Skip to content

Commit

Permalink
scanner: refactor keyword detection
Browse files Browse the repository at this point in the history
  • Loading branch information
vrischmann committed Feb 3, 2024
1 parent 9bb4ca1 commit a9c0006
Showing 1 changed file with 28 additions and 28 deletions.
56 changes: 28 additions & 28 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@ static bool lookahead_buffer_find_char(LookaheadBuffer *buffer,

// Tries to find the keyword `str` in the character stream of `lexer`.
//
// Since TSLexer doesn't allow backtracking and we need it to lookup different
// keywords, we have to implement backtracking ourselves.
// Since TSLexer doesn't allow backtracking and we need it to lookup
// different keywords, we have to implement backtracking ourselves.
//
// It's relatively simple:
// * if we have any buffered data, try it first
// * otherwise pull from the stream while simultaneously adding to the buffer
// * otherwise pull from the stream while simultaneously adding to the
// buffer
//
// The next call will have the buffer populated.
static bool lookahead_buffer_find_keyword(LookaheadBuffer *buffer,
Expand Down Expand Up @@ -155,6 +156,19 @@ static bool is_element_text_terminator_for_import_expression(int ch) {
return false;
}

const char *statement_keywords[] = {
// Comments
"//",
"/*",
// Other statements
"if ",
"else ",
"for ",
"switch ",
};
const int statement_keywords_count =
sizeof(statement_keywords) / sizeof(const char *);

static bool scan_element_text(Scanner *scanner, TSLexer *lexer) {
lexer->result_symbol = ELEMENT_TEXT;

Expand All @@ -172,32 +186,17 @@ static bool scan_element_text(Scanner *scanner, TSLexer *lexer) {
return false;
}

// Detect if the node starts with a comment
if (lookahead_buffer_find_keyword(&buffer, lexer, "//")) {
goto done;
}
if (lookahead_buffer_find_keyword(&buffer, lexer, "/*")) {
goto done;
}

// 1. Detect if the node starts with a keyword that makes it a statement
// instead of a text element
//
// Since we're looking for a multicharacter token we need backtracking but
// TSLexer doesn't provide it so we have to do it ourselves.
// Detect if the node starts with a keyword that makes it a statement instead.
for (size_t i = 0; i < statement_keywords_count; i++) {
const char *keyword = statement_keywords[i];

if (lookahead_buffer_find_keyword(&buffer, lexer, "if ")) {
goto done;
}
if (lookahead_buffer_find_keyword(&buffer, lexer, "else ")) {
goto done;
}
if (lookahead_buffer_find_keyword(&buffer, lexer, "for ")) {
goto done;
}
if (lookahead_buffer_find_keyword(&buffer, lexer, "switch ")) {
goto done;
// Since we're looking for a multicharacter token we need backtracking but
// TSLexer doesn't provide it so we have to do it ourselves.
if (lookahead_buffer_find_keyword(&buffer, lexer, keyword)) {
goto done;
}
}

// Try for a "@" which signals a component import expression
if (lookahead_buffer_find_keyword(&buffer, lexer, "@")) {
scanner->saw_at_symbol = true;
Expand All @@ -211,7 +210,8 @@ static bool scan_element_text(Scanner *scanner, TSLexer *lexer) {
goto done;
}

// If we saw a @ symbol, we could be in an import expression.
// If we saw a @ symbol, we could be in an import expression and the
// terminator characters differ.
if (scanner->saw_at_symbol) {
if (lookahead_buffer_find_char(
&buffer, is_element_text_terminator_for_import_expression)) {
Expand Down

0 comments on commit a9c0006

Please sign in to comment.