From 754fd13dfdae967a368dfbf4041a577a3b099b05 Mon Sep 17 00:00:00 2001 From: Blendi Date: Tue, 21 Apr 2026 22:57:49 +0200 Subject: [PATCH] lexer: comments --- src/lexer.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/lexer.h | 3 ++ src/main.c | 2 +- src/parser.c | 2 +- 4 files changed, 90 insertions(+), 3 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index 8b8d136..7105218 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -82,7 +82,6 @@ noom_uint_t noomL_getsymbol(const char* s, noom_LuaVersion version) { // TODO: m } noom_uint_t noomL_getnumber(const char* s, noomL_ErrorType* error, noom_LuaVersion version) { // TODO: more number kinds idk - // lazy af rn noom_uint_t len = 0; if (s[0] == '0' && noomL_lower(s[1]) == 'x') { @@ -192,6 +191,74 @@ noom_uint_t noomL_getnumber(const char* s, noomL_ErrorType* error, noom_LuaVersi return 0; } +noom_uint_t noomL_getcomment(const char* str, noomL_ErrorType* error) { + if (str[0] == '-' && str[1] == '-') { + noom_uint_t len = 2; + noom_uint_t longb_len = 0; + + // check for long bracket + int is_long = 0; // int for bools :fire: + + if (str[len] == '[') { + len++; + + while (str[len] == '=') { longb_len++; len++; } + + if (str[len] == '[') { + // yay long bracket! + is_long = 1; + len++; + } + } + + if (is_long) { + + // oh boy. + + while (1) { + if (str[len] == ']') { // maybe this is it! + len++; + + int success = 0; + noom_uint_t spos = len; // after the ] intentionally + noom_uint_t testlong = 0; + + while (str[len] == '=') { testlong++; len++; } + + if (str[len] == ']') { // actual long bracket! holy shit! + len++; + if (testlong == longb_len) { + // we're done! + success = 1; + } + } + + if (success) { + return len; + } else { + len = spos; + } + } else if (str[len] == '\0') { // comment never finished + *error = NOOML_ERROR_UNFINISHED_COMMENT; + return 0; + } else { + len++; // just some character. + } + } + + } else { + // reset to remove stuff, in case we hit like --[===hello, technically not required but a good idea + len = 2; + + while (str[len] != '\0' && str[len] != '\n') len++; + + return len; + } + } + + return 0; +} + int noomL_iskeyword(const char* s, noom_uint_t len, noom_LuaVersion version) { if (noom_streql(s, len, "true", 4)) return 1; if (noom_streql(s, len, "false", 5)) return 1; @@ -242,6 +309,8 @@ const char *noomL_formatTokenType(noomL_TokenType token_type) { return "symbol"; case NOOML_TOKEN_NUMBER: return "number"; + case NOOML_TOKEN_COMMENT: + return "comment"; default: return "unknown"; } @@ -295,6 +364,21 @@ noomL_ErrorType noomL_lex(const char* s, noom_uint_t start, noomL_Token* token, if (err != NOOML_ERROR_NONE) return err; } } + + { + noomL_ErrorType err = NOOML_ERROR_NONE; + noom_uint_t commentLen = noomL_getcomment(str, &err); + + if (commentLen) { + token->type = NOOML_TOKEN_COMMENT; + token->offset = start; + token->length = commentLen; + + return NOOML_ERROR_NONE; + } else { + if (err != NOOML_ERROR_NONE) return err; + } + } { noom_uint_t symbolLen = noomL_getsymbol(str, version); diff --git a/src/lexer.h b/src/lexer.h index d3ce324..a22e89a 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -9,6 +9,7 @@ typedef enum noomL_TokenType { NOOML_TOKEN_STRING, NOOML_TOKEN_SYMBOL, NOOML_TOKEN_NUMBER, + NOOML_TOKEN_COMMENT, NOOML_TOKEN_TCOUNT, } noomL_TokenType; @@ -17,6 +18,8 @@ typedef enum noomL_ErrorType { NOOML_ERROR_NONE = 0, NOOML_ERROR_UNKNOWN, NOOML_ERROR_MALFORMED_NUM, + + NOOML_ERROR_UNFINISHED_COMMENT } noomL_ErrorType; typedef struct noomL_Token { diff --git a/src/main.c b/src/main.c index 6bf3524..ea8af11 100644 --- a/src/main.c +++ b/src/main.c @@ -33,7 +33,7 @@ void print_node(noomP_Node* node, noom_uint_t depth) { int main(int argc, char** argv) { // uhh uhhh uhhhhh - const char* code = "local a = 52 local b = 2"; + const char* code = "--[=[i\nam\na\nlong\ncomment]]lololnotoveryet]==]nah lol]=] --local a = 2\nlocal b = 3"; noom_uint_t pos = 0; printf("LEX OUTPUT:\n"); diff --git a/src/parser.c b/src/parser.c index dbfeb42..b6380ea 100644 --- a/src/parser.c +++ b/src/parser.c @@ -39,7 +39,7 @@ int noomP_peek(noomP_Parser* parser, noomL_Token* token) { int success = noomL_lex(parser->code, parser->lex_offset, token, parser->version); if (success != 0) return -1; // TODO: proper error propogation and stuff - if (token->type == NOOML_TOKEN_WHITESPACE) { + if (token->type == NOOML_TOKEN_WHITESPACE || token->type == NOOML_TOKEN_COMMENT) { // peek changes state, but only if it's one of these useless tokens anyway. parser->lex_offset += token->length; continue;