improve number lexing

This commit is contained in:
2026-04-20 16:14:56 +02:00
parent bf52cf70ab
commit 0e6a08d702
3 changed files with 86 additions and 17 deletions

View File

@@ -18,6 +18,17 @@ int noomL_iswhitespace(char c) {
return c == ' ' || c == '\r' || c == '\n' || c == '\t' || c == '\v' || c == '\f';
}
int noomL_lower(char c) {
if (c >= 'A' && c <= 'Z') {
return c - 'A' + 'a';
}
return c;
}
int noomL_ishex(char c) {
return noomL_isnumber(c) || (noomL_lower(c) >= 'a' && noomL_lower(c) <= 'f');
}
noom_uint_t noomL_getsymbol(const char* s, noom_LuaVersion version) { // TODO: maybe find some less shit crap holy crap
if (noom_startswith(s, "...")) return 3;
@@ -70,13 +81,64 @@ noom_uint_t noomL_getsymbol(const char* s, noom_LuaVersion version) { // TODO: m
return 0; // no symbol
}
noom_uint_t noomL_getnumber(const char* s, noom_LuaVersion version) { // TODO: more number kinds idk
noom_uint_t noomL_getnumber(const char* s, noomL_ErrorType* error, noom_LuaVersion version) { // TODO: more number kinds idk
// lazy af rn
noom_uint_t len = 0;
if (s[0] == '0' && noomL_lower(s[1]) == 'x') {
len = 2;
while (noomL_isnumber(s[len])) len++;
while (noomL_ishex(s[len])) {
len++;
}
return len;
if (len == 2) { // nothing after the x; malformed number.
*error = NOOML_ERROR_MALFORMED_NUM;
return 0;
}
return len;
} else {
while (noomL_isnumber(s[len])) { // int part
len++;
}
if (s[len] == '.') { // it's-a me, decimal number
len++;
while (noomL_isnumber(s[len])) { // decimal numbering
len++;
}
if (len == 1) { // only a . is an invalid number (it's a symbol instead!)
// don't error; it's just a symbol, everything's okay.
return 0;
}
}
if (noomL_lower(s[len]) == 'e') { // exponent
len++;
// sign for exponent
if (s[len] == '-' || s[len] == '+') len++;
noom_uint_t slen = len;
while (noomL_isnumber(s[len])) { // the exponent
len++;
}
// exponent has no numbers in it, malformed
if (slen == len) {
*error = NOOML_ERROR_MALFORMED_NUM;
return 0;
}
}
return len;
}
return 0;
}
int noomL_iskeyword(const char* s, noom_uint_t len, noom_LuaVersion version) {
@@ -168,6 +230,21 @@ noomL_ErrorType noomL_lex(const char* s, noom_uint_t start, noomL_Token* token,
return NOOML_ERROR_NONE;
}
{ // must be above symbols because `.2` is a number
noomL_ErrorType err = NOOML_ERROR_NONE;
noom_uint_t numberLen = noomL_getnumber(str, &err, version);
if (numberLen) {
token->type = NOOML_TOKEN_NUMBER;
token->offset = start;
token->length = numberLen;
return NOOML_ERROR_NONE;
} else {
if (err != NOOML_ERROR_NONE) return err;
}
}
{
noom_uint_t symbolLen = noomL_getsymbol(str, version);
@@ -180,17 +257,6 @@ noomL_ErrorType noomL_lex(const char* s, noom_uint_t start, noomL_Token* token,
}
}
{
noom_uint_t numberLen = noomL_getnumber(str, version);
if (numberLen) {
token->type = NOOML_TOKEN_NUMBER;
token->offset = start;
token->length = numberLen;
return NOOML_ERROR_NONE;
}
}
// god damn it we errorrreed
return NOOML_ERROR_UNKNOWN;

View File

@@ -15,7 +15,8 @@ typedef enum noomL_TokenType {
typedef enum noomL_ErrorType {
NOOML_ERROR_NONE = 0,
NOOML_ERROR_UNKNOWN = 1,
NOOML_ERROR_UNKNOWN,
NOOML_ERROR_MALFORMED_NUM,
} noomL_ErrorType;
typedef struct noomL_Token {
@@ -29,9 +30,11 @@ int noomL_isalpha(char c);
int noomL_isnumber(char c);
int noomL_isalphanum(char c);
int noomL_iswhitespace(char c);
int noomL_lower(char c);
int noomL_ishex(char c);
noom_uint_t noomL_getsymbol(const char* s, noom_LuaVersion version);
noom_uint_t noomL_getnumber(const char* s, noom_LuaVersion version);
noom_uint_t noomL_getnumber(const char* s, noomL_ErrorType* error, noom_LuaVersion version);
const char *noomL_formatTokenType(noomL_TokenType token_type);

View File

@@ -33,7 +33,7 @@ void print_node(noomP_Node* node, noom_uint_t depth) {
int main(int argc, char** argv) {
// uhh uhhh uhhhhh
const char* code = "local a, b = true, false\nif a == b then elseif a or b then end";
const char* code = "local a = 0xff + 5e-2 + .1e2";
noom_uint_t pos = 0;
printf("LEX OUTPUT:\n");