forked from NeoFlock/noom
588 lines
14 KiB
C
588 lines
14 KiB
C
#include "types.h"
|
|
#include "helper.h"
|
|
#include "lexer.h"
|
|
|
|
int noomL_isalpha(char c) {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
}
|
|
|
|
int noomL_isnumber(char c) {
|
|
return (c >= '0' && c <= '9');
|
|
}
|
|
|
|
int noomL_isalphanum(char c) {
|
|
return noomL_isalpha(c) || noomL_isnumber(c);
|
|
}
|
|
|
|
int noomL_iswhitespace(char c) {
|
|
return c == ' ' || c == '\r' || c == '\n' || c == '\t' || c == '\v' || c == '\f';
|
|
}
|
|
|
|
int noomL_lower(char c) {
|
|
if (c >= 'A' && c <= 'Z') {
|
|
return c - 'A' + 'a';
|
|
}
|
|
return c;
|
|
}
|
|
|
|
int noomL_ishex(char c) {
|
|
return noomL_isnumber(c) || (noomL_lower(c) >= 'a' && noomL_lower(c) <= 'f');
|
|
}
|
|
|
|
noom_uint_t noomL_getsymbol(const char* s, noom_LuaVersion version) { // TODO: maybe find some less shit crap holy crap
|
|
if (noom_startswith(s, "...")) return 3;
|
|
|
|
if (noom_startswith(s, "==")) return 2;
|
|
if (noom_startswith(s, "~=")) return 2;
|
|
if (noom_startswith(s, "<=")) return 2;
|
|
if (noom_startswith(s, ">=")) return 2;
|
|
if (noom_startswith(s, "..")) return 2;
|
|
|
|
if (version >= NOOM_VERSION_52) {
|
|
if (noom_startswith(s, "::")) return 2;
|
|
}
|
|
|
|
if (version >= NOOM_VERSION_53) {
|
|
if (noom_startswith(s, "//")) return 2;
|
|
|
|
if (noom_startswith(s, ">>")) return 2;
|
|
if (noom_startswith(s, "<<")) return 2;
|
|
|
|
if (noom_startswith(s, "&")) return 1;
|
|
if (noom_startswith(s, "|")) return 1;
|
|
if (noom_startswith(s, "~")) return 1;
|
|
}
|
|
|
|
if (noom_startswith(s, "+")) return 1;
|
|
if (noom_startswith(s, "-")) return 1;
|
|
if (noom_startswith(s, "*")) return 1;
|
|
if (noom_startswith(s, "/")) return 1;
|
|
if (noom_startswith(s, "%")) return 1;
|
|
if (noom_startswith(s, "^")) return 1;
|
|
if (noom_startswith(s, "#")) return 1;
|
|
if (noom_startswith(s, "<")) return 1;
|
|
if (noom_startswith(s, ">")) return 1;
|
|
|
|
if (noom_startswith(s, "=")) return 1;
|
|
if (noom_startswith(s, ",")) return 1;
|
|
|
|
if (noom_startswith(s, "(")) return 1;
|
|
if (noom_startswith(s, ")")) return 1;
|
|
if (noom_startswith(s, "{")) return 1;
|
|
if (noom_startswith(s, "}")) return 1;
|
|
if (noom_startswith(s, "[")) return 1;
|
|
if (noom_startswith(s, "]")) return 1;
|
|
|
|
if (noom_startswith(s, ":")) return 1;
|
|
if (noom_startswith(s, ".")) return 1;
|
|
|
|
if (noom_startswith(s, ";")) return 1;
|
|
|
|
return 0; // no symbol
|
|
}
|
|
|
|
noom_uint_t noomL_getnumber(const char* s, noomL_ErrorType* error, noom_LuaVersion version) { // TODO: more number kinds idk
|
|
noom_uint_t len = 0;
|
|
|
|
if (s[0] == '0' && noomL_lower(s[1]) == 'x') {
|
|
len = 2;
|
|
|
|
while (noomL_ishex(s[len])) {
|
|
len++;
|
|
}
|
|
|
|
if (version >= NOOM_VERSION_52) { // 5.2 added exponent and decimal to hex literals.
|
|
|
|
if (s[len] == '.') { // decimals in hex. smh.
|
|
len++;
|
|
|
|
while (noomL_ishex(s[len])) {
|
|
len++;
|
|
}
|
|
|
|
if (len == 3) { // only 0x. is a malformed number, even if followed by an exponent
|
|
*error = NOOML_ERROR_MALFORMED_NUM;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (noomL_lower(s[len]) == 'p') {
|
|
len++;
|
|
|
|
// sign for exponent
|
|
if (s[len] == '-' || s[len] == '+') len++;
|
|
|
|
noom_uint_t slen = len;
|
|
|
|
while (noomL_isnumber(s[len])) {
|
|
len++;
|
|
}
|
|
|
|
if (slen == len) { // nothing after `p`
|
|
*error = NOOML_ERROR_MALFORMED_NUM;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (len == 2) { // nothing after the x; malformed number.
|
|
*error = NOOML_ERROR_MALFORMED_NUM;
|
|
return 0;
|
|
}
|
|
|
|
// check if identifier starter, if so, it's malformed (you can't do a=0xffl=2 or whatevs)
|
|
if (version >= NOOM_VERSION_51) { // always true for now, but if 5.0 this shouldn't happen.
|
|
if (noomL_isalpha(s[len]) || s[len] == '_') {
|
|
*error = NOOML_ERROR_MALFORMED_NUM;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return len;
|
|
} else {
|
|
while (noomL_isnumber(s[len])) { // int part
|
|
len++;
|
|
}
|
|
|
|
if (s[len] == '.') { // it's-a me, decimal number
|
|
len++;
|
|
|
|
while (noomL_isnumber(s[len])) { // decimal numbering
|
|
len++;
|
|
}
|
|
|
|
if (len == 1) { // only a . is an invalid number (it's a symbol instead!)
|
|
// don't error; it's just a symbol, everything's okay.
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (noomL_lower(s[len]) == 'e') { // exponent
|
|
len++;
|
|
|
|
// sign for exponent
|
|
if (s[len] == '-' || s[len] == '+') len++;
|
|
|
|
noom_uint_t slen = len;
|
|
|
|
while (noomL_isnumber(s[len])) { // the exponent
|
|
len++;
|
|
}
|
|
|
|
// exponent has no numbers in it, malformed
|
|
if (slen == len) {
|
|
*error = NOOML_ERROR_MALFORMED_NUM;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// check if identifier starter, if so, it's malformed (you can't do a=53b=2 or whatevs)
|
|
if (version >= NOOM_VERSION_51) { // always true for now, but if 5.0 this shouldn't happen.
|
|
if (noomL_isalpha(s[len]) || s[len] == '_') {
|
|
*error = NOOML_ERROR_MALFORMED_NUM;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
noom_uint_t noomL_getcomment(const char* str, noomL_ErrorType* error) {
|
|
if (str[0] == '-' && str[1] == '-') {
|
|
noom_uint_t len = 2;
|
|
noom_uint_t longb_len = 0;
|
|
|
|
// check for long bracket
|
|
int is_long = 0; // int for bools :fire:
|
|
|
|
if (str[len] == '[') {
|
|
len++;
|
|
|
|
while (str[len] == '=') { longb_len++; len++; }
|
|
|
|
if (str[len] == '[') {
|
|
// yay long bracket!
|
|
is_long = 1;
|
|
len++;
|
|
}
|
|
}
|
|
|
|
if (is_long) {
|
|
|
|
// oh boy.
|
|
|
|
while (1) {
|
|
if (str[len] == ']') { // maybe this is it!
|
|
len++;
|
|
|
|
int success = 0;
|
|
noom_uint_t spos = len; // after the ] intentionally
|
|
noom_uint_t testlong = 0;
|
|
|
|
while (str[len] == '=') { testlong++; len++; }
|
|
|
|
if (str[len] == ']') { // actual long bracket! holy shit!
|
|
len++;
|
|
if (testlong == longb_len) {
|
|
// we're done!
|
|
success = 1;
|
|
}
|
|
}
|
|
|
|
if (success) {
|
|
return len;
|
|
} else {
|
|
len = spos;
|
|
}
|
|
} else if (str[len] == '\0') { // comment never finished
|
|
*error = NOOML_ERROR_UNFINISHED_COMMENT;
|
|
return 0;
|
|
} else {
|
|
len++; // just some character.
|
|
}
|
|
}
|
|
|
|
} else {
|
|
// reset to remove stuff, in case we hit like --[===hello, technically not required but a good idea
|
|
len = 2;
|
|
|
|
while (str[len] != '\0' && str[len] != '\n') len++;
|
|
|
|
return len;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
noom_uint_t noomL_getstring(const char* s, noomL_ErrorType* error, noom_LuaVersion version) {
|
|
noom_uint_t len = 0;
|
|
if (s[len] == '"' || s[len] == '\'') {
|
|
char starter = s[len]; // either `'` or `"`
|
|
|
|
len++; // double quoted string
|
|
|
|
while (1) {
|
|
if (s[len] == starter) {
|
|
// it's over
|
|
len++;
|
|
return len; // found a whole string!
|
|
} else if (s[len] == '\\') {
|
|
len++; // oh boy!
|
|
if (s[len] == '\\') {
|
|
len++;
|
|
} else if (s[len] == 'a') {
|
|
len++;
|
|
} else if (s[len] == 'b') {
|
|
len++;
|
|
} else if (s[len] == 'f') {
|
|
len++;
|
|
} else if (s[len] == 'n') {
|
|
len++;
|
|
} else if (s[len] == 'r') {
|
|
len++;
|
|
} else if (s[len] == 't') {
|
|
len++;
|
|
} else if (s[len] == 'v') {
|
|
len++;
|
|
|
|
// both string using single or double quote can have either escaped inside
|
|
} else if (s[len] == '"') {
|
|
len++;
|
|
} else if (s[len] == '\'') {
|
|
len++;
|
|
} else if (s[len] == '\n') {
|
|
len++;
|
|
} else if (s[len] == '\r' && s[len+1] == '\n') { // fuck windows :fire:
|
|
len += 2;
|
|
|
|
} else if (noomL_isnumber(s[len])) {
|
|
// fuckkkk
|
|
noom_uint_t count = 0;
|
|
for (noom_uint_t i = 0; i < 3; i++) {
|
|
if (noomL_isnumber(s[len + i])) count++; else break;
|
|
}
|
|
if (count == 3) { // could be too big
|
|
if ((s[len] > '2') || (s[len] == '2' && (s[len+1] > '5' || (s[len+1] == '5' && s[len+2] > '5')))) {
|
|
// >255, i could also make it a number first but meh
|
|
*error = NOOML_ERROR_DECIMAL_ESCAPE_TOO_BIG;
|
|
return 0;
|
|
}
|
|
}
|
|
// count can't be 0 because this if wouldn't run.
|
|
len += count;
|
|
|
|
} else if (s[len] == 'x' && version >= NOOM_VERSION_52) {
|
|
len++;
|
|
|
|
if ((!noomL_ishex(s[len])) || (!noomL_ishex(s[len+1]))) {
|
|
*error = NOOML_ERROR_HEX_ESCAPE_INVALID;
|
|
return 0;
|
|
}
|
|
len += 2;
|
|
|
|
} else if (s[len] == 'z' && version >= NOOM_VERSION_52) {
|
|
len++;
|
|
while (noomL_iswhitespace(s[len])) len++;
|
|
|
|
} else if (s[len] == 'u' && version >= NOOM_VERSION_53) {
|
|
len++;
|
|
if (s[len] != '{') {
|
|
*error = NOOML_ERROR_UNICODE_ESCAPE_UNOPENED;
|
|
return 0;
|
|
}
|
|
len++;
|
|
|
|
while (s[len] == '0') len++; // remove leading zeroes.
|
|
|
|
noom_uint_t hexlen = 0;
|
|
while (noomL_ishex(s[len + hexlen])) hexlen++;
|
|
|
|
// 5.3 allows <= 10ffff
|
|
// whereas 5.4 allows <= 7fffffff
|
|
// fuck my life
|
|
if (version == NOOM_VERSION_53) {
|
|
if (hexlen == 6) {
|
|
if (s[len] > '1' || (s[len] == '1' && s[len+1] > '0')) {
|
|
*error = NOOML_ERROR_UNICODE_ESCAPE_TOO_BIG;
|
|
return 0;
|
|
}
|
|
} else if (hexlen > 6) {
|
|
*error = NOOML_ERROR_UNICODE_ESCAPE_TOO_BIG;
|
|
return 0;
|
|
}
|
|
} else if (version >= NOOM_VERSION_54) {
|
|
if (hexlen == 8) {
|
|
if (s[len] > '7') {
|
|
*error = NOOML_ERROR_UNICODE_ESCAPE_TOO_BIG;
|
|
return 0;
|
|
}
|
|
} else if (hexlen > 8) {
|
|
*error = NOOML_ERROR_UNICODE_ESCAPE_TOO_BIG;
|
|
return 0;
|
|
}
|
|
}
|
|
len += hexlen;
|
|
|
|
if (s[len] != '}') {
|
|
*error = NOOML_ERROR_UNICODE_ESCAPE_UNCLOSED;
|
|
return 0;
|
|
}
|
|
len++;
|
|
|
|
} else if (s[len] == '\0') {
|
|
// no. just leave it for the string to find afterward.
|
|
} else {
|
|
len++; // allow any random escape
|
|
}
|
|
} else if (s[len] == '\0') {
|
|
*error = NOOML_ERROR_UNFINISHED_STRING;
|
|
return 0;
|
|
} else if (s[len] == '\n') {
|
|
// unfinished because you can't have a newline in it
|
|
*error = NOOML_ERROR_UNFINISHED_STRING;
|
|
return 0;
|
|
|
|
} else {
|
|
len++; // anything else is just a thing in the string.
|
|
}
|
|
}
|
|
} else if (s[len] == '[') { // potential multi-line string
|
|
len++;
|
|
|
|
noom_uint_t order = 0;
|
|
int succ = 0;
|
|
|
|
while (s[len] == '=') { order++; len++; }
|
|
|
|
if (s[len] == '[') { len++; succ = 1; }
|
|
|
|
if (succ) { // it is a multi-line string.
|
|
while (1) {
|
|
if (s[len] == ']') { // potential ender
|
|
len++;
|
|
noom_uint_t order2 = 0;
|
|
noom_uint_t startp = len; // intentionally after the `]`
|
|
|
|
while (s[len] == '=') { order2++; len++; }
|
|
|
|
if (s[len] == ']' && order == order2) { // holy shit it's real
|
|
len++;
|
|
|
|
return len;
|
|
} else {
|
|
// nope.
|
|
len = startp; // go back just in case like ]=]==]
|
|
}
|
|
|
|
} else if (s[len] == '\0') {
|
|
*error = NOOML_ERROR_UNFINISHED_LONG_STRING;
|
|
return 0;
|
|
} else {
|
|
len++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int noomL_iskeyword(const char* s, noom_uint_t len, noom_LuaVersion version) {
|
|
if (noom_streql(s, len, "true", 4)) return 1;
|
|
if (noom_streql(s, len, "false", 5)) return 1;
|
|
if (noom_streql(s, len, "nil", 3)) return 1;
|
|
|
|
if (noom_streql(s, len, "if", 2)) return 1;
|
|
if (noom_streql(s, len, "then", 4)) return 1;
|
|
if (noom_streql(s, len, "else", 4)) return 1;
|
|
if (noom_streql(s, len, "elseif", 6)) return 1;
|
|
|
|
if (noom_streql(s, len, "and", 3)) return 1;
|
|
if (noom_streql(s, len, "or", 2)) return 1;
|
|
if (noom_streql(s, len, "not", 3)) return 1;
|
|
|
|
if (noom_streql(s, len, "local", 5)) return 1;
|
|
|
|
if (noom_streql(s, len, "for", 3)) return 1;
|
|
if (noom_streql(s, len, "function", 8)) return 1;
|
|
if (noom_streql(s, len, "do", 2)) return 1;
|
|
if (noom_streql(s, len, "until", 5)) return 1;
|
|
if (noom_streql(s, len, "while", 5)) return 1;
|
|
if (noom_streql(s, len, "repeat", 6)) return 1;
|
|
if (noom_streql(s, len, "end", 3)) return 1;
|
|
if (noom_streql(s, len, "in", 2)) return 1;
|
|
if (noom_streql(s, len, "return", 6)) return 1;
|
|
if (noom_streql(s, len, "break", 5)) return 1;
|
|
|
|
if (version >= NOOM_VERSION_52) {
|
|
if (noom_streql(s, len, "goto", 4)) return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
const char *noomL_formatTokenType(noomL_TokenType token_type) {
|
|
switch (token_type) {
|
|
case NOOML_TOKEN_EOF:
|
|
return "EOF";
|
|
case NOOML_TOKEN_KEYWORD:
|
|
return "keyword";
|
|
case NOOML_TOKEN_WHITESPACE:
|
|
return "whitespace";
|
|
case NOOML_TOKEN_IDENTIFIER:
|
|
return "identifier";
|
|
case NOOML_TOKEN_STRING:
|
|
return "string";
|
|
case NOOML_TOKEN_SYMBOL:
|
|
return "symbol";
|
|
case NOOML_TOKEN_NUMBER:
|
|
return "number";
|
|
case NOOML_TOKEN_COMMENT:
|
|
return "comment";
|
|
default:
|
|
return "unknown";
|
|
}
|
|
|
|
}
|
|
|
|
noomL_ErrorType noomL_lex(const char* s, noom_uint_t start, noomL_Token* token, noom_LuaVersion version) {
|
|
const char* str = s + start;
|
|
|
|
if (str[0] == '\0') {
|
|
token->type = NOOML_TOKEN_EOF;
|
|
token->offset = start;
|
|
token->length = 0;
|
|
|
|
return NOOML_ERROR_NONE;
|
|
}
|
|
|
|
if (noomL_iswhitespace(str[0])) {
|
|
token->type = NOOML_TOKEN_WHITESPACE; // TODO: maybe handle multiple at once? for now it should be okay
|
|
token->offset = start;
|
|
token->length = 1;
|
|
|
|
return NOOML_ERROR_NONE;
|
|
}
|
|
|
|
if (str[0] == '_' || noomL_isalpha(str[0])) { // TODO: maybe abstract into function for "can start ident"?
|
|
noom_uint_t len = 1;
|
|
while (str[len] == '_' || noomL_isalphanum(str[len])) // same here
|
|
len++;
|
|
|
|
token->type = NOOML_TOKEN_IDENTIFIER;
|
|
if (noomL_iskeyword(str, len, version)) token->type = NOOML_TOKEN_KEYWORD;
|
|
|
|
token->offset = start;
|
|
token->length = len;
|
|
|
|
return NOOML_ERROR_NONE;
|
|
}
|
|
|
|
{ // must be above symbols because `.2` is a number
|
|
noomL_ErrorType err = NOOML_ERROR_NONE;
|
|
noom_uint_t numberLen = noomL_getnumber(str, &err, version);
|
|
|
|
if (numberLen) {
|
|
token->type = NOOML_TOKEN_NUMBER;
|
|
token->offset = start;
|
|
token->length = numberLen;
|
|
|
|
return NOOML_ERROR_NONE;
|
|
} else {
|
|
if (err != NOOML_ERROR_NONE) return err;
|
|
}
|
|
}
|
|
|
|
{
|
|
noomL_ErrorType err = NOOML_ERROR_NONE;
|
|
noom_uint_t commentLen = noomL_getcomment(str, &err);
|
|
|
|
if (commentLen) {
|
|
token->type = NOOML_TOKEN_COMMENT;
|
|
token->offset = start;
|
|
token->length = commentLen;
|
|
|
|
return NOOML_ERROR_NONE;
|
|
} else {
|
|
if (err != NOOML_ERROR_NONE) return err;
|
|
}
|
|
}
|
|
|
|
{
|
|
noomL_ErrorType err = NOOML_ERROR_NONE;
|
|
noom_uint_t stringLen = noomL_getstring(str, &err, version);
|
|
|
|
if (stringLen) {
|
|
token->type = NOOML_TOKEN_STRING;
|
|
token->offset = start;
|
|
token->length = stringLen;
|
|
|
|
return NOOML_ERROR_NONE;
|
|
} else {
|
|
if (err != NOOML_ERROR_NONE) return err;
|
|
}
|
|
}
|
|
|
|
{
|
|
noom_uint_t symbolLen = noomL_getsymbol(str, version);
|
|
|
|
if (symbolLen) {
|
|
token->type = NOOML_TOKEN_SYMBOL;
|
|
token->offset = start;
|
|
token->length = symbolLen;
|
|
|
|
return NOOML_ERROR_NONE;
|
|
}
|
|
}
|
|
|
|
|
|
// god damn it we errorrreed
|
|
return NOOML_ERROR_UNKNOWN;
|
|
}
|
|
|