Files
noom/src/parser.c
2026-04-16 11:53:26 +02:00

534 lines
14 KiB
C

#include "parser.h"
#include "helper.h"
int noomP_peek(noomP_Parser* parser, noomL_Token* token) {
while (1) {
int success = noomL_lex(parser->code, parser->lex_offset, token);
if (success != 0) return -1; // TODO: proper error propogation and stuff
if (token->type == NOOML_TOKEN_WHITESPACE) {
// peek changes state, but only if it's one of these useless tokens anyway.
parser->lex_offset += token->length;
continue;
}
return 0;
}
}
void noomP_skip(noomP_Parser* parser, noomL_Token* token) { // expects you to alr know the token
if (token->type == NOOML_TOKEN_EOF) return;
parser->lex_offset += token->length;
}
noomP_Node* noomP_allocNode(noomP_Parser* parser) {
noomP_Node* node = noom_alloc(sizeof(noomP_Node));
if (node == 0) return 0;
node->previous_node = parser->last_node;
node->subnodec = 0;
node->subnodes = noom_alloc(sizeof(noomP_Node*) * 2);
node->subnode_cap = 2;
if (node->subnodes == 0) {
noom_free(node);
return 0;
}
parser->last_node = node;
return node;
}
int noomP_addSubnode(noomP_Node* node, noomP_Node* subnode) {
if (node->subnodec == node->subnode_cap) {
node->subnode_cap = node->subnode_cap * 2;
node->subnodes = noom_realloc(node->subnodes, sizeof(noomP_Node*) * node->subnode_cap);
if (node->subnodes == 0) return -1;
}
node->subnodes[node->subnodec++] = subnode;
return 0;
}
noomP_Node* noomP_parseRawExpression(noomP_Parser* parser) {
noomL_Token token;
noomP_peek(parser, &token);
if (token.type == NOOML_TOKEN_NUMBER) {
// uhh figure it out, future me!
noomP_skip(parser, &token);
noomP_Node* numNode = noomP_allocNode(parser);
if (numNode == 0) return 0;
numNode->type = NOOMP_NODE_NUMBERLITERAL;
numNode->source_offset = token.offset;
return numNode;
} else if (token.type == NOOML_TOKEN_IDENTIFIER) {
noomP_skip(parser, &token);
noomP_Node* varNode = noomP_allocNode(parser);
if (varNode == 0) return 0;
varNode->type = NOOMP_NODE_VARIABLE;
varNode->source_offset = token.offset;
return varNode;
} else if (token.type == NOOML_TOKEN_KEYWORD) {
if (noom_streql(parser->code + token.offset, token.length, "true", 4)) {
noomP_skip(parser, &token);
noomP_Node* litNode = noomP_allocNode(parser);
if (litNode == 0) return 0;
litNode->type = NOOMP_NODE_BOOLEANLITERAL;
litNode->source_offset = token.offset;
return litNode;
} else if (noom_streql(parser->code + token.offset, token.length, "false", 5)) {
noomP_skip(parser, &token);
noomP_Node* litNode = noomP_allocNode(parser);
if (litNode == 0) return 0;
litNode->type = NOOMP_NODE_BOOLEANLITERAL;
litNode->source_offset = token.offset;
return litNode;
} else if (noom_streql(parser->code + token.offset, token.length, "nil", 3)) {
noomP_skip(parser, &token);
noomP_Node* litNode = noomP_allocNode(parser);
if (litNode == 0) return 0;
litNode->type = NOOMP_NODE_NILLITERAL;
litNode->source_offset = token.offset;
return litNode;
}
}
return 0;
}
int noomP_infixOperatorBP(noomP_Parser* parser, noomL_Token* token, noom_uint_t* a, noom_uint_t* b) { // todo: maybe make this not pointer? we'll see
if (token->type == NOOML_TOKEN_SYMBOL) {
if (noom_streql(parser->code + token->offset, token->length, "+", 1)) {
*a = 90;
*b = 100;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, "-", 1)) {
*a = 90;
*b = 100;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, "*", 1)) {
*a = 110;
*b = 120;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, "/", 1)) {
*a = 110;
*b = 120;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, "%", 1)) {
*a = 110;
*b = 120;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, "^", 1)) {
*a = 140;
*b = 130; // right associative
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, "..", 2)) {
*a = 80;
*b = 70; // right ass.
return 1;
// oh boy.
} else if (noom_streql(parser->code + token->offset, token->length, "<", 1)) {
*a = 50;
*b = 60;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, ">", 1)) {
*a = 50;
*b = 60;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, "<=", 2)) {
*a = 50;
*b = 60;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, ">=", 2)) {
*a = 50;
*b = 60;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, "~=", 2)) {
*a = 50;
*b = 60;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, "==", 2)) {
*a = 50;
*b = 60;
return 1;
}
} else if (token->type == NOOML_TOKEN_KEYWORD) {
if (noom_streql(parser->code + token->offset, token->length, "and", 3)) {
*a = 30;
*b = 40;
return 1;
} else if (noom_streql(parser->code + token->offset, token->length, "or", 2)) {
*a = 10;
*b = 20;
return 1;
}
}
return 0;
}
noom_uint_t noomP_prefixOperatorBP(noomP_Parser* parser, noomL_Token* token) { // todo: maybe make this not pointer? we'll see
if (token->type == NOOML_TOKEN_SYMBOL) {
if (noom_streql(parser->code + token->offset, token->length, "-", 1)) {
return 125;
} else if (noom_streql(parser->code + token->offset, token->length, "#", 1)) {
return 125;
} else if (noom_streql(parser->code + token->offset, token->length, "~", 1)) {
return 125;
}
} else if (token->type == NOOML_TOKEN_KEYWORD) {
if (noom_streql(parser->code + token->offset, token->length, "not", 3)) {
return 125;
}
}
return 0;
}
noomP_Node* noomP_parseOperatorExpression(noomP_Parser* parser, noom_uint_t min_bp, noomP_Node* predlhs) {
noomL_Token token;
noomP_peek(parser, &token);
noomP_Node* lhs = predlhs;
// eof check is 2 hard
if (lhs == 0) { // prefix operator?
noom_uint_t bp = noomP_prefixOperatorBP(parser, &token);
if (bp != 0) {
noomP_skip(parser, &token);
noomP_Node* child = noomP_parseOperatorExpression(parser, bp, 0);
if (child == 0) return 0;
lhs = noomP_allocNode(parser);
if (lhs == 0) return 0;
lhs->type = NOOMP_NODE_UNARYOPERATOR;
lhs->source_offset = token.offset; // the operator! we need this to check what it was when compiling.
noomP_addSubnode(lhs, child);
}
}
// wasn't prefix op, raw?
if (lhs == 0) {
noomP_Node* raw = noomP_parseRawExpression(parser);
if (raw == 0) return 0;
lhs = raw;
}
noom_uint_t lbp;
noom_uint_t rbp;
while (1) { // infix operator time!!
noomP_peek(parser, &token);
noom_uint_t op_loc = token.offset;
// also sets lbp and rbp
int is_op = noomP_infixOperatorBP(parser, &token, &lbp, &rbp);
if (is_op == 0) break;
if (lbp < min_bp) break; // joever
noomP_skip(parser, &token); // remove operator
noomP_Node* rhs = noomP_parseOperatorExpression(parser, rbp, 0);
if (rhs == 0) return 0;
noomP_Node* new_node = noomP_allocNode(parser);
if (new_node == 0) return 0;
new_node->type = NOOMP_NODE_BINARYOPERATOR;
new_node->source_offset = op_loc;
noomP_addSubnode(new_node, lhs);
noomP_addSubnode(new_node, rhs);
lhs = new_node;
}
return lhs;
}
noomP_Node* noomP_parseExpression(noomP_Parser* parser) {
return noomP_parseOperatorExpression(parser, 0, 0);
}
noomP_Node* noomP_parseBlock(noomP_Parser* parser) { // stops on end, else or elseif.
// block starter has been eaten already; we just go until ending keyword
noomP_Node* node = noomP_allocNode(parser);
if (node == 0) return 0; // OOM :(
node->type = NOOMP_NODE_BLOCK;
node->source_offset = parser->lex_offset;
noomL_Token token;
while (1) {
// check if end reached
noomP_peek(parser, &token);
if (token.type == NOOML_TOKEN_KEYWORD) {
if (noom_streql(parser->code + token.offset, token.length, "end", 3)) {
break;
} else if (noom_streql(parser->code + token.offset, token.length, "elseif", 6)) {
break;
} else if (noom_streql(parser->code + token.offset, token.length, "else", 4)) {
break;
}
}
noomP_Node* stmt = noomP_parseStatement(parser);
if (stmt == 0) return 0;
noomP_addSubnode(node, stmt);
}
return node;
}
noomP_Node* noomP_parseRawStatement(noomP_Parser* parser) {
noomL_Token token;
noomP_peek(parser, &token);
if (token.type == NOOML_TOKEN_KEYWORD) {
if (noom_streql(parser->code + token.offset, token.length, "local", 5)) {
noomP_skip(parser, &token);
noomP_Node* localNode = noomP_allocNode(parser);
if (localNode == 0) return 0; // no memory :(
localNode->source_offset = token.offset;
localNode->type = NOOMP_NODE_LOCALDECLARATION;
while (1) {
noomP_peek(parser, &token);
if (token.type != NOOML_TOKEN_IDENTIFIER) return 0;
noomP_skip(parser, &token);
noomP_Node* varname = noomP_allocNode(parser);
if (varname == 0) return 0;
varname->type = NOOMP_NODE_VARNAME;
varname->source_offset = token.offset;
noomP_addSubnode(localNode, varname);
noomP_peek(parser, &token);
if (token.type == NOOML_TOKEN_SYMBOL) {
if (noom_streql(parser->code + token.offset, token.length, "=", 1)) {
noomP_skip(parser, &token);
break;
} else if (noom_streql(parser->code + token.offset, token.length, ",", 1)) {
noomP_skip(parser, &token);
} else {
return 0; // unexpected token
}
} else {
return 0; // unexpected token
}
}
// equals has already been eaten by loop (thank you loop)
while (1) {
noomP_Node *expr = noomP_parseExpression(parser);
if (expr == 0) return 0;
noomP_addSubnode(localNode, expr);
noomP_peek(parser, &token);
if (token.type == NOOML_TOKEN_SYMBOL) {
if (noom_streql(parser->code + token.offset, token.length, ",", 1)) {
noomP_skip(parser, &token);
} else {
break;
}
} else {
break;
}
}
return localNode;
} else if (noom_streql(parser->code + token.offset, token.length, "if", 2)) {
noomP_skip(parser, &token);
noomP_Node* ifStatement = noomP_allocNode(parser);
if (ifStatement == 0) return 0;
ifStatement->type = NOOMP_NODE_IFSTATEMENT;
ifStatement->source_offset = token.offset;
noomP_Node* condition = noomP_parseExpression(parser);
if (condition == 0) return 0;
noomP_addSubnode(ifStatement, condition);
noomP_peek(parser, &token);
if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected
if (!noom_streql(parser->code + token.offset, token.length, "then", 4)) return 0; // unexpected
noomP_skip(parser, &token);
noomP_Node* block = noomP_parseBlock(parser);
if (block == 0) return 0;
noomP_addSubnode(ifStatement, block);
while (1) { // else, elseif
noomP_peek(parser, &token);
if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected
if (noom_streql(parser->code + token.offset, token.length, "elseif", 6)) {
noomP_skip(parser, &token);
noomP_Node* elseIfCondition = noomP_parseExpression(parser);
if (elseIfCondition == 0) return 0;
noomP_addSubnode(ifStatement, elseIfCondition);
// now we need to check for "then"
noomP_peek(parser, &token);
if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected
if (!noom_streql(parser->code + token.offset, token.length, "then", 4)) return 0; // unexpected
noomP_skip(parser, &token);
// now the block
noomP_Node* elseIfBlock = noomP_parseBlock(parser);
if (elseIfBlock == 0) return 0;
noomP_addSubnode(ifStatement, elseIfBlock);
// could be even more
} else if (noom_streql(parser->code + token.offset, token.length, "else", 4)) {
noomP_skip(parser, &token);
noomP_Node* elseBlock = noomP_parseBlock(parser);
if (elseBlock == 0) return 0;
// we know it's an else if it's an odd number. no need to do anything special.
noomP_addSubnode(ifStatement, elseBlock);
break; // this must be the last one; end is handled after the loop
} else if (noom_streql(parser->code + token.offset, token.length, "end", 3)) {
break; // will check for end outside the loop because else and things
} else {
// unexpected
return 0;
}
}
noomP_peek(parser, &token);
if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected
if (!noom_streql(parser->code + token.offset, token.length, "end", 3)) return 0; // unexpected
noomP_skip(parser, &token);
return ifStatement;
}
}
while (1) {
noomP_peek(parser, &token);
if (token.type == NOOML_TOKEN_SYMBOL) {
if (noom_streql(parser->code + token.offset, token.length, ";", 1)) {
noomP_skip(parser, &token);
continue;
}
}
break;
}
return 0;
}
noomP_Node* noomP_parseStatement(noomP_Parser* parser) {
noomL_Token token;
noomP_Node* stmt = noomP_parseRawStatement(parser);
if (stmt == 0) return 0;
while (1) {
noomP_peek(parser, &token);
if (token.type == NOOML_TOKEN_SYMBOL) {
if (noom_streql(parser->code + token.offset, token.length, ";", 1)) {
noomP_skip(parser, &token);
continue;
}
}
break;
}
return stmt;
}
int noomP_parse(const char* code, const char* filename, noomP_Node** outpointer, noomP_Node** last_node) {
noomP_Parser parser;
noomP_initParser(&parser, code, filename);
noomL_Token token;
noomP_Node* node = noomP_allocNode(&parser);
if (node == 0) return -1;
node->source_offset = parser.lex_offset;
node->type = NOOMP_NODE_PROGRAM;
while (1) {
noomP_peek(&parser, &token);
if (token.type == NOOML_TOKEN_EOF) break;
noomP_Node* child = noomP_parseStatement(&parser);
if (child == 0) return -1;
noomP_addSubnode(node, child);
}
*outpointer = node;
*last_node = parser.last_node;
return 0;
}
int noomP_initParser(noomP_Parser* parser, const char* code, const char* filename) {
parser->code = code;
parser->filename = filename;
parser->lex_offset = 0;
parser->last_node = (void *)0;
return 0;
}