forked from NeoFlock/noom
534 lines
14 KiB
C
534 lines
14 KiB
C
#include "parser.h"
|
|
#include "helper.h"
|
|
|
|
int noomP_peek(noomP_Parser* parser, noomL_Token* token) {
|
|
while (1) {
|
|
int success = noomL_lex(parser->code, parser->lex_offset, token);
|
|
if (success != 0) return -1; // TODO: proper error propogation and stuff
|
|
|
|
if (token->type == NOOML_TOKEN_WHITESPACE) {
|
|
// peek changes state, but only if it's one of these useless tokens anyway.
|
|
parser->lex_offset += token->length;
|
|
continue;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void noomP_skip(noomP_Parser* parser, noomL_Token* token) { // expects you to alr know the token
|
|
if (token->type == NOOML_TOKEN_EOF) return;
|
|
|
|
parser->lex_offset += token->length;
|
|
}
|
|
|
|
noomP_Node* noomP_allocNode(noomP_Parser* parser) {
|
|
noomP_Node* node = noom_alloc(sizeof(noomP_Node));
|
|
if (node == 0) return 0;
|
|
|
|
node->previous_node = parser->last_node;
|
|
|
|
node->subnodec = 0;
|
|
node->subnodes = noom_alloc(sizeof(noomP_Node*) * 2);
|
|
node->subnode_cap = 2;
|
|
if (node->subnodes == 0) {
|
|
noom_free(node);
|
|
return 0;
|
|
}
|
|
|
|
parser->last_node = node;
|
|
|
|
return node;
|
|
}
|
|
|
|
int noomP_addSubnode(noomP_Node* node, noomP_Node* subnode) {
|
|
if (node->subnodec == node->subnode_cap) {
|
|
node->subnode_cap = node->subnode_cap * 2;
|
|
node->subnodes = noom_realloc(node->subnodes, sizeof(noomP_Node*) * node->subnode_cap);
|
|
|
|
if (node->subnodes == 0) return -1;
|
|
}
|
|
|
|
node->subnodes[node->subnodec++] = subnode;
|
|
|
|
return 0;
|
|
}
|
|
|
|
noomP_Node* noomP_parseRawExpression(noomP_Parser* parser) {
|
|
noomL_Token token;
|
|
noomP_peek(parser, &token);
|
|
|
|
if (token.type == NOOML_TOKEN_NUMBER) {
|
|
// uhh figure it out, future me!
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* numNode = noomP_allocNode(parser);
|
|
if (numNode == 0) return 0;
|
|
|
|
numNode->type = NOOMP_NODE_NUMBERLITERAL;
|
|
numNode->source_offset = token.offset;
|
|
|
|
return numNode;
|
|
} else if (token.type == NOOML_TOKEN_IDENTIFIER) {
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* varNode = noomP_allocNode(parser);
|
|
if (varNode == 0) return 0;
|
|
|
|
varNode->type = NOOMP_NODE_VARIABLE;
|
|
varNode->source_offset = token.offset;
|
|
|
|
return varNode;
|
|
} else if (token.type == NOOML_TOKEN_KEYWORD) {
|
|
if (noom_streql(parser->code + token.offset, token.length, "true", 4)) {
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* litNode = noomP_allocNode(parser);
|
|
if (litNode == 0) return 0;
|
|
|
|
litNode->type = NOOMP_NODE_BOOLEANLITERAL;
|
|
litNode->source_offset = token.offset;
|
|
|
|
return litNode;
|
|
} else if (noom_streql(parser->code + token.offset, token.length, "false", 5)) {
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* litNode = noomP_allocNode(parser);
|
|
if (litNode == 0) return 0;
|
|
|
|
litNode->type = NOOMP_NODE_BOOLEANLITERAL;
|
|
litNode->source_offset = token.offset;
|
|
|
|
return litNode;
|
|
} else if (noom_streql(parser->code + token.offset, token.length, "nil", 3)) {
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* litNode = noomP_allocNode(parser);
|
|
if (litNode == 0) return 0;
|
|
|
|
litNode->type = NOOMP_NODE_NILLITERAL;
|
|
litNode->source_offset = token.offset;
|
|
|
|
return litNode;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int noomP_infixOperatorBP(noomP_Parser* parser, noomL_Token* token, noom_uint_t* a, noom_uint_t* b) { // todo: maybe make this not pointer? we'll see
|
|
if (token->type == NOOML_TOKEN_SYMBOL) {
|
|
if (noom_streql(parser->code + token->offset, token->length, "+", 1)) {
|
|
*a = 90;
|
|
*b = 100;
|
|
return 1;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "-", 1)) {
|
|
*a = 90;
|
|
*b = 100;
|
|
return 1;
|
|
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "*", 1)) {
|
|
*a = 110;
|
|
*b = 120;
|
|
return 1;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "/", 1)) {
|
|
*a = 110;
|
|
*b = 120;
|
|
return 1;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "%", 1)) {
|
|
*a = 110;
|
|
*b = 120;
|
|
return 1;
|
|
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "^", 1)) {
|
|
*a = 140;
|
|
*b = 130; // right associative
|
|
return 1;
|
|
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "..", 2)) {
|
|
*a = 80;
|
|
*b = 70; // right ass.
|
|
return 1;
|
|
|
|
|
|
// oh boy.
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "<", 1)) {
|
|
*a = 50;
|
|
*b = 60;
|
|
return 1;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, ">", 1)) {
|
|
*a = 50;
|
|
*b = 60;
|
|
return 1;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "<=", 2)) {
|
|
*a = 50;
|
|
*b = 60;
|
|
return 1;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, ">=", 2)) {
|
|
*a = 50;
|
|
*b = 60;
|
|
return 1;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "~=", 2)) {
|
|
*a = 50;
|
|
*b = 60;
|
|
return 1;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "==", 2)) {
|
|
*a = 50;
|
|
*b = 60;
|
|
return 1;
|
|
}
|
|
} else if (token->type == NOOML_TOKEN_KEYWORD) {
|
|
if (noom_streql(parser->code + token->offset, token->length, "and", 3)) {
|
|
*a = 30;
|
|
*b = 40;
|
|
return 1;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "or", 2)) {
|
|
*a = 10;
|
|
*b = 20;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
noom_uint_t noomP_prefixOperatorBP(noomP_Parser* parser, noomL_Token* token) { // todo: maybe make this not pointer? we'll see
|
|
if (token->type == NOOML_TOKEN_SYMBOL) {
|
|
if (noom_streql(parser->code + token->offset, token->length, "-", 1)) {
|
|
return 125;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "#", 1)) {
|
|
return 125;
|
|
} else if (noom_streql(parser->code + token->offset, token->length, "~", 1)) {
|
|
return 125;
|
|
}
|
|
} else if (token->type == NOOML_TOKEN_KEYWORD) {
|
|
if (noom_streql(parser->code + token->offset, token->length, "not", 3)) {
|
|
return 125;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
noomP_Node* noomP_parseOperatorExpression(noomP_Parser* parser, noom_uint_t min_bp, noomP_Node* predlhs) {
|
|
noomL_Token token;
|
|
|
|
noomP_peek(parser, &token);
|
|
|
|
noomP_Node* lhs = predlhs;
|
|
|
|
// eof check is 2 hard
|
|
|
|
if (lhs == 0) { // prefix operator?
|
|
noom_uint_t bp = noomP_prefixOperatorBP(parser, &token);
|
|
|
|
if (bp != 0) {
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* child = noomP_parseOperatorExpression(parser, bp, 0);
|
|
if (child == 0) return 0;
|
|
|
|
lhs = noomP_allocNode(parser);
|
|
if (lhs == 0) return 0;
|
|
|
|
lhs->type = NOOMP_NODE_UNARYOPERATOR;
|
|
lhs->source_offset = token.offset; // the operator! we need this to check what it was when compiling.
|
|
|
|
noomP_addSubnode(lhs, child);
|
|
}
|
|
}
|
|
|
|
// wasn't prefix op, raw?
|
|
if (lhs == 0) {
|
|
noomP_Node* raw = noomP_parseRawExpression(parser);
|
|
if (raw == 0) return 0;
|
|
|
|
lhs = raw;
|
|
}
|
|
|
|
noom_uint_t lbp;
|
|
noom_uint_t rbp;
|
|
|
|
while (1) { // infix operator time!!
|
|
noomP_peek(parser, &token);
|
|
noom_uint_t op_loc = token.offset;
|
|
|
|
// also sets lbp and rbp
|
|
int is_op = noomP_infixOperatorBP(parser, &token, &lbp, &rbp);
|
|
|
|
if (is_op == 0) break;
|
|
|
|
if (lbp < min_bp) break; // joever
|
|
|
|
noomP_skip(parser, &token); // remove operator
|
|
|
|
noomP_Node* rhs = noomP_parseOperatorExpression(parser, rbp, 0);
|
|
if (rhs == 0) return 0;
|
|
|
|
noomP_Node* new_node = noomP_allocNode(parser);
|
|
if (new_node == 0) return 0;
|
|
|
|
new_node->type = NOOMP_NODE_BINARYOPERATOR;
|
|
new_node->source_offset = op_loc;
|
|
|
|
noomP_addSubnode(new_node, lhs);
|
|
noomP_addSubnode(new_node, rhs);
|
|
|
|
lhs = new_node;
|
|
}
|
|
|
|
return lhs;
|
|
}
|
|
|
|
noomP_Node* noomP_parseExpression(noomP_Parser* parser) {
|
|
return noomP_parseOperatorExpression(parser, 0, 0);
|
|
}
|
|
|
|
noomP_Node* noomP_parseBlock(noomP_Parser* parser) { // stops on end, else or elseif.
|
|
// block starter has been eaten already; we just go until ending keyword
|
|
noomP_Node* node = noomP_allocNode(parser);
|
|
if (node == 0) return 0; // OOM :(
|
|
|
|
node->type = NOOMP_NODE_BLOCK;
|
|
node->source_offset = parser->lex_offset;
|
|
|
|
noomL_Token token;
|
|
|
|
while (1) {
|
|
// check if end reached
|
|
noomP_peek(parser, &token);
|
|
|
|
if (token.type == NOOML_TOKEN_KEYWORD) {
|
|
if (noom_streql(parser->code + token.offset, token.length, "end", 3)) {
|
|
break;
|
|
} else if (noom_streql(parser->code + token.offset, token.length, "elseif", 6)) {
|
|
break;
|
|
} else if (noom_streql(parser->code + token.offset, token.length, "else", 4)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
noomP_Node* stmt = noomP_parseStatement(parser);
|
|
if (stmt == 0) return 0;
|
|
|
|
noomP_addSubnode(node, stmt);
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
noomP_Node* noomP_parseRawStatement(noomP_Parser* parser) {
|
|
noomL_Token token;
|
|
noomP_peek(parser, &token);
|
|
|
|
if (token.type == NOOML_TOKEN_KEYWORD) {
|
|
if (noom_streql(parser->code + token.offset, token.length, "local", 5)) {
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* localNode = noomP_allocNode(parser);
|
|
if (localNode == 0) return 0; // no memory :(
|
|
|
|
localNode->source_offset = token.offset;
|
|
localNode->type = NOOMP_NODE_LOCALDECLARATION;
|
|
|
|
while (1) {
|
|
noomP_peek(parser, &token);
|
|
|
|
if (token.type != NOOML_TOKEN_IDENTIFIER) return 0;
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* varname = noomP_allocNode(parser);
|
|
if (varname == 0) return 0;
|
|
|
|
varname->type = NOOMP_NODE_VARNAME;
|
|
varname->source_offset = token.offset;
|
|
|
|
noomP_addSubnode(localNode, varname);
|
|
|
|
noomP_peek(parser, &token);
|
|
|
|
if (token.type == NOOML_TOKEN_SYMBOL) {
|
|
if (noom_streql(parser->code + token.offset, token.length, "=", 1)) {
|
|
noomP_skip(parser, &token);
|
|
break;
|
|
} else if (noom_streql(parser->code + token.offset, token.length, ",", 1)) {
|
|
noomP_skip(parser, &token);
|
|
} else {
|
|
return 0; // unexpected token
|
|
}
|
|
} else {
|
|
return 0; // unexpected token
|
|
}
|
|
}
|
|
|
|
// equals has already been eaten by loop (thank you loop)
|
|
|
|
while (1) {
|
|
noomP_Node *expr = noomP_parseExpression(parser);
|
|
if (expr == 0) return 0;
|
|
|
|
noomP_addSubnode(localNode, expr);
|
|
|
|
noomP_peek(parser, &token);
|
|
|
|
if (token.type == NOOML_TOKEN_SYMBOL) {
|
|
if (noom_streql(parser->code + token.offset, token.length, ",", 1)) {
|
|
noomP_skip(parser, &token);
|
|
} else {
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return localNode;
|
|
} else if (noom_streql(parser->code + token.offset, token.length, "if", 2)) {
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* ifStatement = noomP_allocNode(parser);
|
|
if (ifStatement == 0) return 0;
|
|
|
|
ifStatement->type = NOOMP_NODE_IFSTATEMENT;
|
|
ifStatement->source_offset = token.offset;
|
|
|
|
noomP_Node* condition = noomP_parseExpression(parser);
|
|
if (condition == 0) return 0;
|
|
|
|
noomP_addSubnode(ifStatement, condition);
|
|
|
|
noomP_peek(parser, &token);
|
|
|
|
if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected
|
|
if (!noom_streql(parser->code + token.offset, token.length, "then", 4)) return 0; // unexpected
|
|
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* block = noomP_parseBlock(parser);
|
|
if (block == 0) return 0;
|
|
|
|
noomP_addSubnode(ifStatement, block);
|
|
|
|
while (1) { // else, elseif
|
|
noomP_peek(parser, &token);
|
|
|
|
if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected
|
|
|
|
if (noom_streql(parser->code + token.offset, token.length, "elseif", 6)) {
|
|
noomP_skip(parser, &token);
|
|
noomP_Node* elseIfCondition = noomP_parseExpression(parser);
|
|
if (elseIfCondition == 0) return 0;
|
|
|
|
noomP_addSubnode(ifStatement, elseIfCondition);
|
|
|
|
// now we need to check for "then"
|
|
noomP_peek(parser, &token);
|
|
|
|
if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected
|
|
if (!noom_streql(parser->code + token.offset, token.length, "then", 4)) return 0; // unexpected
|
|
noomP_skip(parser, &token);
|
|
|
|
// now the block
|
|
noomP_Node* elseIfBlock = noomP_parseBlock(parser);
|
|
if (elseIfBlock == 0) return 0;
|
|
|
|
noomP_addSubnode(ifStatement, elseIfBlock);
|
|
|
|
// could be even more
|
|
} else if (noom_streql(parser->code + token.offset, token.length, "else", 4)) {
|
|
noomP_skip(parser, &token);
|
|
|
|
noomP_Node* elseBlock = noomP_parseBlock(parser);
|
|
if (elseBlock == 0) return 0;
|
|
|
|
// we know it's an else if it's an odd number. no need to do anything special.
|
|
noomP_addSubnode(ifStatement, elseBlock);
|
|
|
|
break; // this must be the last one; end is handled after the loop
|
|
} else if (noom_streql(parser->code + token.offset, token.length, "end", 3)) {
|
|
break; // will check for end outside the loop because else and things
|
|
} else {
|
|
// unexpected
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
noomP_peek(parser, &token);
|
|
if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected
|
|
if (!noom_streql(parser->code + token.offset, token.length, "end", 3)) return 0; // unexpected
|
|
noomP_skip(parser, &token);
|
|
|
|
return ifStatement;
|
|
}
|
|
}
|
|
|
|
while (1) {
|
|
noomP_peek(parser, &token);
|
|
if (token.type == NOOML_TOKEN_SYMBOL) {
|
|
if (noom_streql(parser->code + token.offset, token.length, ";", 1)) {
|
|
noomP_skip(parser, &token);
|
|
continue;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
noomP_Node* noomP_parseStatement(noomP_Parser* parser) {
|
|
noomL_Token token;
|
|
|
|
noomP_Node* stmt = noomP_parseRawStatement(parser);
|
|
if (stmt == 0) return 0;
|
|
|
|
while (1) {
|
|
noomP_peek(parser, &token);
|
|
if (token.type == NOOML_TOKEN_SYMBOL) {
|
|
if (noom_streql(parser->code + token.offset, token.length, ";", 1)) {
|
|
noomP_skip(parser, &token);
|
|
continue;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
return stmt;
|
|
}
|
|
|
|
int noomP_parse(const char* code, const char* filename, noomP_Node** outpointer, noomP_Node** last_node) {
|
|
noomP_Parser parser;
|
|
noomP_initParser(&parser, code, filename);
|
|
|
|
noomL_Token token;
|
|
noomP_Node* node = noomP_allocNode(&parser);
|
|
if (node == 0) return -1;
|
|
|
|
node->source_offset = parser.lex_offset;
|
|
node->type = NOOMP_NODE_PROGRAM;
|
|
|
|
while (1) {
|
|
noomP_peek(&parser, &token);
|
|
if (token.type == NOOML_TOKEN_EOF) break;
|
|
|
|
noomP_Node* child = noomP_parseStatement(&parser);
|
|
if (child == 0) return -1;
|
|
|
|
noomP_addSubnode(node, child);
|
|
}
|
|
|
|
*outpointer = node;
|
|
*last_node = parser.last_node;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int noomP_initParser(noomP_Parser* parser, const char* code, const char* filename) {
|
|
parser->code = code;
|
|
parser->filename = filename;
|
|
parser->lex_offset = 0;
|
|
parser->last_node = (void *)0;
|
|
|
|
return 0;
|
|
}
|