#include "parser.h" #include "helper.h" int noomP_peek(noomP_Parser* parser, noomL_Token* token) { while (1) { int success = noomL_lex(parser->code, parser->lex_offset, token); if (success != 0) return -1; // TODO: proper error propogation and stuff if (token->type == NOOML_TOKEN_WHITESPACE) { // peek changes state, but only if it's one of these useless tokens anyway. parser->lex_offset += token->length; continue; } return 0; } } void noomP_skip(noomP_Parser* parser, noomL_Token* token) { // expects you to alr know the token if (token->type == NOOML_TOKEN_EOF) return; parser->lex_offset += token->length; } noomP_Node* noomP_allocNode(noomP_Parser* parser) { noomP_Node* node = noom_alloc(sizeof(noomP_Node)); if (node == 0) return 0; node->previous_node = parser->last_node; node->subnodec = 0; node->subnodes = noom_alloc(sizeof(noomP_Node*) * 2); node->subnode_cap = 2; if (node->subnodes == 0) { noom_free(node); return 0; } parser->last_node = node; return node; } int noomP_addSubnode(noomP_Node* node, noomP_Node* subnode) { if (node->subnodec == node->subnode_cap) { node->subnode_cap = node->subnode_cap * 2; node->subnodes = noom_realloc(node->subnodes, sizeof(noomP_Node*) * node->subnode_cap); if (node->subnodes == 0) return -1; } node->subnodes[node->subnodec++] = subnode; return 0; } noomP_Node* noomP_parseRawExpression(noomP_Parser* parser) { noomL_Token token; noomP_peek(parser, &token); if (token.type == NOOML_TOKEN_NUMBER) { // uhh figure it out, future me! noomP_skip(parser, &token); noomP_Node* numNode = noomP_allocNode(parser); if (numNode == 0) return 0; numNode->type = NOOMP_NODE_NUMBERLITERAL; numNode->source_offset = token.offset; return numNode; } else if (token.type == NOOML_TOKEN_IDENTIFIER) { noomP_skip(parser, &token); noomP_Node* varNode = noomP_allocNode(parser); if (varNode == 0) return 0; varNode->type = NOOMP_NODE_VARIABLE; varNode->source_offset = token.offset; return varNode; } else if (token.type == NOOML_TOKEN_KEYWORD) { if (noom_streql(parser->code + token.offset, token.length, "true", 4)) { noomP_skip(parser, &token); noomP_Node* litNode = noomP_allocNode(parser); if (litNode == 0) return 0; litNode->type = NOOMP_NODE_BOOLEANLITERAL; litNode->source_offset = token.offset; return litNode; } else if (noom_streql(parser->code + token.offset, token.length, "false", 5)) { noomP_skip(parser, &token); noomP_Node* litNode = noomP_allocNode(parser); if (litNode == 0) return 0; litNode->type = NOOMP_NODE_BOOLEANLITERAL; litNode->source_offset = token.offset; return litNode; } else if (noom_streql(parser->code + token.offset, token.length, "nil", 3)) { noomP_skip(parser, &token); noomP_Node* litNode = noomP_allocNode(parser); if (litNode == 0) return 0; litNode->type = NOOMP_NODE_NILLITERAL; litNode->source_offset = token.offset; return litNode; } } return 0; } int noomP_infixOperatorBP(noomP_Parser* parser, noomL_Token* token, noom_uint_t* a, noom_uint_t* b) { // todo: maybe make this not pointer? we'll see if (token->type == NOOML_TOKEN_SYMBOL) { if (noom_streql(parser->code + token->offset, token->length, "+", 1)) { *a = 90; *b = 100; return 1; } else if (noom_streql(parser->code + token->offset, token->length, "-", 1)) { *a = 90; *b = 100; return 1; } else if (noom_streql(parser->code + token->offset, token->length, "*", 1)) { *a = 110; *b = 120; return 1; } else if (noom_streql(parser->code + token->offset, token->length, "/", 1)) { *a = 110; *b = 120; return 1; } else if (noom_streql(parser->code + token->offset, token->length, "%", 1)) { *a = 110; *b = 120; return 1; } else if (noom_streql(parser->code + token->offset, token->length, "^", 1)) { *a = 140; *b = 130; // right associative return 1; } else if (noom_streql(parser->code + token->offset, token->length, "..", 2)) { *a = 80; *b = 70; // right ass. return 1; // oh boy. } else if (noom_streql(parser->code + token->offset, token->length, "<", 1)) { *a = 50; *b = 60; return 1; } else if (noom_streql(parser->code + token->offset, token->length, ">", 1)) { *a = 50; *b = 60; return 1; } else if (noom_streql(parser->code + token->offset, token->length, "<=", 2)) { *a = 50; *b = 60; return 1; } else if (noom_streql(parser->code + token->offset, token->length, ">=", 2)) { *a = 50; *b = 60; return 1; } else if (noom_streql(parser->code + token->offset, token->length, "~=", 2)) { *a = 50; *b = 60; return 1; } else if (noom_streql(parser->code + token->offset, token->length, "==", 2)) { *a = 50; *b = 60; return 1; } } else if (token->type == NOOML_TOKEN_KEYWORD) { if (noom_streql(parser->code + token->offset, token->length, "and", 3)) { *a = 30; *b = 40; return 1; } else if (noom_streql(parser->code + token->offset, token->length, "or", 2)) { *a = 10; *b = 20; return 1; } } return 0; } noom_uint_t noomP_prefixOperatorBP(noomP_Parser* parser, noomL_Token* token) { // todo: maybe make this not pointer? we'll see if (token->type == NOOML_TOKEN_SYMBOL) { if (noom_streql(parser->code + token->offset, token->length, "-", 1)) { return 125; } else if (noom_streql(parser->code + token->offset, token->length, "#", 1)) { return 125; } else if (noom_streql(parser->code + token->offset, token->length, "~", 1)) { return 125; } } else if (token->type == NOOML_TOKEN_KEYWORD) { if (noom_streql(parser->code + token->offset, token->length, "not", 3)) { return 125; } } return 0; } noomP_Node* noomP_parseOperatorExpression(noomP_Parser* parser, noom_uint_t min_bp, noomP_Node* predlhs) { noomL_Token token; noomP_peek(parser, &token); noomP_Node* lhs = predlhs; // eof check is 2 hard if (lhs == 0) { // prefix operator? noom_uint_t bp = noomP_prefixOperatorBP(parser, &token); if (bp != 0) { noomP_skip(parser, &token); noomP_Node* child = noomP_parseOperatorExpression(parser, bp, 0); if (child == 0) return 0; lhs = noomP_allocNode(parser); if (lhs == 0) return 0; lhs->type = NOOMP_NODE_UNARYOPERATOR; lhs->source_offset = token.offset; // the operator! we need this to check what it was when compiling. noomP_addSubnode(lhs, child); } } // wasn't prefix op, raw? if (lhs == 0) { noomP_Node* raw = noomP_parseRawExpression(parser); if (raw == 0) return 0; lhs = raw; } noom_uint_t lbp; noom_uint_t rbp; while (1) { // infix operator time!! noomP_peek(parser, &token); noom_uint_t op_loc = token.offset; // also sets lbp and rbp int is_op = noomP_infixOperatorBP(parser, &token, &lbp, &rbp); if (is_op == 0) break; if (lbp < min_bp) break; // joever noomP_skip(parser, &token); // remove operator noomP_Node* rhs = noomP_parseOperatorExpression(parser, rbp, 0); if (rhs == 0) return 0; noomP_Node* new_node = noomP_allocNode(parser); if (new_node == 0) return 0; new_node->type = NOOMP_NODE_BINARYOPERATOR; new_node->source_offset = op_loc; noomP_addSubnode(new_node, lhs); noomP_addSubnode(new_node, rhs); lhs = new_node; } return lhs; } noomP_Node* noomP_parseExpression(noomP_Parser* parser) { return noomP_parseOperatorExpression(parser, 0, 0); } noomP_Node* noomP_parseBlock(noomP_Parser* parser) { // stops on end, else or elseif. // block starter has been eaten already; we just go until ending keyword noomP_Node* node = noomP_allocNode(parser); if (node == 0) return 0; // OOM :( node->type = NOOMP_NODE_BLOCK; node->source_offset = parser->lex_offset; noomL_Token token; while (1) { // check if end reached noomP_peek(parser, &token); if (token.type == NOOML_TOKEN_KEYWORD) { if (noom_streql(parser->code + token.offset, token.length, "end", 3)) { break; } else if (noom_streql(parser->code + token.offset, token.length, "elseif", 6)) { break; } else if (noom_streql(parser->code + token.offset, token.length, "else", 4)) { break; } } noomP_Node* stmt = noomP_parseStatement(parser); if (stmt == 0) return 0; noomP_addSubnode(node, stmt); } return node; } noomP_Node* noomP_parseRawStatement(noomP_Parser* parser) { noomL_Token token; noomP_peek(parser, &token); if (token.type == NOOML_TOKEN_KEYWORD) { if (noom_streql(parser->code + token.offset, token.length, "local", 5)) { noomP_skip(parser, &token); noomP_Node* localNode = noomP_allocNode(parser); if (localNode == 0) return 0; // no memory :( localNode->source_offset = token.offset; localNode->type = NOOMP_NODE_LOCALDECLARATION; while (1) { noomP_peek(parser, &token); if (token.type != NOOML_TOKEN_IDENTIFIER) return 0; noomP_skip(parser, &token); noomP_Node* varname = noomP_allocNode(parser); if (varname == 0) return 0; varname->type = NOOMP_NODE_VARNAME; varname->source_offset = token.offset; noomP_addSubnode(localNode, varname); noomP_peek(parser, &token); if (token.type == NOOML_TOKEN_SYMBOL) { if (noom_streql(parser->code + token.offset, token.length, "=", 1)) { noomP_skip(parser, &token); break; } else if (noom_streql(parser->code + token.offset, token.length, ",", 1)) { noomP_skip(parser, &token); } else { return 0; // unexpected token } } else { return 0; // unexpected token } } // equals has already been eaten by loop (thank you loop) while (1) { noomP_Node *expr = noomP_parseExpression(parser); if (expr == 0) return 0; noomP_addSubnode(localNode, expr); noomP_peek(parser, &token); if (token.type == NOOML_TOKEN_SYMBOL) { if (noom_streql(parser->code + token.offset, token.length, ",", 1)) { noomP_skip(parser, &token); } else { break; } } else { break; } } return localNode; } else if (noom_streql(parser->code + token.offset, token.length, "if", 2)) { noomP_skip(parser, &token); noomP_Node* ifStatement = noomP_allocNode(parser); if (ifStatement == 0) return 0; ifStatement->type = NOOMP_NODE_IFSTATEMENT; ifStatement->source_offset = token.offset; noomP_Node* condition = noomP_parseExpression(parser); if (condition == 0) return 0; noomP_addSubnode(ifStatement, condition); noomP_peek(parser, &token); if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected if (!noom_streql(parser->code + token.offset, token.length, "then", 4)) return 0; // unexpected noomP_skip(parser, &token); noomP_Node* block = noomP_parseBlock(parser); if (block == 0) return 0; noomP_addSubnode(ifStatement, block); while (1) { // else, elseif noomP_peek(parser, &token); if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected if (noom_streql(parser->code + token.offset, token.length, "elseif", 6)) { noomP_skip(parser, &token); noomP_Node* elseIfCondition = noomP_parseExpression(parser); if (elseIfCondition == 0) return 0; noomP_addSubnode(ifStatement, elseIfCondition); // now we need to check for "then" noomP_peek(parser, &token); if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected if (!noom_streql(parser->code + token.offset, token.length, "then", 4)) return 0; // unexpected noomP_skip(parser, &token); // now the block noomP_Node* elseIfBlock = noomP_parseBlock(parser); if (elseIfBlock == 0) return 0; noomP_addSubnode(ifStatement, elseIfBlock); // could be even more } else if (noom_streql(parser->code + token.offset, token.length, "else", 4)) { noomP_skip(parser, &token); noomP_Node* elseBlock = noomP_parseBlock(parser); if (elseBlock == 0) return 0; // we know it's an else if it's an odd number. no need to do anything special. noomP_addSubnode(ifStatement, elseBlock); break; // this must be the last one; end is handled after the loop } else if (noom_streql(parser->code + token.offset, token.length, "end", 3)) { break; // will check for end outside the loop because else and things } else { // unexpected return 0; } } noomP_peek(parser, &token); if (token.type != NOOML_TOKEN_KEYWORD) return 0; // unexpected if (!noom_streql(parser->code + token.offset, token.length, "end", 3)) return 0; // unexpected noomP_skip(parser, &token); return ifStatement; } } while (1) { noomP_peek(parser, &token); if (token.type == NOOML_TOKEN_SYMBOL) { if (noom_streql(parser->code + token.offset, token.length, ";", 1)) { noomP_skip(parser, &token); continue; } } break; } return 0; } noomP_Node* noomP_parseStatement(noomP_Parser* parser) { noomL_Token token; noomP_Node* stmt = noomP_parseRawStatement(parser); if (stmt == 0) return 0; while (1) { noomP_peek(parser, &token); if (token.type == NOOML_TOKEN_SYMBOL) { if (noom_streql(parser->code + token.offset, token.length, ";", 1)) { noomP_skip(parser, &token); continue; } } break; } return stmt; } int noomP_parse(const char* code, const char* filename, noomP_Node** outpointer, noomP_Node** last_node) { noomP_Parser parser; noomP_initParser(&parser, code, filename); noomL_Token token; noomP_Node* node = noomP_allocNode(&parser); if (node == 0) return -1; node->source_offset = parser.lex_offset; node->type = NOOMP_NODE_PROGRAM; while (1) { noomP_peek(&parser, &token); if (token.type == NOOML_TOKEN_EOF) break; noomP_Node* child = noomP_parseStatement(&parser); if (child == 0) return -1; noomP_addSubnode(node, child); } *outpointer = node; *last_node = parser.last_node; return 0; } int noomP_initParser(noomP_Parser* parser, const char* code, const char* filename) { parser->code = code; parser->filename = filename; parser->lex_offset = 0; parser->last_node = (void *)0; return 0; }