/*! @file src/Parser/Parser.hpp @brief 语法分析器(Pratt + 手动递归下降) 定义 @author PuqiAR (im@puqiar.top) @date 2026-02-14 */ #pragma once #include #include #include #include #include #include #include #include namespace Fig { class Parser { private: Lexer &lexer; SourceManager &srcManager; size_t index = 0; // token在buffer下标 DynArray buffer; String fileName; bool isEOF = false; Token nextToken() { assert(!isEOF && "nextToken: eof but called nextToken"); if (index + 1 < buffer.size()) { return buffer[++index]; } auto result = lexer.NextToken(); if (!result) { ReportError(result.error(), srcManager); std::exit(-1); } const Token &token = result.value(); if (token.type == TokenType::EndOfFile) { isEOF = true; } buffer.push_back(token); index++; return token; } inline Token prevToken() { if (buffer.size() < 2) { return currentToken(); } return buffer[buffer.size() - 2]; } inline Token currentToken() { if (buffer.empty()) { return nextToken(); } return buffer.back(); } Token peekToken(size_t lookahead = 1) { assert(!isEOF && "peekToken: eof but called peekToken"); size_t peekIndex = index + lookahead; while (peekIndex >= buffer.size() && !isEOF) { auto result = lexer.NextToken(); if (!result) { ReportError(result.error(), srcManager); std::abort(); } const Token &token = result.value(); if (token.type == TokenType::EndOfFile) { isEOF = true; } buffer.push_back(token); } if (peekIndex >= buffer.size()) // 没有那么多token { return buffer.back(); // back是EOF Token } return buffer[peekIndex]; } inline Token consumeToken() { if (isEOF) return buffer.back(); Token current = currentToken(); nextToken(); return current; } inline bool match(TokenType type) { if (currentToken().type == type) { consumeToken(); return true; } return false; } inline Error makeUnexpectTokenError(const String &stmtType, const String &expect, const Token &tokenGot, std::source_location loc = std::source_location::current()) { return Error(ErrorType::SyntaxError, std::format("expect '{}' in {}, got `{}`", expect, stmtType, magic_enum::enum_name(tokenGot.type)), "none", makeSourceLocation(tokenGot), loc); } inline Error makeExpectSemicolonError( std::source_location loc = std::source_location::current()) { return Error(ErrorType::SyntaxError, "expect ';' after statement", "insert ';'", makeSourceLocation(currentToken()), loc); } public: struct State { enum StateType : std::uint8_t { Standby, ParsingLiteralExpr, ParsingIdentiExpr, ParsingInfixExpr, ParsingPrefixExpr, ParsingIndexExpr, ParsingCallExpr, ParsingVarDecl, ParsingIf, ParsingWhile, ParsingFnDefStmt, ParsingReturn, ParsingBreak, ParsingContinue, ParsingNamedTypeExpr, } type = StateType::Standby; std::unordered_set stopAt = {}; }; private: const std::unordered_set &getBaseTerminators() { static const std::unordered_set baseTerminators = {TokenType::Semicolon, TokenType::RightParen, TokenType::RightBracket, TokenType::RightBrace, TokenType::Comma, TokenType::EndOfFile}; return baseTerminators; } std::unordered_set &getTerminators() // 返回固定的终止符 { /* Syntax terminators: ; ) ] } , EOF */ static std::unordered_set terminators(getBaseTerminators()); return terminators; } void resetTermintors() { getTerminators() = getBaseTerminators(); } bool shouldTerminate() // 判断是否终结 { const Token &token = currentToken(); const auto &terminators = getTerminators(); if (terminators.contains(token.type)) { return true; } for (auto it = stateStack.rbegin(); it < stateStack.rend(); ++it) { if (it->stopAt.contains(token.type)) { return true; } } return false; } DynArray stateStack; State ¤tState() { return stateStack.back(); } void pushState(State _state) { stateStack.push_back(std::move(_state)); } void popState() { if (!stateStack.empty()) { stateStack.pop_back(); } } class StateProtector { Parser *parser; public: StateProtector(Parser *p, const State &newState) : parser(p) { parser->pushState(newState); } ~StateProtector() { parser->popState(); } // 禁止拷贝 StateProtector(const StateProtector &) = delete; StateProtector &operator=(const StateProtector &) = delete; }; public: Parser(Lexer &_lexer, SourceManager &_srcManager, String _fileName) : lexer(_lexer), srcManager(_srcManager), fileName(std::move(_fileName)) { pushState(State()); } private: SourceLocation makeSourceLocation(const Token &tok) { auto [line, column] = srcManager.GetLineColumn(tok.index); return SourceLocation(SourcePosition(line, column, tok.length), fileName, "[internal parser]", magic_enum::enum_name(currentState().type).data()); } /* TypeExpressions */ Result parseNamedTypeExpr(); // 当前token为identifier Result parseTypeExpr(); /* Expressions */ Result parseLiteralExpr(); // 当前token为literal时调用 Result parseIdentiExpr(); // 当前token为Identifier调用 Result parseInfixExpr( Expr *); // 由 parseExpression递归调用, 当前token为op Result parsePrefixExpr(); // 由 parseExpression递归调用, 当前token为op Result parseIndexExpr( Expr *); // 由 parseExpression调用, 当前token为 `[` Result parseCallExpr(Expr *); // 由 parseExpression调用, 当前token为 `(` Result parseExpression(BindingPower = 0); /* Statements */ Result parseBlockStmt(); // 当前token为 { Result parseVarDecl(bool); // 由 parseStatement调用, 当前token为 var Result parseIfStmt(); // 由 parseStatement调用, 当前token为 if Result parseWhileStmt(); // 由 parseStatement调用, 当前token为 while Result, Error> parseFnParams(); // 由 parseFnDefStmt或lambda调用 Result parseFnDefStmt(bool); // 由 parseStatement调用, 当前token为 func Result parseReturnStmt(); // 由 parseStatement调用, 当前token为 return // continue break直接由parseStatement一步解析 Result parseStatement(); public: Result Parse(); }; #define SET_STOP_AT(...) currentState().stopAt = {__VA_ARGS__}; }; // namespace Fig