完成Parser定义以及表达式解析

2026-02-14 23:03:46 +08:00
parent 35e479fd05
commit 878157c2fc
19 changed files with 771 additions and 102 deletions
--- a/src/Parser/ExprParser.cpp
+++ b/src/Parser/ExprParser.cpp
@@ -0,0 +1,159 @@
+/*!
+    @file src/Parser/ExprParser.hpp
+    @brief 语法分析器(Pratt + 手动递归下降) 表达式解析实现 (pratt)
+    @author PuqiAR (im@puqiar.top)
+    @date 2026-02-14
+*/
+
+#include <Parser/Parser.hpp>
+
+namespace Fig
+{
+    Result<LiteralExpr *, Error> Parser::parseLiteralExpr() // 当前token为literal时调用
+    {
+        state                      = State::ParsingLiteralExpr;
+        const Token &literal_token = consumeToken();
+        LiteralExpr *node          = new LiteralExpr(literal_token, makeSourcelocation(literal_token));
+        return node;
+    }
+    Result<IdentiExpr *, Error> Parser::parseIdentiExpr() // 当前token为Identifier调用
+    {
+        state                   = State::ParsingIdentiExpr;
+        const Token &identifier = consumeToken();
+        IdentiExpr  *node =
+            new IdentiExpr(srcManager.GetSub(identifier.index, identifier.length), makeSourcelocation(identifier));
+        return node;
+    }
+
+    Result<InfixExpr *, Error> Parser::parseInfixExpr(Expr *lhs) // 当前token为 op
+    {
+        state                   = State::ParsingInfixExpr;
+        const Token   &op_token = consumeToken();
+        BinaryOperator op       = TokenToBinaryOp(op_token);
+        BindingPower   rbp      = GetBinaryOpRBp(op);
+
+        const auto &rhs_result = parseExpression(rbp);
+        if (!rhs_result)
+        {
+            return std::unexpected(rhs_result.error());
+        }
+        Expr *rhs = *rhs_result;
+
+        InfixExpr *node = new InfixExpr(lhs, op, rhs);
+        return node;
+    }
+
+    Result<PrefixExpr *, Error> Parser::parsePrefixExpr() // 当前token为op
+    {
+        state                  = State::ParsingPrefixExpr;
+        const Token  &op_token = consumeToken();
+        UnaryOperator op       = TokenToUnaryOp(op_token);
+
+        BindingPower rbp        = GetUnaryOpRBp(op);
+        const auto  &rhs_result = parseExpression(rbp);
+        if (!rhs_result)
+        {
+            return std::unexpected(rhs_result.error());
+        }
+
+        Expr       *rhs  = *rhs_result;
+        PrefixExpr *node = new PrefixExpr(op, rhs);
+        return node;
+    }
+
+    std::unordered_set<TokenType> Parser::getTerminators() // 返回当前state的终止条件(终止符)
+    {
+        using enum State;
+
+        static const std::unordered_set<TokenType> baseTerminators = {TokenType::EndOfFile, TokenType::Semicolon};
+
+        switch (state)
+        {
+            default: return baseTerminators;
+        }
+    }
+    bool Parser::shouldTerminate()
+    {
+        const Token &token       = currentToken();
+        const auto  &terminators = getTerminators();
+        return terminators.contains(token.type);
+    }
+
+    Result<Expr *, Error> Parser::parseExpression(BindingPower rbp)
+    {
+        Expr *lhs   = nullptr;
+        Token token = currentToken();
+
+        if (token.isIdentifier())
+        {
+            const auto &lhs_result = parseIdentiExpr();
+            if (!lhs_result)
+            {
+                return std::unexpected(lhs_result.error());
+            }
+            lhs = *lhs_result;
+        }
+        else if (token.isLiteral())
+        {
+            const auto &lhs_result = parseLiteralExpr();
+            if (!lhs_result)
+            {
+                return std::unexpected(lhs_result.error());
+            }
+            lhs = *lhs_result;
+        }
+        else if (IsTokenOp(token.type, false)) // 是否是一元运算符
+        {
+            const auto &lhs_result = parsePrefixExpr();
+            if (!lhs_result)
+            {
+                return std::unexpected(lhs_result.error());
+            }
+            lhs = *lhs_result;
+        }
+
+        if (!lhs)
+        {
+            return std::unexpected(Error(ErrorType::ExpectedExpression,
+                "expected expression",
+                "insert expressions",
+                makeSourcelocation(prevToken())));
+        }
+
+        while (true)
+        {
+            token = currentToken();
+            if (shouldTerminate())
+            {
+                return lhs;
+            }
+
+            if (IsTokenOp(token.type /* isBinary = true */)) // 是否为二元运算符
+            {
+                BinaryOperator op  = TokenToBinaryOp(token);
+                BindingPower   lbp = GetBinaryOpLBp(op);
+                if (rbp >= lbp)
+                {
+                    // 前操作数的右绑定力比当前操作数的左绑定力大
+                    // lhs被吸走
+                    return lhs;
+                }
+
+                const auto &result = parseInfixExpr(lhs);
+                if (!result)
+                {
+                    return result;
+                }
+                lhs = *result;
+            }
+            // 后缀运算符优先级非常大，几乎永远跟在操作数后面，因此我们可以直接结合
+            // 而不用走正常路径
+            else if (0) {}
+            else
+            {
+                return lhs;
+            }
+        }
+    }
+
+}; // namespace Fig
--- a/src/Parser/Parser.cpp
+++ b/src/Parser/Parser.cpp
@@ -0,0 +1,18 @@
+/*!
+    @file src/Parser/Parser.cpp
+    @brief 语法分析器(Pratt + 手动递归下降) 实现
+    @author PuqiAR (im@puqiar.top)
+    @date 2026-02-14
+*/
+
+
+#include <Parser/Parser.hpp>
+
+namespace Fig
+{
+    DynArray<AstNode *> Parser::parseAll()
+    {
+        DynArray<AstNode *> nodes;
+        return nodes;
+    }
+};
--- a/src/Parser/Parser.hpp
+++ b/src/Parser/Parser.hpp
@@ -0,0 +1,154 @@
+/*!
+    @file src/Parser/Parser.hpp
+    @brief 语法分析器(Pratt + 手动递归下降) 定义
+    @author PuqiAR (im@puqiar.top)
+    @date 2026-02-14
+*/
+
+#pragma once
+
+#include <Ast/Ast.hpp>
+#include <Deps/Deps.hpp>
+#include <Error/Error.hpp>
+#include <Lexer/Lexer.hpp>
+#include <Token/Token.hpp>
+
+#include <cstddef>
+#include <cstdlib>
+
+#include <unordered_set>
+
+namespace Fig
+{
+
+    class Parser
+    {
+    private:
+        Lexer         &lexer;
+        SourceManager &srcManager;
+
+        size_t          index = 0; // token在buffer下标
+        DynArray<Token> buffer;
+
+        String fileName;
+
+        bool isEOF = false;
+
+        const Token &nextToken()
+        {
+            assert(!isEOF && "nextToken: eof but called nextToken");
+            if (index + 1 < buffer.size())
+            {
+                return buffer[++index];
+            }
+            const auto &result = lexer.NextToken();
+            if (!result)
+            {
+                ReportError(result.error(), srcManager);
+                std::exit(-1);
+            }
+            const Token &token = result.value();
+            if (token.type == TokenType::EndOfFile)
+            {
+                isEOF = true;
+            }
+            buffer.push_back(token);
+            index++;
+            return token;
+        }
+
+        inline const Token &prevToken()
+        {
+            return buffer[buffer.size() - 2];
+        }
+
+        inline const Token &currentToken()
+        {
+            if (buffer.empty())
+            {
+                return nextToken();
+            }
+            return buffer.back();
+        }
+
+        const Token &peekToken(size_t lookahead = 1)
+        {
+            assert(!isEOF && "peekToken: eof but called peekToken");
+
+            size_t peekIndex = index + lookahead;
+            while (peekIndex >= buffer.size() && !isEOF)
+            {
+                const auto &result = lexer.NextToken();
+                if (!result)
+                {
+                    ReportError(result.error(), srcManager);
+                    std::abort();
+                }
+                const Token &token = result.value();
+                if (token.type == TokenType::EndOfFile)
+                {
+                    isEOF = true;
+                }
+                buffer.push_back(token);
+            }
+            if (peekIndex >= buffer.size()) // 没有那么多token
+            {
+                return buffer.back(); // back是EOF Token
+            }
+            return buffer[peekIndex];
+        }
+
+        inline Token consumeToken()
+        {
+            if (isEOF)
+                return buffer.back();
+            Token current = currentToken();
+            nextToken();
+            return current;
+        }
+
+    public:
+        enum class State : std::uint8_t
+        {
+            Standby,
+
+            ParsingLiteralExpr,
+            ParsingIdentiExpr,
+
+            ParsingInfixExpr,
+            ParsingPrefixExpr,
+        } state;
+
+        Parser(Lexer &_lexer, SourceManager &_srcManager, String _fileName) :
+            lexer(_lexer), srcManager(_srcManager), fileName(std::move(_fileName))
+        {
+            state = State::Standby;
+        }
+
+    private:
+        SourceLocation makeSourcelocation(const Token &tok)
+        {
+            auto [line, column] = srcManager.GetLineColumn(tok.index);
+            return SourceLocation(
+                SourcePosition(
+                    line,
+                    column,
+                    tok.length
+                ), fileName, "[internal parser]", magic_enum::enum_name(state).data());
+        }
+
+        Result<LiteralExpr *, Error> parseLiteralExpr(); // 当前token为literal时调用
+        Result<IdentiExpr *, Error>  parseIdentiExpr();  // 当前token为Identifier调用
+
+        Result<InfixExpr *, Error>  parseInfixExpr(Expr *);  // 由 parseExpression递归调用, 当前token为op
+        Result<PrefixExpr *, Error> parsePrefixExpr(); // 由 parseExpression递归调用, 当前token为op
+
+        std::unordered_set<TokenType> getTerminators(); // 返回当前state的终止条件(终止符)
+        bool shouldTerminate(); // 通过state判断该不该终止表达式解析
+
+    public:
+        Result<Expr *, Error> parseExpression(BindingPower = 0);
+
+        DynArray<AstNode *> parseAll();
+    };
+}; // namespace Fig
--- a/src/Parser/ParserTest.cpp
+++ b/src/Parser/ParserTest.cpp
@@ -0,0 +1,29 @@
+#include <Parser/Parser.hpp>
+#include <iostream>
+
+int main()
+{
+    using namespace Fig;
+
+    String        fileName = "test.fig";
+    String        filePath = "T:/Files/Maker/Code/MyCodingLanguage/The Fig Project/Fig/test.fig";
+    SourceManager srcManager(filePath);
+
+    String source = srcManager.Read();
+    if (!srcManager.read)
+    {
+        std::cerr << "Couldn't read file";
+        return 1;
+    }
+
+    Lexer lexer(source, fileName);
+    Parser parser(lexer, srcManager, fileName);
+    const auto &result = parser.parseExpression();
+    if (!result)
+    {
+        ReportError(result.error(), srcManager);
+        return 1;
+    }
+    Expr *expr = *result;
+    std::cout << expr->toString() << '\n';
+}