From 878157c2fcd9988b1f71aefe1950157fb4d9cfc2 Mon Sep 17 00:00:00 2001 From: PuqiAR Date: Sat, 14 Feb 2026 23:03:46 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90Parser=E5=AE=9A=E4=B9=89?= =?UTF-8?q?=E4=BB=A5=E5=8F=8A=E8=A1=A8=E8=BE=BE=E5=BC=8F=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Ast/Ast.hpp | 6 + src/Ast/Base.hpp | 7 +- src/Ast/Expr/IdentiExpr.hpp | 7 ++ src/Ast/Expr/InfixExpr.hpp | 39 +++++++ src/Ast/Expr/LiteralExpr.hpp | 10 +- src/Ast/Expr/PrefixExpr.hpp | 39 +++++++ src/Ast/Operator.cpp | 95 ++++++++++++++-- src/Ast/Operator.hpp | 27 ++++- src/Deps/Deps.hpp | 5 + src/Error/Error.cpp | 18 +-- src/Error/Error.hpp | 163 +++++++++++++++------------- src/Lexer/Lexer.hpp | 3 +- src/Parser/ExprParser.cpp | 159 +++++++++++++++++++++++++++ src/Parser/Parser.cpp | 18 +++ src/Parser/Parser.hpp | 154 ++++++++++++++++++++++++++ src/Parser/ParserTest.cpp | 29 +++++ src/SourceManager/SourceManager.hpp | 61 ++++++++++- src/Token/Token.hpp | 14 +++ xmake.lua | 19 +++- 19 files changed, 771 insertions(+), 102 deletions(-) create mode 100644 src/Ast/Expr/InfixExpr.hpp create mode 100644 src/Ast/Expr/PrefixExpr.hpp create mode 100644 src/Parser/ExprParser.cpp create mode 100644 src/Parser/Parser.cpp create mode 100644 src/Parser/Parser.hpp create mode 100644 src/Parser/ParserTest.cpp diff --git a/src/Ast/Ast.hpp b/src/Ast/Ast.hpp index 1bdffcd..cf33c15 100644 --- a/src/Ast/Ast.hpp +++ b/src/Ast/Ast.hpp @@ -6,3 +6,9 @@ */ #pragma once + +#include +#include +#include +#include + diff --git a/src/Ast/Base.hpp b/src/Ast/Base.hpp index 26af312..417ee97 100644 --- a/src/Ast/Base.hpp +++ b/src/Ast/Base.hpp @@ -21,14 +21,15 @@ namespace Fig IdentiExpr, // 标识符表达式 LiteralExpr, // 字面量表达式 - UnaryExpr, // 一元表达式 - BinaryExpr, // 二元表达式 - TernaryExpr, // 三元表达式 + PrefixExpr, // 一元 前缀表达式 + InfixExpr, // 二元 中缀表达式 }; struct AstNode { AstType type = AstType::AstNode; SourceLocation location; + + virtual String toString() const = 0; }; struct Expr : public AstNode diff --git a/src/Ast/Expr/IdentiExpr.hpp b/src/Ast/Expr/IdentiExpr.hpp index f8b1d51..8efd47c 100644 --- a/src/Ast/Expr/IdentiExpr.hpp +++ b/src/Ast/Expr/IdentiExpr.hpp @@ -6,7 +6,9 @@ */ #pragma once + #include +#include namespace Fig { @@ -25,5 +27,10 @@ namespace Fig name = std::move(_name); location = std::move(_loc); } + + virtual String toString() const override + { + return std::format("", name); + } }; }; \ No newline at end of file diff --git a/src/Ast/Expr/InfixExpr.hpp b/src/Ast/Expr/InfixExpr.hpp new file mode 100644 index 0000000..e26c855 --- /dev/null +++ b/src/Ast/Expr/InfixExpr.hpp @@ -0,0 +1,39 @@ +/*! + @file src/Ast/Expr/InfixExpr.hpp + @brief 中缀表达式定义 + @author PuqiAR (im@puqiar.top) + @date 2026-02-14 +*/ + +#pragma once + +#include +#include + +#include + +namespace Fig +{ + struct InfixExpr final : Expr + { + Expr *left; + BinaryOperator op; + Expr *right; + + InfixExpr() + { + type = AstType::InfixExpr; + } + InfixExpr(Expr *_left, BinaryOperator _op, Expr *_right) : + left(_left), op(_op), right(_right) + { + type = AstType::InfixExpr; + location = _left->location; + } + + virtual String toString() const override + { + return std::format("", left->toString(), magic_enum::enum_name(op), right->toString()); + } + }; +}; // namespace Fig \ No newline at end of file diff --git a/src/Ast/Expr/LiteralExpr.hpp b/src/Ast/Expr/LiteralExpr.hpp index af23f1c..f4c927d 100644 --- a/src/Ast/Expr/LiteralExpr.hpp +++ b/src/Ast/Expr/LiteralExpr.hpp @@ -10,6 +10,8 @@ #include #include +#include + namespace Fig { struct LiteralExpr final : Expr @@ -20,9 +22,15 @@ namespace Fig { type = AstType::LiteralExpr; } - LiteralExpr(const Token& token) : token(token) + LiteralExpr(const Token& token, SourceLocation _location) : token(token) { type = AstType::LiteralExpr; + location = std::move(_location); + } + + virtual String toString() const override + { + return std::format("", magic_enum::enum_name(token.type)); } }; }; // namespace Fig \ No newline at end of file diff --git a/src/Ast/Expr/PrefixExpr.hpp b/src/Ast/Expr/PrefixExpr.hpp new file mode 100644 index 0000000..88753ae --- /dev/null +++ b/src/Ast/Expr/PrefixExpr.hpp @@ -0,0 +1,39 @@ +/*! + @file src/Ast/Expr/PrefixExpr.hpp + @brief 前缀表达式定义 + @author PuqiAR (im@puqiar.top) + @date 2026-02-14 +*/ + +#pragma once + +#include +#include + +#include + +namespace Fig +{ + struct PrefixExpr final : Expr + { + UnaryOperator op; + Expr *operand; + + PrefixExpr() + { + type = AstType::PrefixExpr; + } + + PrefixExpr(UnaryOperator _op, Expr *_operand) : + op(_op), operand(_operand) + { + type = AstType::PrefixExpr; + location = _operand->location; + } + + virtual String toString() const override + { + return std::format("", magic_enum::enum_name(op), operand->toString()); + } + }; +}; \ No newline at end of file diff --git a/src/Ast/Operator.cpp b/src/Ast/Operator.cpp index db9e501..8074c5f 100644 --- a/src/Ast/Operator.cpp +++ b/src/Ast/Operator.cpp @@ -22,8 +22,10 @@ namespace Fig HashMap &GetBinaryOpMap() { - static HashMap binaryOpMap{{TokenType::Plus, BinaryOperator::Add}, + static HashMap binaryOpMap{ + {TokenType::Plus, BinaryOperator::Add}, {TokenType::Minus, BinaryOperator::Subtract}, + {TokenType::Asterisk, BinaryOperator::Multiply}, {TokenType::Slash, BinaryOperator::Divide}, {TokenType::Percent, BinaryOperator::Modulo}, @@ -42,30 +44,54 @@ namespace Fig {TokenType::Power, BinaryOperator::Power}, {TokenType::Assign, BinaryOperator::Assign}, + {TokenType::PlusEqual, BinaryOperator::AddAssign}, + {TokenType::MinusEqual, BinaryOperator::SubAssign}, + {TokenType::AsteriskEqual, BinaryOperator::MultiplyAssign}, + {TokenType::SlashEqual, BinaryOperator::DivideAssign}, + {TokenType::PercentEqual, BinaryOperator::ModuloAssign}, + {TokenType::CaretEqual, BinaryOperator::BitXorAssign}, {TokenType::Pipe, BinaryOperator::BitAnd}, {TokenType::Ampersand, BinaryOperator::BitAnd}, {TokenType::ShiftLeft, BinaryOperator::ShiftLeft}, - {TokenType::ShiftRight, BinaryOperator::ShiftRight}}; + {TokenType::ShiftRight, BinaryOperator::ShiftRight}, + + {TokenType::Dot, BinaryOperator::MemberAccess}, + }; return binaryOpMap; } - // 赋值 < 三元 < 逻辑或 < 逻辑与 < 位运算 < 比较 < 位移 < 加减 < 乘除 < 幂 < 一元 + // 赋值 < 三元 < 逻辑或 < 逻辑与 < 位运算 < 比较 < 位移 < 加减 < 乘除 < 幂 < 一元 < 成员访问 < (后缀) + + /* + 暂划分: + 二元运算符:0 - 20000 + 一元运算符:20001 - 40000 + 后缀/成员/其他:40001 - 60001 + + */ HashMap &GetUnaryOpBindingPowerMap() { static HashMap unbpm{ - {UnaryOperator::BitNot, 10000}, - {UnaryOperator::Negate, 10000}, - {UnaryOperator::Not, 10000}, - {UnaryOperator::AddressOf, 10000}, + {UnaryOperator::BitNot, 20001}, + {UnaryOperator::Negate, 20001}, + {UnaryOperator::Not, 20001}, + {UnaryOperator::AddressOf, 20001}, }; return unbpm; } HashMap &GetBinaryOpBindingPowerMap() { - static HashMap bnbpm{{BinaryOperator::Assign, 100}, + static HashMap bnbpm{ + {BinaryOperator::Assign, 100}, + {BinaryOperator::AddAssign, 100}, + {BinaryOperator::SubAssign, 100}, + {BinaryOperator::MultiplyAssign, 100}, + {BinaryOperator::DivideAssign, 100}, + {BinaryOperator::ModuloAssign, 100}, + {BinaryOperator::BitXorAssign, 100}, {BinaryOperator::LogicalOr, 500}, {BinaryOperator::LogicalAnd, 550}, @@ -93,8 +119,51 @@ namespace Fig {BinaryOperator::Divide, 4500}, {BinaryOperator::Power, 5000}, + + {BinaryOperator::MemberAccess, 40001}, }; + return bnbpm; } + + BindingPower GetUnaryOpRBp(UnaryOperator op) + { + return GetUnaryOpBindingPowerMap().at(op); + } + + BindingPower GetBinaryOpLBp(BinaryOperator op) + { + return GetBinaryOpBindingPowerMap().at(op); + } + + BindingPower GetBinaryOpRBp(BinaryOperator op) + { + /* + 右结合,左绑定力 >= 右 + a = b = c + a = (b = c) + a.b.c + */ + switch (op) + { + case BinaryOperator::Assign: return GetBinaryOpLBp(op); + case BinaryOperator::AddAssign: return GetBinaryOpLBp(op); + case BinaryOperator::SubAssign: return GetBinaryOpLBp(op); + case BinaryOperator::MultiplyAssign: return GetBinaryOpLBp(op); + case BinaryOperator::DivideAssign: return GetBinaryOpLBp(op); + case BinaryOperator::ModuloAssign: return GetBinaryOpLBp(op); + case BinaryOperator::BitXorAssign: return GetBinaryOpLBp(op); + case BinaryOperator::Power: return GetBinaryOpLBp(op); + + default: + /* + 左结合, 左绑定力 < 右 + a * b * c + (a * b) * c + */ + return GetBinaryOpLBp(op) + 1; + } + } + bool IsTokenOp(TokenType type, bool binary /* = true*/) { if (binary) @@ -103,4 +172,14 @@ namespace Fig } return GetUnaryOpMap().contains(type); } + + UnaryOperator TokenToUnaryOp(const Token &token) + { + return GetUnaryOpMap().at(token.type); + } + BinaryOperator TokenToBinaryOp(const Token &token) + { + return GetBinaryOpMap().at(token.type); + } + }; // namespace Fig \ No newline at end of file diff --git a/src/Ast/Operator.hpp b/src/Ast/Operator.hpp index a612a08..4bb4179 100644 --- a/src/Ast/Operator.hpp +++ b/src/Ast/Operator.hpp @@ -35,22 +35,31 @@ namespace Fig Greater, // 大于 > LessEqual, // 小于等于 <= GreaterEqual, // 大于等于 >= - - Is, // is操作符 + + Is, // is操作符 LogicalAnd, // 逻辑与 && / and LogicalOr, // 逻辑或 || / or Power, // 幂运算 ** - Assign, // 赋值(修改) = + Assign, // 赋值(修改) = + AddAssign, // += + SubAssign, // -= + MultiplyAssign, // *= + DivideAssign, // /= + ModuloAssign, // %= + BitXorAssign, // ^= // 位运算 - BitAnd, // 按位与 & - BitOr, // 按位或 | + BitAnd, // 按位与 & + BitOr, // 按位或 | BitXor, // 异或 ^ ShiftLeft, // 左移 ShiftRight, // 右移 + + // 成员访问 + MemberAccess, // . }; using BindingPower = unsigned int; @@ -61,5 +70,13 @@ namespace Fig HashMap &GetUnaryOpBindingPowerMap(); HashMap &GetBinaryOpBindingPowerMap(); + BindingPower GetUnaryOpRBp(UnaryOperator); + + BindingPower GetBinaryOpLBp(BinaryOperator); + BindingPower GetBinaryOpRBp(BinaryOperator); + bool IsTokenOp(TokenType type, bool binary = true); + + UnaryOperator TokenToUnaryOp(const Token &); + BinaryOperator TokenToBinaryOp(const Token &); }; // namespace Fig \ No newline at end of file diff --git a/src/Deps/Deps.hpp b/src/Deps/Deps.hpp index f2386b0..3170952 100644 --- a/src/Deps/Deps.hpp +++ b/src/Deps/Deps.hpp @@ -13,7 +13,12 @@ #include #include +#include +#include +#include +#include #include +#include namespace Fig { diff --git a/src/Error/Error.cpp b/src/Error/Error.cpp index 8228504..4a4d426 100644 --- a/src/Error/Error.cpp +++ b/src/Error/Error.cpp @@ -36,19 +36,21 @@ namespace Fig const char *ErrorTypeToString(ErrorType type) { + using enum ErrorType; switch (type) { - case ErrorType::UnusedSymbol: return "UnusedSymbol"; + case UnusedSymbol: return "UnusedSymbol"; - case ErrorType::MayBeNull: return "MaybeNull"; + case MayBeNull: return "MaybeNull"; - case ErrorType::UnterminatedString: return "UnterminatedString"; - case ErrorType::UnterminatedComments: return "UnterminatedComments"; - case ErrorType::InvalidNumberLiteral: return "InvalidNumberLiteral"; - case ErrorType::InvalidCharacter: return "InvalidCharacter"; - case Fig::ErrorType::InvalidSymbol: return "InvalidSymbol"; + case UnterminatedString: return "UnterminatedString"; + case UnterminatedComments: return "UnterminatedComments"; + case InvalidNumberLiteral: return "InvalidNumberLiteral"; + case InvalidCharacter: return "InvalidCharacter"; + case InvalidSymbol: return "InvalidSymbol"; - case ErrorType::SyntaxError: return "SyntaxError"; + case ExpectedExpression: return "ExpectedExpression"; + case SyntaxError: return "SyntaxError"; // default: return "Some one forgot to add case to `ErrorTypeToString`"; } diff --git a/src/Error/Error.hpp b/src/Error/Error.hpp index 189ab3e..0c855bf 100644 --- a/src/Error/Error.hpp +++ b/src/Error/Error.hpp @@ -7,10 +7,11 @@ #pragma once -#include #include +#include #include + #include namespace Fig @@ -22,16 +23,23 @@ namespace Fig */ enum class ErrorType : unsigned int { + /* Minor */ UnusedSymbol = 0, + /* Medium */ MayBeNull = 1001, + /* Critical */ + + // lexer errors UnterminatedString = 2001, UnterminatedComments, InvalidNumberLiteral, InvalidCharacter, InvalidSymbol, + // parser errors + ExpectedExpression, SyntaxError, }; @@ -40,109 +48,118 @@ namespace Fig struct Error { ErrorType type; - String message; - String suggestion; + String message; + String suggestion; - SourceLocation location; + SourceLocation location; std::source_location thrower_loc; Error() {} - Error(ErrorType _type, - const String &_message, - const String &_suggestion, - const SourceLocation &_location, - const std::source_location &_throwerloc = std::source_location::current()) + Error(ErrorType _type, + const String &_message, + const String &_suggestion, + const SourceLocation &_location, + const std::source_location &_throwerloc = std::source_location::current()) { - type = _type; - message = _message; - suggestion = _suggestion; - location = _location; + type = _type; + message = _message; + suggestion = _suggestion; + location = _location; thrower_loc = _throwerloc; } }; namespace TerminalColors { - constexpr const char *Reset = "\033[0m"; - constexpr const char *Bold = "\033[1m"; - constexpr const char *Dim = "\033[2m"; - constexpr const char *Italic = "\033[3m"; + constexpr const char *Reset = "\033[0m"; + constexpr const char *Bold = "\033[1m"; + constexpr const char *Dim = "\033[2m"; + constexpr const char *Italic = "\033[3m"; constexpr const char *Underline = "\033[4m"; - constexpr const char *Blink = "\033[5m"; - constexpr const char *Reverse = "\033[7m"; // 前背景反色 - constexpr const char *Hidden = "\033[8m"; // 隐藏文本 - constexpr const char *Strike = "\033[9m"; // 删除线 + constexpr const char *Blink = "\033[5m"; + constexpr const char *Reverse = "\033[7m"; // 前背景反色 + constexpr const char *Hidden = "\033[8m"; // 隐藏文本 + constexpr const char *Strike = "\033[9m"; // 删除线 - constexpr const char *Black = "\033[30m"; - constexpr const char *Red = "\033[31m"; - constexpr const char *Green = "\033[32m"; - constexpr const char *Yellow = "\033[33m"; - constexpr const char *Blue = "\033[34m"; + constexpr const char *Black = "\033[30m"; + constexpr const char *Red = "\033[31m"; + constexpr const char *Green = "\033[32m"; + constexpr const char *Yellow = "\033[33m"; + constexpr const char *Blue = "\033[34m"; constexpr const char *Magenta = "\033[35m"; - constexpr const char *Cyan = "\033[36m"; - constexpr const char *White = "\033[37m"; + constexpr const char *Cyan = "\033[36m"; + constexpr const char *White = "\033[37m"; - constexpr const char *LightBlack = "\033[90m"; - constexpr const char *LightRed = "\033[91m"; - constexpr const char *LightGreen = "\033[92m"; - constexpr const char *LightYellow = "\033[93m"; - constexpr const char *LightBlue = "\033[94m"; + constexpr const char *LightBlack = "\033[90m"; + constexpr const char *LightRed = "\033[91m"; + constexpr const char *LightGreen = "\033[92m"; + constexpr const char *LightYellow = "\033[93m"; + constexpr const char *LightBlue = "\033[94m"; constexpr const char *LightMagenta = "\033[95m"; - constexpr const char *LightCyan = "\033[96m"; - constexpr const char *LightWhite = "\033[97m"; + constexpr const char *LightCyan = "\033[96m"; + constexpr const char *LightWhite = "\033[97m"; - constexpr const char *DarkRed = "\033[38;2;128;0;0m"; - constexpr const char *DarkGreen = "\033[38;2;0;100;0m"; - constexpr const char *DarkYellow = "\033[38;2;128;128;0m"; - constexpr const char *DarkBlue = "\033[38;2;0;0;128m"; + constexpr const char *DarkRed = "\033[38;2;128;0;0m"; + constexpr const char *DarkGreen = "\033[38;2;0;100;0m"; + constexpr const char *DarkYellow = "\033[38;2;128;128;0m"; + constexpr const char *DarkBlue = "\033[38;2;0;0;128m"; constexpr const char *DarkMagenta = "\033[38;2;100;0;100m"; - constexpr const char *DarkCyan = "\033[38;2;0;128;128m"; - constexpr const char *DarkGray = "\033[38;2;64;64;64m"; - constexpr const char *Gray = "\033[38;2;128;128;128m"; - constexpr const char *Silver = "\033[38;2;192;192;192m"; + constexpr const char *DarkCyan = "\033[38;2;0;128;128m"; + constexpr const char *DarkGray = "\033[38;2;64;64;64m"; + constexpr const char *Gray = "\033[38;2;128;128;128m"; + constexpr const char *Silver = "\033[38;2;192;192;192m"; - constexpr const char *Navy = "\033[38;2;0;0;128m"; - constexpr const char *RoyalBlue = "\033[38;2;65;105;225m"; + constexpr const char *Navy = "\033[38;2;0;0;128m"; + constexpr const char *RoyalBlue = "\033[38;2;65;105;225m"; constexpr const char *ForestGreen = "\033[38;2;34;139;34m"; - constexpr const char *Olive = "\033[38;2;128;128;0m"; - constexpr const char *Teal = "\033[38;2;0;128;128m"; - constexpr const char *Maroon = "\033[38;2;128;0;0m"; - constexpr const char *Purple = "\033[38;2;128;0;128m"; - constexpr const char *Orange = "\033[38;2;255;165;0m"; - constexpr const char *Gold = "\033[38;2;255;215;0m"; - constexpr const char *Pink = "\033[38;2;255;192;203m"; - constexpr const char *Crimson = "\033[38;2;220;20;60m"; + constexpr const char *Olive = "\033[38;2;128;128;0m"; + constexpr const char *Teal = "\033[38;2;0;128;128m"; + constexpr const char *Maroon = "\033[38;2;128;0;0m"; + constexpr const char *Purple = "\033[38;2;128;0;128m"; + constexpr const char *Orange = "\033[38;2;255;165;0m"; + constexpr const char *Gold = "\033[38;2;255;215;0m"; + constexpr const char *Pink = "\033[38;2;255;192;203m"; + constexpr const char *Crimson = "\033[38;2;220;20;60m"; - constexpr const char *OnBlack = "\033[40m"; - constexpr const char *OnRed = "\033[41m"; - constexpr const char *OnGreen = "\033[42m"; - constexpr const char *OnYellow = "\033[43m"; - constexpr const char *OnBlue = "\033[44m"; + constexpr const char *OnBlack = "\033[40m"; + constexpr const char *OnRed = "\033[41m"; + constexpr const char *OnGreen = "\033[42m"; + constexpr const char *OnYellow = "\033[43m"; + constexpr const char *OnBlue = "\033[44m"; constexpr const char *OnMagenta = "\033[45m"; - constexpr const char *OnCyan = "\033[46m"; - constexpr const char *OnWhite = "\033[47m"; + constexpr const char *OnCyan = "\033[46m"; + constexpr const char *OnWhite = "\033[47m"; - constexpr const char *OnLightBlack = "\033[100m"; - constexpr const char *OnLightRed = "\033[101m"; - constexpr const char *OnLightGreen = "\033[102m"; - constexpr const char *OnLightYellow = "\033[103m"; - constexpr const char *OnLightBlue = "\033[104m"; + constexpr const char *OnLightBlack = "\033[100m"; + constexpr const char *OnLightRed = "\033[101m"; + constexpr const char *OnLightGreen = "\033[102m"; + constexpr const char *OnLightYellow = "\033[103m"; + constexpr const char *OnLightBlue = "\033[104m"; constexpr const char *OnLightMagenta = "\033[105m"; - constexpr const char *OnLightCyan = "\033[106m"; - constexpr const char *OnLightWhite = "\033[107m"; + constexpr const char *OnLightCyan = "\033[106m"; + constexpr const char *OnLightWhite = "\033[107m"; - constexpr const char *OnDarkBlue = "\033[48;2;0;0;128m"; + constexpr const char *OnDarkBlue = "\033[48;2;0;0;128m"; constexpr const char *OnGreenYellow = "\033[48;2;173;255;47m"; - constexpr const char *OnOrange = "\033[48;2;255;165;0m"; - constexpr const char *OnGray = "\033[48;2;128;128;128m"; + constexpr const char *OnOrange = "\033[48;2;255;165;0m"; + constexpr const char *OnGray = "\033[48;2;128;128;128m"; }; // namespace TerminalColors inline uint8_t ErrorLevel(ErrorType t) { unsigned int id = static_cast(t); - if (id <= 1000) { return 1; } - if (id > 1000 && id <= 2000) { return 2; } - if (id > 2000) { return 3; } + if (id <= 1000) + { + return 1; + } + if (id > 1000 && id <= 2000) + { + return 2; + } + if (id > 2000) + { + return 3; + } return 0; } diff --git a/src/Lexer/Lexer.hpp b/src/Lexer/Lexer.hpp index b29b8bc..1dc3f9f 100644 --- a/src/Lexer/Lexer.hpp +++ b/src/Lexer/Lexer.hpp @@ -153,8 +153,9 @@ namespace Fig fileName = std::move(_fileName); } - SourceLocation makeSourceLocation(const SourcePosition ¤t_pos) + SourceLocation makeSourceLocation(SourcePosition current_pos) { + current_pos.tok_length = 1; return SourceLocation( current_pos, fileName, "[internal lexer]", String(magic_enum::enum_name(state).data())); } diff --git a/src/Parser/ExprParser.cpp b/src/Parser/ExprParser.cpp new file mode 100644 index 0000000..a7b35bc --- /dev/null +++ b/src/Parser/ExprParser.cpp @@ -0,0 +1,159 @@ +/*! + @file src/Parser/ExprParser.hpp + @brief 语法分析器(Pratt + 手动递归下降) 表达式解析实现 (pratt) + @author PuqiAR (im@puqiar.top) + @date 2026-02-14 +*/ + +#include + +namespace Fig +{ + Result Parser::parseLiteralExpr() // 当前token为literal时调用 + { + state = State::ParsingLiteralExpr; + const Token &literal_token = consumeToken(); + LiteralExpr *node = new LiteralExpr(literal_token, makeSourcelocation(literal_token)); + return node; + } + Result Parser::parseIdentiExpr() // 当前token为Identifier调用 + { + state = State::ParsingIdentiExpr; + const Token &identifier = consumeToken(); + IdentiExpr *node = + new IdentiExpr(srcManager.GetSub(identifier.index, identifier.length), makeSourcelocation(identifier)); + return node; + } + + Result Parser::parseInfixExpr(Expr *lhs) // 当前token为 op + { + state = State::ParsingInfixExpr; + const Token &op_token = consumeToken(); + BinaryOperator op = TokenToBinaryOp(op_token); + BindingPower rbp = GetBinaryOpRBp(op); + + const auto &rhs_result = parseExpression(rbp); + if (!rhs_result) + { + return std::unexpected(rhs_result.error()); + } + Expr *rhs = *rhs_result; + + InfixExpr *node = new InfixExpr(lhs, op, rhs); + return node; + } + + Result Parser::parsePrefixExpr() // 当前token为op + { + state = State::ParsingPrefixExpr; + const Token &op_token = consumeToken(); + UnaryOperator op = TokenToUnaryOp(op_token); + + BindingPower rbp = GetUnaryOpRBp(op); + const auto &rhs_result = parseExpression(rbp); + if (!rhs_result) + { + return std::unexpected(rhs_result.error()); + } + + Expr *rhs = *rhs_result; + PrefixExpr *node = new PrefixExpr(op, rhs); + return node; + } + + std::unordered_set Parser::getTerminators() // 返回当前state的终止条件(终止符) + { + using enum State; + + static const std::unordered_set baseTerminators = {TokenType::EndOfFile, TokenType::Semicolon}; + + switch (state) + { + default: return baseTerminators; + } + } + bool Parser::shouldTerminate() + { + const Token &token = currentToken(); + const auto &terminators = getTerminators(); + return terminators.contains(token.type); + } + + Result Parser::parseExpression(BindingPower rbp) + { + Expr *lhs = nullptr; + Token token = currentToken(); + + if (token.isIdentifier()) + { + const auto &lhs_result = parseIdentiExpr(); + if (!lhs_result) + { + return std::unexpected(lhs_result.error()); + } + lhs = *lhs_result; + } + else if (token.isLiteral()) + { + const auto &lhs_result = parseLiteralExpr(); + if (!lhs_result) + { + return std::unexpected(lhs_result.error()); + } + lhs = *lhs_result; + } + else if (IsTokenOp(token.type, false)) // 是否是一元运算符 + { + const auto &lhs_result = parsePrefixExpr(); + if (!lhs_result) + { + return std::unexpected(lhs_result.error()); + } + lhs = *lhs_result; + } + + if (!lhs) + { + return std::unexpected(Error(ErrorType::ExpectedExpression, + "expected expression", + "insert expressions", + makeSourcelocation(prevToken()))); + } + + while (true) + { + token = currentToken(); + if (shouldTerminate()) + { + return lhs; + } + + if (IsTokenOp(token.type /* isBinary = true */)) // 是否为二元运算符 + { + BinaryOperator op = TokenToBinaryOp(token); + BindingPower lbp = GetBinaryOpLBp(op); + if (rbp >= lbp) + { + // 前操作数的右绑定力比当前操作数的左绑定力大 + // lhs被吸走 + return lhs; + } + + const auto &result = parseInfixExpr(lhs); + if (!result) + { + return result; + } + lhs = *result; + } + // 后缀运算符优先级非常大,几乎永远跟在操作数后面,因此我们可以直接结合 + // 而不用走正常路径 + else if (0) {} + else + { + return lhs; + } + } + } + +}; // namespace Fig \ No newline at end of file diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp new file mode 100644 index 0000000..ebe8a85 --- /dev/null +++ b/src/Parser/Parser.cpp @@ -0,0 +1,18 @@ +/*! + @file src/Parser/Parser.cpp + @brief 语法分析器(Pratt + 手动递归下降) 实现 + @author PuqiAR (im@puqiar.top) + @date 2026-02-14 +*/ + + +#include + +namespace Fig +{ + DynArray Parser::parseAll() + { + DynArray nodes; + return nodes; + } +}; \ No newline at end of file diff --git a/src/Parser/Parser.hpp b/src/Parser/Parser.hpp new file mode 100644 index 0000000..9bb7ce4 --- /dev/null +++ b/src/Parser/Parser.hpp @@ -0,0 +1,154 @@ +/*! + @file src/Parser/Parser.hpp + @brief 语法分析器(Pratt + 手动递归下降) 定义 + @author PuqiAR (im@puqiar.top) + @date 2026-02-14 +*/ + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace Fig +{ + + class Parser + { + private: + Lexer &lexer; + SourceManager &srcManager; + + size_t index = 0; // token在buffer下标 + DynArray buffer; + + String fileName; + + bool isEOF = false; + + const Token &nextToken() + { + assert(!isEOF && "nextToken: eof but called nextToken"); + if (index + 1 < buffer.size()) + { + return buffer[++index]; + } + const auto &result = lexer.NextToken(); + if (!result) + { + ReportError(result.error(), srcManager); + std::exit(-1); + } + const Token &token = result.value(); + if (token.type == TokenType::EndOfFile) + { + isEOF = true; + } + buffer.push_back(token); + index++; + return token; + } + + inline const Token &prevToken() + { + return buffer[buffer.size() - 2]; + } + + inline const Token ¤tToken() + { + if (buffer.empty()) + { + return nextToken(); + } + return buffer.back(); + } + + const Token &peekToken(size_t lookahead = 1) + { + assert(!isEOF && "peekToken: eof but called peekToken"); + + size_t peekIndex = index + lookahead; + while (peekIndex >= buffer.size() && !isEOF) + { + const auto &result = lexer.NextToken(); + if (!result) + { + ReportError(result.error(), srcManager); + std::abort(); + } + const Token &token = result.value(); + if (token.type == TokenType::EndOfFile) + { + isEOF = true; + } + buffer.push_back(token); + } + if (peekIndex >= buffer.size()) // 没有那么多token + { + return buffer.back(); // back是EOF Token + } + return buffer[peekIndex]; + } + + inline Token consumeToken() + { + if (isEOF) + return buffer.back(); + Token current = currentToken(); + nextToken(); + return current; + } + + public: + enum class State : std::uint8_t + { + Standby, + + ParsingLiteralExpr, + ParsingIdentiExpr, + + ParsingInfixExpr, + ParsingPrefixExpr, + } state; + + Parser(Lexer &_lexer, SourceManager &_srcManager, String _fileName) : + lexer(_lexer), srcManager(_srcManager), fileName(std::move(_fileName)) + { + state = State::Standby; + } + + private: + SourceLocation makeSourcelocation(const Token &tok) + { + auto [line, column] = srcManager.GetLineColumn(tok.index); + return SourceLocation( + SourcePosition( + line, + column, + tok.length + ), fileName, "[internal parser]", magic_enum::enum_name(state).data()); + } + + Result parseLiteralExpr(); // 当前token为literal时调用 + Result parseIdentiExpr(); // 当前token为Identifier调用 + + Result parseInfixExpr(Expr *); // 由 parseExpression递归调用, 当前token为op + Result parsePrefixExpr(); // 由 parseExpression递归调用, 当前token为op + + std::unordered_set getTerminators(); // 返回当前state的终止条件(终止符) + bool shouldTerminate(); // 通过state判断该不该终止表达式解析 + + public: + Result parseExpression(BindingPower = 0); + + DynArray parseAll(); + }; +}; // namespace Fig \ No newline at end of file diff --git a/src/Parser/ParserTest.cpp b/src/Parser/ParserTest.cpp new file mode 100644 index 0000000..4db3de3 --- /dev/null +++ b/src/Parser/ParserTest.cpp @@ -0,0 +1,29 @@ +#include +#include + +int main() +{ + using namespace Fig; + + String fileName = "test.fig"; + String filePath = "T:/Files/Maker/Code/MyCodingLanguage/The Fig Project/Fig/test.fig"; + SourceManager srcManager(filePath); + + String source = srcManager.Read(); + if (!srcManager.read) + { + std::cerr << "Couldn't read file"; + return 1; + } + + Lexer lexer(source, fileName); + Parser parser(lexer, srcManager, fileName); + const auto &result = parser.parseExpression(); + if (!result) + { + ReportError(result.error(), srcManager); + return 1; + } + Expr *expr = *result; + std::cout << expr->toString() << '\n'; +} \ No newline at end of file diff --git a/src/SourceManager/SourceManager.hpp b/src/SourceManager/SourceManager.hpp index a49db27..21c4736 100644 --- a/src/SourceManager/SourceManager.hpp +++ b/src/SourceManager/SourceManager.hpp @@ -20,6 +20,29 @@ namespace Fig String filePath; String source; std::vector lines; + std::vector lineStartIndex; // 每行在整个源字符串中的起始 index + + void preprocessLineIndices() + { + lineStartIndex.clear(); + lineStartIndex.push_back(0); + + for (size_t i = 0; i < source.length(); ++i) + { + if (source[i] == U'\n') + { + lineStartIndex.push_back(i + 1); + } + else if (source[i] == U'\r') + { + // 处理 CRLF,只在 \n 处记录 + if (i + 1 < source.length() && source[i + 1] == U'\n') + continue; + + lineStartIndex.push_back(i + 1); + } + } + } public: bool read = false; @@ -37,7 +60,12 @@ namespace Fig source += line + '\n'; lines.push_back(String(line)); } + if (lines.empty()) + { + lines.push_back(String()); // 填充一个空的 + } read = true; + preprocessLineIndices(); return source; } @@ -49,7 +77,7 @@ namespace Fig return _line <= lines.size() && _line >= 1; } - String GetLine(size_t _line) const + const String &GetLine(size_t _line) const { assert(_line <= lines.size() && "SourceManager: GetLine failed, index out of range"); return lines[_line - 1]; @@ -64,5 +92,36 @@ namespace Fig { return source; } + + std::pair GetLineColumn(size_t index) const + { + if (lineStartIndex.empty()) + { + return {1, 1}; + } + + // clamp index 到合法范围(Parser报错可能传入EOF位置) + // size_t lastLine = lineStartIndex.size() - 1; + if (index < lineStartIndex[0]) + { + return {1, 1}; + } + + // upper_bound 找到第一个 > index 的行起点 + auto it = std::ranges::upper_bound(lineStartIndex.begin(), lineStartIndex.end(), index); + + size_t line; + if (it == lineStartIndex.begin()) + { + line = 0; + } + else + { + line = static_cast(it - lineStartIndex.begin() - 1); + } + + size_t column = index - lineStartIndex[line] + 1; + return {line + 1, column}; + } }; }; \ No newline at end of file diff --git a/src/Token/Token.hpp b/src/Token/Token.hpp index dc982b1..46b808e 100644 --- a/src/Token/Token.hpp +++ b/src/Token/Token.hpp @@ -111,12 +111,14 @@ namespace Fig NotEqual, // != LessEqual, // <= GreaterEqual, // >= + PlusEqual, // += MinusEqual, // -= AsteriskEqual, // *= SlashEqual, // /= PercentEqual, // %= CaretEqual, // ^= + DoublePlus, // ++ DoubleMinus, // -- DoubleAmpersand, // && @@ -150,5 +152,17 @@ namespace Fig return type == TokenType::LiteralNull || type == TokenType::LiteralTrue || type == TokenType::LiteralFalse || type == TokenType::LiteralNumber || type == TokenType::LiteralString; } + + Token &operator=(const Token &other) + { + if (this == &other) + { + return *this; + } + index = other.index; + length = other.length; + type = other.type; + return *this; + } }; } // namespace Fig \ No newline at end of file diff --git a/xmake.lua b/xmake.lua index 8b475d2..b1c40b9 100644 --- a/xmake.lua +++ b/xmake.lua @@ -35,12 +35,27 @@ target("LexerTest") add_files("src/Lexer/Lexer.cpp") add_files("src/Lexer/LexerTest.cpp") + +target("ParserTest") + add_files("src/Core/*.cpp") + add_files("src/Token/Token.cpp") + add_files("src/Error/Error.cpp") + add_files("src/Lexer/Lexer.cpp") + + add_files("src/Ast/Operator.cpp") + add_files("src/Parser/ExprParser.cpp") + add_files("src/Parser/Parser.cpp") + + add_files("src/Parser/ParserTest.cpp") + target("Fig") add_files("src/Core/*.cpp") add_files("src/Token/Token.cpp") - add_files("src/Error/Error.cpp") - add_files("src/Lexer/Lexer.cpp") + + add_files("src/Ast/Operator.cpp") + add_files("src/Parser/ExprParser.cpp") + add_files("src/Parser/Parser.cpp") add_files("src/main.cpp") \ No newline at end of file