完成Parser定义以及表达式解析

This commit is contained in:
2026-02-14 23:03:46 +08:00
parent 35e479fd05
commit 878157c2fc
19 changed files with 771 additions and 102 deletions

View File

@@ -6,3 +6,9 @@
*/
#pragma once
#include <Ast/Expr/IdentiExpr.hpp>
#include <Ast/Expr/InfixExpr.hpp>
#include <Ast/Expr/LiteralExpr.hpp>
#include <Ast/Expr/PrefixExpr.hpp>

View File

@@ -21,14 +21,15 @@ namespace Fig
IdentiExpr, // 标识符表达式
LiteralExpr, // 字面量表达式
UnaryExpr, // 一元表达式
BinaryExpr, // 二元表达式
TernaryExpr, // 三元表达式
PrefixExpr, // 一元 前缀表达式
InfixExpr, // 二元 中缀表达式
};
struct AstNode
{
AstType type = AstType::AstNode;
SourceLocation location;
virtual String toString() const = 0;
};
struct Expr : public AstNode

View File

@@ -6,7 +6,9 @@
*/
#pragma once
#include <Ast/Base.hpp>
#include <Deps/Deps.hpp>
namespace Fig
{
@@ -25,5 +27,10 @@ namespace Fig
name = std::move(_name);
location = std::move(_loc);
}
virtual String toString() const override
{
return std::format("<IdentiExpr: {}>", name);
}
};
};

View File

@@ -0,0 +1,39 @@
/*!
@file src/Ast/Expr/InfixExpr.hpp
@brief 中缀表达式定义
@author PuqiAR (im@puqiar.top)
@date 2026-02-14
*/
#pragma once
#include <Ast/Base.hpp>
#include <Ast/Operator.hpp>
#include <Deps/Deps.hpp>
namespace Fig
{
struct InfixExpr final : Expr
{
Expr *left;
BinaryOperator op;
Expr *right;
InfixExpr()
{
type = AstType::InfixExpr;
}
InfixExpr(Expr *_left, BinaryOperator _op, Expr *_right) :
left(_left), op(_op), right(_right)
{
type = AstType::InfixExpr;
location = _left->location;
}
virtual String toString() const override
{
return std::format("<InfixExpr: '{}' {} '{}'>", left->toString(), magic_enum::enum_name(op), right->toString());
}
};
}; // namespace Fig

View File

@@ -10,6 +10,8 @@
#include <Ast/Base.hpp>
#include <Token/Token.hpp>
#include <Deps/Deps.hpp>
namespace Fig
{
struct LiteralExpr final : Expr
@@ -20,9 +22,15 @@ namespace Fig
{
type = AstType::LiteralExpr;
}
LiteralExpr(const Token& token) : token(token)
LiteralExpr(const Token& token, SourceLocation _location) : token(token)
{
type = AstType::LiteralExpr;
location = std::move(_location);
}
virtual String toString() const override
{
return std::format("<LiteralExpr: {}>", magic_enum::enum_name(token.type));
}
};
}; // namespace Fig

View File

@@ -0,0 +1,39 @@
/*!
@file src/Ast/Expr/PrefixExpr.hpp
@brief 前缀表达式定义
@author PuqiAR (im@puqiar.top)
@date 2026-02-14
*/
#pragma once
#include <Ast/Operator.hpp>
#include <Ast/Base.hpp>
#include <Deps/Deps.hpp>
namespace Fig
{
struct PrefixExpr final : Expr
{
UnaryOperator op;
Expr *operand;
PrefixExpr()
{
type = AstType::PrefixExpr;
}
PrefixExpr(UnaryOperator _op, Expr *_operand) :
op(_op), operand(_operand)
{
type = AstType::PrefixExpr;
location = _operand->location;
}
virtual String toString() const override
{
return std::format("<PrefixExpr: {} '{}'>", magic_enum::enum_name(op), operand->toString());
}
};
};

View File

@@ -22,8 +22,10 @@ namespace Fig
HashMap<TokenType, BinaryOperator> &GetBinaryOpMap()
{
static HashMap<TokenType, BinaryOperator> binaryOpMap{{TokenType::Plus, BinaryOperator::Add},
static HashMap<TokenType, BinaryOperator> binaryOpMap{
{TokenType::Plus, BinaryOperator::Add},
{TokenType::Minus, BinaryOperator::Subtract},
{TokenType::Asterisk, BinaryOperator::Multiply},
{TokenType::Slash, BinaryOperator::Divide},
{TokenType::Percent, BinaryOperator::Modulo},
@@ -42,30 +44,54 @@ namespace Fig
{TokenType::Power, BinaryOperator::Power},
{TokenType::Assign, BinaryOperator::Assign},
{TokenType::PlusEqual, BinaryOperator::AddAssign},
{TokenType::MinusEqual, BinaryOperator::SubAssign},
{TokenType::AsteriskEqual, BinaryOperator::MultiplyAssign},
{TokenType::SlashEqual, BinaryOperator::DivideAssign},
{TokenType::PercentEqual, BinaryOperator::ModuloAssign},
{TokenType::CaretEqual, BinaryOperator::BitXorAssign},
{TokenType::Pipe, BinaryOperator::BitAnd},
{TokenType::Ampersand, BinaryOperator::BitAnd},
{TokenType::ShiftLeft, BinaryOperator::ShiftLeft},
{TokenType::ShiftRight, BinaryOperator::ShiftRight}};
{TokenType::ShiftRight, BinaryOperator::ShiftRight},
{TokenType::Dot, BinaryOperator::MemberAccess},
};
return binaryOpMap;
}
// 赋值 < 三元 < 逻辑或 < 逻辑与 < 位运算 < 比较 < 位移 < 加减 < 乘除 < 幂 < 一元
// 赋值 < 三元 < 逻辑或 < 逻辑与 < 位运算 < 比较 < 位移 < 加减 < 乘除 < 幂 < 一元 < 成员访问 < (后缀)
/*
暂划分:
二元运算符0 - 20000
一元运算符20001 - 40000
后缀/成员/其他40001 - 60001
*/
HashMap<UnaryOperator, BindingPower> &GetUnaryOpBindingPowerMap()
{
static HashMap<UnaryOperator, BindingPower> unbpm{
{UnaryOperator::BitNot, 10000},
{UnaryOperator::Negate, 10000},
{UnaryOperator::Not, 10000},
{UnaryOperator::AddressOf, 10000},
{UnaryOperator::BitNot, 20001},
{UnaryOperator::Negate, 20001},
{UnaryOperator::Not, 20001},
{UnaryOperator::AddressOf, 20001},
};
return unbpm;
}
HashMap<BinaryOperator, BindingPower> &GetBinaryOpBindingPowerMap()
{
static HashMap<BinaryOperator, BindingPower> bnbpm{{BinaryOperator::Assign, 100},
static HashMap<BinaryOperator, BindingPower> bnbpm{
{BinaryOperator::Assign, 100},
{BinaryOperator::AddAssign, 100},
{BinaryOperator::SubAssign, 100},
{BinaryOperator::MultiplyAssign, 100},
{BinaryOperator::DivideAssign, 100},
{BinaryOperator::ModuloAssign, 100},
{BinaryOperator::BitXorAssign, 100},
{BinaryOperator::LogicalOr, 500},
{BinaryOperator::LogicalAnd, 550},
@@ -93,8 +119,51 @@ namespace Fig
{BinaryOperator::Divide, 4500},
{BinaryOperator::Power, 5000},
{BinaryOperator::MemberAccess, 40001},
};
return bnbpm;
}
BindingPower GetUnaryOpRBp(UnaryOperator op)
{
return GetUnaryOpBindingPowerMap().at(op);
}
BindingPower GetBinaryOpLBp(BinaryOperator op)
{
return GetBinaryOpBindingPowerMap().at(op);
}
BindingPower GetBinaryOpRBp(BinaryOperator op)
{
/*
右结合,左绑定力 >= 右
a = b = c
a = (b = c)
a.b.c
*/
switch (op)
{
case BinaryOperator::Assign: return GetBinaryOpLBp(op);
case BinaryOperator::AddAssign: return GetBinaryOpLBp(op);
case BinaryOperator::SubAssign: return GetBinaryOpLBp(op);
case BinaryOperator::MultiplyAssign: return GetBinaryOpLBp(op);
case BinaryOperator::DivideAssign: return GetBinaryOpLBp(op);
case BinaryOperator::ModuloAssign: return GetBinaryOpLBp(op);
case BinaryOperator::BitXorAssign: return GetBinaryOpLBp(op);
case BinaryOperator::Power: return GetBinaryOpLBp(op);
default:
/*
左结合, 左绑定力 < 右
a * b * c
(a * b) * c
*/
return GetBinaryOpLBp(op) + 1;
}
}
bool IsTokenOp(TokenType type, bool binary /* = true*/)
{
if (binary)
@@ -103,4 +172,14 @@ namespace Fig
}
return GetUnaryOpMap().contains(type);
}
UnaryOperator TokenToUnaryOp(const Token &token)
{
return GetUnaryOpMap().at(token.type);
}
BinaryOperator TokenToBinaryOp(const Token &token)
{
return GetBinaryOpMap().at(token.type);
}
}; // namespace Fig

View File

@@ -35,22 +35,31 @@ namespace Fig
Greater, // 大于 >
LessEqual, // 小于等于 <=
GreaterEqual, // 大于等于 >=
Is, // is操作符
Is, // is操作符
LogicalAnd, // 逻辑与 && / and
LogicalOr, // 逻辑或 || / or
Power, // 幂运算 **
Assign, // 赋值(修改) =
Assign, // 赋值(修改) =
AddAssign, // +=
SubAssign, // -=
MultiplyAssign, // *=
DivideAssign, // /=
ModuloAssign, // %=
BitXorAssign, // ^=
// 位运算
BitAnd, // 按位与 &
BitOr, // 按位或 |
BitAnd, // 按位与 &
BitOr, // 按位或 |
BitXor, // 异或 ^
ShiftLeft, // 左移
ShiftRight, // 右移
// 成员访问
MemberAccess, // .
};
using BindingPower = unsigned int;
@@ -61,5 +70,13 @@ namespace Fig
HashMap<UnaryOperator, BindingPower> &GetUnaryOpBindingPowerMap();
HashMap<BinaryOperator, BindingPower> &GetBinaryOpBindingPowerMap();
BindingPower GetUnaryOpRBp(UnaryOperator);
BindingPower GetBinaryOpLBp(BinaryOperator);
BindingPower GetBinaryOpRBp(BinaryOperator);
bool IsTokenOp(TokenType type, bool binary = true);
UnaryOperator TokenToUnaryOp(const Token &);
BinaryOperator TokenToBinaryOp(const Token &);
}; // namespace Fig

View File

@@ -13,7 +13,12 @@
#include <Deps/String/CharUtils.hpp>
#include <Deps/String/String.hpp>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <expected>
#include <format>
namespace Fig
{

View File

@@ -36,19 +36,21 @@ namespace Fig
const char *ErrorTypeToString(ErrorType type)
{
using enum ErrorType;
switch (type)
{
case ErrorType::UnusedSymbol: return "UnusedSymbol";
case UnusedSymbol: return "UnusedSymbol";
case ErrorType::MayBeNull: return "MaybeNull";
case MayBeNull: return "MaybeNull";
case ErrorType::UnterminatedString: return "UnterminatedString";
case ErrorType::UnterminatedComments: return "UnterminatedComments";
case ErrorType::InvalidNumberLiteral: return "InvalidNumberLiteral";
case ErrorType::InvalidCharacter: return "InvalidCharacter";
case Fig::ErrorType::InvalidSymbol: return "InvalidSymbol";
case UnterminatedString: return "UnterminatedString";
case UnterminatedComments: return "UnterminatedComments";
case InvalidNumberLiteral: return "InvalidNumberLiteral";
case InvalidCharacter: return "InvalidCharacter";
case InvalidSymbol: return "InvalidSymbol";
case ErrorType::SyntaxError: return "SyntaxError";
case ExpectedExpression: return "ExpectedExpression";
case SyntaxError: return "SyntaxError";
// default: return "Some one forgot to add case to `ErrorTypeToString`";
}

View File

@@ -7,10 +7,11 @@
#pragma once
#include <Deps/Deps.hpp>
#include <Core/SourceLocations.hpp>
#include <Deps/Deps.hpp>
#include <SourceManager/SourceManager.hpp>
#include <source_location>
namespace Fig
@@ -22,16 +23,23 @@ namespace Fig
*/
enum class ErrorType : unsigned int
{
/* Minor */
UnusedSymbol = 0,
/* Medium */
MayBeNull = 1001,
/* Critical */
// lexer errors
UnterminatedString = 2001,
UnterminatedComments,
InvalidNumberLiteral,
InvalidCharacter,
InvalidSymbol,
// parser errors
ExpectedExpression,
SyntaxError,
};
@@ -40,109 +48,118 @@ namespace Fig
struct Error
{
ErrorType type;
String message;
String suggestion;
String message;
String suggestion;
SourceLocation location;
SourceLocation location;
std::source_location thrower_loc;
Error() {}
Error(ErrorType _type,
const String &_message,
const String &_suggestion,
const SourceLocation &_location,
const std::source_location &_throwerloc = std::source_location::current())
Error(ErrorType _type,
const String &_message,
const String &_suggestion,
const SourceLocation &_location,
const std::source_location &_throwerloc = std::source_location::current())
{
type = _type;
message = _message;
suggestion = _suggestion;
location = _location;
type = _type;
message = _message;
suggestion = _suggestion;
location = _location;
thrower_loc = _throwerloc;
}
};
namespace TerminalColors
{
constexpr const char *Reset = "\033[0m";
constexpr const char *Bold = "\033[1m";
constexpr const char *Dim = "\033[2m";
constexpr const char *Italic = "\033[3m";
constexpr const char *Reset = "\033[0m";
constexpr const char *Bold = "\033[1m";
constexpr const char *Dim = "\033[2m";
constexpr const char *Italic = "\033[3m";
constexpr const char *Underline = "\033[4m";
constexpr const char *Blink = "\033[5m";
constexpr const char *Reverse = "\033[7m"; // 前背景反色
constexpr const char *Hidden = "\033[8m"; // 隐藏文本
constexpr const char *Strike = "\033[9m"; // 删除线
constexpr const char *Blink = "\033[5m";
constexpr const char *Reverse = "\033[7m"; // 前背景反色
constexpr const char *Hidden = "\033[8m"; // 隐藏文本
constexpr const char *Strike = "\033[9m"; // 删除线
constexpr const char *Black = "\033[30m";
constexpr const char *Red = "\033[31m";
constexpr const char *Green = "\033[32m";
constexpr const char *Yellow = "\033[33m";
constexpr const char *Blue = "\033[34m";
constexpr const char *Black = "\033[30m";
constexpr const char *Red = "\033[31m";
constexpr const char *Green = "\033[32m";
constexpr const char *Yellow = "\033[33m";
constexpr const char *Blue = "\033[34m";
constexpr const char *Magenta = "\033[35m";
constexpr const char *Cyan = "\033[36m";
constexpr const char *White = "\033[37m";
constexpr const char *Cyan = "\033[36m";
constexpr const char *White = "\033[37m";
constexpr const char *LightBlack = "\033[90m";
constexpr const char *LightRed = "\033[91m";
constexpr const char *LightGreen = "\033[92m";
constexpr const char *LightYellow = "\033[93m";
constexpr const char *LightBlue = "\033[94m";
constexpr const char *LightBlack = "\033[90m";
constexpr const char *LightRed = "\033[91m";
constexpr const char *LightGreen = "\033[92m";
constexpr const char *LightYellow = "\033[93m";
constexpr const char *LightBlue = "\033[94m";
constexpr const char *LightMagenta = "\033[95m";
constexpr const char *LightCyan = "\033[96m";
constexpr const char *LightWhite = "\033[97m";
constexpr const char *LightCyan = "\033[96m";
constexpr const char *LightWhite = "\033[97m";
constexpr const char *DarkRed = "\033[38;2;128;0;0m";
constexpr const char *DarkGreen = "\033[38;2;0;100;0m";
constexpr const char *DarkYellow = "\033[38;2;128;128;0m";
constexpr const char *DarkBlue = "\033[38;2;0;0;128m";
constexpr const char *DarkRed = "\033[38;2;128;0;0m";
constexpr const char *DarkGreen = "\033[38;2;0;100;0m";
constexpr const char *DarkYellow = "\033[38;2;128;128;0m";
constexpr const char *DarkBlue = "\033[38;2;0;0;128m";
constexpr const char *DarkMagenta = "\033[38;2;100;0;100m";
constexpr const char *DarkCyan = "\033[38;2;0;128;128m";
constexpr const char *DarkGray = "\033[38;2;64;64;64m";
constexpr const char *Gray = "\033[38;2;128;128;128m";
constexpr const char *Silver = "\033[38;2;192;192;192m";
constexpr const char *DarkCyan = "\033[38;2;0;128;128m";
constexpr const char *DarkGray = "\033[38;2;64;64;64m";
constexpr const char *Gray = "\033[38;2;128;128;128m";
constexpr const char *Silver = "\033[38;2;192;192;192m";
constexpr const char *Navy = "\033[38;2;0;0;128m";
constexpr const char *RoyalBlue = "\033[38;2;65;105;225m";
constexpr const char *Navy = "\033[38;2;0;0;128m";
constexpr const char *RoyalBlue = "\033[38;2;65;105;225m";
constexpr const char *ForestGreen = "\033[38;2;34;139;34m";
constexpr const char *Olive = "\033[38;2;128;128;0m";
constexpr const char *Teal = "\033[38;2;0;128;128m";
constexpr const char *Maroon = "\033[38;2;128;0;0m";
constexpr const char *Purple = "\033[38;2;128;0;128m";
constexpr const char *Orange = "\033[38;2;255;165;0m";
constexpr const char *Gold = "\033[38;2;255;215;0m";
constexpr const char *Pink = "\033[38;2;255;192;203m";
constexpr const char *Crimson = "\033[38;2;220;20;60m";
constexpr const char *Olive = "\033[38;2;128;128;0m";
constexpr const char *Teal = "\033[38;2;0;128;128m";
constexpr const char *Maroon = "\033[38;2;128;0;0m";
constexpr const char *Purple = "\033[38;2;128;0;128m";
constexpr const char *Orange = "\033[38;2;255;165;0m";
constexpr const char *Gold = "\033[38;2;255;215;0m";
constexpr const char *Pink = "\033[38;2;255;192;203m";
constexpr const char *Crimson = "\033[38;2;220;20;60m";
constexpr const char *OnBlack = "\033[40m";
constexpr const char *OnRed = "\033[41m";
constexpr const char *OnGreen = "\033[42m";
constexpr const char *OnYellow = "\033[43m";
constexpr const char *OnBlue = "\033[44m";
constexpr const char *OnBlack = "\033[40m";
constexpr const char *OnRed = "\033[41m";
constexpr const char *OnGreen = "\033[42m";
constexpr const char *OnYellow = "\033[43m";
constexpr const char *OnBlue = "\033[44m";
constexpr const char *OnMagenta = "\033[45m";
constexpr const char *OnCyan = "\033[46m";
constexpr const char *OnWhite = "\033[47m";
constexpr const char *OnCyan = "\033[46m";
constexpr const char *OnWhite = "\033[47m";
constexpr const char *OnLightBlack = "\033[100m";
constexpr const char *OnLightRed = "\033[101m";
constexpr const char *OnLightGreen = "\033[102m";
constexpr const char *OnLightYellow = "\033[103m";
constexpr const char *OnLightBlue = "\033[104m";
constexpr const char *OnLightBlack = "\033[100m";
constexpr const char *OnLightRed = "\033[101m";
constexpr const char *OnLightGreen = "\033[102m";
constexpr const char *OnLightYellow = "\033[103m";
constexpr const char *OnLightBlue = "\033[104m";
constexpr const char *OnLightMagenta = "\033[105m";
constexpr const char *OnLightCyan = "\033[106m";
constexpr const char *OnLightWhite = "\033[107m";
constexpr const char *OnLightCyan = "\033[106m";
constexpr const char *OnLightWhite = "\033[107m";
constexpr const char *OnDarkBlue = "\033[48;2;0;0;128m";
constexpr const char *OnDarkBlue = "\033[48;2;0;0;128m";
constexpr const char *OnGreenYellow = "\033[48;2;173;255;47m";
constexpr const char *OnOrange = "\033[48;2;255;165;0m";
constexpr const char *OnGray = "\033[48;2;128;128;128m";
constexpr const char *OnOrange = "\033[48;2;255;165;0m";
constexpr const char *OnGray = "\033[48;2;128;128;128m";
}; // namespace TerminalColors
inline uint8_t ErrorLevel(ErrorType t)
{
unsigned int id = static_cast<int>(t);
if (id <= 1000) { return 1; }
if (id > 1000 && id <= 2000) { return 2; }
if (id > 2000) { return 3; }
if (id <= 1000)
{
return 1;
}
if (id > 1000 && id <= 2000)
{
return 2;
}
if (id > 2000)
{
return 3;
}
return 0;
}

View File

@@ -153,8 +153,9 @@ namespace Fig
fileName = std::move(_fileName);
}
SourceLocation makeSourceLocation(const SourcePosition &current_pos)
SourceLocation makeSourceLocation(SourcePosition current_pos)
{
current_pos.tok_length = 1;
return SourceLocation(
current_pos, fileName, "[internal lexer]", String(magic_enum::enum_name(state).data()));
}

159
src/Parser/ExprParser.cpp Normal file
View File

@@ -0,0 +1,159 @@
/*!
@file src/Parser/ExprParser.hpp
@brief 语法分析器(Pratt + 手动递归下降) 表达式解析实现 (pratt)
@author PuqiAR (im@puqiar.top)
@date 2026-02-14
*/
#include <Parser/Parser.hpp>
namespace Fig
{
Result<LiteralExpr *, Error> Parser::parseLiteralExpr() // 当前token为literal时调用
{
state = State::ParsingLiteralExpr;
const Token &literal_token = consumeToken();
LiteralExpr *node = new LiteralExpr(literal_token, makeSourcelocation(literal_token));
return node;
}
Result<IdentiExpr *, Error> Parser::parseIdentiExpr() // 当前token为Identifier调用
{
state = State::ParsingIdentiExpr;
const Token &identifier = consumeToken();
IdentiExpr *node =
new IdentiExpr(srcManager.GetSub(identifier.index, identifier.length), makeSourcelocation(identifier));
return node;
}
Result<InfixExpr *, Error> Parser::parseInfixExpr(Expr *lhs) // 当前token为 op
{
state = State::ParsingInfixExpr;
const Token &op_token = consumeToken();
BinaryOperator op = TokenToBinaryOp(op_token);
BindingPower rbp = GetBinaryOpRBp(op);
const auto &rhs_result = parseExpression(rbp);
if (!rhs_result)
{
return std::unexpected(rhs_result.error());
}
Expr *rhs = *rhs_result;
InfixExpr *node = new InfixExpr(lhs, op, rhs);
return node;
}
Result<PrefixExpr *, Error> Parser::parsePrefixExpr() // 当前token为op
{
state = State::ParsingPrefixExpr;
const Token &op_token = consumeToken();
UnaryOperator op = TokenToUnaryOp(op_token);
BindingPower rbp = GetUnaryOpRBp(op);
const auto &rhs_result = parseExpression(rbp);
if (!rhs_result)
{
return std::unexpected(rhs_result.error());
}
Expr *rhs = *rhs_result;
PrefixExpr *node = new PrefixExpr(op, rhs);
return node;
}
std::unordered_set<TokenType> Parser::getTerminators() // 返回当前state的终止条件(终止符)
{
using enum State;
static const std::unordered_set<TokenType> baseTerminators = {TokenType::EndOfFile, TokenType::Semicolon};
switch (state)
{
default: return baseTerminators;
}
}
bool Parser::shouldTerminate()
{
const Token &token = currentToken();
const auto &terminators = getTerminators();
return terminators.contains(token.type);
}
Result<Expr *, Error> Parser::parseExpression(BindingPower rbp)
{
Expr *lhs = nullptr;
Token token = currentToken();
if (token.isIdentifier())
{
const auto &lhs_result = parseIdentiExpr();
if (!lhs_result)
{
return std::unexpected(lhs_result.error());
}
lhs = *lhs_result;
}
else if (token.isLiteral())
{
const auto &lhs_result = parseLiteralExpr();
if (!lhs_result)
{
return std::unexpected(lhs_result.error());
}
lhs = *lhs_result;
}
else if (IsTokenOp(token.type, false)) // 是否是一元运算符
{
const auto &lhs_result = parsePrefixExpr();
if (!lhs_result)
{
return std::unexpected(lhs_result.error());
}
lhs = *lhs_result;
}
if (!lhs)
{
return std::unexpected(Error(ErrorType::ExpectedExpression,
"expected expression",
"insert expressions",
makeSourcelocation(prevToken())));
}
while (true)
{
token = currentToken();
if (shouldTerminate())
{
return lhs;
}
if (IsTokenOp(token.type /* isBinary = true */)) // 是否为二元运算符
{
BinaryOperator op = TokenToBinaryOp(token);
BindingPower lbp = GetBinaryOpLBp(op);
if (rbp >= lbp)
{
// 前操作数的右绑定力比当前操作数的左绑定力大
// lhs被吸走
return lhs;
}
const auto &result = parseInfixExpr(lhs);
if (!result)
{
return result;
}
lhs = *result;
}
// 后缀运算符优先级非常大,几乎永远跟在操作数后面,因此我们可以直接结合
// 而不用走正常路径
else if (0) {}
else
{
return lhs;
}
}
}
}; // namespace Fig

18
src/Parser/Parser.cpp Normal file
View File

@@ -0,0 +1,18 @@
/*!
@file src/Parser/Parser.cpp
@brief 语法分析器(Pratt + 手动递归下降) 实现
@author PuqiAR (im@puqiar.top)
@date 2026-02-14
*/
#include <Parser/Parser.hpp>
namespace Fig
{
DynArray<AstNode *> Parser::parseAll()
{
DynArray<AstNode *> nodes;
return nodes;
}
};

154
src/Parser/Parser.hpp Normal file
View File

@@ -0,0 +1,154 @@
/*!
@file src/Parser/Parser.hpp
@brief 语法分析器(Pratt + 手动递归下降) 定义
@author PuqiAR (im@puqiar.top)
@date 2026-02-14
*/
#pragma once
#include <Ast/Ast.hpp>
#include <Deps/Deps.hpp>
#include <Error/Error.hpp>
#include <Lexer/Lexer.hpp>
#include <Token/Token.hpp>
#include <cstddef>
#include <cstdlib>
#include <unordered_set>
namespace Fig
{
class Parser
{
private:
Lexer &lexer;
SourceManager &srcManager;
size_t index = 0; // token在buffer下标
DynArray<Token> buffer;
String fileName;
bool isEOF = false;
const Token &nextToken()
{
assert(!isEOF && "nextToken: eof but called nextToken");
if (index + 1 < buffer.size())
{
return buffer[++index];
}
const auto &result = lexer.NextToken();
if (!result)
{
ReportError(result.error(), srcManager);
std::exit(-1);
}
const Token &token = result.value();
if (token.type == TokenType::EndOfFile)
{
isEOF = true;
}
buffer.push_back(token);
index++;
return token;
}
inline const Token &prevToken()
{
return buffer[buffer.size() - 2];
}
inline const Token &currentToken()
{
if (buffer.empty())
{
return nextToken();
}
return buffer.back();
}
const Token &peekToken(size_t lookahead = 1)
{
assert(!isEOF && "peekToken: eof but called peekToken");
size_t peekIndex = index + lookahead;
while (peekIndex >= buffer.size() && !isEOF)
{
const auto &result = lexer.NextToken();
if (!result)
{
ReportError(result.error(), srcManager);
std::abort();
}
const Token &token = result.value();
if (token.type == TokenType::EndOfFile)
{
isEOF = true;
}
buffer.push_back(token);
}
if (peekIndex >= buffer.size()) // 没有那么多token
{
return buffer.back(); // back是EOF Token
}
return buffer[peekIndex];
}
inline Token consumeToken()
{
if (isEOF)
return buffer.back();
Token current = currentToken();
nextToken();
return current;
}
public:
enum class State : std::uint8_t
{
Standby,
ParsingLiteralExpr,
ParsingIdentiExpr,
ParsingInfixExpr,
ParsingPrefixExpr,
} state;
Parser(Lexer &_lexer, SourceManager &_srcManager, String _fileName) :
lexer(_lexer), srcManager(_srcManager), fileName(std::move(_fileName))
{
state = State::Standby;
}
private:
SourceLocation makeSourcelocation(const Token &tok)
{
auto [line, column] = srcManager.GetLineColumn(tok.index);
return SourceLocation(
SourcePosition(
line,
column,
tok.length
), fileName, "[internal parser]", magic_enum::enum_name(state).data());
}
Result<LiteralExpr *, Error> parseLiteralExpr(); // 当前token为literal时调用
Result<IdentiExpr *, Error> parseIdentiExpr(); // 当前token为Identifier调用
Result<InfixExpr *, Error> parseInfixExpr(Expr *); // 由 parseExpression递归调用, 当前token为op
Result<PrefixExpr *, Error> parsePrefixExpr(); // 由 parseExpression递归调用, 当前token为op
std::unordered_set<TokenType> getTerminators(); // 返回当前state的终止条件(终止符)
bool shouldTerminate(); // 通过state判断该不该终止表达式解析
public:
Result<Expr *, Error> parseExpression(BindingPower = 0);
DynArray<AstNode *> parseAll();
};
}; // namespace Fig

29
src/Parser/ParserTest.cpp Normal file
View File

@@ -0,0 +1,29 @@
#include <Parser/Parser.hpp>
#include <iostream>
int main()
{
using namespace Fig;
String fileName = "test.fig";
String filePath = "T:/Files/Maker/Code/MyCodingLanguage/The Fig Project/Fig/test.fig";
SourceManager srcManager(filePath);
String source = srcManager.Read();
if (!srcManager.read)
{
std::cerr << "Couldn't read file";
return 1;
}
Lexer lexer(source, fileName);
Parser parser(lexer, srcManager, fileName);
const auto &result = parser.parseExpression();
if (!result)
{
ReportError(result.error(), srcManager);
return 1;
}
Expr *expr = *result;
std::cout << expr->toString() << '\n';
}

View File

@@ -20,6 +20,29 @@ namespace Fig
String filePath;
String source;
std::vector<String> lines;
std::vector<size_t> lineStartIndex; // 每行在整个源字符串中的起始 index
void preprocessLineIndices()
{
lineStartIndex.clear();
lineStartIndex.push_back(0);
for (size_t i = 0; i < source.length(); ++i)
{
if (source[i] == U'\n')
{
lineStartIndex.push_back(i + 1);
}
else if (source[i] == U'\r')
{
// 处理 CRLF只在 \n 处记录
if (i + 1 < source.length() && source[i + 1] == U'\n')
continue;
lineStartIndex.push_back(i + 1);
}
}
}
public:
bool read = false;
@@ -37,7 +60,12 @@ namespace Fig
source += line + '\n';
lines.push_back(String(line));
}
if (lines.empty())
{
lines.push_back(String()); // 填充一个空的
}
read = true;
preprocessLineIndices();
return source;
}
@@ -49,7 +77,7 @@ namespace Fig
return _line <= lines.size() && _line >= 1;
}
String GetLine(size_t _line) const
const String &GetLine(size_t _line) const
{
assert(_line <= lines.size() && "SourceManager: GetLine failed, index out of range");
return lines[_line - 1];
@@ -64,5 +92,36 @@ namespace Fig
{
return source;
}
std::pair<size_t, size_t> GetLineColumn(size_t index) const
{
if (lineStartIndex.empty())
{
return {1, 1};
}
// clamp index 到合法范围Parser报错可能传入EOF位置
// size_t lastLine = lineStartIndex.size() - 1;
if (index < lineStartIndex[0])
{
return {1, 1};
}
// upper_bound 找到第一个 > index 的行起点
auto it = std::ranges::upper_bound(lineStartIndex.begin(), lineStartIndex.end(), index);
size_t line;
if (it == lineStartIndex.begin())
{
line = 0;
}
else
{
line = static_cast<size_t>(it - lineStartIndex.begin() - 1);
}
size_t column = index - lineStartIndex[line] + 1;
return {line + 1, column};
}
};
};

View File

@@ -111,12 +111,14 @@ namespace Fig
NotEqual, // !=
LessEqual, // <=
GreaterEqual, // >=
PlusEqual, // +=
MinusEqual, // -=
AsteriskEqual, // *=
SlashEqual, // /=
PercentEqual, // %=
CaretEqual, // ^=
DoublePlus, // ++
DoubleMinus, // --
DoubleAmpersand, // &&
@@ -150,5 +152,17 @@ namespace Fig
return type == TokenType::LiteralNull || type == TokenType::LiteralTrue || type == TokenType::LiteralFalse
|| type == TokenType::LiteralNumber || type == TokenType::LiteralString;
}
Token &operator=(const Token &other)
{
if (this == &other)
{
return *this;
}
index = other.index;
length = other.length;
type = other.type;
return *this;
}
};
} // namespace Fig

View File

@@ -35,12 +35,27 @@ target("LexerTest")
add_files("src/Lexer/Lexer.cpp")
add_files("src/Lexer/LexerTest.cpp")
target("ParserTest")
add_files("src/Core/*.cpp")
add_files("src/Token/Token.cpp")
add_files("src/Error/Error.cpp")
add_files("src/Lexer/Lexer.cpp")
add_files("src/Ast/Operator.cpp")
add_files("src/Parser/ExprParser.cpp")
add_files("src/Parser/Parser.cpp")
add_files("src/Parser/ParserTest.cpp")
target("Fig")
add_files("src/Core/*.cpp")
add_files("src/Token/Token.cpp")
add_files("src/Error/Error.cpp")
add_files("src/Lexer/Lexer.cpp")
add_files("src/Ast/Operator.cpp")
add_files("src/Parser/ExprParser.cpp")
add_files("src/Parser/Parser.cpp")
add_files("src/main.cpp")