Files
Fig/src/Parser/Parser.hpp
PuqiAR 680197aafe Refactor: 重构Parser和AST结构,以支持新的语言特性
- 更新了 ParserTest,以改进文件路径处理和输出格式。
- 在 StmtParser 中新增了 parseConstDecl 和 parseForStmt 方法,用于处理常量声明和 for 循环。
- TypeExpr现归类为Expr。TypeExpr属于Expr,语义阶段视为Expr
- 添加了新的 AST 节点:PostfixExpr、TernaryExpr、ForStmt 和 ImportStmt,用于表示新的语法结构。
2026-06-06 22:12:04 +08:00

306 lines
9.5 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*!
@file src/Parser/Parser.hpp
@brief 语法分析器(Pratt + 手动递归下降) 定义
@author PuqiAR (im@puqiar.top)
@date 2026-03-08
*/
#pragma once
#include <Ast/Ast.hpp>
#include <Deps/Deps.hpp>
#include <Error/Diagnostics.hpp>
#include <Error/Error.hpp>
#include <Lexer/Lexer.hpp>
#include <Token/Token.hpp>
#include <Utils/Arena.hpp>
#include <cstddef>
#include <cstdlib>
#include <unordered_set>
namespace Fig
{
class Parser
{
private:
Arena arena;
Lexer &lexer;
SourceManager &srcManager;
size_t index = 0; // 当前 Token 在 buffer 中的下标
DynArray<Token> buffer; // 已从 Lexer 读取的 Token 缓存
String fileName;
bool isEOF = false;
Diagnostics &diagnostics;
std::optional<Error> lexerError; // 词法错误缓存,避免 exit/abort
// 惰性获取下一个 Token跳过注释
Token nextToken()
{
if (index + 1 < buffer.size())
return buffer[++index];
if (isEOF)
return buffer[index];
while (true)
{
auto result = lexer.NextToken();
if (!result)
{
lexerError = result.error();
isEOF = true;
Token eof = {0, 0, TokenType::EndOfFile};
buffer.push_back(eof);
index = buffer.size() - 1;
return buffer[index];
}
const Token &token = result.value();
if (token.type == TokenType::Comments)
continue; // 惰性跳过注释
if (token.type == TokenType::EndOfFile)
isEOF = true;
buffer.push_back(token);
index = buffer.size() - 1;
return buffer[index];
}
}
inline Token prevToken()
{
return (index > 0) ? buffer[index - 1] : buffer[0];
}
inline Token currentToken()
{
if (buffer.empty())
return nextToken();
return buffer[index];
}
// 惰性窥视后续 Token
Token peekToken(size_t lookahead = 1)
{
size_t targetIndex = index + lookahead;
while (targetIndex >= buffer.size() && !isEOF)
{
auto result = lexer.NextToken();
if (!result)
{
lexerError = result.error();
isEOF = true;
Token eof = {0, 0, TokenType::EndOfFile};
buffer.push_back(eof);
index = buffer.size() - 1;
return buffer.back();
}
if (result->type == TokenType::Comments)
continue;
if (result->type == TokenType::EndOfFile)
isEOF = true;
buffer.push_back(*result);
}
return (targetIndex >= buffer.size()) ? buffer.back() : buffer[targetIndex];
}
inline Token consumeToken()
{
Token current = currentToken();
if (current.type != TokenType::EndOfFile)
nextToken();
return current;
}
inline bool match(TokenType type)
{
if (currentToken().type == type)
{
consumeToken();
return true;
}
return false;
}
public:
struct State
{
enum StateType : std::uint8_t
{
Standby,
ParsingLiteralExpr,
ParsingIdentiExpr,
ParsingInfixExpr,
ParsingPrefixExpr,
ParsingIndexExpr,
ParsingCallExpr,
ParsingLambdaExpr,
ParsingNewExpr,
ParsingVarDecl,
ParsingIf,
ParsingWhile,
ParsingFnDefStmt,
ParsingReturn,
ParsingBreak,
ParsingContinue,
ParsingStructDef,
ParsingTypeParameters,
ParsingNamedTypeExpr,
ParsingFnTypeExpr,
} type = StateType::Standby;
std::unordered_set<TokenType> stopAt = {};
};
private:
const std::unordered_set<TokenType> &getBaseTerminators()
{
static const std::unordered_set<TokenType> baseTerminators{
TokenType::Semicolon,
TokenType::RightParen,
TokenType::RightBracket,
TokenType::RightBrace,
TokenType::Comma,
TokenType::EndOfFile};
return baseTerminators;
}
bool shouldTerminate()
{
const Token &token = currentToken();
if (getBaseTerminators().contains(token.type))
return true;
for (auto it = stateStack.rbegin(); it < stateStack.rend(); ++it)
{
if (it->stopAt.contains(token.type))
return true;
}
return false;
}
DynArray<State> stateStack;
State &currentState()
{
return stateStack.back();
}
void pushState(State _state)
{
stateStack.push_back(std::move(_state));
}
void popState()
{
if (!stateStack.empty())
stateStack.pop_back();
}
struct StateProtector
{
Parser *p;
StateProtector(Parser *_p, State _s) : p(_p)
{
p->pushState(_s);
}
~StateProtector()
{
p->popState();
}
};
SourceLocation makeSourceLocation(const Token &tok)
{
auto [line, column] = srcManager.GetLineColumn(tok.index);
// 防止因解析错位导致的异常列号引起终端 OOM
if (column > 5000)
column = 1;
return SourceLocation(
SourcePosition(line, column, tok.length),
fileName,
"[internal parser]",
magic_enum::enum_name(currentState().type).data());
}
inline Error makeUnexpectTokenError(
const String &stmt,
const String &exp,
const Token &got,
std::source_location th_loc = std::source_location::current())
{
return Error(
ErrorType::SyntaxError,
std::format(
"expect '{}' in {}, got `{}`", exp, stmt, magic_enum::enum_name(got.type)),
"none",
makeSourceLocation(got),
th_loc);
}
inline Error
makeExpectSemicolonError(std::source_location th_loc = std::source_location::current())
{
return Error(
ErrorType::SyntaxError,
"expect ';' after statement",
"insert ';'",
makeSourceLocation(currentToken()),
th_loc);
}
inline Error makeExpectSemicolonError(
const Token &token, std::source_location th_loc = std::source_location::current())
{
return Error(
ErrorType::SyntaxError,
"expect ';' after statement",
"insert ';'",
makeSourceLocation(token),
th_loc);
}
Result<decltype(StructDefStmt::typeParameters), Error> parseTypeParameters();
Result<Expr *, Error> parseTypeExpr();
Result<Expr *, Error> parseNamedTypeExpr();
Result<Expr *, Error> parseFnTypeExpr();
Result<Expr *, Error> parseExpression(BindingPower = 0);
Result<Expr *, Error> parseLiteralExpr();
Result<Expr *, Error> parseIdentiExpr();
Result<Expr *, Error> parsePrefixExpr();
Result<Expr *, Error> parseInfixExpr(Expr *);
Result<Expr *, Error> parseIndexExpr(Expr *);
Result<Expr *, Error> parseCallExpr(Expr *);
Result<Expr *, Error> parseNewExpr();
Result<Expr *, Error> parseLambdaExpr();
Result<BlockStmt *, Error> parseBlockStmt();
Result<VarDecl *, Error> parseVarDecl(bool);
Result<VarDecl *, Error> parseConstDecl(bool);
Result<IfStmt *, Error> parseIfStmt();
Result<WhileStmt *, Error> parseWhileStmt();
Result<DynArray<Param *>, Error> parseFnParams();
Result<FnDefStmt *, Error> parseFnDefStmt(bool);
Result<ReturnStmt *, Error> parseReturnStmt();
Result<Stmt *, Error> parseStructDef(bool);
Result<Stmt *, Error> parseInterfaceDef(bool);
Result<Stmt *, Error> parseImpl();
Result<Stmt *, Error> parseForStmt();
Result<Stmt *, Error> parseImportStmt();
Result<Stmt *, Error> parseStatement();
public:
Parser(Lexer &_lexer, SourceManager &_src, String _file, Diagnostics &_diagnostics) :
lexer(_lexer), srcManager(_src), fileName(std::move(_file)), diagnostics(_diagnostics)
{
pushState(State());
}
Result<Program *, Error> Parse();
};
#define SET_STOP_AT(...) currentState().stopAt = {__VA_ARGS__};
} // namespace Fig