libertaria-stack/l1-identity/qvl/gql/parser.zig

564 lines
18 KiB
Zig

//! GQL Parser (Recursive Descent)
//!
//! Parses GQL tokens into AST according to ISO/IEC 39075:2024.
//! Entry point: Parser.parse() -> Query AST
const std = @import("std");
const lexer = @import("lexer.zig");
const ast = @import("ast.zig");
const Token = lexer.Token;
const TokenType = lexer.TokenType;
pub const Parser = struct {
tokens: []const Token,
pos: usize,
allocator: std.mem.Allocator,
const Self = @This();
pub fn init(tokens: []const Token, allocator: std.mem.Allocator) Self {
return Self{
.tokens = tokens,
.pos = 0,
.allocator = allocator,
};
}
/// Parse complete query
pub fn parse(self: *Self) !ast.Query {
var statements = std.ArrayList(ast.Statement).init(self.allocator);
errdefer {
for (statements.items) |*s| s.deinit();
statements.deinit();
}
while (!self.isAtEnd()) {
const stmt = try self.parseStatement();
try statements.append(stmt);
}
return ast.Query{
.allocator = self.allocator,
.statements = try statements.toOwnedSlice(),
};
}
// =========================================================================
// Statement parsing
// =========================================================================
fn parseStatement(self: *Self) !ast.Statement {
if (self.match(.match)) {
return ast.Statement{ .match = try self.parseMatchStatement() };
}
if (self.match(.create)) {
return ast.Statement{ .create = try self.parseCreateStatement() };
}
if (self.match(.return_keyword)) {
return ast.Statement{ .return_stmt = try self.parseReturnStatement() };
}
if (self.match(.delete)) {
return ast.Statement{ .delete = try self.parseDeleteStatement() };
}
return error.UnexpectedToken;
}
fn parseMatchStatement(self: *Self) !ast.MatchStatement {
const pattern = try self.parseGraphPattern();
errdefer pattern.deinit();
var where: ?ast.Expression = null;
if (self.match(.where)) {
where = try self.parseExpression();
}
return ast.MatchStatement{
.allocator = self.allocator,
.pattern = pattern,
.where = where,
};
}
fn parseCreateStatement(self: *Self) !ast.CreateStatement {
const pattern = try self.parseGraphPattern();
return ast.CreateStatement{
.allocator = self.allocator,
.pattern = pattern,
};
}
fn parseDeleteStatement(self: *Self) !ast.DeleteStatement {
// Simple: DELETE identifier [, identifier]*
var targets = std.ArrayList(ast.Identifier).init(self.allocator);
errdefer {
for (targets.items) |*t| t.deinit();
targets.deinit();
}
while (true) {
const ident = try self.parseIdentifier();
try targets.append(ident);
if (!self.match(.comma)) break;
}
return ast.DeleteStatement{
.allocator = self.allocator,
.targets = try targets.toOwnedSlice(),
};
}
fn parseReturnStatement(self: *Self) !ast.ReturnStatement {
var items = std.ArrayList(ast.ReturnItem).init(self.allocator);
errdefer {
for (items.items) |*i| i.deinit();
items.deinit();
}
while (true) {
const expr = try self.parseExpression();
var alias: ?ast.Identifier = null;
if (self.match(.as_keyword)) {
alias = try self.parseIdentifier();
}
try items.append(ast.ReturnItem{
.expression = expr,
.alias = alias,
});
if (!self.match(.comma)) break;
}
return ast.ReturnStatement{
.allocator = self.allocator,
.items = try items.toOwnedSlice(),
};
}
// =========================================================================
// Pattern parsing
// =========================================================================
fn parseGraphPattern(self: *Self) !ast.GraphPattern {
var paths = std.ArrayList(ast.PathPattern).init(self.allocator);
errdefer {
for (paths.items) |*p| p.deinit();
paths.deinit();
}
while (true) {
const path = try self.parsePathPattern();
try paths.append(path);
if (!self.match(.comma)) break;
}
return ast.GraphPattern{
.allocator = self.allocator,
.paths = try paths.toOwnedSlice(),
};
}
fn parsePathPattern(self: *Self) !ast.PathPattern {
var elements = std.ArrayList(ast.PathElement).init(self.allocator);
errdefer {
for (elements.items) |*e| e.deinit();
elements.deinit();
}
// Must start with a node
const node = try self.parseNodePattern();
try elements.append(ast.PathElement{ .node = node });
// Optional: edge - node - edge - node ...
while (self.check(.minus) or self.check(.arrow_left)) {
const edge = try self.parseEdgePattern();
try elements.append(ast.PathElement{ .edge = edge });
const next_node = try self.parseNodePattern();
try elements.append(ast.PathElement{ .node = next_node });
}
return ast.PathPattern{
.allocator = self.allocator,
.elements = try elements.toOwnedSlice(),
};
}
fn parseNodePattern(self: *Self) !ast.NodePattern {
try self.consume(.left_paren, "Expected '('");
// Optional variable: (n) or (:Label)
var variable: ?ast.Identifier = null;
if (self.check(.identifier)) {
variable = try self.parseIdentifier();
}
// Optional labels: (:Label1:Label2)
var labels = std.ArrayList(ast.Identifier).init(self.allocator);
errdefer {
for (labels.items) |*l| l.deinit();
labels.deinit();
}
while (self.match(.colon)) {
const label = try self.parseIdentifier();
try labels.append(label);
}
// Optional properties: ({key: value})
var properties: ?ast.PropertyMap = null;
if (self.check(.left_brace)) {
properties = try self.parsePropertyMap();
}
try self.consume(.right_paren, "Expected ')'");
return ast.NodePattern{
.allocator = self.allocator,
.variable = variable,
.labels = try labels.toOwnedSlice(),
.properties = properties,
};
}
fn parseEdgePattern(self: *Self) !ast.EdgePattern {
var direction: ast.EdgeDirection = .outgoing;
// Check for incoming: <-
if (self.match(.arrow_left)) {
direction = .incoming;
} else if (self.match(.minus)) {
direction = .outgoing;
}
// Edge details in brackets: -[r:TYPE]-
var variable: ?ast.Identifier = null;
var types = std.ArrayList(ast.Identifier).init(self.allocator);
errdefer {
for (types.items) |*t| t.deinit();
types.deinit();
}
var properties: ?ast.PropertyMap = null;
var quantifier: ?ast.Quantifier = null;
if (self.match(.left_bracket)) {
// Variable: [r]
if (self.check(.identifier)) {
variable = try self.parseIdentifier();
}
// Type: [:TRUST]
while (self.match(.colon)) {
const edge_type = try self.parseIdentifier();
try types.append(edge_type);
}
// Properties: [{level: 3}]
if (self.check(.left_brace)) {
properties = try self.parsePropertyMap();
}
// Quantifier: [*1..3]
if (self.match(.star)) {
quantifier = try self.parseQuantifier();
}
try self.consume(.right_bracket, "Expected ']'");
}
// Arrow end
if (direction == .outgoing) {
try self.consume(.arrow_right, "Expected '->'");
} else {
// Incoming already consumed <-, now just need -
try self.consume(.minus, "Expected '-'");
}
return ast.EdgePattern{
.allocator = self.allocator,
.direction = direction,
.variable = variable,
.types = try types.toOwnedSlice(),
.properties = properties,
.quantifier = quantifier,
};
}
fn parseQuantifier(self: *Self) !ast.Quantifier {
var min: ?u32 = null;
var max: ?u32 = null;
if (self.check(.integer_literal)) {
min = try self.parseInteger();
}
if (self.match(.dot) and self.match(.dot)) {
if (self.check(.integer_literal)) {
max = try self.parseInteger();
}
}
return ast.Quantifier{
.min = min,
.max = max,
};
}
fn parsePropertyMap(self: *Self) !ast.PropertyMap {
try self.consume(.left_brace, "Expected '{'");
var entries = std.ArrayList(ast.PropertyEntry).init(self.allocator);
errdefer {
for (entries.items) |*e| e.deinit();
entries.deinit();
}
while (!self.check(.right_brace) and !self.isAtEnd()) {
const key = try self.parseIdentifier();
try self.consume(.colon, "Expected ':'");
const value = try self.parseExpression();
try entries.append(ast.PropertyEntry{
.key = key,
.value = value,
});
if (!self.match(.comma)) break;
}
try self.consume(.right_brace, "Expected '}'");
return ast.PropertyMap{
.allocator = self.allocator,
.entries = try entries.toOwnedSlice(),
};
}
// =========================================================================
// Expression parsing
// =========================================================================
fn parseExpression(self: *Self) !ast.Expression {
return try self.parseOrExpression();
}
fn parseOrExpression(self: *Self) !ast.Expression {
var left = try self.parseAndExpression();
while (self.match(.or_keyword)) {
const right = try self.parseAndExpression();
// Create binary op
const left_ptr = try self.allocator.create(ast.Expression);
left_ptr.* = left;
const right_ptr = try self.allocator.create(ast.Expression);
right_ptr.* = right;
left = ast.Expression{
.binary_op = ast.BinaryOp{
.left = left_ptr,
.op = .or_op,
.right = right_ptr,
},
};
}
return left;
}
fn parseAndExpression(self: *Self) !ast.Expression {
var left = try self.parseComparison();
while (self.match(.and_keyword)) {
const right = try self.parseComparison();
const left_ptr = try self.allocator.create(ast.Expression);
left_ptr.* = left;
const right_ptr = try self.allocator.create(ast.Expression);
right_ptr.* = right;
left = ast.Expression{
.binary_op = ast.BinaryOp{
.left = left_ptr,
.op = .and_op,
.right = right_ptr,
},
};
}
return left;
}
fn parseComparison(self: *Self) !ast.Expression {
var left = try self.parseAdditive();
const op: ?ast.ComparisonOperator = blk: {
if (self.match(.eq)) break :blk .eq;
if (self.match(.neq)) break :blk .neq;
if (self.match(.lt)) break :blk .lt;
if (self.match(.lte)) break :blk .lte;
if (self.match(.gt)) break :blk .gt;
if (self.match(.gte)) break :blk .gte;
break :blk null;
};
if (op) |comparison_op| {
const right = try self.parseAdditive();
const left_ptr = try self.allocator.create(ast.Expression);
left_ptr.* = left;
const right_ptr = try self.allocator.create(ast.Expression);
right_ptr.* = right;
return ast.Expression{
.comparison = ast.Comparison{
.left = left_ptr,
.op = comparison_op,
.right = right_ptr,
},
};
}
return left;
}
fn parseAdditive(self: *Self) !ast.Expression {
_ = self;
// Simplified: just return primary for now
return try self.parsePrimary();
}
fn parsePrimary(self: *Self) !ast.Expression {
if (self.match(.null_keyword)) {
return ast.Expression{ .literal = ast.Literal{ .null = {} } };
}
if (self.match(.true_keyword)) {
return ast.Expression{ .literal = ast.Literal{ .boolean = true } };
}
if (self.match(.false_keyword)) {
return ast.Expression{ .literal = ast.Literal{ .boolean = false } };
}
if (self.match(.string_literal)) {
return ast.Expression{ .literal = ast.Literal{ .string = self.previous().text } };
}
if (self.check(.integer_literal)) {
const val = try self.parseInteger();
return ast.Expression{ .literal = ast.Literal{ .integer = @intCast(val) } };
}
// Property access or identifier
if (self.check(.identifier)) {
const ident = try self.parseIdentifier();
if (self.match(.dot)) {
const property = try self.parseIdentifier();
return ast.Expression{
.property_access = ast.PropertyAccess{
.object = ident,
.property = property,
},
};
}
return ast.Expression{ .identifier = ident };
}
return error.UnexpectedToken;
}
// =========================================================================
// Helpers
// =========================================================================
fn parseIdentifier(self: *Self) !ast.Identifier {
const tok = try self.consume(.identifier, "Expected identifier");
return ast.Identifier{ .name = tok.text };
}
fn parseInteger(self: *Self) !u32 {
const tok = try self.consume(.integer_literal, "Expected integer");
return try std.fmt.parseInt(u32, tok.text, 10);
}
fn match(self: *Self, tok_type: TokenType) bool {
if (self.check(tok_type)) {
self.advance();
return true;
}
return false;
}
fn check(self: *Self, tok_type: TokenType) bool {
if (self.isAtEnd()) return false;
return self.peek().type == tok_type;
}
fn advance(self: *Self) Token {
if (!self.isAtEnd()) self.pos += 1;
return self.previous();
}
fn isAtEnd(self: *Self) bool {
return self.peek().type == .eof;
}
fn peek(self: *Self) Token {
return self.tokens[self.pos];
}
fn previous(self: *Self) Token {
return self.tokens[self.pos - 1];
}
fn consume(self: *Self, tok_type: TokenType, message: []const u8) !Token {
if (self.check(tok_type)) return self.advance();
std.log.err("{s}, got {s}", .{ message, @tagName(self.peek().type) });
return error.UnexpectedToken;
}
};
// ============================================================================
// TESTS
// ============================================================================
test "Parser: simple MATCH" {
const allocator = std.testing.allocator;
const source = "MATCH (n:Identity) RETURN n";
var lex = lexer.Lexer.init(source, allocator);
const tokens = try lex.tokenize();
defer allocator.free(tokens);
var parser = Parser.init(tokens, allocator);
const query = try parser.parse();
defer query.deinit();
try std.testing.expectEqual(2, query.statements.len);
try std.testing.expect(query.statements[0] == .match);
try std.testing.expect(query.statements[1] == .return_stmt);
}
test "Parser: path pattern" {
const allocator = std.testing.allocator;
const source = "MATCH (a)-[t:TRUST]->(b) RETURN a, b";
var lex = lexer.Lexer.init(source, allocator);
const tokens = try lex.tokenize();
defer allocator.free(tokens);
var parser = Parser.init(tokens, allocator);
const query = try parser.parse();
defer query.deinit();
try std.testing.expectEqual(1, query.statements[0].match.pattern.paths.len);
}