diff --git a/AST.py b/AST.py new file mode 100644 index 0000000..fb59e3e --- /dev/null +++ b/AST.py @@ -0,0 +1,149 @@ +from enum import Enum + +class NodeType(Enum): + INTEGER = 1 + STRING = 2 + NOTE = 3 + BLOCK = 4 + ARGUMENTS = 5 + IDENTIFIER = 6 + ASSIGN = 7 + PROGRAM = 8 + ASTERISK = 9 + COLON = 10 + FUNCTION_CALL = 11 + COMMA = 12 + PERCENT = 13 + +class Node: + def __init__(self): + self.children = [] + + def __repr__(self): + return self.__str__() + + def __len__(self): + return len(self.children) + + def __getitem__(self, index): + return self.children[index] + + def append(self, node): + self.children.append(node) + + def pop(self, index): + return self.children.pop(index) + +class Program(Node): + def __init__(self): + Node.__init__(self) + self.type = NodeType.PROGRAM + + def __str__(self): + return "Program:\n" + "\n".join([str(e) for e in self.children]) + + +class BlockNode(Node): + def __init__(self): + Node.__init__(self) + self.type = NodeType.BLOCK + + def __str__(self): + return "B{\n" + "\n".join([str(e) for e in self.children]) + "\n}" + + +class ArgumentsNode(Node): + def __init__(self): + Node.__init__(self) + self.type = NodeType.ARGUMENTS + + def __str__(self): + return "@(" + ", ".join([str(e) for e in self.children]) + ")" + +class IdentifierNode(Node): + def __init__(self, identifier): + self.type = NodeType.IDENTIFIER + self.identifier = identifier + + def __str__(self): + return f"L'{self.identifier}'" + +class AssignExpression(Node): + def __init__(self, target, value): + self.type = NodeType.ASSIGN + self.target = target + self.value = value + + def __str__(self): + return f"A[{self.target} = {self.value}]" + +class AsteriskStatementNode(Node): + def __init__(self, iterator, statement): + self.type = NodeType.ASTERISK + self.iterator = iterator + self.statement = statement + + def __str__(self): + return f"*({self.iterator}: {self.statement})" + +class ColonNode(Node): + def __init__(self, a, b): + self.type = NodeType.COLON + self.a = a + self.b = b + + def __str__(self): + return f":({self.a}, {self.b})" + +class ExpressionNode(Node): + def __str__(self): + return f"{self.__class__.__name__}('{self.value}')" + + +class IntegerLiteralNode(ExpressionNode): + def __init__(self, value): + self.type = NodeType.INTEGER + self.value = value + + def __str__(self): + return f"i'{self.value}'" + +class StringLiteralNode(ExpressionNode): + def __init__(self, value): + self.type = NodeType.STRING + self.value = value + + def __str__(self): + return f"s'{self.value}'" + +class NoteLiteralNode(ExpressionNode): + def __init__(self, value): + self.type = NodeType.NOTE + self.value = value + + def __str__(self): + return f"n'{self.value}'" + +class FunctionCallNode(Node): + def __init__(self, identifier, arguments): + self.type = NodeType.FUNCTION_CALL + self.identifier = identifier + self.arguments = arguments + + def __str__(self): + return f"F({self.identifier}: {self.arguments})" + +class CommaNode(Node): + def __init__(self): + self.type = NodeType.COMMA + + def __str__(self): + return "[,]" + +class PercentNode(Node): + def __init__(self, value): + self.type = NodeType.PERCENT + self.value = value + + def __str__(self): + return f"%'{self.value}'" diff --git a/parser.py b/parser.py index 149bd1f..b004a30 100644 --- a/parser.py +++ b/parser.py @@ -1,178 +1,20 @@ -from enum import Enum from tokenizer import * from note import * -import json +from AST import * class ParseError(Exception): pass -class NodeType(Enum): - INTEGER = 1 - STRING = 2 - NOTE = 3 - BLOCK = 4 - ARGUMENTS = 5 - IDENTIFIER = 6 - ASSIGN = 7 - PROGRAM = 8 - ASTERISK = 9 - COLON = 10 - FUNCTION_CALL = 11 - COMMA = 12 - PERCENT = 13 - -class Node: - def __init__(self): - self.children = [] - - def __repr__(self): - return self.__str__() - - def __len__(self): - return len(self.children) - - def __getitem__(self, index): - return self.children[index] - - def append(self, node): - self.children.append(node) - - def pop(self, index): - return self.children.pop(index) - -class Program(Node): - def __init__(self): - Node.__init__(self) - self.type = NodeType.PROGRAM - - def __str__(self): - return "Program:\n" + "\n".join([str(e) for e in self.children]) - - -class BlockNode(Node): - def __init__(self): - Node.__init__(self) - self.type = NodeType.BLOCK - - def __str__(self): - return "B{\n" + "\n".join([str(e) for e in self.children]) + "\n}" - - -class ArgumentsNode(Node): - def __init__(self): - Node.__init__(self) - self.type = NodeType.ARGUMENTS - - def __str__(self): - return "@(" + ", ".join([str(e) for e in self.children]) + ")" - -class IdentifierNode(Node): - def __init__(self, identifier): - self.type = NodeType.IDENTIFIER - self.identifier = identifier - - def __str__(self): - return f"L'{self.identifier}'" - -class AssignExpression(Node): - def __init__(self, target, value): - self.type = NodeType.ASSIGN - self.target = target - self.value = value - - def __str__(self): - return f"A[{self.target} = {self.value}]" - -class AsteriskStatementNode(Node): - def __init__(self, iterator, statement): - self.type = NodeType.ASTERISK - self.iterator = iterator - self.statement = statement - - def __str__(self): - return f"*({self.iterator}: {self.statement})" - -class ColonNode(Node): - def __init__(self, a, b): - self.type = NodeType.COLON - self.a = a - self.b = b - - def __str__(self): - return f":({self.a}, {self.b})" - -class ExpressionNode(Node): - def __str__(self): - return f"{self.__class__.__name__}('{self.value}')" - - -class IntegerLiteralNode(ExpressionNode): - def __init__(self, value): - self.type = NodeType.INTEGER - self.value = value - - def __str__(self): - return f"i'{self.value}'" - -class StringLiteralNode(ExpressionNode): - def __init__(self, value): - self.type = NodeType.STRING - self.value = value - - def __str__(self): - return f"s'{self.value}'" - -class NoteLiteralNode(ExpressionNode): - def __init__(self, value): - self.type = NodeType.NOTE - self.value = value - - def __str__(self): - return f"n'{self.value}'" - -class FunctionCallNode(Node): - def __init__(self, identifier, arguments): - self.type = NodeType.FUNCTION_CALL - self.identifier = identifier - self.arguments = arguments - - def __str__(self): - return f"F({self.identifier}: {self.arguments})" - -class CommaNode(Node): - def __init__(self): - self.type = NodeType.COMMA - - def __str__(self): - return "[,]" - -class PercentNode(Node): - def __init__(self, value): - self.type = NodeType.PERCENT - self.value = value - - def __str__(self): - return f"%'{self.value}'" - def expectedFound(expected, found): raise ParseError(f"Expected: {expected}, found: {found}") def parseInteger(input, parent): - if input[0].type != TokenType.INTEGER: - expectedFound(TokenType.INTEGER, input[0].type) - return IntegerLiteralNode(int(input.pop(0).value)) -def parseString(input, parent): - if input[0].type != TokenType.STRING: - expectedFound(TokenType.STRING, input[0].type) - +def parseString(input, parent): return StringLiteralNode(input.pop(0).value[1:-1]) -def parseNote(input, parent): - if input[0].type != TokenType.NOTE: - expectedFound(TokenType.NOTE, input[0].type) - +def parseNote(input, parent): value = input.pop(0).value consumedChars = 1 notePitch = value[consumedChars] @@ -196,21 +38,20 @@ def parseNote(input, parent): return NoteLiteralNode(Note(notePitch, octave, duration)) def parseComma(input, parent): - if input[0].type != TokenType.COMMA: - expectedFound(TokenType.COMMA, input[0].type) input.pop(0) return CommaNode() def parseArguments(input, parent): - if input[0].type != TokenType.OPEN_PAREN: - expectedFound(TokenType.OPEN_PAREN, input[0].type) - + #import pdb; pdb.set_trace() input.pop(0) arguments = ArgumentsNode() while input[0].type != TokenType.CLOSE_PAREN: - arguments.append(parseArrayElement(input, arguments)) #TODO: parseExpression + argument = parseArrayElement(input, arguments) + if argument is None: + raise ParseError(f"Line: {input[0].pos[0]+1}, col: {input[0].pos[1]+1}: Invalid function argument '{input[0].value}'") + arguments.append(argument) #TODO: parseExpression if input[0].type != TokenType.CLOSE_PAREN: expectedFound(TokenType.CLOSE_PAREN, input[0].type) @@ -219,9 +60,6 @@ def parseArguments(input, parent): return arguments def parseBlock(input, parent): - if input[0].type != TokenType.OPEN_BRACKET: - expectedFound(TokenType.OPEN_BRACKET, input[0].type) - input.pop(0) block = BlockNode() @@ -235,27 +73,8 @@ def parseBlock(input, parent): return block -def parseIdentifier(input, parent): - if input[0].type != TokenType.IDENTIFIER: - expectedFound(TokenType.IDENTIFIER, input[0].type) - - return IdentifierNode(input.pop(0).value) - -def parseAssign(input, parent): - if input[0].type != TokenType.ASSIGN: - expectedFound(TokenType.ASSIGN, input[0].type) - - input.pop(0) - - target = parent.pop(-1) - value = parseExpression(input, parent) #TODO: only expressions! - - return AssignExpression(target, value) def parseAsterisk(input, parent): - if input[0].type != TokenType.ASTERISK: - expectedFound(TokenType.ASTERISK, input[0].type) - input.pop(0) iterator = parent.pop(-1) @@ -264,9 +83,6 @@ def parseAsterisk(input, parent): return AsteriskStatementNode(iterator, value) def parseColon(input, parent): - if input[0].type != TokenType.COLON: - expectedFound(TokenType.COLON, input[0].type) - input.pop(0) a = parent.pop(-1) @@ -274,28 +90,29 @@ def parseColon(input, parent): return ColonNode(a, b) -def parseFunctionCallOrIdentifier(input, parent): - if input[0].type != TokenType.IDENTIFIER: - expectedFound(TokenType.IDENTIFIER, input[0].type) - - if input[1].type == TokenType.OPEN_PAREN: - identifier = parseIdentifier(input, parent) +def parseFunctionCallOrAssignOrIdentifier(input, parent): + #import pdb; pdb.set_trace() + identifier = IdentifierNode(input.pop(0).value) + # Function call + if len(input) > 0 and input[0].type == TokenType.OPEN_PAREN: arguments = parseArguments(input, parent) return FunctionCallNode(identifier, arguments) - - return parseIdentifier(input, parent) + # Assign + if len(input) > 1 and input[0].type == TokenType.ASSIGN: + input.pop(0) + value = parseExpression(input, parent) #TODO: only expressions! + return AssignExpression(identifier, value) + + return identifier def parsePercent(input, parent): - if input[0].type != TokenType.PERCENT: - expectedFound(TokenType.PERCENT, input[0].type) - input.pop(0) value = parent.pop(-1) return PercentNode(value) -def parseExpression(input, parent): +def parseExpression(input, parent): type = input[0].type if type == TokenType.INTEGER: return parseInteger(input, parent) @@ -304,13 +121,11 @@ def parseExpression(input, parent): if type == TokenType.NOTE: return parseNote(input, parent) if type == TokenType.IDENTIFIER: - return parseFunctionCallOrIdentifier(input, parent) + return parseFunctionCallOrAssignOrIdentifier(input, parent) if type == TokenType.PERCENT: return parsePercent(input, parent) if type == TokenType.OPEN_PAREN: return parseArguments(input, parent) - if type == TokenType.ASSIGN: - return parseAssign(input, parent) if type == TokenType.COLON: return parseColon(input, parent) @@ -330,12 +145,11 @@ def parseStatement(input, parent): return parseExpression(input, parent) def parseToken(input, parent): - type = input[0].type - - return parseStatement(input, parent) + #import pdb; pdb.set_trace() + return parseStatement(input, parent) -def parseProgram(input): +def parse(input): root = Program() while len(input) > 0: root.append(parseToken(input, root)) @@ -343,12 +157,12 @@ def parseProgram(input): def test(): try: - with open('test2.lit', 'r') as source: + with open(sys.argv[1], 'r') as source: lines = [line.rstrip('\n') for line in source.readlines()] tokens = [token for token in tokenize(lines) if token.type != TokenType.COMMENT] - ast = parseProgram(tokens) + ast = parse(tokens) print(ast) except TokenizerError as e: