diff --git a/AST.py b/AST.py index 1aa99f6..42f319e 100644 --- a/AST.py +++ b/AST.py @@ -22,13 +22,17 @@ class Node: def pop(self, index): return self.children.pop(index) - def _print(self, level): - print(f"{pad(level)}{self.__class__.__name__}({self.parent.__class__.__name__}):") + def _print(self, level): + string = f"{pad(level)}{self.__class__.__name__}({self.parent.__class__.__name__}):\n" for child in self.children: if isinstance(child, str) or isinstance(child, int) or isinstance(child, Note): - print(pad(level+1) + f"'{child}'") + string += pad(level+1) + f"'{child}'\n" else: - child._print(level+1) + string += child._print(level+1) + return string + + def __str__(self): + return self._print(0) def pad(level): return (" " * level) @@ -37,26 +41,26 @@ class Program(Node): def __init__(self): Node.__init__(self, None, (-1, -1)) - def __str__(self): - return "Program:\n" + "\n".join([str(e) for e in self.children]) + #def __str__(self): + #return "Program:\n" + "\n".join([str(e) for e in self.children]) def print(self): - self._print(0) + print(self._print(0)) class BlockNode(Node): def __init__(self, parent, pos): Node.__init__(self, parent, pos) - def __str__(self): - return "B{\n" + "\n".join([str(e) for e in self.children]) + "\n}" + #def __str__(self): + #return "B{\n" + "\n".join([str(e) for e in self.children]) + "\n}" class ListNode(Node): def __init__(self, parent, pos): Node.__init__(self, parent, pos) - def __str__(self): - return "@(" + ", ".join([str(e) for e in self.children]) + ")" + #def __str__(self): + #return "@(" + ", ".join([str(e) for e in self.children]) + ")" class IdentifierNode(Node): def __init__(self, identifier, parent, pos): @@ -65,8 +69,8 @@ class IdentifierNode(Node): self.identifier = self.children[0] - def __str__(self): - return f"L'{self.identifier}'" + #def __str__(self): + #return f"L'{self.identifier}'" class AssignExpression(Node): def __init__(self, target, value, parent, pos): @@ -76,8 +80,8 @@ class AssignExpression(Node): self.target = self.children[0] self.value = self.children[1] - def __str__(self): - return f"A[{self.target} = {self.value}]" + #def __str__(self): + #return f"A[{self.target} = {self.value}]" class AsteriskStatementNode(Node): def __init__(self, iterator, statement, parent, pos): @@ -87,8 +91,8 @@ class AsteriskStatementNode(Node): self.iterator = self.children[0] self.statement = self.children[1] - def __str__(self): - return f"*({self.iterator}: {self.statement})" + #def __str__(self): + #return f"*({self.iterator}: {self.statement})" class ColonNode(Node): def __init__(self, a, b, parent, pos): @@ -98,16 +102,15 @@ class ColonNode(Node): self.a = self.children[0] self.b = self.children[1] - def __str__(self): - return f":({self.a}, {self.b})" + #def __str__(self): + #return f":({self.a}, {self.b})" class ExpressionNode(Node): def __init__(self, parent, pos): Node.__init__(self, parent, pos) - def __str__(self): - return f"{self.__class__.__name__}('{self.value}')" - + #def __str__(self): + #return f"{self.__class__.__name__}('{self.value}')" class IntegerLiteralNode(ExpressionNode): def __init__(self, value, parent, pos): @@ -116,8 +119,8 @@ class IntegerLiteralNode(ExpressionNode): self.value = self.children[0] - def __str__(self): - return f"i'{self.value}'" + #def __str__(self): + #return f"i'{self.value}'" class StringLiteralNode(ExpressionNode): def __init__(self, value, parent, pos): @@ -126,8 +129,8 @@ class StringLiteralNode(ExpressionNode): self.value = self.children[0] - def __str__(self): - return f"s'{self.value}'" + #def __str__(self): + #return f"s'{self.value}'" class NoteLiteralNode(ExpressionNode): def __init__(self, value, parent, pos): @@ -136,8 +139,8 @@ class NoteLiteralNode(ExpressionNode): self.value = self.children[0] - def __str__(self): - return f"n'{self.value.note}[{self.value.octave}, {self.value.duration}]'" + #def __str__(self): + #return f"n'{self.value.note}[{self.value.octave}, {self.value.duration}]'" class FunctionCallNode(Node): def __init__(self, identifier, arguments, parent, pos): @@ -147,15 +150,15 @@ class FunctionCallNode(Node): self.identifier = self.children[0] self.arguments = self.children[1] - def __str__(self): - return f"F({self.identifier}: {self.arguments})" + #def __str__(self): + #return f"F({self.identifier}: {self.arguments})" class CommaNode(Node): def __init__(self, parent, pos): Node.__init__(self, parent, pos) - def __str__(self): - return "[,]" + #def __str__(self): + #return "[,]" class PercentNode(Node): def __init__(self, value, parent, pos): @@ -164,8 +167,8 @@ class PercentNode(Node): self.value = self.children[0] - def __str__(self): - return f"%'{self.value}'" + #def __str__(self): + #return f"%'{self.value}'" class FunctionDefinitionNode(Node): def __init__(self, name, parameters, body, parent, pos): @@ -176,8 +179,8 @@ class FunctionDefinitionNode(Node): self.parameters = self.children[1] self.body = self.children[2] - def __str__(self): - return f"$F'{self.name}{self.parameters}{self.body}" + #def __str__(self): + #return f"$F'{self.name}{self.parameters}{self.body}" class ReturnNode(Node): def __init__(self, value, parent, pos): @@ -186,5 +189,15 @@ class ReturnNode(Node): self.value = self.children[0] - def __str__(self): - return f"Ret({self.value})" + #def __str__(self): + #return f"Ret({self.value})" + +class ListItemNode(Node): + def __init__(self, value, parent, pos): + Node.__init__(self, parent, pos) + self.children.append(value) + + self.value = self.children[0] + +class CloseListNode(Node): + pass diff --git a/Parser.py b/Parser.py index 1f4f42f..190238b 100644 --- a/Parser.py +++ b/Parser.py @@ -1,230 +1,117 @@ -from Tokenizer import * -from Note import * from AST import * +from Tokenizer import TokenType from Error import SyntaxException -def expectedFound(expected, found): - raise SyntaxException(None, f"Expected: {expected}, found: {found}") +def assertToken(expected, input): + if expected != input.current().type: + raise SyntaxException(input.current().pos, f"Expected '{expected}', found '{input.current().value}'") -def assertType(expected, found): - if expected != found: - raise SyntaxException(None, f"Expected: {expected}, found: {found}") +def runParsers(input, parent, parsers): + for parser in parsers: + value = parser(input, parent) + if value is not None: + return value + return None -def parseInteger(input, parent): - token = input.pop(0) - return IntegerLiteralNode(int(token.value), parent, token.pos) - -def parseString(input, parent): - token = input.pop(0) - return StringLiteralNode(token.value[1:-1], parent, token.pos) - -def parseNote(input, parent): - token = input.pop(0) - value = token.value - consumedChars = 1 - notePitch = value[consumedChars] - consumedChars += 1 - octave = 4 - duration = 4 - dot = False - if consumedChars < len(value) and value[consumedChars] in ('b', '#'): - notePitch += value[consumedChars] - consumedChars += 1 - if consumedChars < len(value) and re.match(r'\d', value[consumedChars]): - octave = int(value[consumedChars]) - consumedChars += 1 - if consumedChars < len(value) and value[consumedChars] == '.': - consumedChars += 1 - durationString = '' - while consumedChars < len(value) and re.match(r'\d', value[consumedChars]): - durationString += value[consumedChars] - consumedChars += 1 - duration = int(durationString) - if consumedChars < len(value) and value[consumedChars] == '.': - dot = True - consumedChars += 1 - - return NoteLiteralNode(Note(notePitch, octave, duration, dot), parent, token.pos) +def returnAndGoAhead(input, getValue): + value = getValue(input.current()) + input.ahead() + return value -def parseComma(input, parent): - token = input.pop(0) - return CommaNode(parent, token.pos) - -def parseList(input, parent): - token = input.pop(0) - - node = ListNode(parent, token.pos) - - while input[0].type != TokenType.CLOSE_PAREN: - element = parseArrayElement(input, node) - if element is None: - raise SyntaxException(input[0].pos, "Invalid element '{input[0].value}'") - node.append(element) - - if input[0].type != TokenType.CLOSE_PAREN: - expectedFound(TokenType.CLOSE_PAREN, input[0].type) - input.pop(0) - - return node - -def parseBlock(input, parent): - token = input.pop(0) - - block = BlockNode(parent, token.pos) - - while input[0].type != TokenType.CLOSE_BRACKET: - block.append(parseToken(input, block)) - - if input[0].type != TokenType.CLOSE_BRACKET: - expectedFound(TokenType.CLOSE_BRACKET, input[0].type) - input.pop(0) - - return block - - -def parseAsterisk(input, parent): - token = input.pop(0) - - iterator = parent.pop(-1) - value = parseStatement(input, parent) - - asterisk = AsteriskStatementNode(iterator, value, parent, token.pos) - iterator.parent = asterisk - value.parent = asterisk - return asterisk - -def parseNoteOrColon(input, parent): - note = parseNote(input, parent) - if len(input) > 1 and input[0].type == TokenType.COLON: - token = input.pop(0) - b = parseNote(input, parent) - if b is None: - raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'") - colon = ColonNode(note, b, parent, token.pos) - note.parent = colon - b.parent = colon - return colon - - return note - -def parseIntegerOrColonOrPercent(input, parent): - integer = parseInteger(input, parent) - if len(input) > 1 and input[0].type == TokenType.COLON: - token = input.pop(0) - b = parseInteger(input, parent) - if b is None: - raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'") - colon = ColonNode(integer, b, parent, token.pos) - integer.parent = colon - b.parent = colon - return colon - - if len(input) > 0 and input[0].type == TokenType.PERCENT: - input.pop(0) - percent = PercentNode(integer, parent, integer.pos) - integer.parent = percent - return percent - - return integer - -def parseFunctionCallOrAssignOrIdentifier(input, parent): - token = input.pop(0) - identifier = IdentifierNode(token.value, parent, token.pos) - # Function call - if len(input) > 0 and input[0].type == TokenType.OPEN_PAREN: - arguments = parseList(input, parent) - func = FunctionCallNode(identifier, arguments, parent, token.pos) - identifier.parent = func - arguments.parent = func - return func - # Assign - if len(input) > 1 and input[0].type == TokenType.ASSIGN: - token = input.pop(0) - value = parseExpression(input, parent) # - assign = AssignExpression(identifier, value, parent, token.pos) - identifier.parent = assign - value.parent = assign - return assign +# int -> INTEGER +def parseInteger(input, parent): + if input.current().type == TokenType.INTEGER: + integer = IntegerLiteralNode(input.current().value, parent, input.current().pos) + input.ahead() - return identifier + return integer + return None -def parseMinus(input, parent): - token = input.pop(0) - - value = parseInteger(input, parent) - - return IntegerLiteralNode(-value.value, parent, token.pos) +# list -> CLOSE_PAREN | expr listTail +def parseList(input, parent): + if input.current().type == TokenType.OPEN_PAREN: + node = ListNode(parent, input.current().pos) + input.ahead() + + # list -> CLOSE_PAREN (end of list) + if input.current().type == TokenType.CLOSE_PAREN: + close = CloseListNode(node, input.current().pos) + node.append(close) + input.ahead() + return node + + # list -> expr listTail + token = input.current() + expr = parseExpression(input, node) + item = ListItemNode(expr, node, token.pos) + node.append(item) + listTail = parseListTail(input, item) + item.append(listTail) + + #while input.current().type != TokenType.CLOSE_PAREN: + #elem = parseListTail(input, node) + #if elem is None: + #raise SyntaxException(input.current().pos, f"Invalid element '{input.current().value}'") + #node.append(elem) + return node + return None + -def parseFunctionDefinition(input, parent): - input.pop(0) +# listTail -> COMMA expr listTail | CLOSE_PAREN +def parseListTail(input, parent): + # listTail -> CLOSE_PAREN + if input.current().type == TokenType.CLOSE_PAREN: + close = CloseListNode(parent, input.current().pos) + input.ahead() + return close - assertType(TokenType.IDENTIFIER, input[0].type) - token = input.pop(0) - name = IdentifierNode(token.value, parent, token.pos) + assertToken(TokenType.COMMA, input) + input.ahead() + + expr = parseExpression(input, parent) + if expr is not None: + item = ListItemNode(expr, parent, expr.pos) + expr.parent = item + listTail = parseListTail(input, item) + item.append(listTail) + listTail.parent = item + input.ahead() + return item - assertType(TokenType.OPEN_PAREN, input[0].type) - parameters = parseList(input, parent) - - assertType(TokenType.OPEN_BRACKET, input[0].type) - body = parseBlock(input, parent) - - func = FunctionDefinitionNode(name, parameters, body, parent, token.pos) - name.parent = func - parameters.parent = func - body.parent = func - return func + return None -def parseReturn(input, parent): - token = input.pop(0) +def parseExpression(input, parent): + value = runParsers(input, parent, [ + parseInteger, + parseList + ]) - value = parseExpression(input, parent) + if value is None: + raise SyntaxException(input.current().pos, f"Expression expected") + + return value - returnNode = ReturnNode(value, parent, token.pos) - value.parent = returnNode - return returnNode -def parseExpression(input, parent): - type = input[0].type - if type == TokenType.FUNCTION: - return parseFunctionDefinition(input, parent) - if type == TokenType.RETURN: - return parseReturn(input, parent) - if type == TokenType.MINUS: - return parseMinus(input, parent) - if type == TokenType.INTEGER: - return parseIntegerOrColonOrPercent(input, parent) - if type == TokenType.STRING: - return parseString(input, parent) - if type == TokenType.NOTE: - return parseNoteOrColon(input, parent) - if type == TokenType.IDENTIFIER: - return parseFunctionCallOrAssignOrIdentifier(input, parent) - if type == TokenType.OPEN_PAREN: - return parseList(input, parent) - raise SyntaxException(input[0].pos, f"Unexpected character '{input[0].value}'") - -def parseArrayElement(input, parent): - type = input[0].type - if type == TokenType.COMMA: - return parseComma(input, parent) - return parseExpression(input, parent) - -def parseStatement(input, parent): - type = input[0].type - if type == TokenType.OPEN_BRACKET: - return parseBlock(input, parent) - if type == TokenType.ASTERISK: - return parseAsterisk(input, parent) +def parseToken(input, parent): + value = runParsers(input, parent, [ + parseExpression + ]) - return parseExpression(input, parent) + if value is None: + raise SyntaxException(input.current().pos, "Unknown statement") -def parseToken(input, parent): - #import pdb; pdb.set_trace() - return parseStatement(input, parent) - - -def parse(input): + return value + +def parse(input): root = Program() - while len(input) > 0: - root.append(parseToken(input, root)) + while input.notParsedTokensRemain(): + root.append(parseToken(input, root)) return root + + + + + + +def parseNote(input, parent): + pass diff --git a/Tokenizer.py b/Tokenizer.py index 2f09ec3..846458f 100644 --- a/Tokenizer.py +++ b/Tokenizer.py @@ -4,6 +4,51 @@ import re import sys from Error import SyntaxException +class Tokens: + def __init__(self, tokens = []): + self.tokens = tokens + self.cursor = 0 + self.snap = 0 + + def append(self, token): + self.tokens.append(token) + + def __getitem__(self, index): + return self.tokens[index] + + def current(self): + if self.cursor >= len(self.tokens): + raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}") + return self.tokens[self.cursor] + + def next(self): + return self.tokens[self.cursor+1] + + def prev(self): + return self.tokens[self.cursor-1] + + def hasMore(self, count): + return self.cursor + count < len(self.tokens) + + def ahead(self): + self.cursor += 1 + + def snapshot(self): + self.snapshot = self.cursor + + def reset(self): + self.cursor = self.snapshot + return self.tokens[self.cursor] + + def notParsedTokensRemain(self): + return self.cursor < len(self.tokens) + + def __str__(self): + return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]" + + def __repr__(self): + return self.__str__() + class TokenType(Enum): OPEN_PAREN = 1 CLOSE_PAREN = 2 @@ -189,7 +234,7 @@ tokenizers = ( ) def doTokenize(lines): - tokens = [] + tokens = [] for lineNumber, line in enumerate(lines): current = 0 while current < len(line): @@ -209,4 +254,4 @@ def doTokenize(lines): def tokenize(lines): tokens = doTokenize(lines) - return list(filter(lambda x: x.type != TokenType.COMMENT, tokens)) + return Tokens([ token for token in tokens if token.type != TokenType.COMMENT])