Refactor tokenizer
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.mus
|
||||||
251
Tokenizer.py
251
Tokenizer.py
@@ -1,251 +0,0 @@
|
|||||||
from enum import Enum
|
|
||||||
import time
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from Error import SyntaxException
|
|
||||||
|
|
||||||
class Tokens:
|
|
||||||
def __init__(self, tokens = []):
|
|
||||||
self.tokens = tokens
|
|
||||||
self.cursor = 0
|
|
||||||
self.snap = 0
|
|
||||||
|
|
||||||
def append(self, token):
|
|
||||||
self.tokens.append(token)
|
|
||||||
|
|
||||||
def __getitem__(self, index):
|
|
||||||
return self.tokens[index]
|
|
||||||
|
|
||||||
def current(self):
|
|
||||||
if self.cursor >= len(self.tokens):
|
|
||||||
raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}")
|
|
||||||
return self.tokens[self.cursor]
|
|
||||||
|
|
||||||
def next(self, number=1):
|
|
||||||
return self.tokens[self.cursor + number]
|
|
||||||
|
|
||||||
def prev(self, number=1):
|
|
||||||
return self.tokens[self.cursor - number]
|
|
||||||
|
|
||||||
def hasMore(self, count=1):
|
|
||||||
return self.cursor + count < len(self.tokens)
|
|
||||||
|
|
||||||
def hasCurrent(self):
|
|
||||||
return self.cursor < len(self.tokens)
|
|
||||||
|
|
||||||
def ahead(self):
|
|
||||||
self.cursor += 1
|
|
||||||
|
|
||||||
def snapshot(self):
|
|
||||||
self.snapshot = self.cursor
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.cursor = self.snapshot
|
|
||||||
return self.tokens[self.cursor]
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]"
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return self.__str__()
|
|
||||||
|
|
||||||
class TokenType(Enum):
|
|
||||||
OPEN_PAREN = 1
|
|
||||||
CLOSE_PAREN = 2
|
|
||||||
ASTERISK = 3
|
|
||||||
STRING = 4
|
|
||||||
IDENTIFIER = 5
|
|
||||||
COMMA = 6
|
|
||||||
INTEGER = 7
|
|
||||||
OPEN_BRACKET = 8
|
|
||||||
CLOSE_BRACKET = 9
|
|
||||||
ASSIGN = 10
|
|
||||||
COLON = 11
|
|
||||||
NOTE = 12
|
|
||||||
COMMENT = 13
|
|
||||||
PERCENT = 14
|
|
||||||
MINUS = 15
|
|
||||||
FUNCTION = 16
|
|
||||||
RETURN = 17
|
|
||||||
DOT = 18
|
|
||||||
|
|
||||||
class Token:
|
|
||||||
def __init__(self, type, value, pos):
|
|
||||||
self.type = type
|
|
||||||
self.value = value
|
|
||||||
self.pos = pos
|
|
||||||
def __str__(self):
|
|
||||||
return "Token(" + str(self.type) + ", '" + self.value + "', " + str(self.pos) + ")"
|
|
||||||
def __repr__(self):
|
|
||||||
return self.__str__()
|
|
||||||
|
|
||||||
def tokenizeOpenParen(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeChar(type, char, input, current, line):
|
|
||||||
if input[current] == char:
|
|
||||||
return (1, Token(type, input[current], (line, current)))
|
|
||||||
return (0, None)
|
|
||||||
|
|
||||||
def tokenizeCloseParen(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeAsterisk(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.ASTERISK, '*', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeString(input, current, line):
|
|
||||||
if input[current] == '"':
|
|
||||||
value = input[current]
|
|
||||||
char = ''
|
|
||||||
consumedChars = 1
|
|
||||||
while char != '"':
|
|
||||||
if char is None: #TODO!!!
|
|
||||||
print("String not terminated")
|
|
||||||
char = input[current + consumedChars]
|
|
||||||
value += char
|
|
||||||
consumedChars += 1
|
|
||||||
return (consumedChars, Token(TokenType.STRING, value, (line, current)))
|
|
||||||
return (0, None)
|
|
||||||
|
|
||||||
def tokenizeRegexPattern(type, pattern, input, current, line):
|
|
||||||
consumedChars = 0
|
|
||||||
value = ''
|
|
||||||
|
|
||||||
while current+consumedChars < len(input) and re.match(pattern, input[current+consumedChars]):
|
|
||||||
value += input[current+consumedChars]
|
|
||||||
consumedChars += 1
|
|
||||||
return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None)
|
|
||||||
|
|
||||||
def tokenizeWhitespaces(input, current, line):
|
|
||||||
return tokenizeRegexPattern(None, r'\s', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeIdentifier(input, current, line):
|
|
||||||
return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeComma(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.COMMA, ',', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeInteger(input, current, line):
|
|
||||||
return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeOpenBracket(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeCloseBracket(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeAssign(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.ASSIGN, '=', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeColon(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.COLON, ':', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeComment(input, current, line):
|
|
||||||
if input[current] == '#':
|
|
||||||
consumedChars = 0
|
|
||||||
value = ''
|
|
||||||
while current+consumedChars < len(input):
|
|
||||||
value += input[current+consumedChars]
|
|
||||||
consumedChars += 1
|
|
||||||
pass
|
|
||||||
return (consumedChars, Token(TokenType.COMMENT, value, (line, current)))
|
|
||||||
return (0, None)
|
|
||||||
|
|
||||||
def tokenizeNote(input, current, line):
|
|
||||||
consumedChars = 0
|
|
||||||
value = ''
|
|
||||||
if input[current] == '@':
|
|
||||||
consumedChars += 1
|
|
||||||
value += input[current]
|
|
||||||
if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'):
|
|
||||||
value += input[current+consumedChars]
|
|
||||||
consumedChars += 1
|
|
||||||
|
|
||||||
if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'):
|
|
||||||
value += input[current+consumedChars]
|
|
||||||
consumedChars += 1
|
|
||||||
|
|
||||||
if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
|
|
||||||
value += input[current+consumedChars]
|
|
||||||
consumedChars += 1
|
|
||||||
|
|
||||||
if current+consumedChars < len(input) and input[current+consumedChars] == '.':
|
|
||||||
duration = input[current+consumedChars]
|
|
||||||
consumedChars += 1
|
|
||||||
while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
|
|
||||||
duration += input[current+consumedChars]
|
|
||||||
consumedChars += 1
|
|
||||||
if current+consumedChars < len(input) and input[current+consumedChars] == 'd':
|
|
||||||
duration += input[current+consumedChars]
|
|
||||||
consumedChars += 1
|
|
||||||
if len(duration) > 1:
|
|
||||||
value += duration
|
|
||||||
else:
|
|
||||||
consumedChars -= 1
|
|
||||||
return (consumedChars, Token(TokenType.NOTE, value, (line, current)))
|
|
||||||
return (0, None)
|
|
||||||
|
|
||||||
def tokenizePercent(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.PERCENT, '%', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeMinus(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.MINUS, '-', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeFunction(input, current, line):
|
|
||||||
return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeKeyword(type, keyword, input, current, line):
|
|
||||||
if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword:
|
|
||||||
return (len(keyword), Token(type, keyword, (line, current)))
|
|
||||||
return (0, None)
|
|
||||||
|
|
||||||
def tokenizeReturn(input, current, line):
|
|
||||||
return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line)
|
|
||||||
|
|
||||||
def tokenizeDot(input, current, line):
|
|
||||||
return tokenizeChar(TokenType.DOT, '.', input, current, line)
|
|
||||||
|
|
||||||
tokenizers = (
|
|
||||||
tokenizeOpenParen,
|
|
||||||
tokenizeCloseParen,
|
|
||||||
tokenizeAsterisk,
|
|
||||||
tokenizeString,
|
|
||||||
tokenizeFunction,
|
|
||||||
tokenizeReturn,
|
|
||||||
tokenizeInteger,
|
|
||||||
tokenizeNote,
|
|
||||||
tokenizeIdentifier,
|
|
||||||
tokenizeComma,
|
|
||||||
tokenizeOpenBracket,
|
|
||||||
tokenizeCloseBracket,
|
|
||||||
tokenizeAssign,
|
|
||||||
tokenizeColon,
|
|
||||||
tokenizePercent,
|
|
||||||
tokenizeMinus,
|
|
||||||
tokenizeDot,
|
|
||||||
tokenizeComment,
|
|
||||||
tokenizeWhitespaces,
|
|
||||||
)
|
|
||||||
|
|
||||||
def doTokenize(lines):
|
|
||||||
tokens = []
|
|
||||||
for lineNumber, line in enumerate(lines):
|
|
||||||
current = 0
|
|
||||||
while current < len(line):
|
|
||||||
tokenized = False
|
|
||||||
for tokenizer in tokenizers:
|
|
||||||
consumedChars, value = tokenizer(line, current, lineNumber)
|
|
||||||
if consumedChars > 0:
|
|
||||||
tokens.append(value)
|
|
||||||
current += consumedChars
|
|
||||||
tokenized = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if not tokenized:
|
|
||||||
raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'")
|
|
||||||
|
|
||||||
return [token for token in tokens if token.type is not None]
|
|
||||||
|
|
||||||
def tokenize(lines):
|
|
||||||
tokens = doTokenize(lines)
|
|
||||||
return Tokens([ token for token in tokens if token.type != TokenType.COMMENT])
|
|
||||||
26
main.py
26
main.py
@@ -1,26 +0,0 @@
|
|||||||
from Tokenizer import tokenize
|
|
||||||
from Parser import parse
|
|
||||||
from Evaluator import evaluate
|
|
||||||
from Environment import createEnvironment
|
|
||||||
from Error import SyntaxException, RuntimeException
|
|
||||||
import sys
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
try:
|
|
||||||
with open(sys.argv[1], 'r') as source:
|
|
||||||
lines = [line.rstrip('\n') for line in source.readlines()]
|
|
||||||
|
|
||||||
env = createEnvironment()
|
|
||||||
|
|
||||||
tokens = tokenize(lines)
|
|
||||||
|
|
||||||
ast = parse(tokens)
|
|
||||||
|
|
||||||
evaluate(ast, env)
|
|
||||||
except SyntaxException as e:
|
|
||||||
print(e.msg)
|
|
||||||
except RuntimeException as e:
|
|
||||||
print(e.msg)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("Program interrupted")
|
|
||||||
|
|
||||||
230
smnp/OldParser.py
Normal file
230
smnp/OldParser.py
Normal file
@@ -0,0 +1,230 @@
|
|||||||
|
from Tokenizer import *
|
||||||
|
from Note import *
|
||||||
|
from AST import *
|
||||||
|
from Error import SyntaxException
|
||||||
|
|
||||||
|
def expectedFound(expected, found):
|
||||||
|
raise SyntaxException(None, f"Expected: {expected}, found: {found}")
|
||||||
|
|
||||||
|
def assertType(expected, found):
|
||||||
|
if expected != found:
|
||||||
|
raise SyntaxException(None, f"Expected: {expected}, found: {found}")
|
||||||
|
|
||||||
|
def parseInteger(input, parent):
|
||||||
|
token = input.pop(0)
|
||||||
|
return IntegerLiteralNode(int(token.value), parent, token.pos)
|
||||||
|
|
||||||
|
def parseString(input, parent):
|
||||||
|
token = input.pop(0)
|
||||||
|
return StringLiteralNode(token.value[1:-1], parent, token.pos)
|
||||||
|
|
||||||
|
def parseNote(input, parent):
|
||||||
|
token = input.pop(0)
|
||||||
|
value = token.value
|
||||||
|
consumedChars = 1
|
||||||
|
notePitch = value[consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
octave = 4
|
||||||
|
duration = 4
|
||||||
|
dot = False
|
||||||
|
if consumedChars < len(value) and value[consumedChars] in ('b', '#'):
|
||||||
|
notePitch += value[consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
if consumedChars < len(value) and re.match(r'\d', value[consumedChars]):
|
||||||
|
octave = int(value[consumedChars])
|
||||||
|
consumedChars += 1
|
||||||
|
if consumedChars < len(value) and value[consumedChars] == '.':
|
||||||
|
consumedChars += 1
|
||||||
|
durationString = ''
|
||||||
|
while consumedChars < len(value) and re.match(r'\d', value[consumedChars]):
|
||||||
|
durationString += value[consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
duration = int(durationString)
|
||||||
|
if consumedChars < len(value) and value[consumedChars] == '.':
|
||||||
|
dot = True
|
||||||
|
consumedChars += 1
|
||||||
|
|
||||||
|
return NoteLiteralNode(Note(notePitch, octave, duration, dot), parent, token.pos)
|
||||||
|
|
||||||
|
def parseComma(input, parent):
|
||||||
|
token = input.pop(0)
|
||||||
|
return CommaNode(parent, token.pos)
|
||||||
|
|
||||||
|
def parseList(input, parent):
|
||||||
|
token = input.pop(0)
|
||||||
|
|
||||||
|
node = ListNode(parent, token.pos)
|
||||||
|
|
||||||
|
while input[0].type != TokenType.CLOSE_PAREN:
|
||||||
|
element = parseArrayElement(input, node)
|
||||||
|
if element is None:
|
||||||
|
raise SyntaxException(input[0].pos, "Invalid element '{input[0].value}'")
|
||||||
|
node.append(element)
|
||||||
|
|
||||||
|
if input[0].type != TokenType.CLOSE_PAREN:
|
||||||
|
expectedFound(TokenType.CLOSE_PAREN, input[0].type)
|
||||||
|
input.pop(0)
|
||||||
|
|
||||||
|
return node
|
||||||
|
|
||||||
|
def parseBlock(input, parent):
|
||||||
|
token = input.pop(0)
|
||||||
|
|
||||||
|
block = BlockNode(parent, token.pos)
|
||||||
|
|
||||||
|
while input[0].type != TokenType.CLOSE_BRACKET:
|
||||||
|
block.append(parseToken(input, block))
|
||||||
|
|
||||||
|
if input[0].type != TokenType.CLOSE_BRACKET:
|
||||||
|
expectedFound(TokenType.CLOSE_BRACKET, input[0].type)
|
||||||
|
input.pop(0)
|
||||||
|
|
||||||
|
return block
|
||||||
|
|
||||||
|
|
||||||
|
def parseAsterisk(input, parent):
|
||||||
|
token = input.pop(0)
|
||||||
|
|
||||||
|
iterator = parent.pop(-1)
|
||||||
|
value = parseStatement(input, parent)
|
||||||
|
|
||||||
|
asterisk = AsteriskStatementNode(iterator, value, parent, token.pos)
|
||||||
|
iterator.parent = asterisk
|
||||||
|
value.parent = asterisk
|
||||||
|
return asterisk
|
||||||
|
|
||||||
|
def parseNoteOrColon(input, parent):
|
||||||
|
note = parseNote(input, parent)
|
||||||
|
if len(input) > 1 and input[0].type == TokenType.COLON:
|
||||||
|
token = input.pop(0)
|
||||||
|
b = parseNote(input, parent)
|
||||||
|
if b is None:
|
||||||
|
raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'")
|
||||||
|
colon = ColonNode(note, b, parent, token.pos)
|
||||||
|
note.parent = colon
|
||||||
|
b.parent = colon
|
||||||
|
return colon
|
||||||
|
|
||||||
|
return note
|
||||||
|
|
||||||
|
def parseIntegerOrColonOrPercent(input, parent):
|
||||||
|
integer = parseInteger(input, parent)
|
||||||
|
if len(input) > 1 and input[0].type == TokenType.COLON:
|
||||||
|
token = input.pop(0)
|
||||||
|
b = parseInteger(input, parent)
|
||||||
|
if b is None:
|
||||||
|
raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'")
|
||||||
|
colon = ColonNode(integer, b, parent, token.pos)
|
||||||
|
integer.parent = colon
|
||||||
|
b.parent = colon
|
||||||
|
return colon
|
||||||
|
|
||||||
|
if len(input) > 0 and input[0].type == TokenType.PERCENT:
|
||||||
|
input.pop(0)
|
||||||
|
percent = PercentNode(integer, parent, integer.pos)
|
||||||
|
integer.parent = percent
|
||||||
|
return percent
|
||||||
|
|
||||||
|
return integer
|
||||||
|
|
||||||
|
def parseFunctionCallOrAssignOrIdentifier(input, parent):
|
||||||
|
token = input.pop(0)
|
||||||
|
identifier = IdentifierNode(token.value, parent, token.pos)
|
||||||
|
# Function call
|
||||||
|
if len(input) > 0 and input[0].type == TokenType.OPEN_PAREN:
|
||||||
|
arguments = parseList(input, parent)
|
||||||
|
func = FunctionCallNode(identifier, arguments, parent, token.pos)
|
||||||
|
identifier.parent = func
|
||||||
|
arguments.parent = func
|
||||||
|
return func
|
||||||
|
# Assign
|
||||||
|
if len(input) > 1 and input[0].type == TokenType.ASSIGN:
|
||||||
|
token = input.pop(0)
|
||||||
|
value = parseExpression(input, parent) #
|
||||||
|
assign = AssignExpression(identifier, value, parent, token.pos)
|
||||||
|
identifier.parent = assign
|
||||||
|
value.parent = assign
|
||||||
|
return assign
|
||||||
|
|
||||||
|
return identifier
|
||||||
|
|
||||||
|
def parseMinus(input, parent):
|
||||||
|
token = input.pop(0)
|
||||||
|
|
||||||
|
value = parseInteger(input, parent)
|
||||||
|
|
||||||
|
return IntegerLiteralNode(-value.value, parent, token.pos)
|
||||||
|
|
||||||
|
def parseFunctionDefinition(input, parent):
|
||||||
|
input.pop(0)
|
||||||
|
|
||||||
|
assertType(TokenType.IDENTIFIER, input[0].type)
|
||||||
|
token = input.pop(0)
|
||||||
|
name = IdentifierNode(token.value, parent, token.pos)
|
||||||
|
|
||||||
|
assertType(TokenType.OPEN_PAREN, input[0].type)
|
||||||
|
parameters = parseList(input, parent)
|
||||||
|
|
||||||
|
assertType(TokenType.OPEN_BRACKET, input[0].type)
|
||||||
|
body = parseBlock(input, parent)
|
||||||
|
|
||||||
|
func = FunctionDefinitionNode(name, parameters, body, parent, token.pos)
|
||||||
|
name.parent = func
|
||||||
|
parameters.parent = func
|
||||||
|
body.parent = func
|
||||||
|
return func
|
||||||
|
|
||||||
|
def parseReturn(input, parent):
|
||||||
|
token = input.pop(0)
|
||||||
|
|
||||||
|
value = parseExpression(input, parent)
|
||||||
|
|
||||||
|
returnNode = ReturnNode(value, parent, token.pos)
|
||||||
|
value.parent = returnNode
|
||||||
|
return returnNode
|
||||||
|
|
||||||
|
def parseExpression(input, parent):
|
||||||
|
type = input[0].type
|
||||||
|
if type == TokenType.FUNCTION:
|
||||||
|
return parseFunctionDefinition(input, parent)
|
||||||
|
if type == TokenType.RETURN:
|
||||||
|
return parseReturn(input, parent)
|
||||||
|
if type == TokenType.MINUS:
|
||||||
|
return parseMinus(input, parent)
|
||||||
|
if type == TokenType.INTEGER:
|
||||||
|
return parseIntegerOrColonOrPercent(input, parent)
|
||||||
|
if type == TokenType.STRING:
|
||||||
|
return parseString(input, parent)
|
||||||
|
if type == TokenType.NOTE:
|
||||||
|
return parseNoteOrColon(input, parent)
|
||||||
|
if type == TokenType.IDENTIFIER:
|
||||||
|
return parseFunctionCallOrAssignOrIdentifier(input, parent)
|
||||||
|
if type == TokenType.OPEN_PAREN:
|
||||||
|
return parseList(input, parent)
|
||||||
|
raise SyntaxException(input[0].pos, f"Unexpected character '{input[0].value}'")
|
||||||
|
|
||||||
|
def parseArrayElement(input, parent):
|
||||||
|
type = input[0].type
|
||||||
|
if type == TokenType.COMMA:
|
||||||
|
return parseComma(input, parent)
|
||||||
|
return parseExpression(input, parent)
|
||||||
|
|
||||||
|
def parseStatement(input, parent):
|
||||||
|
type = input[0].type
|
||||||
|
if type == TokenType.OPEN_BRACKET:
|
||||||
|
return parseBlock(input, parent)
|
||||||
|
if type == TokenType.ASTERISK:
|
||||||
|
return parseAsterisk(input, parent)
|
||||||
|
|
||||||
|
return parseExpression(input, parent)
|
||||||
|
|
||||||
|
def parseToken(input, parent):
|
||||||
|
#import pdb; pdb.set_trace()
|
||||||
|
return parseStatement(input, parent)
|
||||||
|
|
||||||
|
|
||||||
|
def parse(input):
|
||||||
|
root = Program()
|
||||||
|
while len(input) > 0:
|
||||||
|
root.append(parseToken(input, root))
|
||||||
|
return root
|
||||||
0
smnp/__init__.py
Normal file
0
smnp/__init__.py
Normal file
4
smnp/__main__.py
Normal file
4
smnp/__main__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
from smnp.main import main
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
import sys
|
import sys
|
||||||
from Evaluator import objectString
|
from parser.Environment import objectString
|
||||||
from Note import *
|
from Note import *
|
||||||
import random
|
import random
|
||||||
import Synth
|
import Synth
|
||||||
@@ -46,19 +46,6 @@ class Environment():
|
|||||||
return scope
|
return scope
|
||||||
else:
|
else:
|
||||||
return scope
|
return scope
|
||||||
|
|
||||||
def sample(args, env):
|
|
||||||
if len(args) == 1 and isinstance(args[0], list):
|
|
||||||
return _sample(args[0])
|
|
||||||
elif len(args) == 0:
|
|
||||||
return _sample(Note.range(Note(NotePitch.C), Note(NotePitch.H)))
|
|
||||||
elif all(isinstance(x, Note) for x in args):
|
|
||||||
return _sample(args)
|
|
||||||
else:
|
|
||||||
pass # not valid signature
|
|
||||||
|
|
||||||
def _sample(list):
|
|
||||||
return list[int(random.uniform(0, len(list)))]
|
|
||||||
|
|
||||||
def doPrint(args, env):
|
def doPrint(args, env):
|
||||||
print("".join([objectString(arg) for arg in args]))
|
print("".join([objectString(arg) for arg in args]))
|
||||||
0
smnp/error/__init__.py
Normal file
0
smnp/error/__init__.py
Normal file
4
smnp/error/runtime.py
Normal file
4
smnp/error/runtime.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
class RuntimeException(Exception):
|
||||||
|
def __init__(self, pos, msg):
|
||||||
|
posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"
|
||||||
|
self.msg = f"Syntax error {posStr}:\n{msg}"
|
||||||
@@ -2,8 +2,3 @@ class SyntaxException(Exception):
|
|||||||
def __init__(self, pos, msg):
|
def __init__(self, pos, msg):
|
||||||
posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"
|
posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"
|
||||||
self.msg = f"Syntax error {posStr}:\n{msg}"
|
self.msg = f"Syntax error {posStr}:\n{msg}"
|
||||||
|
|
||||||
class RuntimeException(Exception):
|
|
||||||
def __init__(self, pos, msg):
|
|
||||||
posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"
|
|
||||||
self.msg = f"Syntax error {posStr}:\n{msg}"
|
|
||||||
29
smnp/main.py
Normal file
29
smnp/main.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
import sys
|
||||||
|
from smnp.error.syntax import SyntaxException
|
||||||
|
from smnp.error.runtime import RuntimeException
|
||||||
|
from smnp.token.tokenizer import tokenize
|
||||||
|
#from Tokenizer import tokenize
|
||||||
|
#from Parser import parse
|
||||||
|
#from Evaluator import evaluate
|
||||||
|
#from Environment import createEnvironment
|
||||||
|
#from Error import SyntaxException, RuntimeException
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
with open(sys.argv[1], 'r') as source:
|
||||||
|
lines = [line.rstrip('\n') for line in source.readlines()]
|
||||||
|
|
||||||
|
#env = createEnvironment()
|
||||||
|
|
||||||
|
tokens = tokenize(lines)
|
||||||
|
print(tokens)
|
||||||
|
#ast = parse(tokens)
|
||||||
|
|
||||||
|
#evaluate(ast, env)
|
||||||
|
except SyntaxException as e:
|
||||||
|
print(e.msg)
|
||||||
|
except RuntimeException as e:
|
||||||
|
print(e.msg)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("Program interrupted")
|
||||||
|
|
||||||
1
smnp/token/__init__.py
Normal file
1
smnp/token/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
__all__ = ["tokenize"]
|
||||||
54
smnp/token/model.py
Normal file
54
smnp/token/model.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
class Token:
|
||||||
|
def __init__(self, type, value, pos):
|
||||||
|
self.type = type
|
||||||
|
self.value = value
|
||||||
|
self.pos = pos
|
||||||
|
def __str__(self):
|
||||||
|
return "Token(" + str(self.type) + ", '" + self.value + "', " + str(self.pos) + ")"
|
||||||
|
def __repr__(self):
|
||||||
|
return self.__str__()
|
||||||
|
|
||||||
|
class TokenList:
|
||||||
|
def __init__(self, tokens = []):
|
||||||
|
self.tokens = tokens
|
||||||
|
self.cursor = 0
|
||||||
|
self.snap = 0
|
||||||
|
|
||||||
|
def append(self, token):
|
||||||
|
self.tokens.append(token)
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
return self.tokens[index]
|
||||||
|
|
||||||
|
def current(self):
|
||||||
|
if self.cursor >= len(self.tokens):
|
||||||
|
raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}")
|
||||||
|
return self.tokens[self.cursor]
|
||||||
|
|
||||||
|
def next(self, number=1):
|
||||||
|
return self.tokens[self.cursor + number]
|
||||||
|
|
||||||
|
def prev(self, number=1):
|
||||||
|
return self.tokens[self.cursor - number]
|
||||||
|
|
||||||
|
def hasMore(self, count=1):
|
||||||
|
return self.cursor + count < len(self.tokens)
|
||||||
|
|
||||||
|
def hasCurrent(self):
|
||||||
|
return self.cursor < len(self.tokens)
|
||||||
|
|
||||||
|
def ahead(self):
|
||||||
|
self.cursor += 1
|
||||||
|
|
||||||
|
def snapshot(self):
|
||||||
|
self.snapshot = self.cursor
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.cursor = self.snapshot
|
||||||
|
return self.tokens[self.cursor]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]"
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self.__str__()
|
||||||
81
smnp/token/tokenizer.py
Normal file
81
smnp/token/tokenizer.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
from smnp.error.syntax import SyntaxException
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
from smnp.token.model import Token, TokenList
|
||||||
|
from smnp.token.tools import tokenizeChar, tokenizeRegexPattern
|
||||||
|
from smnp.token.tokenizers.paren import tokenizeOpenParen, tokenizeCloseParen
|
||||||
|
from smnp.token.tokenizers.asterisk import tokenizeAsterisk
|
||||||
|
from smnp.token.tokenizers.whitespace import tokenizeWhitespaces
|
||||||
|
from smnp.token.tokenizers.identifier import tokenizeIdentifier
|
||||||
|
from smnp.token.tokenizers.comma import tokenizeComma
|
||||||
|
from smnp.token.tokenizers.string import tokenizeString
|
||||||
|
from smnp.token.tokenizers.integer import tokenizeInteger
|
||||||
|
from smnp.token.tokenizers.bracket import tokenizeOpenBracket, tokenizeCloseBracket
|
||||||
|
from smnp.token.tokenizers.assign import tokenizeAssign
|
||||||
|
from smnp.token.tokenizers.colon import tokenizeColon
|
||||||
|
from smnp.token.tokenizers.comment import tokenizeComment
|
||||||
|
from smnp.token.tokenizers.note import tokenizeNote
|
||||||
|
from smnp.token.tokenizers.function import tokenizeFunction
|
||||||
|
from smnp.token.tokenizers.ret import tokenizeReturn
|
||||||
|
from smnp.token.tokenizers.percent import tokenizePercent
|
||||||
|
from smnp.token.tokenizers.minus import tokenizeMinus
|
||||||
|
from smnp.token.tokenizers.dot import tokenizeDot
|
||||||
|
|
||||||
|
tokenizers = (
|
||||||
|
tokenizeOpenParen,
|
||||||
|
tokenizeCloseParen,
|
||||||
|
tokenizeAsterisk,
|
||||||
|
tokenizeString,
|
||||||
|
tokenizeFunction,
|
||||||
|
tokenizeReturn,
|
||||||
|
tokenizeInteger,
|
||||||
|
tokenizeNote,
|
||||||
|
tokenizeIdentifier,
|
||||||
|
tokenizeComma,
|
||||||
|
tokenizeOpenBracket,
|
||||||
|
tokenizeCloseBracket,
|
||||||
|
tokenizeAssign,
|
||||||
|
tokenizeColon,
|
||||||
|
tokenizePercent,
|
||||||
|
tokenizeMinus,
|
||||||
|
tokenizeDot,
|
||||||
|
tokenizeComment,
|
||||||
|
tokenizeWhitespaces,
|
||||||
|
)
|
||||||
|
|
||||||
|
filters = [
|
||||||
|
lambda token: token.type is not None,
|
||||||
|
lambda token: token.type != TokenType.COMMENT
|
||||||
|
]
|
||||||
|
|
||||||
|
def tokenize(lines):
|
||||||
|
tokens = []
|
||||||
|
for lineNumber, line in enumerate(lines):
|
||||||
|
current = 0
|
||||||
|
while current < len(line):
|
||||||
|
consumedChars, token = combinedTokenizer(line, current, lineNumber)
|
||||||
|
|
||||||
|
if consumedChars == 0:
|
||||||
|
raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'")
|
||||||
|
|
||||||
|
current += consumedChars
|
||||||
|
tokens.append(token)
|
||||||
|
|
||||||
|
return TokenList(filterTokens(filters, tokens))
|
||||||
|
|
||||||
|
def combinedTokenizer(line, current, lineNumber):
|
||||||
|
for tokenizer in tokenizers:
|
||||||
|
consumedChars, token = tokenizer(line, current, lineNumber)
|
||||||
|
if consumedChars > 0:
|
||||||
|
return (consumedChars, token)
|
||||||
|
return (0, None)
|
||||||
|
|
||||||
|
def filterTokens(filters, tokens):
|
||||||
|
if not filters:
|
||||||
|
return tokens
|
||||||
|
|
||||||
|
return filterTokens(filters[1:], (token for token in tokens if filters[0](token)))
|
||||||
|
|
||||||
|
__all__ = ["tokenize"]
|
||||||
0
smnp/token/tokenizers/__init__.py
Normal file
0
smnp/token/tokenizers/__init__.py
Normal file
5
smnp/token/tokenizers/assign.py
Normal file
5
smnp/token/tokenizers/assign.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeChar
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeAssign(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.ASSIGN, '=', input, current, line)
|
||||||
5
smnp/token/tokenizers/asterisk.py
Normal file
5
smnp/token/tokenizers/asterisk.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeChar
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeAsterisk(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.ASTERISK, '*', input, current, line)
|
||||||
8
smnp/token/tokenizers/bracket.py
Normal file
8
smnp/token/tokenizers/bracket.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
from smnp.token.tools import tokenizeChar
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeOpenBracket(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line)
|
||||||
|
|
||||||
|
def tokenizeCloseBracket(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line)
|
||||||
5
smnp/token/tokenizers/colon.py
Normal file
5
smnp/token/tokenizers/colon.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeChar
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeColon(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.COLON, ':', input, current, line)
|
||||||
5
smnp/token/tokenizers/comma.py
Normal file
5
smnp/token/tokenizers/comma.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeChar
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeComma(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.COMMA, ',', input, current, line)
|
||||||
13
smnp/token/tokenizers/comment.py
Normal file
13
smnp/token/tokenizers/comment.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
from smnp.token.type import TokenType
|
||||||
|
from smnp.token.model import Token
|
||||||
|
|
||||||
|
def tokenizeComment(input, current, line):
|
||||||
|
if input[current] == '#':
|
||||||
|
consumedChars = 0
|
||||||
|
value = ''
|
||||||
|
while current+consumedChars < len(input):
|
||||||
|
value += input[current+consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
pass
|
||||||
|
return (consumedChars, Token(TokenType.COMMENT, value, (line, current)))
|
||||||
|
return (0, None)
|
||||||
5
smnp/token/tokenizers/dot.py
Normal file
5
smnp/token/tokenizers/dot.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeChar
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeDot(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.DOT, '.', input, current, line)
|
||||||
5
smnp/token/tokenizers/function.py
Normal file
5
smnp/token/tokenizers/function.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeKeyword
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeFunction(input, current, line):
|
||||||
|
return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line)
|
||||||
5
smnp/token/tokenizers/identifier.py
Normal file
5
smnp/token/tokenizers/identifier.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeRegexPattern
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeIdentifier(input, current, line):
|
||||||
|
return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line)
|
||||||
5
smnp/token/tokenizers/integer.py
Normal file
5
smnp/token/tokenizers/integer.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeRegexPattern
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeInteger(input, current, line):
|
||||||
|
return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line)
|
||||||
5
smnp/token/tokenizers/minus.py
Normal file
5
smnp/token/tokenizers/minus.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeChar
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeMinus(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.MINUS, '-', input, current, line)
|
||||||
37
smnp/token/tokenizers/note.py
Normal file
37
smnp/token/tokenizers/note.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import re
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
from smnp.token.model import Token
|
||||||
|
|
||||||
|
def tokenizeNote(input, current, line):
|
||||||
|
consumedChars = 0
|
||||||
|
value = ''
|
||||||
|
if input[current] == '@':
|
||||||
|
consumedChars += 1
|
||||||
|
value += input[current]
|
||||||
|
if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'):
|
||||||
|
value += input[current+consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
|
||||||
|
if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'):
|
||||||
|
value += input[current+consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
|
||||||
|
if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
|
||||||
|
value += input[current+consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
|
||||||
|
if current+consumedChars < len(input) and input[current+consumedChars] == '.':
|
||||||
|
duration = input[current+consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
|
||||||
|
duration += input[current+consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
if current+consumedChars < len(input) and input[current+consumedChars] == 'd':
|
||||||
|
duration += input[current+consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
if len(duration) > 1:
|
||||||
|
value += duration
|
||||||
|
else:
|
||||||
|
consumedChars -= 1
|
||||||
|
return (consumedChars, Token(TokenType.NOTE, value, (line, current)))
|
||||||
|
return (0, None)
|
||||||
8
smnp/token/tokenizers/paren.py
Normal file
8
smnp/token/tokenizers/paren.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
from smnp.token.tools import tokenizeChar
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeOpenParen(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line)
|
||||||
|
|
||||||
|
def tokenizeCloseParen(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line)
|
||||||
5
smnp/token/tokenizers/percent.py
Normal file
5
smnp/token/tokenizers/percent.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeChar
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizePercent(input, current, line):
|
||||||
|
return tokenizeChar(TokenType.PERCENT, '%', input, current, line)
|
||||||
5
smnp/token/tokenizers/ret.py
Normal file
5
smnp/token/tokenizers/ret.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from smnp.token.tools import tokenizeKeyword
|
||||||
|
from smnp.token.type import TokenType
|
||||||
|
|
||||||
|
def tokenizeReturn(input, current, line):
|
||||||
|
return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line)
|
||||||
16
smnp/token/tokenizers/string.py
Normal file
16
smnp/token/tokenizers/string.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
from smnp.token.type import TokenType
|
||||||
|
from smnp.token.model import Token
|
||||||
|
|
||||||
|
def tokenizeString(input, current, line):
|
||||||
|
if input[current] == '"':
|
||||||
|
value = input[current]
|
||||||
|
char = ''
|
||||||
|
consumedChars = 1
|
||||||
|
while char != '"':
|
||||||
|
if char is None: #TODO!!!
|
||||||
|
print("String not terminated")
|
||||||
|
char = input[current + consumedChars]
|
||||||
|
value += char
|
||||||
|
consumedChars += 1
|
||||||
|
return (consumedChars, Token(TokenType.STRING, value, (line, current)))
|
||||||
|
return (0, None)
|
||||||
4
smnp/token/tokenizers/whitespace.py
Normal file
4
smnp/token/tokenizers/whitespace.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
from smnp.token.tools import tokenizeRegexPattern
|
||||||
|
|
||||||
|
def tokenizeWhitespaces(input, current, line):
|
||||||
|
return tokenizeRegexPattern(None, r'\s', input, current, line)
|
||||||
21
smnp/token/tools.py
Normal file
21
smnp/token/tools.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import re
|
||||||
|
from smnp.token.model import Token
|
||||||
|
|
||||||
|
def tokenizeChar(type, char, input, current, line):
|
||||||
|
if input[current] == char:
|
||||||
|
return (1, Token(type, input[current], (line, current)))
|
||||||
|
return (0, None)
|
||||||
|
|
||||||
|
def tokenizeRegexPattern(type, pattern, input, current, line):
|
||||||
|
consumedChars = 0
|
||||||
|
value = ''
|
||||||
|
|
||||||
|
while current+consumedChars < len(input) and re.match(pattern, input[current+consumedChars]):
|
||||||
|
value += input[current+consumedChars]
|
||||||
|
consumedChars += 1
|
||||||
|
return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None)
|
||||||
|
|
||||||
|
def tokenizeKeyword(type, keyword, input, current, line):
|
||||||
|
if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword:
|
||||||
|
return (len(keyword), Token(type, keyword, (line, current)))
|
||||||
|
return (0, None)
|
||||||
21
smnp/token/type.py
Normal file
21
smnp/token/type.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
class TokenType(Enum):
|
||||||
|
OPEN_PAREN = 1
|
||||||
|
CLOSE_PAREN = 2
|
||||||
|
ASTERISK = 3
|
||||||
|
STRING = 4
|
||||||
|
IDENTIFIER = 5
|
||||||
|
COMMA = 6
|
||||||
|
INTEGER = 7
|
||||||
|
OPEN_BRACKET = 8
|
||||||
|
CLOSE_BRACKET = 9
|
||||||
|
ASSIGN = 10
|
||||||
|
COLON = 11
|
||||||
|
NOTE = 12
|
||||||
|
COMMENT = 13
|
||||||
|
PERCENT = 14
|
||||||
|
MINUS = 15
|
||||||
|
FUNCTION = 16
|
||||||
|
RETURN = 17
|
||||||
|
DOT = 18
|
||||||
Reference in New Issue
Block a user