diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5cc7e02 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +*.mus diff --git a/Tokenizer.py b/Tokenizer.py deleted file mode 100644 index 5b20bc3..0000000 --- a/Tokenizer.py +++ /dev/null @@ -1,251 +0,0 @@ -from enum import Enum -import time -import re -import sys -from Error import SyntaxException - -class Tokens: - def __init__(self, tokens = []): - self.tokens = tokens - self.cursor = 0 - self.snap = 0 - - def append(self, token): - self.tokens.append(token) - - def __getitem__(self, index): - return self.tokens[index] - - def current(self): - if self.cursor >= len(self.tokens): - raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}") - return self.tokens[self.cursor] - - def next(self, number=1): - return self.tokens[self.cursor + number] - - def prev(self, number=1): - return self.tokens[self.cursor - number] - - def hasMore(self, count=1): - return self.cursor + count < len(self.tokens) - - def hasCurrent(self): - return self.cursor < len(self.tokens) - - def ahead(self): - self.cursor += 1 - - def snapshot(self): - self.snapshot = self.cursor - - def reset(self): - self.cursor = self.snapshot - return self.tokens[self.cursor] - - def __str__(self): - return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]" - - def __repr__(self): - return self.__str__() - -class TokenType(Enum): - OPEN_PAREN = 1 - CLOSE_PAREN = 2 - ASTERISK = 3 - STRING = 4 - IDENTIFIER = 5 - COMMA = 6 - INTEGER = 7 - OPEN_BRACKET = 8 - CLOSE_BRACKET = 9 - ASSIGN = 10 - COLON = 11 - NOTE = 12 - COMMENT = 13 - PERCENT = 14 - MINUS = 15 - FUNCTION = 16 - RETURN = 17 - DOT = 18 - -class Token: - def __init__(self, type, value, pos): - self.type = type - self.value = value - self.pos = pos - def __str__(self): - return "Token(" + str(self.type) + ", '" + self.value + "', " + str(self.pos) + ")" - def __repr__(self): - return self.__str__() - -def tokenizeOpenParen(input, current, line): - return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line) - -def tokenizeChar(type, char, input, current, line): - if input[current] == char: - return (1, Token(type, input[current], (line, current))) - return (0, None) - -def tokenizeCloseParen(input, current, line): - return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line) - -def tokenizeAsterisk(input, current, line): - return tokenizeChar(TokenType.ASTERISK, '*', input, current, line) - -def tokenizeString(input, current, line): - if input[current] == '"': - value = input[current] - char = '' - consumedChars = 1 - while char != '"': - if char is None: #TODO!!! - print("String not terminated") - char = input[current + consumedChars] - value += char - consumedChars += 1 - return (consumedChars, Token(TokenType.STRING, value, (line, current))) - return (0, None) - -def tokenizeRegexPattern(type, pattern, input, current, line): - consumedChars = 0 - value = '' - - while current+consumedChars < len(input) and re.match(pattern, input[current+consumedChars]): - value += input[current+consumedChars] - consumedChars += 1 - return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None) - -def tokenizeWhitespaces(input, current, line): - return tokenizeRegexPattern(None, r'\s', input, current, line) - -def tokenizeIdentifier(input, current, line): - return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line) - -def tokenizeComma(input, current, line): - return tokenizeChar(TokenType.COMMA, ',', input, current, line) - -def tokenizeInteger(input, current, line): - return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line) - -def tokenizeOpenBracket(input, current, line): - return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line) - -def tokenizeCloseBracket(input, current, line): - return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line) - -def tokenizeAssign(input, current, line): - return tokenizeChar(TokenType.ASSIGN, '=', input, current, line) - -def tokenizeColon(input, current, line): - return tokenizeChar(TokenType.COLON, ':', input, current, line) - -def tokenizeComment(input, current, line): - if input[current] == '#': - consumedChars = 0 - value = '' - while current+consumedChars < len(input): - value += input[current+consumedChars] - consumedChars += 1 - pass - return (consumedChars, Token(TokenType.COMMENT, value, (line, current))) - return (0, None) - -def tokenizeNote(input, current, line): - consumedChars = 0 - value = '' - if input[current] == '@': - consumedChars += 1 - value += input[current] - if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'): - value += input[current+consumedChars] - consumedChars += 1 - - if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'): - value += input[current+consumedChars] - consumedChars += 1 - - if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]): - value += input[current+consumedChars] - consumedChars += 1 - - if current+consumedChars < len(input) and input[current+consumedChars] == '.': - duration = input[current+consumedChars] - consumedChars += 1 - while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]): - duration += input[current+consumedChars] - consumedChars += 1 - if current+consumedChars < len(input) and input[current+consumedChars] == 'd': - duration += input[current+consumedChars] - consumedChars += 1 - if len(duration) > 1: - value += duration - else: - consumedChars -= 1 - return (consumedChars, Token(TokenType.NOTE, value, (line, current))) - return (0, None) - -def tokenizePercent(input, current, line): - return tokenizeChar(TokenType.PERCENT, '%', input, current, line) - -def tokenizeMinus(input, current, line): - return tokenizeChar(TokenType.MINUS, '-', input, current, line) - -def tokenizeFunction(input, current, line): - return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line) - -def tokenizeKeyword(type, keyword, input, current, line): - if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword: - return (len(keyword), Token(type, keyword, (line, current))) - return (0, None) - -def tokenizeReturn(input, current, line): - return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line) - -def tokenizeDot(input, current, line): - return tokenizeChar(TokenType.DOT, '.', input, current, line) - -tokenizers = ( - tokenizeOpenParen, - tokenizeCloseParen, - tokenizeAsterisk, - tokenizeString, - tokenizeFunction, - tokenizeReturn, - tokenizeInteger, - tokenizeNote, - tokenizeIdentifier, - tokenizeComma, - tokenizeOpenBracket, - tokenizeCloseBracket, - tokenizeAssign, - tokenizeColon, - tokenizePercent, - tokenizeMinus, - tokenizeDot, - tokenizeComment, - tokenizeWhitespaces, -) - -def doTokenize(lines): - tokens = [] - for lineNumber, line in enumerate(lines): - current = 0 - while current < len(line): - tokenized = False - for tokenizer in tokenizers: - consumedChars, value = tokenizer(line, current, lineNumber) - if consumedChars > 0: - tokens.append(value) - current += consumedChars - tokenized = True - break - - if not tokenized: - raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'") - - return [token for token in tokens if token.type is not None] - -def tokenize(lines): - tokens = doTokenize(lines) - return Tokens([ token for token in tokens if token.type != TokenType.COMMENT]) diff --git a/main.py b/main.py deleted file mode 100644 index 759c7bc..0000000 --- a/main.py +++ /dev/null @@ -1,26 +0,0 @@ -from Tokenizer import tokenize -from Parser import parse -from Evaluator import evaluate -from Environment import createEnvironment -from Error import SyntaxException, RuntimeException -import sys - -if __name__ == "__main__": - try: - with open(sys.argv[1], 'r') as source: - lines = [line.rstrip('\n') for line in source.readlines()] - - env = createEnvironment() - - tokens = tokenize(lines) - - ast = parse(tokens) - - evaluate(ast, env) - except SyntaxException as e: - print(e.msg) - except RuntimeException as e: - print(e.msg) - except KeyboardInterrupt: - print("Program interrupted") - diff --git a/AST.py b/smnp/AST.py similarity index 100% rename from AST.py rename to smnp/AST.py diff --git a/Audio.py b/smnp/Audio.py similarity index 100% rename from Audio.py rename to smnp/Audio.py diff --git a/Evaluator.py b/smnp/Evaluator.py similarity index 100% rename from Evaluator.py rename to smnp/Evaluator.py diff --git a/NoiseDetector.py b/smnp/NoiseDetector.py similarity index 100% rename from NoiseDetector.py rename to smnp/NoiseDetector.py diff --git a/Note.py b/smnp/Note.py similarity index 100% rename from Note.py rename to smnp/Note.py diff --git a/smnp/OldParser.py b/smnp/OldParser.py new file mode 100644 index 0000000..1f4f42f --- /dev/null +++ b/smnp/OldParser.py @@ -0,0 +1,230 @@ +from Tokenizer import * +from Note import * +from AST import * +from Error import SyntaxException + +def expectedFound(expected, found): + raise SyntaxException(None, f"Expected: {expected}, found: {found}") + +def assertType(expected, found): + if expected != found: + raise SyntaxException(None, f"Expected: {expected}, found: {found}") + +def parseInteger(input, parent): + token = input.pop(0) + return IntegerLiteralNode(int(token.value), parent, token.pos) + +def parseString(input, parent): + token = input.pop(0) + return StringLiteralNode(token.value[1:-1], parent, token.pos) + +def parseNote(input, parent): + token = input.pop(0) + value = token.value + consumedChars = 1 + notePitch = value[consumedChars] + consumedChars += 1 + octave = 4 + duration = 4 + dot = False + if consumedChars < len(value) and value[consumedChars] in ('b', '#'): + notePitch += value[consumedChars] + consumedChars += 1 + if consumedChars < len(value) and re.match(r'\d', value[consumedChars]): + octave = int(value[consumedChars]) + consumedChars += 1 + if consumedChars < len(value) and value[consumedChars] == '.': + consumedChars += 1 + durationString = '' + while consumedChars < len(value) and re.match(r'\d', value[consumedChars]): + durationString += value[consumedChars] + consumedChars += 1 + duration = int(durationString) + if consumedChars < len(value) and value[consumedChars] == '.': + dot = True + consumedChars += 1 + + return NoteLiteralNode(Note(notePitch, octave, duration, dot), parent, token.pos) + +def parseComma(input, parent): + token = input.pop(0) + return CommaNode(parent, token.pos) + +def parseList(input, parent): + token = input.pop(0) + + node = ListNode(parent, token.pos) + + while input[0].type != TokenType.CLOSE_PAREN: + element = parseArrayElement(input, node) + if element is None: + raise SyntaxException(input[0].pos, "Invalid element '{input[0].value}'") + node.append(element) + + if input[0].type != TokenType.CLOSE_PAREN: + expectedFound(TokenType.CLOSE_PAREN, input[0].type) + input.pop(0) + + return node + +def parseBlock(input, parent): + token = input.pop(0) + + block = BlockNode(parent, token.pos) + + while input[0].type != TokenType.CLOSE_BRACKET: + block.append(parseToken(input, block)) + + if input[0].type != TokenType.CLOSE_BRACKET: + expectedFound(TokenType.CLOSE_BRACKET, input[0].type) + input.pop(0) + + return block + + +def parseAsterisk(input, parent): + token = input.pop(0) + + iterator = parent.pop(-1) + value = parseStatement(input, parent) + + asterisk = AsteriskStatementNode(iterator, value, parent, token.pos) + iterator.parent = asterisk + value.parent = asterisk + return asterisk + +def parseNoteOrColon(input, parent): + note = parseNote(input, parent) + if len(input) > 1 and input[0].type == TokenType.COLON: + token = input.pop(0) + b = parseNote(input, parent) + if b is None: + raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'") + colon = ColonNode(note, b, parent, token.pos) + note.parent = colon + b.parent = colon + return colon + + return note + +def parseIntegerOrColonOrPercent(input, parent): + integer = parseInteger(input, parent) + if len(input) > 1 and input[0].type == TokenType.COLON: + token = input.pop(0) + b = parseInteger(input, parent) + if b is None: + raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'") + colon = ColonNode(integer, b, parent, token.pos) + integer.parent = colon + b.parent = colon + return colon + + if len(input) > 0 and input[0].type == TokenType.PERCENT: + input.pop(0) + percent = PercentNode(integer, parent, integer.pos) + integer.parent = percent + return percent + + return integer + +def parseFunctionCallOrAssignOrIdentifier(input, parent): + token = input.pop(0) + identifier = IdentifierNode(token.value, parent, token.pos) + # Function call + if len(input) > 0 and input[0].type == TokenType.OPEN_PAREN: + arguments = parseList(input, parent) + func = FunctionCallNode(identifier, arguments, parent, token.pos) + identifier.parent = func + arguments.parent = func + return func + # Assign + if len(input) > 1 and input[0].type == TokenType.ASSIGN: + token = input.pop(0) + value = parseExpression(input, parent) # + assign = AssignExpression(identifier, value, parent, token.pos) + identifier.parent = assign + value.parent = assign + return assign + + return identifier + +def parseMinus(input, parent): + token = input.pop(0) + + value = parseInteger(input, parent) + + return IntegerLiteralNode(-value.value, parent, token.pos) + +def parseFunctionDefinition(input, parent): + input.pop(0) + + assertType(TokenType.IDENTIFIER, input[0].type) + token = input.pop(0) + name = IdentifierNode(token.value, parent, token.pos) + + assertType(TokenType.OPEN_PAREN, input[0].type) + parameters = parseList(input, parent) + + assertType(TokenType.OPEN_BRACKET, input[0].type) + body = parseBlock(input, parent) + + func = FunctionDefinitionNode(name, parameters, body, parent, token.pos) + name.parent = func + parameters.parent = func + body.parent = func + return func + +def parseReturn(input, parent): + token = input.pop(0) + + value = parseExpression(input, parent) + + returnNode = ReturnNode(value, parent, token.pos) + value.parent = returnNode + return returnNode + +def parseExpression(input, parent): + type = input[0].type + if type == TokenType.FUNCTION: + return parseFunctionDefinition(input, parent) + if type == TokenType.RETURN: + return parseReturn(input, parent) + if type == TokenType.MINUS: + return parseMinus(input, parent) + if type == TokenType.INTEGER: + return parseIntegerOrColonOrPercent(input, parent) + if type == TokenType.STRING: + return parseString(input, parent) + if type == TokenType.NOTE: + return parseNoteOrColon(input, parent) + if type == TokenType.IDENTIFIER: + return parseFunctionCallOrAssignOrIdentifier(input, parent) + if type == TokenType.OPEN_PAREN: + return parseList(input, parent) + raise SyntaxException(input[0].pos, f"Unexpected character '{input[0].value}'") + +def parseArrayElement(input, parent): + type = input[0].type + if type == TokenType.COMMA: + return parseComma(input, parent) + return parseExpression(input, parent) + +def parseStatement(input, parent): + type = input[0].type + if type == TokenType.OPEN_BRACKET: + return parseBlock(input, parent) + if type == TokenType.ASTERISK: + return parseAsterisk(input, parent) + + return parseExpression(input, parent) + +def parseToken(input, parent): + #import pdb; pdb.set_trace() + return parseStatement(input, parent) + + +def parse(input): + root = Program() + while len(input) > 0: + root.append(parseToken(input, root)) + return root diff --git a/Parser.py b/smnp/Parser.py similarity index 100% rename from Parser.py rename to smnp/Parser.py diff --git a/Synth.py b/smnp/Synth.py similarity index 100% rename from Synth.py rename to smnp/Synth.py diff --git a/smnp/__init__.py b/smnp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/smnp/__main__.py b/smnp/__main__.py new file mode 100644 index 0000000..b2a4429 --- /dev/null +++ b/smnp/__main__.py @@ -0,0 +1,4 @@ +from smnp.main import main + +if __name__ == "__main__": + main() diff --git a/Environment.py b/smnp/environment/Environment.py similarity index 94% rename from Environment.py rename to smnp/environment/Environment.py index 195198f..2ac28e8 100644 --- a/Environment.py +++ b/smnp/environment/Environment.py @@ -1,5 +1,5 @@ import sys -from Evaluator import objectString +from parser.Environment import objectString from Note import * import random import Synth @@ -46,19 +46,6 @@ class Environment(): return scope else: return scope - -def sample(args, env): - if len(args) == 1 and isinstance(args[0], list): - return _sample(args[0]) - elif len(args) == 0: - return _sample(Note.range(Note(NotePitch.C), Note(NotePitch.H))) - elif all(isinstance(x, Note) for x in args): - return _sample(args) - else: - pass # not valid signature - -def _sample(list): - return list[int(random.uniform(0, len(list)))] def doPrint(args, env): print("".join([objectString(arg) for arg in args])) diff --git a/smnp/error/__init__.py b/smnp/error/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/smnp/error/runtime.py b/smnp/error/runtime.py new file mode 100644 index 0000000..8a60c36 --- /dev/null +++ b/smnp/error/runtime.py @@ -0,0 +1,4 @@ +class RuntimeException(Exception): + def __init__(self, pos, msg): + posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]" + self.msg = f"Syntax error {posStr}:\n{msg}" diff --git a/Error.py b/smnp/error/syntax.py similarity index 50% rename from Error.py rename to smnp/error/syntax.py index c1b588e..082fd24 100644 --- a/Error.py +++ b/smnp/error/syntax.py @@ -2,8 +2,3 @@ class SyntaxException(Exception): def __init__(self, pos, msg): posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]" self.msg = f"Syntax error {posStr}:\n{msg}" - -class RuntimeException(Exception): - def __init__(self, pos, msg): - posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]" - self.msg = f"Syntax error {posStr}:\n{msg}" diff --git a/smnp/main.py b/smnp/main.py new file mode 100644 index 0000000..b111f17 --- /dev/null +++ b/smnp/main.py @@ -0,0 +1,29 @@ +import sys +from smnp.error.syntax import SyntaxException +from smnp.error.runtime import RuntimeException +from smnp.token.tokenizer import tokenize +#from Tokenizer import tokenize +#from Parser import parse +#from Evaluator import evaluate +#from Environment import createEnvironment +#from Error import SyntaxException, RuntimeException + +def main(): + try: + with open(sys.argv[1], 'r') as source: + lines = [line.rstrip('\n') for line in source.readlines()] + + #env = createEnvironment() + + tokens = tokenize(lines) + print(tokens) + #ast = parse(tokens) + + #evaluate(ast, env) + except SyntaxException as e: + print(e.msg) + except RuntimeException as e: + print(e.msg) + except KeyboardInterrupt: + print("Program interrupted") + diff --git a/smnp/token/__init__.py b/smnp/token/__init__.py new file mode 100644 index 0000000..e18e085 --- /dev/null +++ b/smnp/token/__init__.py @@ -0,0 +1 @@ +__all__ = ["tokenize"] diff --git a/smnp/token/model.py b/smnp/token/model.py new file mode 100644 index 0000000..4f77028 --- /dev/null +++ b/smnp/token/model.py @@ -0,0 +1,54 @@ +class Token: + def __init__(self, type, value, pos): + self.type = type + self.value = value + self.pos = pos + def __str__(self): + return "Token(" + str(self.type) + ", '" + self.value + "', " + str(self.pos) + ")" + def __repr__(self): + return self.__str__() + +class TokenList: + def __init__(self, tokens = []): + self.tokens = tokens + self.cursor = 0 + self.snap = 0 + + def append(self, token): + self.tokens.append(token) + + def __getitem__(self, index): + return self.tokens[index] + + def current(self): + if self.cursor >= len(self.tokens): + raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}") + return self.tokens[self.cursor] + + def next(self, number=1): + return self.tokens[self.cursor + number] + + def prev(self, number=1): + return self.tokens[self.cursor - number] + + def hasMore(self, count=1): + return self.cursor + count < len(self.tokens) + + def hasCurrent(self): + return self.cursor < len(self.tokens) + + def ahead(self): + self.cursor += 1 + + def snapshot(self): + self.snapshot = self.cursor + + def reset(self): + self.cursor = self.snapshot + return self.tokens[self.cursor] + + def __str__(self): + return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]" + + def __repr__(self): + return self.__str__() diff --git a/smnp/token/tokenizer.py b/smnp/token/tokenizer.py new file mode 100644 index 0000000..830dc16 --- /dev/null +++ b/smnp/token/tokenizer.py @@ -0,0 +1,81 @@ +import sys +import time +import re +from smnp.error.syntax import SyntaxException +from smnp.token.type import TokenType +from smnp.token.model import Token, TokenList +from smnp.token.tools import tokenizeChar, tokenizeRegexPattern +from smnp.token.tokenizers.paren import tokenizeOpenParen, tokenizeCloseParen +from smnp.token.tokenizers.asterisk import tokenizeAsterisk +from smnp.token.tokenizers.whitespace import tokenizeWhitespaces +from smnp.token.tokenizers.identifier import tokenizeIdentifier +from smnp.token.tokenizers.comma import tokenizeComma +from smnp.token.tokenizers.string import tokenizeString +from smnp.token.tokenizers.integer import tokenizeInteger +from smnp.token.tokenizers.bracket import tokenizeOpenBracket, tokenizeCloseBracket +from smnp.token.tokenizers.assign import tokenizeAssign +from smnp.token.tokenizers.colon import tokenizeColon +from smnp.token.tokenizers.comment import tokenizeComment +from smnp.token.tokenizers.note import tokenizeNote +from smnp.token.tokenizers.function import tokenizeFunction +from smnp.token.tokenizers.ret import tokenizeReturn +from smnp.token.tokenizers.percent import tokenizePercent +from smnp.token.tokenizers.minus import tokenizeMinus +from smnp.token.tokenizers.dot import tokenizeDot + +tokenizers = ( + tokenizeOpenParen, + tokenizeCloseParen, + tokenizeAsterisk, + tokenizeString, + tokenizeFunction, + tokenizeReturn, + tokenizeInteger, + tokenizeNote, + tokenizeIdentifier, + tokenizeComma, + tokenizeOpenBracket, + tokenizeCloseBracket, + tokenizeAssign, + tokenizeColon, + tokenizePercent, + tokenizeMinus, + tokenizeDot, + tokenizeComment, + tokenizeWhitespaces, +) + +filters = [ + lambda token: token.type is not None, + lambda token: token.type != TokenType.COMMENT +] + +def tokenize(lines): + tokens = [] + for lineNumber, line in enumerate(lines): + current = 0 + while current < len(line): + consumedChars, token = combinedTokenizer(line, current, lineNumber) + + if consumedChars == 0: + raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'") + + current += consumedChars + tokens.append(token) + + return TokenList(filterTokens(filters, tokens)) + +def combinedTokenizer(line, current, lineNumber): + for tokenizer in tokenizers: + consumedChars, token = tokenizer(line, current, lineNumber) + if consumedChars > 0: + return (consumedChars, token) + return (0, None) + +def filterTokens(filters, tokens): + if not filters: + return tokens + + return filterTokens(filters[1:], (token for token in tokens if filters[0](token))) + +__all__ = ["tokenize"] diff --git a/smnp/token/tokenizers/__init__.py b/smnp/token/tokenizers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/smnp/token/tokenizers/assign.py b/smnp/token/tokenizers/assign.py new file mode 100644 index 0000000..ca61260 --- /dev/null +++ b/smnp/token/tokenizers/assign.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeChar +from smnp.token.type import TokenType + +def tokenizeAssign(input, current, line): + return tokenizeChar(TokenType.ASSIGN, '=', input, current, line) diff --git a/smnp/token/tokenizers/asterisk.py b/smnp/token/tokenizers/asterisk.py new file mode 100644 index 0000000..42bb212 --- /dev/null +++ b/smnp/token/tokenizers/asterisk.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeChar +from smnp.token.type import TokenType + +def tokenizeAsterisk(input, current, line): + return tokenizeChar(TokenType.ASTERISK, '*', input, current, line) diff --git a/smnp/token/tokenizers/bracket.py b/smnp/token/tokenizers/bracket.py new file mode 100644 index 0000000..d160461 --- /dev/null +++ b/smnp/token/tokenizers/bracket.py @@ -0,0 +1,8 @@ +from smnp.token.tools import tokenizeChar +from smnp.token.type import TokenType + +def tokenizeOpenBracket(input, current, line): + return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line) + +def tokenizeCloseBracket(input, current, line): + return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line) diff --git a/smnp/token/tokenizers/colon.py b/smnp/token/tokenizers/colon.py new file mode 100644 index 0000000..05a6571 --- /dev/null +++ b/smnp/token/tokenizers/colon.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeChar +from smnp.token.type import TokenType + +def tokenizeColon(input, current, line): + return tokenizeChar(TokenType.COLON, ':', input, current, line) diff --git a/smnp/token/tokenizers/comma.py b/smnp/token/tokenizers/comma.py new file mode 100644 index 0000000..722dbba --- /dev/null +++ b/smnp/token/tokenizers/comma.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeChar +from smnp.token.type import TokenType + +def tokenizeComma(input, current, line): + return tokenizeChar(TokenType.COMMA, ',', input, current, line) diff --git a/smnp/token/tokenizers/comment.py b/smnp/token/tokenizers/comment.py new file mode 100644 index 0000000..c5fd12c --- /dev/null +++ b/smnp/token/tokenizers/comment.py @@ -0,0 +1,13 @@ +from smnp.token.type import TokenType +from smnp.token.model import Token + +def tokenizeComment(input, current, line): + if input[current] == '#': + consumedChars = 0 + value = '' + while current+consumedChars < len(input): + value += input[current+consumedChars] + consumedChars += 1 + pass + return (consumedChars, Token(TokenType.COMMENT, value, (line, current))) + return (0, None) diff --git a/smnp/token/tokenizers/dot.py b/smnp/token/tokenizers/dot.py new file mode 100644 index 0000000..f959dc2 --- /dev/null +++ b/smnp/token/tokenizers/dot.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeChar +from smnp.token.type import TokenType + +def tokenizeDot(input, current, line): + return tokenizeChar(TokenType.DOT, '.', input, current, line) diff --git a/smnp/token/tokenizers/function.py b/smnp/token/tokenizers/function.py new file mode 100644 index 0000000..2d12bab --- /dev/null +++ b/smnp/token/tokenizers/function.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeKeyword +from smnp.token.type import TokenType + +def tokenizeFunction(input, current, line): + return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line) diff --git a/smnp/token/tokenizers/identifier.py b/smnp/token/tokenizers/identifier.py new file mode 100644 index 0000000..35b8835 --- /dev/null +++ b/smnp/token/tokenizers/identifier.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeRegexPattern +from smnp.token.type import TokenType + +def tokenizeIdentifier(input, current, line): + return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line) diff --git a/smnp/token/tokenizers/integer.py b/smnp/token/tokenizers/integer.py new file mode 100644 index 0000000..48b31ed --- /dev/null +++ b/smnp/token/tokenizers/integer.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeRegexPattern +from smnp.token.type import TokenType + +def tokenizeInteger(input, current, line): + return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line) diff --git a/smnp/token/tokenizers/minus.py b/smnp/token/tokenizers/minus.py new file mode 100644 index 0000000..7b40a2b --- /dev/null +++ b/smnp/token/tokenizers/minus.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeChar +from smnp.token.type import TokenType + +def tokenizeMinus(input, current, line): + return tokenizeChar(TokenType.MINUS, '-', input, current, line) diff --git a/smnp/token/tokenizers/note.py b/smnp/token/tokenizers/note.py new file mode 100644 index 0000000..b0d3f7c --- /dev/null +++ b/smnp/token/tokenizers/note.py @@ -0,0 +1,37 @@ +import re +from smnp.token.type import TokenType +from smnp.token.model import Token + +def tokenizeNote(input, current, line): + consumedChars = 0 + value = '' + if input[current] == '@': + consumedChars += 1 + value += input[current] + if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'): + value += input[current+consumedChars] + consumedChars += 1 + + if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'): + value += input[current+consumedChars] + consumedChars += 1 + + if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]): + value += input[current+consumedChars] + consumedChars += 1 + + if current+consumedChars < len(input) and input[current+consumedChars] == '.': + duration = input[current+consumedChars] + consumedChars += 1 + while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]): + duration += input[current+consumedChars] + consumedChars += 1 + if current+consumedChars < len(input) and input[current+consumedChars] == 'd': + duration += input[current+consumedChars] + consumedChars += 1 + if len(duration) > 1: + value += duration + else: + consumedChars -= 1 + return (consumedChars, Token(TokenType.NOTE, value, (line, current))) + return (0, None) diff --git a/smnp/token/tokenizers/paren.py b/smnp/token/tokenizers/paren.py new file mode 100644 index 0000000..44d324c --- /dev/null +++ b/smnp/token/tokenizers/paren.py @@ -0,0 +1,8 @@ +from smnp.token.tools import tokenizeChar +from smnp.token.type import TokenType + +def tokenizeOpenParen(input, current, line): + return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line) + +def tokenizeCloseParen(input, current, line): + return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line) diff --git a/smnp/token/tokenizers/percent.py b/smnp/token/tokenizers/percent.py new file mode 100644 index 0000000..cccb638 --- /dev/null +++ b/smnp/token/tokenizers/percent.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeChar +from smnp.token.type import TokenType + +def tokenizePercent(input, current, line): + return tokenizeChar(TokenType.PERCENT, '%', input, current, line) diff --git a/smnp/token/tokenizers/ret.py b/smnp/token/tokenizers/ret.py new file mode 100644 index 0000000..2d9a387 --- /dev/null +++ b/smnp/token/tokenizers/ret.py @@ -0,0 +1,5 @@ +from smnp.token.tools import tokenizeKeyword +from smnp.token.type import TokenType + +def tokenizeReturn(input, current, line): + return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line) diff --git a/smnp/token/tokenizers/string.py b/smnp/token/tokenizers/string.py new file mode 100644 index 0000000..d66beae --- /dev/null +++ b/smnp/token/tokenizers/string.py @@ -0,0 +1,16 @@ +from smnp.token.type import TokenType +from smnp.token.model import Token + +def tokenizeString(input, current, line): + if input[current] == '"': + value = input[current] + char = '' + consumedChars = 1 + while char != '"': + if char is None: #TODO!!! + print("String not terminated") + char = input[current + consumedChars] + value += char + consumedChars += 1 + return (consumedChars, Token(TokenType.STRING, value, (line, current))) + return (0, None) diff --git a/smnp/token/tokenizers/whitespace.py b/smnp/token/tokenizers/whitespace.py new file mode 100644 index 0000000..a4cc2b3 --- /dev/null +++ b/smnp/token/tokenizers/whitespace.py @@ -0,0 +1,4 @@ +from smnp.token.tools import tokenizeRegexPattern + +def tokenizeWhitespaces(input, current, line): + return tokenizeRegexPattern(None, r'\s', input, current, line) diff --git a/smnp/token/tools.py b/smnp/token/tools.py new file mode 100644 index 0000000..9f4e902 --- /dev/null +++ b/smnp/token/tools.py @@ -0,0 +1,21 @@ +import re +from smnp.token.model import Token + +def tokenizeChar(type, char, input, current, line): + if input[current] == char: + return (1, Token(type, input[current], (line, current))) + return (0, None) + +def tokenizeRegexPattern(type, pattern, input, current, line): + consumedChars = 0 + value = '' + + while current+consumedChars < len(input) and re.match(pattern, input[current+consumedChars]): + value += input[current+consumedChars] + consumedChars += 1 + return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None) + +def tokenizeKeyword(type, keyword, input, current, line): + if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword: + return (len(keyword), Token(type, keyword, (line, current))) + return (0, None) diff --git a/smnp/token/type.py b/smnp/token/type.py new file mode 100644 index 0000000..9aff113 --- /dev/null +++ b/smnp/token/type.py @@ -0,0 +1,21 @@ +from enum import Enum + +class TokenType(Enum): + OPEN_PAREN = 1 + CLOSE_PAREN = 2 + ASTERISK = 3 + STRING = 4 + IDENTIFIER = 5 + COMMA = 6 + INTEGER = 7 + OPEN_BRACKET = 8 + CLOSE_BRACKET = 9 + ASSIGN = 10 + COLON = 11 + NOTE = 12 + COMMENT = 13 + PERCENT = 14 + MINUS = 15 + FUNCTION = 16 + RETURN = 17 + DOT = 18