Refactor tokenizer

2019-07-03 01:55:08 +02:00
parent 8313d2dcfd
commit f826516d8f
41 changed files with 589 additions and 296 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
 __pycache__/
 *.mus
--- a/Tokenizer.py
+++ b/Tokenizer.py
@@ -1,251 +0,0 @@
 from enum import Enum
 import time
 import re
 import sys
 from Error import SyntaxException
 class Tokens:
    def __init__(self, tokens = []):
        self.tokens = tokens
        self.cursor = 0
        self.snap = 0
    def append(self, token):
        self.tokens.append(token)
    def __getitem__(self, index):
        return self.tokens[index]
    def current(self):
        if self.cursor >= len(self.tokens):
            raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}")
        return self.tokens[self.cursor]
    def next(self, number=1):
        return self.tokens[self.cursor + number]
    def prev(self, number=1):
        return self.tokens[self.cursor - number]        
    def hasMore(self, count=1):
        return self.cursor + count < len(self.tokens)
    def hasCurrent(self):
        return self.cursor < len(self.tokens)
    def ahead(self):
        self.cursor += 1        
    def snapshot(self):
        self.snapshot = self.cursor
    def reset(self):
        self.cursor = self.snapshot
        return self.tokens[self.cursor]
    def __str__(self):
        return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]"
    def __repr__(self):
        return self.__str__()
 class TokenType(Enum):
    OPEN_PAREN = 1
    CLOSE_PAREN = 2
    ASTERISK = 3
    STRING = 4
    IDENTIFIER = 5
    COMMA = 6
    INTEGER = 7
    OPEN_BRACKET = 8
    CLOSE_BRACKET = 9
    ASSIGN = 10
    COLON = 11
    NOTE = 12
    COMMENT = 13
    PERCENT = 14
    MINUS = 15
    FUNCTION = 16
    RETURN = 17    
    DOT = 18
 class Token:
    def __init__(self, type, value, pos):
        self.type = type
        self.value = value    
        self.pos = pos
    def __str__(self):
        return "Token(" + str(self.type) + ", '" + self.value + "', " + str(self.pos) + ")"
    def __repr__(self):
        return self.__str__()
 def tokenizeOpenParen(input, current, line):
    return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line)
 def tokenizeChar(type, char, input, current, line):
    if input[current] == char:
        return (1, Token(type, input[current], (line, current)))
    return (0, None)
 def tokenizeCloseParen(input, current, line):
    return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line)
 def tokenizeAsterisk(input, current, line):
    return tokenizeChar(TokenType.ASTERISK, '*', input, current, line)
 def tokenizeString(input, current, line):
    if input[current] == '"':
        value = input[current]
        char = ''
        consumedChars = 1
        while char != '"':
            if char is None: #TODO!!!
                print("String not terminated")
            char = input[current + consumedChars]
            value += char
            consumedChars += 1
        return (consumedChars, Token(TokenType.STRING, value, (line, current)))
    return (0, None)
 def tokenizeRegexPattern(type, pattern, input, current, line):    
    consumedChars = 0
    value = ''
    while current+consumedChars < len(input) and re.match(pattern, input[current+consumedChars]):
        value += input[current+consumedChars]        
        consumedChars += 1            
    return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None)
 def tokenizeWhitespaces(input, current, line):    
    return tokenizeRegexPattern(None, r'\s', input, current, line)
 def tokenizeIdentifier(input, current, line):
    return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line)
 def tokenizeComma(input, current, line):
    return tokenizeChar(TokenType.COMMA, ',', input, current, line)
 def tokenizeInteger(input, current, line):    
    return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line)
 def tokenizeOpenBracket(input, current, line):
    return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line)
 def tokenizeCloseBracket(input, current, line):
    return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line)
 def tokenizeAssign(input, current, line):
    return tokenizeChar(TokenType.ASSIGN, '=', input, current, line)
 def tokenizeColon(input, current, line):
    return tokenizeChar(TokenType.COLON, ':', input, current, line)
 def tokenizeComment(input, current, line):
    if input[current] == '#':
        consumedChars = 0
        value = ''
        while current+consumedChars < len(input):
            value += input[current+consumedChars]
            consumedChars += 1            
            pass
        return (consumedChars, Token(TokenType.COMMENT, value, (line, current)))
    return (0, None)
 def tokenizeNote(input, current, line):
    consumedChars = 0
    value = ''
    if input[current] == '@':
        consumedChars += 1
        value += input[current]
        if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'):
            value += input[current+consumedChars]
            consumedChars += 1
            if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'):                        
                value += input[current+consumedChars]
                consumedChars += 1
            if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):            
                value += input[current+consumedChars]
                consumedChars += 1
            if current+consumedChars < len(input) and input[current+consumedChars] == '.':            
                duration = input[current+consumedChars]
                consumedChars += 1
                while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
                    duration += input[current+consumedChars]        
                    consumedChars += 1  
                if current+consumedChars < len(input) and input[current+consumedChars] == 'd':
                    duration += input[current+consumedChars]
                    consumedChars += 1
                if len(duration) > 1:
                    value += duration
                else:
                    consumedChars -= 1
            return (consumedChars, Token(TokenType.NOTE, value, (line, current)))
    return (0, None)
 def tokenizePercent(input, current, line):
    return tokenizeChar(TokenType.PERCENT, '%', input, current, line)
 def tokenizeMinus(input, current, line):
    return tokenizeChar(TokenType.MINUS, '-', input, current, line)
 def tokenizeFunction(input, current, line):
    return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line)
 def tokenizeKeyword(type, keyword, input, current, line):       
    if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword:
        return (len(keyword), Token(type, keyword, (line, current)))
    return (0, None)
 def tokenizeReturn(input, current, line):
    return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line)
 def tokenizeDot(input, current, line):
    return tokenizeChar(TokenType.DOT, '.', input, current, line)
 tokenizers = (
    tokenizeOpenParen, 
    tokenizeCloseParen, 
    tokenizeAsterisk, 
    tokenizeString, 
    tokenizeFunction,
    tokenizeReturn,
    tokenizeInteger,
    tokenizeNote,
    tokenizeIdentifier, 
    tokenizeComma,
    tokenizeOpenBracket,
    tokenizeCloseBracket,
    tokenizeAssign,
    tokenizeColon,    
    tokenizePercent,
    tokenizeMinus,
    tokenizeDot,
    tokenizeComment,
    tokenizeWhitespaces,
 )
 def doTokenize(lines):    
    tokens = []     
    for lineNumber, line in enumerate(lines):    
        current = 0
        while current < len(line):
            tokenized = False
            for tokenizer in tokenizers:
                consumedChars, value = tokenizer(line, current, lineNumber)
                if consumedChars > 0:
                    tokens.append(value)
                    current += consumedChars
                    tokenized = True
                    break            
            if not tokenized:
                raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'")
    return [token for token in tokens if token.type is not None]
 def tokenize(lines):
    tokens = doTokenize(lines)
    return Tokens([ token for token in tokens if token.type != TokenType.COMMENT])
--- a/main.py
+++ b/main.py
@@ -1,26 +0,0 @@
 from Tokenizer import tokenize
 from Parser import parse
 from Evaluator import evaluate
 from Environment import createEnvironment
 from Error import SyntaxException, RuntimeException
 import sys
 if __name__ == "__main__":
    try:
        with open(sys.argv[1], 'r') as source:
            lines = [line.rstrip('\n') for line in source.readlines()]
        env = createEnvironment()
        tokens = tokenize(lines)                                
        ast = parse(tokens)             
        evaluate(ast, env)
    except SyntaxException as e:
        print(e.msg)
    except RuntimeException as e:
        print(e.msg)
    except KeyboardInterrupt:
        print("Program interrupted")
--- a/smnp/AST.py
+++ b/smnp/AST.py
--- a/smnp/Audio.py
+++ b/smnp/Audio.py
--- a/smnp/Evaluator.py
+++ b/smnp/Evaluator.py
--- a/smnp/NoiseDetector.py
+++ b/smnp/NoiseDetector.py
--- a/smnp/Note.py
+++ b/smnp/Note.py
--- a/smnp/OldParser.py
+++ b/smnp/OldParser.py
@@ -0,0 +1,230 @@
 from Tokenizer import *
 from Note import *
 from AST import *
 from Error import SyntaxException
 def expectedFound(expected, found):
    raise SyntaxException(None, f"Expected: {expected}, found: {found}")
 def assertType(expected, found):
    if expected != found:
        raise SyntaxException(None, f"Expected: {expected}, found: {found}")
 def parseInteger(input, parent):
    token = input.pop(0)
    return IntegerLiteralNode(int(token.value), parent, token.pos)
 def parseString(input, parent):   
    token = input.pop(0)
    return StringLiteralNode(token.value[1:-1], parent, token.pos)
 def parseNote(input, parent): 
    token = input.pop(0)
    value = token.value
    consumedChars = 1
    notePitch = value[consumedChars]
    consumedChars += 1
    octave = 4
    duration = 4
    dot = False
    if consumedChars < len(value) and value[consumedChars] in ('b', '#'):
        notePitch += value[consumedChars]
        consumedChars += 1
    if consumedChars < len(value) and re.match(r'\d', value[consumedChars]):
        octave = int(value[consumedChars])
        consumedChars += 1
    if consumedChars < len(value) and value[consumedChars] == '.':
        consumedChars += 1
        durationString = ''            
        while consumedChars < len(value) and re.match(r'\d', value[consumedChars]):
            durationString += value[consumedChars]      
            consumedChars += 1  
            duration = int(durationString)
        if consumedChars < len(value) and value[consumedChars] == '.':
            dot = True
            consumedChars += 1
    return NoteLiteralNode(Note(notePitch, octave, duration, dot), parent, token.pos)
 def parseComma(input, parent):
    token = input.pop(0)
    return CommaNode(parent, token.pos)
 def parseList(input, parent):       
    token = input.pop(0)
    node = ListNode(parent, token.pos)
    while input[0].type != TokenType.CLOSE_PAREN:  
        element = parseArrayElement(input, node)
        if element is None:
            raise SyntaxException(input[0].pos, "Invalid element '{input[0].value}'")
        node.append(element)
    if input[0].type != TokenType.CLOSE_PAREN:
        expectedFound(TokenType.CLOSE_PAREN, input[0].type)
    input.pop(0)
    return node
 def parseBlock(input, parent):
    token = input.pop(0)
    block = BlockNode(parent, token.pos)
    while input[0].type != TokenType.CLOSE_BRACKET:
        block.append(parseToken(input, block))
    if input[0].type != TokenType.CLOSE_BRACKET:
        expectedFound(TokenType.CLOSE_BRACKET, input[0].type)
    input.pop(0)
    return block
 def parseAsterisk(input, parent):
    token = input.pop(0)
    iterator = parent.pop(-1)
    value = parseStatement(input, parent)
    asterisk = AsteriskStatementNode(iterator, value, parent, token.pos)        
    iterator.parent = asterisk
    value.parent = asterisk
    return asterisk
 def parseNoteOrColon(input, parent):
    note = parseNote(input, parent)    
    if len(input) > 1 and input[0].type == TokenType.COLON:
        token = input.pop(0)                
        b = parseNote(input, parent)
        if b is None:
            raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'")
        colon = ColonNode(note, b, parent, token.pos)
        note.parent = colon
        b.parent = colon
        return colon
    return note
 def parseIntegerOrColonOrPercent(input, parent):
    integer = parseInteger(input, parent)    
    if len(input) > 1 and input[0].type == TokenType.COLON:
        token = input.pop(0)                
        b = parseInteger(input, parent) 
        if b is None:
            raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'")
        colon = ColonNode(integer, b, parent, token.pos)
        integer.parent = colon
        b.parent = colon
        return colon
    if len(input) > 0 and input[0].type == TokenType.PERCENT:
        input.pop(0)
        percent = PercentNode(integer, parent, integer.pos)
        integer.parent = percent
        return percent
    return integer   
 def parseFunctionCallOrAssignOrIdentifier(input, parent):   
    token = input.pop(0)
    identifier = IdentifierNode(token.value, parent, token.pos)
    # Function call
    if len(input) > 0 and input[0].type == TokenType.OPEN_PAREN:            
        arguments = parseList(input, parent)        
        func = FunctionCallNode(identifier, arguments, parent, token.pos)
        identifier.parent = func
        arguments.parent = func
        return func
    # Assign
    if len(input) > 1 and input[0].type == TokenType.ASSIGN:          
        token = input.pop(0)            
        value = parseExpression(input, parent) #
        assign = AssignExpression(identifier, value, parent, token.pos)
        identifier.parent = assign
        value.parent = assign
        return assign
    return identifier
 def parseMinus(input, parent):
    token = input.pop(0)
    value = parseInteger(input, parent)
    return IntegerLiteralNode(-value.value, parent, token.pos)
 def parseFunctionDefinition(input, parent):
    input.pop(0)
    assertType(TokenType.IDENTIFIER, input[0].type)
    token = input.pop(0)
    name = IdentifierNode(token.value, parent, token.pos)
    assertType(TokenType.OPEN_PAREN, input[0].type)
    parameters = parseList(input, parent)
    assertType(TokenType.OPEN_BRACKET, input[0].type)
    body = parseBlock(input, parent)
    func = FunctionDefinitionNode(name, parameters, body, parent, token.pos)
    name.parent = func
    parameters.parent = func
    body.parent = func
    return func
 def parseReturn(input, parent):
    token = input.pop(0)
    value = parseExpression(input, parent)
    returnNode = ReturnNode(value, parent, token.pos)
    value.parent = returnNode
    return returnNode
 def parseExpression(input, parent):    
    type = input[0].type
    if type == TokenType.FUNCTION:
        return parseFunctionDefinition(input, parent)
    if type == TokenType.RETURN:
        return parseReturn(input, parent)
    if type == TokenType.MINUS:
        return parseMinus(input, parent)
    if type == TokenType.INTEGER:
        return parseIntegerOrColonOrPercent(input, parent)
    if type == TokenType.STRING:
        return parseString(input, parent)    
    if type == TokenType.NOTE:
        return parseNoteOrColon(input, parent)    
    if type == TokenType.IDENTIFIER:
        return parseFunctionCallOrAssignOrIdentifier(input, parent)    
    if type == TokenType.OPEN_PAREN:
        return parseList(input, parent)     
    raise SyntaxException(input[0].pos, f"Unexpected character '{input[0].value}'")
 def parseArrayElement(input, parent):
    type = input[0].type
    if type == TokenType.COMMA:
        return parseComma(input, parent)
    return parseExpression(input, parent)
 def parseStatement(input, parent):
    type = input[0].type
    if type == TokenType.OPEN_BRACKET:
        return parseBlock(input, parent)
    if type == TokenType.ASTERISK:
        return parseAsterisk(input, parent)
    return parseExpression(input, parent)
 def parseToken(input, parent):      
    #import pdb; pdb.set_trace()
    return parseStatement(input, parent)    
 def parse(input):    
    root = Program()
    while len(input) > 0:
        root.append(parseToken(input, root))
    return root
--- a/smnp/Parser.py
+++ b/smnp/Parser.py
--- a/smnp/Synth.py
+++ b/smnp/Synth.py
--- a/smnp/init.py
+++ b/smnp/init.py
--- a/smnp/main.py
+++ b/smnp/main.py
@@ -0,0 +1,4 @@
 from smnp.main import main
 if __name__ == "__main__":
    main()
--- a/smnp/environment/Environment.py
+++ b/smnp/environment/Environment.py
@@ -1,5 +1,5 @@
 import sys
-from Evaluator import objectString
+from parser.Environment import objectString
 from Note import *
 import random
 import Synth
@@ -46,19 +46,6 @@ class Environment():
                        return scope         
                else:
                    return scope        
 def sample(args, env):
    if len(args) == 1 and isinstance(args[0], list):
        return _sample(args[0])
    elif len(args) == 0:
        return _sample(Note.range(Note(NotePitch.C), Note(NotePitch.H)))
    elif all(isinstance(x, Note) for x in args):
        return _sample(args)
    else:
        pass # not valid signature
 def _sample(list):
    return list[int(random.uniform(0, len(list)))]
 def doPrint(args, env):    
    print("".join([objectString(arg) for arg in args]))
--- a/smnp/error/init.py
+++ b/smnp/error/init.py
--- a/smnp/error/runtime.py
+++ b/smnp/error/runtime.py
@@ -0,0 +1,4 @@
 class RuntimeException(Exception):
    def __init__(self, pos, msg):
        posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"        
        self.msg = f"Syntax error {posStr}:\n{msg}"
--- a/smnp/error/syntax.py
+++ b/smnp/error/syntax.py
@@ -2,8 +2,3 @@ class SyntaxException(Exception):
    def __init__(self, pos, msg):        
        posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"        
        self.msg = f"Syntax error {posStr}:\n{msg}"
 class RuntimeException(Exception):
    def __init__(self, pos, msg):
        posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"        
        self.msg = f"Syntax error {posStr}:\n{msg}"
--- a/smnp/main.py
+++ b/smnp/main.py
@@ -0,0 +1,29 @@
 import sys
 from smnp.error.syntax import SyntaxException
 from smnp.error.runtime import RuntimeException
 from smnp.token.tokenizer import tokenize
 #from Tokenizer import tokenize
 #from Parser import parse
 #from Evaluator import evaluate
 #from Environment import createEnvironment
 #from Error import SyntaxException, RuntimeException
 def main():
    try:        
        with open(sys.argv[1], 'r') as source:
            lines = [line.rstrip('\n') for line in source.readlines()]        
        #env = createEnvironment()
        tokens = tokenize(lines)                                
        print(tokens)
        #ast = parse(tokens)             
        #evaluate(ast, env)
    except SyntaxException as e:
        print(e.msg)
    except RuntimeException as e:
        print(e.msg)
    except KeyboardInterrupt:
        print("Program interrupted")
--- a/smnp/token/init.py
+++ b/smnp/token/init.py
@@ -0,0 +1 @@
 __all__ = ["tokenize"]
--- a/smnp/token/model.py
+++ b/smnp/token/model.py
@@ -0,0 +1,54 @@
 class Token:
    def __init__(self, type, value, pos):
        self.type = type
        self.value = value    
        self.pos = pos
    def __str__(self):
        return "Token(" + str(self.type) + ", '" + self.value + "', " + str(self.pos) + ")"
    def __repr__(self):
        return self.__str__()
 class TokenList:
    def __init__(self, tokens = []):
        self.tokens = tokens
        self.cursor = 0
        self.snap = 0
    def append(self, token):
        self.tokens.append(token)
    def __getitem__(self, index):
        return self.tokens[index]
    def current(self):
        if self.cursor >= len(self.tokens):
            raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}")
        return self.tokens[self.cursor]
    def next(self, number=1):
        return self.tokens[self.cursor + number]
    def prev(self, number=1):
        return self.tokens[self.cursor - number]        
    def hasMore(self, count=1):
        return self.cursor + count < len(self.tokens)
    def hasCurrent(self):
        return self.cursor < len(self.tokens)
    def ahead(self):
        self.cursor += 1        
    def snapshot(self):
        self.snapshot = self.cursor
    def reset(self):
        self.cursor = self.snapshot
        return self.tokens[self.cursor]
    def __str__(self):
        return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]"
    def __repr__(self):
        return self.__str__()
--- a/smnp/token/tokenizer.py
+++ b/smnp/token/tokenizer.py
@@ -0,0 +1,81 @@
 import sys
 import time
 import re
 from smnp.error.syntax import SyntaxException
 from smnp.token.type import TokenType
 from smnp.token.model import Token, TokenList
 from smnp.token.tools import tokenizeChar, tokenizeRegexPattern
 from smnp.token.tokenizers.paren import tokenizeOpenParen, tokenizeCloseParen
 from smnp.token.tokenizers.asterisk import tokenizeAsterisk
 from smnp.token.tokenizers.whitespace import tokenizeWhitespaces
 from smnp.token.tokenizers.identifier import tokenizeIdentifier
 from smnp.token.tokenizers.comma import tokenizeComma
 from smnp.token.tokenizers.string import tokenizeString
 from smnp.token.tokenizers.integer import tokenizeInteger
 from smnp.token.tokenizers.bracket import tokenizeOpenBracket, tokenizeCloseBracket
 from smnp.token.tokenizers.assign import tokenizeAssign
 from smnp.token.tokenizers.colon import tokenizeColon
 from smnp.token.tokenizers.comment import tokenizeComment
 from smnp.token.tokenizers.note import tokenizeNote
 from smnp.token.tokenizers.function import tokenizeFunction
 from smnp.token.tokenizers.ret import tokenizeReturn
 from smnp.token.tokenizers.percent import tokenizePercent
 from smnp.token.tokenizers.minus import tokenizeMinus
 from smnp.token.tokenizers.dot import tokenizeDot
 tokenizers = (
    tokenizeOpenParen, 
    tokenizeCloseParen, 
    tokenizeAsterisk, 
    tokenizeString, 
    tokenizeFunction,
    tokenizeReturn,
    tokenizeInteger,
    tokenizeNote,
    tokenizeIdentifier, 
    tokenizeComma,
    tokenizeOpenBracket,
    tokenizeCloseBracket,
    tokenizeAssign,
    tokenizeColon,    
    tokenizePercent,
    tokenizeMinus,
    tokenizeDot,
    tokenizeComment,
    tokenizeWhitespaces,
 )
 filters = [
    lambda token: token.type is not None,
    lambda token: token.type != TokenType.COMMENT
 ]
 def tokenize(lines):
    tokens = []     
    for lineNumber, line in enumerate(lines):    
        current = 0
        while current < len(line):
            consumedChars, token = combinedTokenizer(line, current, lineNumber)            
            if consumedChars == 0:
                raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'")
            current += consumedChars
            tokens.append(token)
    return TokenList(filterTokens(filters, tokens))
 def combinedTokenizer(line, current, lineNumber):
    for tokenizer in tokenizers:
        consumedChars, token = tokenizer(line, current, lineNumber)
        if consumedChars > 0:                      
            return (consumedChars, token)
    return (0, None)
 def filterTokens(filters, tokens):    
    if not filters:
        return tokens
    return filterTokens(filters[1:], (token for token in tokens if filters[0](token)))
 __all__ = ["tokenize"]
--- a/smnp/token/tokenizers/init.py
+++ b/smnp/token/tokenizers/init.py
--- a/smnp/token/tokenizers/assign.py
+++ b/smnp/token/tokenizers/assign.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeChar
 from smnp.token.type import TokenType
 def tokenizeAssign(input, current, line):
    return tokenizeChar(TokenType.ASSIGN, '=', input, current, line)
--- a/smnp/token/tokenizers/asterisk.py
+++ b/smnp/token/tokenizers/asterisk.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeChar
 from smnp.token.type import TokenType
 def tokenizeAsterisk(input, current, line):
    return tokenizeChar(TokenType.ASTERISK, '*', input, current, line)
--- a/smnp/token/tokenizers/bracket.py
+++ b/smnp/token/tokenizers/bracket.py
@@ -0,0 +1,8 @@
 from smnp.token.tools import tokenizeChar
 from smnp.token.type import TokenType
 def tokenizeOpenBracket(input, current, line):
    return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line)
 def tokenizeCloseBracket(input, current, line):
    return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line)
--- a/smnp/token/tokenizers/colon.py
+++ b/smnp/token/tokenizers/colon.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeChar
 from smnp.token.type import TokenType
 def tokenizeColon(input, current, line):
    return tokenizeChar(TokenType.COLON, ':', input, current, line)
--- a/smnp/token/tokenizers/comma.py
+++ b/smnp/token/tokenizers/comma.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeChar
 from smnp.token.type import TokenType
 def tokenizeComma(input, current, line):
    return tokenizeChar(TokenType.COMMA, ',', input, current, line)
--- a/smnp/token/tokenizers/comment.py
+++ b/smnp/token/tokenizers/comment.py
@@ -0,0 +1,13 @@
 from smnp.token.type import TokenType
 from smnp.token.model import Token
 def tokenizeComment(input, current, line):
    if input[current] == '#':
        consumedChars = 0
        value = ''
        while current+consumedChars < len(input):
            value += input[current+consumedChars]
            consumedChars += 1            
            pass
        return (consumedChars, Token(TokenType.COMMENT, value, (line, current)))
    return (0, None)
--- a/smnp/token/tokenizers/dot.py
+++ b/smnp/token/tokenizers/dot.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeChar
 from smnp.token.type import TokenType
 def tokenizeDot(input, current, line):
    return tokenizeChar(TokenType.DOT, '.', input, current, line)
--- a/smnp/token/tokenizers/function.py
+++ b/smnp/token/tokenizers/function.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeKeyword
 from smnp.token.type import TokenType
 def tokenizeFunction(input, current, line):
    return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line)
--- a/smnp/token/tokenizers/identifier.py
+++ b/smnp/token/tokenizers/identifier.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeRegexPattern
 from smnp.token.type import TokenType
 def tokenizeIdentifier(input, current, line):
    return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line)
--- a/smnp/token/tokenizers/integer.py
+++ b/smnp/token/tokenizers/integer.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeRegexPattern
 from smnp.token.type import TokenType
 def tokenizeInteger(input, current, line):    
    return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line)
--- a/smnp/token/tokenizers/minus.py
+++ b/smnp/token/tokenizers/minus.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeChar
 from smnp.token.type import TokenType
 def tokenizeMinus(input, current, line):
    return tokenizeChar(TokenType.MINUS, '-', input, current, line)
--- a/smnp/token/tokenizers/note.py
+++ b/smnp/token/tokenizers/note.py
@@ -0,0 +1,37 @@
 import re
 from smnp.token.type import TokenType
 from smnp.token.model import Token
 def tokenizeNote(input, current, line):
    consumedChars = 0
    value = ''
    if input[current] == '@':
        consumedChars += 1
        value += input[current]
        if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'):
            value += input[current+consumedChars]
            consumedChars += 1
            if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'):                        
                value += input[current+consumedChars]
                consumedChars += 1
            if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):            
                value += input[current+consumedChars]
                consumedChars += 1
            if current+consumedChars < len(input) and input[current+consumedChars] == '.':            
                duration = input[current+consumedChars]
                consumedChars += 1
                while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
                    duration += input[current+consumedChars]        
                    consumedChars += 1  
                if current+consumedChars < len(input) and input[current+consumedChars] == 'd':
                    duration += input[current+consumedChars]
                    consumedChars += 1
                if len(duration) > 1:
                    value += duration
                else:
                    consumedChars -= 1
            return (consumedChars, Token(TokenType.NOTE, value, (line, current)))
    return (0, None)
--- a/smnp/token/tokenizers/paren.py
+++ b/smnp/token/tokenizers/paren.py
@@ -0,0 +1,8 @@
 from smnp.token.tools import tokenizeChar
 from smnp.token.type import TokenType
 def tokenizeOpenParen(input, current, line):
    return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line)
 def tokenizeCloseParen(input, current, line):
    return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line)
--- a/smnp/token/tokenizers/percent.py
+++ b/smnp/token/tokenizers/percent.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeChar
 from smnp.token.type import TokenType
 def tokenizePercent(input, current, line):
    return tokenizeChar(TokenType.PERCENT, '%', input, current, line)
--- a/smnp/token/tokenizers/ret.py
+++ b/smnp/token/tokenizers/ret.py
@@ -0,0 +1,5 @@
 from smnp.token.tools import tokenizeKeyword
 from smnp.token.type import TokenType
 def tokenizeReturn(input, current, line):
    return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line)
--- a/smnp/token/tokenizers/string.py
+++ b/smnp/token/tokenizers/string.py
@@ -0,0 +1,16 @@
 from smnp.token.type import TokenType
 from smnp.token.model import Token
 def tokenizeString(input, current, line):
    if input[current] == '"':
        value = input[current]
        char = ''
        consumedChars = 1
        while char != '"':
            if char is None: #TODO!!!
                print("String not terminated")
            char = input[current + consumedChars]
            value += char
            consumedChars += 1
        return (consumedChars, Token(TokenType.STRING, value, (line, current)))
    return (0, None)
--- a/smnp/token/tokenizers/whitespace.py
+++ b/smnp/token/tokenizers/whitespace.py
@@ -0,0 +1,4 @@
 from smnp.token.tools import tokenizeRegexPattern
 def tokenizeWhitespaces(input, current, line):    
    return tokenizeRegexPattern(None, r'\s', input, current, line)
--- a/smnp/token/tools.py
+++ b/smnp/token/tools.py
@@ -0,0 +1,21 @@
 import re
 from smnp.token.model import Token
 def tokenizeChar(type, char, input, current, line):
    if input[current] == char:
        return (1, Token(type, input[current], (line, current)))
    return (0, None)
 def tokenizeRegexPattern(type, pattern, input, current, line):    
    consumedChars = 0
    value = ''
    while current+consumedChars < len(input) and re.match(pattern, input[current+consumedChars]):
        value += input[current+consumedChars]        
        consumedChars += 1            
    return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None)
 def tokenizeKeyword(type, keyword, input, current, line):       
    if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword:
        return (len(keyword), Token(type, keyword, (line, current)))
    return (0, None)
--- a/smnp/token/type.py
+++ b/smnp/token/type.py
@@ -0,0 +1,21 @@
 from enum import Enum
 class TokenType(Enum):
    OPEN_PAREN = 1
    CLOSE_PAREN = 2
    ASTERISK = 3
    STRING = 4
    IDENTIFIER = 5
    COMMA = 6
    INTEGER = 7
    OPEN_BRACKET = 8
    CLOSE_BRACKET = 9
    ASSIGN = 10
    COLON = 11
    NOTE = 12
    COMMENT = 13
    PERCENT = 14
    MINUS = 15
    FUNCTION = 16
    RETURN = 17    
    DOT = 18