Refactor tokenizer

2019-07-03 01:55:08 +02:00
parent 8313d2dcfd
commit f826516d8f
41 changed files with 589 additions and 296 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+*.mus
--- a/Tokenizer.py
+++ b/Tokenizer.py
@@ -1,251 +0,0 @@
-from enum import Enum
-import time
-import re
-import sys
-from Error import SyntaxException
-
-class Tokens:
-    def __init__(self, tokens = []):
-        self.tokens = tokens
-        self.cursor = 0
-        self.snap = 0
-        
-    def append(self, token):
-        self.tokens.append(token)
-        
-    def __getitem__(self, index):
-        return self.tokens[index]
-    
-    def current(self):
-        if self.cursor >= len(self.tokens):
-            raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}")
-        return self.tokens[self.cursor]
-    
-    def next(self, number=1):
-        return self.tokens[self.cursor + number]
-    
-    def prev(self, number=1):
-        return self.tokens[self.cursor - number]        
-    
-    def hasMore(self, count=1):
-        return self.cursor + count < len(self.tokens)
-    
-    def hasCurrent(self):
-        return self.cursor < len(self.tokens)
-    
-    def ahead(self):
-        self.cursor += 1        
-    
-    def snapshot(self):
-        self.snapshot = self.cursor
-        
-    def reset(self):
-        self.cursor = self.snapshot
-        return self.tokens[self.cursor]
-    
-    def __str__(self):
-        return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]"
-    
-    def __repr__(self):
-        return self.__str__()
-
-class TokenType(Enum):
-    OPEN_PAREN = 1
-    CLOSE_PAREN = 2
-    ASTERISK = 3
-    STRING = 4
-    IDENTIFIER = 5
-    COMMA = 6
-    INTEGER = 7
-    OPEN_BRACKET = 8
-    CLOSE_BRACKET = 9
-    ASSIGN = 10
-    COLON = 11
-    NOTE = 12
-    COMMENT = 13
-    PERCENT = 14
-    MINUS = 15
-    FUNCTION = 16
-    RETURN = 17    
-    DOT = 18
-
-class Token:
-    def __init__(self, type, value, pos):
-        self.type = type
-        self.value = value    
-        self.pos = pos
-    def __str__(self):
-        return "Token(" + str(self.type) + ", '" + self.value + "', " + str(self.pos) + ")"
-    def __repr__(self):
-        return self.__str__()
-
-def tokenizeOpenParen(input, current, line):
-    return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line)
-   
-def tokenizeChar(type, char, input, current, line):
-    if input[current] == char:
-        return (1, Token(type, input[current], (line, current)))
-    return (0, None)
-
-def tokenizeCloseParen(input, current, line):
-    return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line)
-
-def tokenizeAsterisk(input, current, line):
-    return tokenizeChar(TokenType.ASTERISK, '*', input, current, line)
-
-def tokenizeString(input, current, line):
-    if input[current] == '"':
-        value = input[current]
-        char = ''
-        consumedChars = 1
-        while char != '"':
-            if char is None: #TODO!!!
-                print("String not terminated")
-            char = input[current + consumedChars]
-            value += char
-            consumedChars += 1
-        return (consumedChars, Token(TokenType.STRING, value, (line, current)))
-    return (0, None)
-
-def tokenizeRegexPattern(type, pattern, input, current, line):    
-    consumedChars = 0
-    value = ''
-    
-    while current+consumedChars < len(input) and re.match(pattern, input[current+consumedChars]):
-        value += input[current+consumedChars]        
-        consumedChars += 1            
-    return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None)
-        
-def tokenizeWhitespaces(input, current, line):    
-    return tokenizeRegexPattern(None, r'\s', input, current, line)
-
-def tokenizeIdentifier(input, current, line):
-    return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line)
-
-def tokenizeComma(input, current, line):
-    return tokenizeChar(TokenType.COMMA, ',', input, current, line)
-
-def tokenizeInteger(input, current, line):    
-    return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line)
-
-def tokenizeOpenBracket(input, current, line):
-    return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line)
-
-def tokenizeCloseBracket(input, current, line):
-    return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line)
-
-def tokenizeAssign(input, current, line):
-    return tokenizeChar(TokenType.ASSIGN, '=', input, current, line)
-
-def tokenizeColon(input, current, line):
-    return tokenizeChar(TokenType.COLON, ':', input, current, line)
-
-def tokenizeComment(input, current, line):
-    if input[current] == '#':
-        consumedChars = 0
-        value = ''
-        while current+consumedChars < len(input):
-            value += input[current+consumedChars]
-            consumedChars += 1            
-            pass
-        return (consumedChars, Token(TokenType.COMMENT, value, (line, current)))
-    return (0, None)
-
-def tokenizeNote(input, current, line):
-    consumedChars = 0
-    value = ''
-    if input[current] == '@':
-        consumedChars += 1
-        value += input[current]
-        if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'):
-            value += input[current+consumedChars]
-            consumedChars += 1
-            
-            if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'):                        
-                value += input[current+consumedChars]
-                consumedChars += 1
-                
-            if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):            
-                value += input[current+consumedChars]
-                consumedChars += 1
-                
-            if current+consumedChars < len(input) and input[current+consumedChars] == '.':            
-                duration = input[current+consumedChars]
-                consumedChars += 1
-                while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
-                    duration += input[current+consumedChars]        
-                    consumedChars += 1  
-                if current+consumedChars < len(input) and input[current+consumedChars] == 'd':
-                    duration += input[current+consumedChars]
-                    consumedChars += 1
-                if len(duration) > 1:
-                    value += duration
-                else:
-                    consumedChars -= 1
-            return (consumedChars, Token(TokenType.NOTE, value, (line, current)))
-    return (0, None)
-
-def tokenizePercent(input, current, line):
-    return tokenizeChar(TokenType.PERCENT, '%', input, current, line)
-
-def tokenizeMinus(input, current, line):
-    return tokenizeChar(TokenType.MINUS, '-', input, current, line)
-
-def tokenizeFunction(input, current, line):
-    return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line)
-
-def tokenizeKeyword(type, keyword, input, current, line):       
-    if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword:
-        return (len(keyword), Token(type, keyword, (line, current)))
-    return (0, None)
-
-def tokenizeReturn(input, current, line):
-    return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line)
-
-def tokenizeDot(input, current, line):
-    return tokenizeChar(TokenType.DOT, '.', input, current, line)
-
-tokenizers = (
-    tokenizeOpenParen, 
-    tokenizeCloseParen, 
-    tokenizeAsterisk, 
-    tokenizeString, 
-    tokenizeFunction,
-    tokenizeReturn,
-    tokenizeInteger,
-    tokenizeNote,
-    tokenizeIdentifier, 
-    tokenizeComma,
-    tokenizeOpenBracket,
-    tokenizeCloseBracket,
-    tokenizeAssign,
-    tokenizeColon,    
-    tokenizePercent,
-    tokenizeMinus,
-    tokenizeDot,
-    tokenizeComment,
-    tokenizeWhitespaces,
-)
-
-def doTokenize(lines):    
-    tokens = []     
-    for lineNumber, line in enumerate(lines):    
-        current = 0
-        while current < len(line):
-            tokenized = False
-            for tokenizer in tokenizers:
-                consumedChars, value = tokenizer(line, current, lineNumber)
-                if consumedChars > 0:
-                    tokens.append(value)
-                    current += consumedChars
-                    tokenized = True
-                    break            
-            
-            if not tokenized:
-                raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'")
-            
-    return [token for token in tokens if token.type is not None]
-
-def tokenize(lines):
-    tokens = doTokenize(lines)
-    return Tokens([ token for token in tokens if token.type != TokenType.COMMENT])
--- a/main.py
+++ b/main.py
@@ -1,26 +0,0 @@
-from Tokenizer import tokenize
-from Parser import parse
-from Evaluator import evaluate
-from Environment import createEnvironment
-from Error import SyntaxException, RuntimeException
-import sys
-
-if __name__ == "__main__":
-    try:
-        with open(sys.argv[1], 'r') as source:
-            lines = [line.rstrip('\n') for line in source.readlines()]
-        
-        env = createEnvironment()
-        
-        tokens = tokenize(lines)                                
-        
-        ast = parse(tokens)             
-        
-        evaluate(ast, env)
-    except SyntaxException as e:
-        print(e.msg)
-    except RuntimeException as e:
-        print(e.msg)
-    except KeyboardInterrupt:
-        print("Program interrupted")
-
--- a/smnp/AST.py
+++ b/smnp/AST.py
--- a/smnp/Audio.py
+++ b/smnp/Audio.py
--- a/smnp/Evaluator.py
+++ b/smnp/Evaluator.py
--- a/smnp/NoiseDetector.py
+++ b/smnp/NoiseDetector.py
--- a/smnp/Note.py
+++ b/smnp/Note.py
--- a/smnp/OldParser.py
+++ b/smnp/OldParser.py
@@ -0,0 +1,230 @@
+from Tokenizer import *
+from Note import *
+from AST import *
+from Error import SyntaxException
+
+def expectedFound(expected, found):
+    raise SyntaxException(None, f"Expected: {expected}, found: {found}")
+
+def assertType(expected, found):
+    if expected != found:
+        raise SyntaxException(None, f"Expected: {expected}, found: {found}")
+
+def parseInteger(input, parent):
+    token = input.pop(0)
+    return IntegerLiteralNode(int(token.value), parent, token.pos)
+    
+def parseString(input, parent):   
+    token = input.pop(0)
+    return StringLiteralNode(token.value[1:-1], parent, token.pos)
+    
+def parseNote(input, parent): 
+    token = input.pop(0)
+    value = token.value
+    consumedChars = 1
+    notePitch = value[consumedChars]
+    consumedChars += 1
+    octave = 4
+    duration = 4
+    dot = False
+    if consumedChars < len(value) and value[consumedChars] in ('b', '#'):
+        notePitch += value[consumedChars]
+        consumedChars += 1
+    if consumedChars < len(value) and re.match(r'\d', value[consumedChars]):
+        octave = int(value[consumedChars])
+        consumedChars += 1
+    if consumedChars < len(value) and value[consumedChars] == '.':
+        consumedChars += 1
+        durationString = ''            
+        while consumedChars < len(value) and re.match(r'\d', value[consumedChars]):
+            durationString += value[consumedChars]      
+            consumedChars += 1  
+            duration = int(durationString)
+        if consumedChars < len(value) and value[consumedChars] == '.':
+            dot = True
+            consumedChars += 1
+    
+    return NoteLiteralNode(Note(notePitch, octave, duration, dot), parent, token.pos)
+
+def parseComma(input, parent):
+    token = input.pop(0)
+    return CommaNode(parent, token.pos)
+
+def parseList(input, parent):       
+    token = input.pop(0)
+    
+    node = ListNode(parent, token.pos)
+    
+    while input[0].type != TokenType.CLOSE_PAREN:  
+        element = parseArrayElement(input, node)
+        if element is None:
+            raise SyntaxException(input[0].pos, "Invalid element '{input[0].value}'")
+        node.append(element)
+    
+    if input[0].type != TokenType.CLOSE_PAREN:
+        expectedFound(TokenType.CLOSE_PAREN, input[0].type)
+    input.pop(0)
+    
+    return node
+            
+def parseBlock(input, parent):
+    token = input.pop(0)
+    
+    block = BlockNode(parent, token.pos)
+    
+    while input[0].type != TokenType.CLOSE_BRACKET:
+        block.append(parseToken(input, block))
+    
+    if input[0].type != TokenType.CLOSE_BRACKET:
+        expectedFound(TokenType.CLOSE_BRACKET, input[0].type)
+    input.pop(0)
+    
+    return block
+
+
+def parseAsterisk(input, parent):
+    token = input.pop(0)
+    
+    iterator = parent.pop(-1)
+    value = parseStatement(input, parent)
+    
+    asterisk = AsteriskStatementNode(iterator, value, parent, token.pos)        
+    iterator.parent = asterisk
+    value.parent = asterisk
+    return asterisk
+   
+def parseNoteOrColon(input, parent):
+    note = parseNote(input, parent)    
+    if len(input) > 1 and input[0].type == TokenType.COLON:
+        token = input.pop(0)                
+        b = parseNote(input, parent)
+        if b is None:
+            raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'")
+        colon = ColonNode(note, b, parent, token.pos)
+        note.parent = colon
+        b.parent = colon
+        return colon
+    
+    return note
+   
+def parseIntegerOrColonOrPercent(input, parent):
+    integer = parseInteger(input, parent)    
+    if len(input) > 1 and input[0].type == TokenType.COLON:
+        token = input.pop(0)                
+        b = parseInteger(input, parent) 
+        if b is None:
+            raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'")
+        colon = ColonNode(integer, b, parent, token.pos)
+        integer.parent = colon
+        b.parent = colon
+        return colon
+    
+    if len(input) > 0 and input[0].type == TokenType.PERCENT:
+        input.pop(0)
+        percent = PercentNode(integer, parent, integer.pos)
+        integer.parent = percent
+        return percent
+    
+    return integer   
+   
+def parseFunctionCallOrAssignOrIdentifier(input, parent):   
+    token = input.pop(0)
+    identifier = IdentifierNode(token.value, parent, token.pos)
+    # Function call
+    if len(input) > 0 and input[0].type == TokenType.OPEN_PAREN:            
+        arguments = parseList(input, parent)        
+        func = FunctionCallNode(identifier, arguments, parent, token.pos)
+        identifier.parent = func
+        arguments.parent = func
+        return func
+    # Assign
+    if len(input) > 1 and input[0].type == TokenType.ASSIGN:          
+        token = input.pop(0)            
+        value = parseExpression(input, parent) #
+        assign = AssignExpression(identifier, value, parent, token.pos)
+        identifier.parent = assign
+        value.parent = assign
+        return assign
+        
+    return identifier
+
+def parseMinus(input, parent):
+    token = input.pop(0)
+    
+    value = parseInteger(input, parent)
+    
+    return IntegerLiteralNode(-value.value, parent, token.pos)
+
+def parseFunctionDefinition(input, parent):
+    input.pop(0)
+    
+    assertType(TokenType.IDENTIFIER, input[0].type)
+    token = input.pop(0)
+    name = IdentifierNode(token.value, parent, token.pos)
+    
+    assertType(TokenType.OPEN_PAREN, input[0].type)
+    parameters = parseList(input, parent)
+    
+    assertType(TokenType.OPEN_BRACKET, input[0].type)
+    body = parseBlock(input, parent)
+    
+    func = FunctionDefinitionNode(name, parameters, body, parent, token.pos)
+    name.parent = func
+    parameters.parent = func
+    body.parent = func
+    return func
+
+def parseReturn(input, parent):
+    token = input.pop(0)
+    
+    value = parseExpression(input, parent)
+    
+    returnNode = ReturnNode(value, parent, token.pos)
+    value.parent = returnNode
+    return returnNode
+
+def parseExpression(input, parent):    
+    type = input[0].type
+    if type == TokenType.FUNCTION:
+        return parseFunctionDefinition(input, parent)
+    if type == TokenType.RETURN:
+        return parseReturn(input, parent)
+    if type == TokenType.MINUS:
+        return parseMinus(input, parent)
+    if type == TokenType.INTEGER:
+        return parseIntegerOrColonOrPercent(input, parent)
+    if type == TokenType.STRING:
+        return parseString(input, parent)    
+    if type == TokenType.NOTE:
+        return parseNoteOrColon(input, parent)    
+    if type == TokenType.IDENTIFIER:
+        return parseFunctionCallOrAssignOrIdentifier(input, parent)    
+    if type == TokenType.OPEN_PAREN:
+        return parseList(input, parent)     
+    raise SyntaxException(input[0].pos, f"Unexpected character '{input[0].value}'")
+ 
+def parseArrayElement(input, parent):
+    type = input[0].type
+    if type == TokenType.COMMA:
+        return parseComma(input, parent)
+    return parseExpression(input, parent)
+ 
+def parseStatement(input, parent):
+    type = input[0].type
+    if type == TokenType.OPEN_BRACKET:
+        return parseBlock(input, parent)
+    if type == TokenType.ASTERISK:
+        return parseAsterisk(input, parent)
+    
+    return parseExpression(input, parent)
+    
+def parseToken(input, parent):      
+    #import pdb; pdb.set_trace()
+    return parseStatement(input, parent)    
+    
+  
+def parse(input):    
+    root = Program()
+    while len(input) > 0:
+        root.append(parseToken(input, root))
+    return root
--- a/smnp/Parser.py
+++ b/smnp/Parser.py
--- a/smnp/Synth.py
+++ b/smnp/Synth.py
--- a/smnp/init.py
+++ b/smnp/init.py
--- a/smnp/main.py
+++ b/smnp/main.py
@@ -0,0 +1,4 @@
+from smnp.main import main
+
+if __name__ == "__main__":
+    main()
--- a/smnp/environment/Environment.py
+++ b/smnp/environment/Environment.py
@@ -1,5 +1,5 @@
 import sys
-from Evaluator import objectString
+from parser.Environment import objectString
 from Note import *
 import random
 import Synth
@@ -46,19 +46,6 @@ class Environment():
                        return scope         
                else:
                    return scope        
-
-def sample(args, env):
-    if len(args) == 1 and isinstance(args[0], list):
-        return _sample(args[0])
-    elif len(args) == 0:
-        return _sample(Note.range(Note(NotePitch.C), Note(NotePitch.H)))
-    elif all(isinstance(x, Note) for x in args):
-        return _sample(args)
-    else:
-        pass # not valid signature
- 
-def _sample(list):
-    return list[int(random.uniform(0, len(list)))]
 
 def doPrint(args, env):    
    print("".join([objectString(arg) for arg in args]))
--- a/smnp/error/init.py
+++ b/smnp/error/init.py
--- a/smnp/error/runtime.py
+++ b/smnp/error/runtime.py
@@ -0,0 +1,4 @@
+class RuntimeException(Exception):
+    def __init__(self, pos, msg):
+        posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"        
+        self.msg = f"Syntax error {posStr}:\n{msg}"
--- a/smnp/error/syntax.py
+++ b/smnp/error/syntax.py
@@ -2,8 +2,3 @@ class SyntaxException(Exception):
    def __init__(self, pos, msg):        
        posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"        
        self.msg = f"Syntax error {posStr}:\n{msg}"
-    
-class RuntimeException(Exception):
-    def __init__(self, pos, msg):
-        posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"        
-        self.msg = f"Syntax error {posStr}:\n{msg}"
--- a/smnp/main.py
+++ b/smnp/main.py
@@ -0,0 +1,29 @@
+import sys
+from smnp.error.syntax import SyntaxException
+from smnp.error.runtime import RuntimeException
+from smnp.token.tokenizer import tokenize
+#from Tokenizer import tokenize
+#from Parser import parse
+#from Evaluator import evaluate
+#from Environment import createEnvironment
+#from Error import SyntaxException, RuntimeException
+
+def main():
+    try:        
+        with open(sys.argv[1], 'r') as source:
+            lines = [line.rstrip('\n') for line in source.readlines()]        
+                    
+        #env = createEnvironment()
+        
+        tokens = tokenize(lines)                                
+        print(tokens)
+        #ast = parse(tokens)             
+        
+        #evaluate(ast, env)
+    except SyntaxException as e:
+        print(e.msg)
+    except RuntimeException as e:
+        print(e.msg)
+    except KeyboardInterrupt:
+        print("Program interrupted")
+
--- a/smnp/token/init.py
+++ b/smnp/token/init.py
@@ -0,0 +1 @@
+__all__ = ["tokenize"]
--- a/smnp/token/model.py
+++ b/smnp/token/model.py
@@ -0,0 +1,54 @@
+class Token:
+    def __init__(self, type, value, pos):
+        self.type = type
+        self.value = value    
+        self.pos = pos
+    def __str__(self):
+        return "Token(" + str(self.type) + ", '" + self.value + "', " + str(self.pos) + ")"
+    def __repr__(self):
+        return self.__str__()
+
+class TokenList:
+    def __init__(self, tokens = []):
+        self.tokens = tokens
+        self.cursor = 0
+        self.snap = 0
+        
+    def append(self, token):
+        self.tokens.append(token)
+        
+    def __getitem__(self, index):
+        return self.tokens[index]
+    
+    def current(self):
+        if self.cursor >= len(self.tokens):
+            raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}")
+        return self.tokens[self.cursor]
+    
+    def next(self, number=1):
+        return self.tokens[self.cursor + number]
+    
+    def prev(self, number=1):
+        return self.tokens[self.cursor - number]        
+    
+    def hasMore(self, count=1):
+        return self.cursor + count < len(self.tokens)
+    
+    def hasCurrent(self):
+        return self.cursor < len(self.tokens)
+    
+    def ahead(self):
+        self.cursor += 1        
+    
+    def snapshot(self):
+        self.snapshot = self.cursor
+        
+    def reset(self):
+        self.cursor = self.snapshot
+        return self.tokens[self.cursor]
+    
+    def __str__(self):
+        return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]"
+    
+    def __repr__(self):
+        return self.__str__()
--- a/smnp/token/tokenizer.py
+++ b/smnp/token/tokenizer.py
@@ -0,0 +1,81 @@
+import sys
+import time
+import re
+from smnp.error.syntax import SyntaxException
+from smnp.token.type import TokenType
+from smnp.token.model import Token, TokenList
+from smnp.token.tools import tokenizeChar, tokenizeRegexPattern
+from smnp.token.tokenizers.paren import tokenizeOpenParen, tokenizeCloseParen
+from smnp.token.tokenizers.asterisk import tokenizeAsterisk
+from smnp.token.tokenizers.whitespace import tokenizeWhitespaces
+from smnp.token.tokenizers.identifier import tokenizeIdentifier
+from smnp.token.tokenizers.comma import tokenizeComma
+from smnp.token.tokenizers.string import tokenizeString
+from smnp.token.tokenizers.integer import tokenizeInteger
+from smnp.token.tokenizers.bracket import tokenizeOpenBracket, tokenizeCloseBracket
+from smnp.token.tokenizers.assign import tokenizeAssign
+from smnp.token.tokenizers.colon import tokenizeColon
+from smnp.token.tokenizers.comment import tokenizeComment
+from smnp.token.tokenizers.note import tokenizeNote
+from smnp.token.tokenizers.function import tokenizeFunction
+from smnp.token.tokenizers.ret import tokenizeReturn
+from smnp.token.tokenizers.percent import tokenizePercent
+from smnp.token.tokenizers.minus import tokenizeMinus
+from smnp.token.tokenizers.dot import tokenizeDot
+
+tokenizers = (
+    tokenizeOpenParen, 
+    tokenizeCloseParen, 
+    tokenizeAsterisk, 
+    tokenizeString, 
+    tokenizeFunction,
+    tokenizeReturn,
+    tokenizeInteger,
+    tokenizeNote,
+    tokenizeIdentifier, 
+    tokenizeComma,
+    tokenizeOpenBracket,
+    tokenizeCloseBracket,
+    tokenizeAssign,
+    tokenizeColon,    
+    tokenizePercent,
+    tokenizeMinus,
+    tokenizeDot,
+    tokenizeComment,
+    tokenizeWhitespaces,
+)
+
+filters = [
+    lambda token: token.type is not None,
+    lambda token: token.type != TokenType.COMMENT
+]
+
+def tokenize(lines):
+    tokens = []     
+    for lineNumber, line in enumerate(lines):    
+        current = 0
+        while current < len(line):
+            consumedChars, token = combinedTokenizer(line, current, lineNumber)            
+            
+            if consumedChars == 0:
+                raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'")
+            
+            current += consumedChars
+            tokens.append(token)
+            
+    return TokenList(filterTokens(filters, tokens))
+
+def combinedTokenizer(line, current, lineNumber):
+    for tokenizer in tokenizers:
+        consumedChars, token = tokenizer(line, current, lineNumber)
+        if consumedChars > 0:                      
+            return (consumedChars, token)
+    return (0, None)
+
+def filterTokens(filters, tokens):    
+    if not filters:
+        return tokens
+    
+    return filterTokens(filters[1:], (token for token in tokens if filters[0](token)))
+
+__all__ = ["tokenize"]
--- a/smnp/token/tokenizers/init.py
+++ b/smnp/token/tokenizers/init.py
--- a/smnp/token/tokenizers/assign.py
+++ b/smnp/token/tokenizers/assign.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeAssign(input, current, line):
+    return tokenizeChar(TokenType.ASSIGN, '=', input, current, line)
--- a/smnp/token/tokenizers/asterisk.py
+++ b/smnp/token/tokenizers/asterisk.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeAsterisk(input, current, line):
+    return tokenizeChar(TokenType.ASTERISK, '*', input, current, line)
--- a/smnp/token/tokenizers/bracket.py
+++ b/smnp/token/tokenizers/bracket.py
@@ -0,0 +1,8 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeOpenBracket(input, current, line):
+    return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line)
+
+def tokenizeCloseBracket(input, current, line):
+    return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line)
--- a/smnp/token/tokenizers/colon.py
+++ b/smnp/token/tokenizers/colon.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeColon(input, current, line):
+    return tokenizeChar(TokenType.COLON, ':', input, current, line)
--- a/smnp/token/tokenizers/comma.py
+++ b/smnp/token/tokenizers/comma.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeComma(input, current, line):
+    return tokenizeChar(TokenType.COMMA, ',', input, current, line)
--- a/smnp/token/tokenizers/comment.py
+++ b/smnp/token/tokenizers/comment.py
@@ -0,0 +1,13 @@
+from smnp.token.type import TokenType
+from smnp.token.model import Token
+
+def tokenizeComment(input, current, line):
+    if input[current] == '#':
+        consumedChars = 0
+        value = ''
+        while current+consumedChars < len(input):
+            value += input[current+consumedChars]
+            consumedChars += 1            
+            pass
+        return (consumedChars, Token(TokenType.COMMENT, value, (line, current)))
+    return (0, None)
--- a/smnp/token/tokenizers/dot.py
+++ b/smnp/token/tokenizers/dot.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeDot(input, current, line):
+    return tokenizeChar(TokenType.DOT, '.', input, current, line)
--- a/smnp/token/tokenizers/function.py
+++ b/smnp/token/tokenizers/function.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeKeyword
+from smnp.token.type import TokenType
+
+def tokenizeFunction(input, current, line):
+    return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line)
--- a/smnp/token/tokenizers/identifier.py
+++ b/smnp/token/tokenizers/identifier.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeRegexPattern
+from smnp.token.type import TokenType
+
+def tokenizeIdentifier(input, current, line):
+    return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line)
--- a/smnp/token/tokenizers/integer.py
+++ b/smnp/token/tokenizers/integer.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeRegexPattern
+from smnp.token.type import TokenType
+
+def tokenizeInteger(input, current, line):    
+    return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line)
--- a/smnp/token/tokenizers/minus.py
+++ b/smnp/token/tokenizers/minus.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeMinus(input, current, line):
+    return tokenizeChar(TokenType.MINUS, '-', input, current, line)
--- a/smnp/token/tokenizers/note.py
+++ b/smnp/token/tokenizers/note.py
@@ -0,0 +1,37 @@
+import re
+from smnp.token.type import TokenType
+from smnp.token.model import Token
+
+def tokenizeNote(input, current, line):
+    consumedChars = 0
+    value = ''
+    if input[current] == '@':
+        consumedChars += 1
+        value += input[current]
+        if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'):
+            value += input[current+consumedChars]
+            consumedChars += 1
+            
+            if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'):                        
+                value += input[current+consumedChars]
+                consumedChars += 1
+                
+            if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):            
+                value += input[current+consumedChars]
+                consumedChars += 1
+                
+            if current+consumedChars < len(input) and input[current+consumedChars] == '.':            
+                duration = input[current+consumedChars]
+                consumedChars += 1
+                while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
+                    duration += input[current+consumedChars]        
+                    consumedChars += 1  
+                if current+consumedChars < len(input) and input[current+consumedChars] == 'd':
+                    duration += input[current+consumedChars]
+                    consumedChars += 1
+                if len(duration) > 1:
+                    value += duration
+                else:
+                    consumedChars -= 1
+            return (consumedChars, Token(TokenType.NOTE, value, (line, current)))
+    return (0, None)
--- a/smnp/token/tokenizers/paren.py
+++ b/smnp/token/tokenizers/paren.py
@@ -0,0 +1,8 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeOpenParen(input, current, line):
+    return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line)
+
+def tokenizeCloseParen(input, current, line):
+    return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line)
--- a/smnp/token/tokenizers/percent.py
+++ b/smnp/token/tokenizers/percent.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizePercent(input, current, line):
+    return tokenizeChar(TokenType.PERCENT, '%', input, current, line)
--- a/smnp/token/tokenizers/ret.py
+++ b/smnp/token/tokenizers/ret.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeKeyword
+from smnp.token.type import TokenType
+
+def tokenizeReturn(input, current, line):
+    return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line)
--- a/smnp/token/tokenizers/string.py
+++ b/smnp/token/tokenizers/string.py
@@ -0,0 +1,16 @@
+from smnp.token.type import TokenType
+from smnp.token.model import Token
+
+def tokenizeString(input, current, line):
+    if input[current] == '"':
+        value = input[current]
+        char = ''
+        consumedChars = 1
+        while char != '"':
+            if char is None: #TODO!!!
+                print("String not terminated")
+            char = input[current + consumedChars]
+            value += char
+            consumedChars += 1
+        return (consumedChars, Token(TokenType.STRING, value, (line, current)))
+    return (0, None)
--- a/smnp/token/tokenizers/whitespace.py
+++ b/smnp/token/tokenizers/whitespace.py
@@ -0,0 +1,4 @@
+from smnp.token.tools import tokenizeRegexPattern
+
+def tokenizeWhitespaces(input, current, line):    
+    return tokenizeRegexPattern(None, r'\s', input, current, line)
--- a/smnp/token/tools.py
+++ b/smnp/token/tools.py
@@ -0,0 +1,21 @@
+import re
+from smnp.token.model import Token
+
+def tokenizeChar(type, char, input, current, line):
+    if input[current] == char:
+        return (1, Token(type, input[current], (line, current)))
+    return (0, None)
+
+def tokenizeRegexPattern(type, pattern, input, current, line):    
+    consumedChars = 0
+    value = ''
+    
+    while current+consumedChars < len(input) and re.match(pattern, input[current+consumedChars]):
+        value += input[current+consumedChars]        
+        consumedChars += 1            
+    return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None)
+
+def tokenizeKeyword(type, keyword, input, current, line):       
+    if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword:
+        return (len(keyword), Token(type, keyword, (line, current)))
+    return (0, None)
--- a/smnp/token/type.py
+++ b/smnp/token/type.py
@@ -0,0 +1,21 @@
+from enum import Enum
+
+class TokenType(Enum):
+    OPEN_PAREN = 1
+    CLOSE_PAREN = 2
+    ASTERISK = 3
+    STRING = 4
+    IDENTIFIER = 5
+    COMMA = 6
+    INTEGER = 7
+    OPEN_BRACKET = 8
+    CLOSE_BRACKET = 9
+    ASSIGN = 10
+    COLON = 11
+    NOTE = 12
+    COMMENT = 13
+    PERCENT = 14
+    MINUS = 15
+    FUNCTION = 16
+    RETURN = 17    
+    DOT = 18