Refactor tokenizer

2019-07-03 01:55:08 +02:00
parent 8313d2dcfd
commit f826516d8f
41 changed files with 589 additions and 296 deletions
--- a/smnp/token/tokenizers/init.py
+++ b/smnp/token/tokenizers/init.py
--- a/smnp/token/tokenizers/assign.py
+++ b/smnp/token/tokenizers/assign.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeAssign(input, current, line):
+    return tokenizeChar(TokenType.ASSIGN, '=', input, current, line)
--- a/smnp/token/tokenizers/asterisk.py
+++ b/smnp/token/tokenizers/asterisk.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeAsterisk(input, current, line):
+    return tokenizeChar(TokenType.ASTERISK, '*', input, current, line)
--- a/smnp/token/tokenizers/bracket.py
+++ b/smnp/token/tokenizers/bracket.py
@@ -0,0 +1,8 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeOpenBracket(input, current, line):
+    return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line)
+
+def tokenizeCloseBracket(input, current, line):
+    return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line)
--- a/smnp/token/tokenizers/colon.py
+++ b/smnp/token/tokenizers/colon.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeColon(input, current, line):
+    return tokenizeChar(TokenType.COLON, ':', input, current, line)
--- a/smnp/token/tokenizers/comma.py
+++ b/smnp/token/tokenizers/comma.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeComma(input, current, line):
+    return tokenizeChar(TokenType.COMMA, ',', input, current, line)
--- a/smnp/token/tokenizers/comment.py
+++ b/smnp/token/tokenizers/comment.py
@@ -0,0 +1,13 @@
+from smnp.token.type import TokenType
+from smnp.token.model import Token
+
+def tokenizeComment(input, current, line):
+    if input[current] == '#':
+        consumedChars = 0
+        value = ''
+        while current+consumedChars < len(input):
+            value += input[current+consumedChars]
+            consumedChars += 1            
+            pass
+        return (consumedChars, Token(TokenType.COMMENT, value, (line, current)))
+    return (0, None)
--- a/smnp/token/tokenizers/dot.py
+++ b/smnp/token/tokenizers/dot.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeDot(input, current, line):
+    return tokenizeChar(TokenType.DOT, '.', input, current, line)
--- a/smnp/token/tokenizers/function.py
+++ b/smnp/token/tokenizers/function.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeKeyword
+from smnp.token.type import TokenType
+
+def tokenizeFunction(input, current, line):
+    return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line)
--- a/smnp/token/tokenizers/identifier.py
+++ b/smnp/token/tokenizers/identifier.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeRegexPattern
+from smnp.token.type import TokenType
+
+def tokenizeIdentifier(input, current, line):
+    return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line)
--- a/smnp/token/tokenizers/integer.py
+++ b/smnp/token/tokenizers/integer.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeRegexPattern
+from smnp.token.type import TokenType
+
+def tokenizeInteger(input, current, line):    
+    return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line)
--- a/smnp/token/tokenizers/minus.py
+++ b/smnp/token/tokenizers/minus.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeMinus(input, current, line):
+    return tokenizeChar(TokenType.MINUS, '-', input, current, line)
--- a/smnp/token/tokenizers/note.py
+++ b/smnp/token/tokenizers/note.py
@@ -0,0 +1,37 @@
+import re
+from smnp.token.type import TokenType
+from smnp.token.model import Token
+
+def tokenizeNote(input, current, line):
+    consumedChars = 0
+    value = ''
+    if input[current] == '@':
+        consumedChars += 1
+        value += input[current]
+        if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'):
+            value += input[current+consumedChars]
+            consumedChars += 1
+            
+            if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'):                        
+                value += input[current+consumedChars]
+                consumedChars += 1
+                
+            if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):            
+                value += input[current+consumedChars]
+                consumedChars += 1
+                
+            if current+consumedChars < len(input) and input[current+consumedChars] == '.':            
+                duration = input[current+consumedChars]
+                consumedChars += 1
+                while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
+                    duration += input[current+consumedChars]        
+                    consumedChars += 1  
+                if current+consumedChars < len(input) and input[current+consumedChars] == 'd':
+                    duration += input[current+consumedChars]
+                    consumedChars += 1
+                if len(duration) > 1:
+                    value += duration
+                else:
+                    consumedChars -= 1
+            return (consumedChars, Token(TokenType.NOTE, value, (line, current)))
+    return (0, None)
--- a/smnp/token/tokenizers/paren.py
+++ b/smnp/token/tokenizers/paren.py
@@ -0,0 +1,8 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizeOpenParen(input, current, line):
+    return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line)
+
+def tokenizeCloseParen(input, current, line):
+    return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line)
--- a/smnp/token/tokenizers/percent.py
+++ b/smnp/token/tokenizers/percent.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeChar
+from smnp.token.type import TokenType
+
+def tokenizePercent(input, current, line):
+    return tokenizeChar(TokenType.PERCENT, '%', input, current, line)
--- a/smnp/token/tokenizers/ret.py
+++ b/smnp/token/tokenizers/ret.py
@@ -0,0 +1,5 @@
+from smnp.token.tools import tokenizeKeyword
+from smnp.token.type import TokenType
+
+def tokenizeReturn(input, current, line):
+    return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line)
--- a/smnp/token/tokenizers/string.py
+++ b/smnp/token/tokenizers/string.py
@@ -0,0 +1,16 @@
+from smnp.token.type import TokenType
+from smnp.token.model import Token
+
+def tokenizeString(input, current, line):
+    if input[current] == '"':
+        value = input[current]
+        char = ''
+        consumedChars = 1
+        while char != '"':
+            if char is None: #TODO!!!
+                print("String not terminated")
+            char = input[current + consumedChars]
+            value += char
+            consumedChars += 1
+        return (consumedChars, Token(TokenType.STRING, value, (line, current)))
+    return (0, None)
--- a/smnp/token/tokenizers/whitespace.py
+++ b/smnp/token/tokenizers/whitespace.py
@@ -0,0 +1,4 @@
+from smnp.token.tools import tokenizeRegexPattern
+
+def tokenizeWhitespaces(input, current, line):    
+    return tokenizeRegexPattern(None, r'\s', input, current, line)