Refactor tokenizer

This commit is contained in:
Bartłomiej Pluta
2019-07-03 01:55:08 +02:00
parent 8313d2dcfd
commit f826516d8f
41 changed files with 589 additions and 296 deletions

View File

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeAssign(input, current, line):
return tokenizeChar(TokenType.ASSIGN, '=', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeAsterisk(input, current, line):
return tokenizeChar(TokenType.ASTERISK, '*', input, current, line)

View File

@@ -0,0 +1,8 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeOpenBracket(input, current, line):
return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line)
def tokenizeCloseBracket(input, current, line):
return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeColon(input, current, line):
return tokenizeChar(TokenType.COLON, ':', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeComma(input, current, line):
return tokenizeChar(TokenType.COMMA, ',', input, current, line)

View File

@@ -0,0 +1,13 @@
from smnp.token.type import TokenType
from smnp.token.model import Token
def tokenizeComment(input, current, line):
if input[current] == '#':
consumedChars = 0
value = ''
while current+consumedChars < len(input):
value += input[current+consumedChars]
consumedChars += 1
pass
return (consumedChars, Token(TokenType.COMMENT, value, (line, current)))
return (0, None)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeDot(input, current, line):
return tokenizeChar(TokenType.DOT, '.', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeKeyword
from smnp.token.type import TokenType
def tokenizeFunction(input, current, line):
return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeRegexPattern
from smnp.token.type import TokenType
def tokenizeIdentifier(input, current, line):
return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeRegexPattern
from smnp.token.type import TokenType
def tokenizeInteger(input, current, line):
return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeMinus(input, current, line):
return tokenizeChar(TokenType.MINUS, '-', input, current, line)

View File

@@ -0,0 +1,37 @@
import re
from smnp.token.type import TokenType
from smnp.token.model import Token
def tokenizeNote(input, current, line):
consumedChars = 0
value = ''
if input[current] == '@':
consumedChars += 1
value += input[current]
if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'):
value += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'):
value += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
value += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and input[current+consumedChars] == '.':
duration = input[current+consumedChars]
consumedChars += 1
while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
duration += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and input[current+consumedChars] == 'd':
duration += input[current+consumedChars]
consumedChars += 1
if len(duration) > 1:
value += duration
else:
consumedChars -= 1
return (consumedChars, Token(TokenType.NOTE, value, (line, current)))
return (0, None)

View File

@@ -0,0 +1,8 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeOpenParen(input, current, line):
return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line)
def tokenizeCloseParen(input, current, line):
return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizePercent(input, current, line):
return tokenizeChar(TokenType.PERCENT, '%', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeKeyword
from smnp.token.type import TokenType
def tokenizeReturn(input, current, line):
return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line)

View File

@@ -0,0 +1,16 @@
from smnp.token.type import TokenType
from smnp.token.model import Token
def tokenizeString(input, current, line):
if input[current] == '"':
value = input[current]
char = ''
consumedChars = 1
while char != '"':
if char is None: #TODO!!!
print("String not terminated")
char = input[current + consumedChars]
value += char
consumedChars += 1
return (consumedChars, Token(TokenType.STRING, value, (line, current)))
return (0, None)

View File

@@ -0,0 +1,4 @@
from smnp.token.tools import tokenizeRegexPattern
def tokenizeWhitespaces(input, current, line):
return tokenizeRegexPattern(None, r'\s', input, current, line)