Refactor tokenizer: remove colon, add colon as duration separator to note and create TokenType.TYPE

This commit is contained in:
Bartłomiej Pluta
2019-07-05 23:09:27 +02:00
parent f7b8704516
commit c1fbc2fe23
6 changed files with 43 additions and 30 deletions

View File

@@ -3,7 +3,6 @@ from smnp.token.model import TokenList
from smnp.token.tokenizers.assign import tokenizeAssign from smnp.token.tokenizers.assign import tokenizeAssign
from smnp.token.tokenizers.asterisk import tokenizeAsterisk from smnp.token.tokenizers.asterisk import tokenizeAsterisk
from smnp.token.tokenizers.bracket import tokenizeOpenBracket, tokenizeCloseBracket from smnp.token.tokenizers.bracket import tokenizeOpenBracket, tokenizeCloseBracket
from smnp.token.tokenizers.colon import tokenizeColon
from smnp.token.tokenizers.comma import tokenizeComma from smnp.token.tokenizers.comma import tokenizeComma
from smnp.token.tokenizers.comment import tokenizeComment from smnp.token.tokenizers.comment import tokenizeComment
from smnp.token.tokenizers.dot import tokenizeDot from smnp.token.tokenizers.dot import tokenizeDot
@@ -17,6 +16,7 @@ from smnp.token.tokenizers.percent import tokenizePercent
from smnp.token.tokenizers.ret import tokenizeReturn from smnp.token.tokenizers.ret import tokenizeReturn
from smnp.token.tokenizers.square import tokenizeOpenSquare, tokenizeCloseSquare from smnp.token.tokenizers.square import tokenizeOpenSquare, tokenizeCloseSquare
from smnp.token.tokenizers.string import tokenizeString from smnp.token.tokenizers.string import tokenizeString
from smnp.token.tokenizers.type import tokenizeType
from smnp.token.tokenizers.whitespace import tokenizeWhitespaces from smnp.token.tokenizers.whitespace import tokenizeWhitespaces
from smnp.token.type import TokenType from smnp.token.type import TokenType
@@ -25,7 +25,8 @@ tokenizers = (
tokenizeCloseParen, tokenizeCloseParen,
tokenizeOpenSquare, tokenizeOpenSquare,
tokenizeCloseSquare, tokenizeCloseSquare,
tokenizeAsterisk, tokenizeAsterisk,
tokenizeType,
tokenizeString, tokenizeString,
tokenizeFunction, tokenizeFunction,
tokenizeReturn, tokenizeReturn,
@@ -36,7 +37,6 @@ tokenizers = (
tokenizeOpenBracket, tokenizeOpenBracket,
tokenizeCloseBracket, tokenizeCloseBracket,
tokenizeAssign, tokenizeAssign,
tokenizeColon,
tokenizePercent, tokenizePercent,
tokenizeMinus, tokenizeMinus,
tokenizeDot, tokenizeDot,

View File

@@ -1,5 +0,0 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeColon(input, current, line):
return tokenizeChar(TokenType.COLON, ':', input, current, line)

View File

@@ -25,7 +25,7 @@ def tokenizeNote(input, current, line):
octave = input[current+consumedChars] octave = input[current+consumedChars]
consumedChars += 1 consumedChars += 1
if current+consumedChars < len(input) and input[current+consumedChars] == '^': if current+consumedChars < len(input) and input[current+consumedChars] == ':':
duration = '' duration = ''
consumedChars += 1 consumedChars += 1
while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]): while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):

View File

@@ -0,0 +1,8 @@
from smnp.token.tools import tokenizeKeywords
from smnp.token.type import TokenType
from smnp.type.model import Type
def tokenizeType(input, current, line):
types = [ type.name.lower() for type in Type ]
return tokenizeKeywords(TokenType.TYPE, input, current, line, *types)

View File

@@ -1,6 +1,8 @@
import re import re
from smnp.token.model import Token from smnp.token.model import Token
def tokenizeChar(type, char, input, current, line): def tokenizeChar(type, char, input, current, line):
if input[current] == char: if input[current] == char:
return (1, Token(type, input[current], (line, current))) return (1, Token(type, input[current], (line, current)))
@@ -15,6 +17,13 @@ def tokenizeRegexPattern(type, pattern, input, current, line):
consumedChars += 1 consumedChars += 1
return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None) return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None)
def tokenizeKeywords(type, input, current, line, *keywords):
for keyword in keywords:
result = tokenizeKeyword(type, keyword, input, current, line)
if result[0] > 0:
return result
return (0, None)
def tokenizeKeyword(type, keyword, input, current, line): def tokenizeKeyword(type, keyword, input, current, line):
if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword: if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword:
return (len(keyword), Token(type, keyword, (line, current))) return (len(keyword), Token(type, keyword, (line, current)))

View File

@@ -1,23 +1,24 @@
from enum import Enum from enum import Enum, auto
class TokenType(Enum): class TokenType(Enum):
OPEN_PAREN = 1 OPEN_PAREN = auto()
CLOSE_PAREN = 2 CLOSE_PAREN = auto()
ASTERISK = 3 ASTERISK = auto()
STRING = 4 STRING = auto()
IDENTIFIER = 5 IDENTIFIER = auto()
COMMA = 6 COMMA = auto()
INTEGER = 7 INTEGER = auto()
OPEN_BRACKET = 8 OPEN_BRACKET = auto()
CLOSE_BRACKET = 9 CLOSE_BRACKET = auto()
ASSIGN = 10 ASSIGN = auto()
COLON = 11 NOTE = auto()
NOTE = 12 COMMENT = auto()
COMMENT = 13 PERCENT = auto()
PERCENT = 14 MINUS = auto()
MINUS = 15 FUNCTION = auto()
FUNCTION = 16 RETURN = auto()
RETURN = 17 DOT = auto()
DOT = 18 OPEN_SQUARE = auto()
OPEN_SQUARE = 19 CLOSE_SQUARE = auto()
CLOSE_SQUARE = 20 TYPE = auto()