diff --git a/smnp/token/tokenizer.py b/smnp/token/tokenizer.py index 4540ba3..fe1cac8 100644 --- a/smnp/token/tokenizer.py +++ b/smnp/token/tokenizer.py @@ -1,62 +1,57 @@ from smnp.error.syntax import SyntaxException from smnp.token.model import TokenList -from smnp.token.tokenizers.assign import tokenizeAssign -from smnp.token.tokenizers.asterisk import tokenizeAsterisk -from smnp.token.tokenizers.bracket import tokenizeOpenBracket, tokenizeCloseBracket -from smnp.token.tokenizers.comma import tokenizeComma -from smnp.token.tokenizers.comment import tokenizeComment -from smnp.token.tokenizers.dot import tokenizeDot -from smnp.token.tokenizers.identifier import tokenizeIdentifier -from smnp.token.tokenizers.integer import tokenizeInteger -from smnp.token.tokenizers.keyword import tokenizeType, tokenizeFunction, tokenizeReturn, tokenizeExtend, \ - tokenizeImport, tokenizeFrom, tokenizeAs -from smnp.token.tokenizers.minus import tokenizeMinus -from smnp.token.tokenizers.note import tokenizeNote -from smnp.token.tokenizers.paren import tokenizeOpenParen, tokenizeCloseParen -from smnp.token.tokenizers.percent import tokenizePercent -from smnp.token.tokenizers.square import tokenizeOpenSquare, tokenizeCloseSquare -from smnp.token.tokenizers.string import tokenizeString -from smnp.token.tokenizers.whitespace import tokenizeWhitespaces +from smnp.token.tokenizers.comment import commentTokenizer +from smnp.token.tokenizers.identifier import identifierTokenizer +from smnp.token.tokenizers.keyword import typeTokenizer +from smnp.token.tokenizers.note import noteTokenizer +from smnp.token.tokenizers.string import stringTokenizer +from smnp.token.tokenizers.whitespace import whitespacesTokenizer +from smnp.token.tools import defaultTokenizer, separated, regexPatternTokenizer from smnp.token.type import TokenType -# TODO !!! -# Enable tokenizer to detect separators of tokens -# for example, "notes" instead of being tokenized -# to IDENTIFIER(notes) is tokenized to [TYPE(note), IDENTIFIER(s)] - tokenizers = ( - tokenizeOpenParen, - tokenizeCloseParen, - tokenizeOpenSquare, - tokenizeCloseSquare, - tokenizeAsterisk, - tokenizeType, - tokenizeString, - tokenizeFunction, - tokenizeReturn, - tokenizeExtend, - tokenizeImport, - tokenizeFrom, - tokenizeAs, - tokenizeInteger, - tokenizeNote, - tokenizeIdentifier, - tokenizeComma, - tokenizeOpenBracket, - tokenizeCloseBracket, - tokenizeAssign, - tokenizePercent, - tokenizeMinus, - tokenizeDot, - tokenizeComment, - tokenizeWhitespaces, + # Characters + defaultTokenizer(TokenType.OPEN_BRACKET), + defaultTokenizer(TokenType.CLOSE_BRACKET), + defaultTokenizer(TokenType.OPEN_PAREN), + defaultTokenizer(TokenType.OPEN_SQUARE), + defaultTokenizer(TokenType.CLOSE_SQUARE), + defaultTokenizer(TokenType.CLOSE_PAREN), + defaultTokenizer(TokenType.ASTERISK), + defaultTokenizer(TokenType.ASSIGN), + defaultTokenizer(TokenType.COMMA), + defaultTokenizer(TokenType.MINUS), + defaultTokenizer(TokenType.DOT), + + # Types + separated(regexPatternTokenizer(TokenType.INTEGER, r'\d')), + stringTokenizer, + typeTokenizer, + noteTokenizer, + + # Keywords + separated(defaultTokenizer(TokenType.FUNCTION)), + separated(defaultTokenizer(TokenType.RETURN)), + separated(defaultTokenizer(TokenType.EXTEND)), + separated(defaultTokenizer(TokenType.IMPORT)), + separated(defaultTokenizer(TokenType.FROM)), + separated(defaultTokenizer(TokenType.AS)), + + # Identifier (couldn't be before keywords!) + identifierTokenizer, + + # Other + whitespacesTokenizer, + commentTokenizer, ) + filters = [ lambda token: token.type is not None, lambda token: token.type != TokenType.COMMENT ] + def tokenize(lines): tokens = [] for lineNumber, line in enumerate(lines): diff --git a/smnp/token/tokenizers/assign.py b/smnp/token/tokenizers/assign.py deleted file mode 100644 index 0467fd9..0000000 --- a/smnp/token/tokenizers/assign.py +++ /dev/null @@ -1,5 +0,0 @@ -from smnp.token.tools import charTokenizer -from smnp.token.type import TokenType - -def tokenizeAssign(input, current, line): - return charTokenizer(TokenType.ASSIGN, '=')(input, current, line) diff --git a/smnp/token/tokenizers/asterisk.py b/smnp/token/tokenizers/asterisk.py deleted file mode 100644 index ee03f9d..0000000 --- a/smnp/token/tokenizers/asterisk.py +++ /dev/null @@ -1,6 +0,0 @@ -from smnp.token.tools import charTokenizer -from smnp.token.type import TokenType - - -def tokenizeAsterisk(input, current, line): - return charTokenizer(TokenType.ASTERISK, '*')(input, current, line) diff --git a/smnp/token/tokenizers/bracket.py b/smnp/token/tokenizers/bracket.py deleted file mode 100644 index 3220ef7..0000000 --- a/smnp/token/tokenizers/bracket.py +++ /dev/null @@ -1,10 +0,0 @@ -from smnp.token.tools import charTokenizer -from smnp.token.type import TokenType - - -def tokenizeOpenBracket(input, current, line): - return charTokenizer(TokenType.OPEN_BRACKET, '{')(input, current, line) - - -def tokenizeCloseBracket(input, current, line): - return charTokenizer(TokenType.CLOSE_BRACKET, '}')(input, current, line) diff --git a/smnp/token/tokenizers/comma.py b/smnp/token/tokenizers/comma.py deleted file mode 100644 index 184e84c..0000000 --- a/smnp/token/tokenizers/comma.py +++ /dev/null @@ -1,6 +0,0 @@ -from smnp.token.tools import charTokenizer -from smnp.token.type import TokenType - - -def tokenizeComma(input, current, line): - return charTokenizer(TokenType.COMMA, ',')(input, current, line) diff --git a/smnp/token/tokenizers/comment.py b/smnp/token/tokenizers/comment.py index c5fd12c..d15f938 100644 --- a/smnp/token/tokenizers/comment.py +++ b/smnp/token/tokenizers/comment.py @@ -1,7 +1,8 @@ -from smnp.token.type import TokenType from smnp.token.model import Token +from smnp.token.type import TokenType -def tokenizeComment(input, current, line): + +def commentTokenizer(input, current, line): if input[current] == '#': consumedChars = 0 value = '' diff --git a/smnp/token/tokenizers/dot.py b/smnp/token/tokenizers/dot.py deleted file mode 100644 index 126e12c..0000000 --- a/smnp/token/tokenizers/dot.py +++ /dev/null @@ -1,5 +0,0 @@ -from smnp.token.tools import charTokenizer -from smnp.token.type import TokenType - -def tokenizeDot(input, current, line): - return charTokenizer(TokenType.DOT, '.')(input, current, line) diff --git a/smnp/token/tokenizers/identifier.py b/smnp/token/tokenizers/identifier.py index 6728039..bf679fb 100644 --- a/smnp/token/tokenizers/identifier.py +++ b/smnp/token/tokenizers/identifier.py @@ -1,6 +1,6 @@ from smnp.token.tools import regexPatternTokenizer from smnp.token.type import TokenType -def tokenizeIdentifier(input, current, line): +def identifierTokenizer(input, current, line): # TODO: Disallow to create identifiers beggining from a number return regexPatternTokenizer(TokenType.IDENTIFIER, r'\w')(input, current, line) diff --git a/smnp/token/tokenizers/integer.py b/smnp/token/tokenizers/integer.py deleted file mode 100644 index b135444..0000000 --- a/smnp/token/tokenizers/integer.py +++ /dev/null @@ -1,5 +0,0 @@ -from smnp.token.tools import regexPatternTokenizer, separate -from smnp.token.type import TokenType - -def tokenizeInteger(input, current, line): - return separate(regexPatternTokenizer(TokenType.INTEGER, r'\d'))(input, current, line) diff --git a/smnp/token/tokenizers/keyword.py b/smnp/token/tokenizers/keyword.py index e0dd453..e1d2186 100644 --- a/smnp/token/tokenizers/keyword.py +++ b/smnp/token/tokenizers/keyword.py @@ -1,34 +1,10 @@ -from smnp.token.tools import keywordsTokenizer, keywordTokenizer, separate +from smnp.token.tools import keywordsTokenizer, separated from smnp.token.type import TokenType from smnp.type.model import Type -def tokenizeType(input, current, line): - types = [ type.name.lower() for type in Type ] - return separate(keywordsTokenizer(TokenType.TYPE, *types))(input, current, line) +typeTokenizer = separated(keywordsTokenizer(TokenType.TYPE, *[type.name.lower() for type in Type])) -def tokenizeReturn(input, current, line): - return separate(keywordTokenizer(TokenType.RETURN, 'return'))(input, current, line) - - -def tokenizeFunction(input, current, line): - return separate(keywordTokenizer(TokenType.FUNCTION, 'function'))(input, current, line) - - -def tokenizeExtend(input, current, line): - return separate(keywordTokenizer(TokenType.EXTEND, "extend"))(input, current, line) - - -def tokenizeImport(input, current, line): - return separate(keywordTokenizer(TokenType.IMPORT, "import"))(input, current, line) - - -def tokenizeFrom(input, current, line): - return separate(keywordTokenizer(TokenType.FROM, "from"))(input, current, line) - - -def tokenizeAs(input, current, line): - return separate(keywordTokenizer(TokenType.AS, "as"))(input, current, line) diff --git a/smnp/token/tokenizers/minus.py b/smnp/token/tokenizers/minus.py deleted file mode 100644 index 38fb784..0000000 --- a/smnp/token/tokenizers/minus.py +++ /dev/null @@ -1,5 +0,0 @@ -from smnp.token.tools import charTokenizer -from smnp.token.type import TokenType - -def tokenizeMinus(input, current, line): - return charTokenizer(TokenType.MINUS, '-')(input, current, line) diff --git a/smnp/token/tokenizers/note.py b/smnp/token/tokenizers/note.py index cc2be13..672d66f 100644 --- a/smnp/token/tokenizers/note.py +++ b/smnp/token/tokenizers/note.py @@ -5,7 +5,7 @@ from smnp.token.model import Token from smnp.token.type import TokenType -def tokenizeNote(input, current, line): +def noteTokenizer(input, current, line): consumedChars = 0 notePitch = None octave = None diff --git a/smnp/token/tokenizers/paren.py b/smnp/token/tokenizers/paren.py deleted file mode 100644 index 30fa9f5..0000000 --- a/smnp/token/tokenizers/paren.py +++ /dev/null @@ -1,9 +0,0 @@ -from smnp.token.tools import charTokenizer -from smnp.token.type import TokenType - - -def tokenizeOpenParen(input, current, line): - return charTokenizer(TokenType.OPEN_PAREN, '(')(input, current, line) - -def tokenizeCloseParen(input, current, line): - return charTokenizer(TokenType.CLOSE_PAREN, ')')(input, current, line) diff --git a/smnp/token/tokenizers/percent.py b/smnp/token/tokenizers/percent.py deleted file mode 100644 index 1d16e08..0000000 --- a/smnp/token/tokenizers/percent.py +++ /dev/null @@ -1,5 +0,0 @@ -from smnp.token.tools import charTokenizer -from smnp.token.type import TokenType - -def tokenizePercent(input, current, line): - return charTokenizer(TokenType.PERCENT, '%')(input, current, line) diff --git a/smnp/token/tokenizers/square.py b/smnp/token/tokenizers/square.py deleted file mode 100644 index 0946b32..0000000 --- a/smnp/token/tokenizers/square.py +++ /dev/null @@ -1,8 +0,0 @@ -from smnp.token.tools import charTokenizer -from smnp.token.type import TokenType - -def tokenizeOpenSquare(input, current, line): - return charTokenizer(TokenType.OPEN_SQUARE, '[')(input, current, line) - -def tokenizeCloseSquare(input, current, line): - return charTokenizer(TokenType.CLOSE_SQUARE, ']')(input, current, line) diff --git a/smnp/token/tokenizers/string.py b/smnp/token/tokenizers/string.py index d66beae..a07a98e 100644 --- a/smnp/token/tokenizers/string.py +++ b/smnp/token/tokenizers/string.py @@ -1,13 +1,14 @@ -from smnp.token.type import TokenType from smnp.token.model import Token +from smnp.token.type import TokenType -def tokenizeString(input, current, line): + +def stringTokenizer(input, current, line): if input[current] == '"': value = input[current] char = '' consumedChars = 1 while char != '"': - if char is None: #TODO!!! + if char is None: # TODO!!! print("String not terminated") char = input[current + consumedChars] value += char diff --git a/smnp/token/tokenizers/whitespace.py b/smnp/token/tokenizers/whitespace.py index a25168b..bb89c47 100644 --- a/smnp/token/tokenizers/whitespace.py +++ b/smnp/token/tokenizers/whitespace.py @@ -1,4 +1,3 @@ from smnp.token.tools import regexPatternTokenizer -def tokenizeWhitespaces(input, current, line): - return regexPatternTokenizer(None, r'\s')(input, current, line) +whitespacesTokenizer = regexPatternTokenizer(None, r'\s') diff --git a/smnp/token/tools.py b/smnp/token/tools.py index eba097c..88d73fa 100644 --- a/smnp/token/tools.py +++ b/smnp/token/tools.py @@ -3,15 +3,6 @@ import re from smnp.token.model import Token -def charTokenizer(type, char): - def tokenizer(input, current, line): - if input[current] == char: - return (1, Token(type, input[current], (line, current))) - return (0, None) - - return tokenizer - - def regexPatternTokenizer(type, pattern): def tokenizer(input, current, line): consumedChars = 0 @@ -44,7 +35,11 @@ def keywordTokenizer(type, keyword): return tokenizer -def separate(tokenizer, end=r"\W"): +def defaultTokenizer(type): + return keywordTokenizer(type, type.key) + + +def separated(tokenizer, end=r"\W"): def separated(input, current, line): consumedChars, token = tokenizer(input, current, line) if consumedChars > 0: