85 lines
2.7 KiB
Python
85 lines
2.7 KiB
Python
import sys
|
|
import time
|
|
import re
|
|
from smnp.error.syntax import SyntaxException
|
|
from smnp.token.type import TokenType
|
|
from smnp.token.model import Token, TokenList
|
|
from smnp.token.tools import tokenizeChar, tokenizeRegexPattern
|
|
from smnp.token.tokenizers.paren import tokenizeOpenParen, tokenizeCloseParen
|
|
from smnp.token.tokenizers.asterisk import tokenizeAsterisk
|
|
from smnp.token.tokenizers.whitespace import tokenizeWhitespaces
|
|
from smnp.token.tokenizers.identifier import tokenizeIdentifier
|
|
from smnp.token.tokenizers.comma import tokenizeComma
|
|
from smnp.token.tokenizers.string import tokenizeString
|
|
from smnp.token.tokenizers.integer import tokenizeInteger
|
|
from smnp.token.tokenizers.bracket import tokenizeOpenBracket, tokenizeCloseBracket
|
|
from smnp.token.tokenizers.assign import tokenizeAssign
|
|
from smnp.token.tokenizers.colon import tokenizeColon
|
|
from smnp.token.tokenizers.comment import tokenizeComment
|
|
from smnp.token.tokenizers.note import tokenizeNote
|
|
from smnp.token.tokenizers.function import tokenizeFunction
|
|
from smnp.token.tokenizers.ret import tokenizeReturn
|
|
from smnp.token.tokenizers.percent import tokenizePercent
|
|
from smnp.token.tokenizers.minus import tokenizeMinus
|
|
from smnp.token.tokenizers.dot import tokenizeDot
|
|
|
|
tokenizers = (
|
|
tokenizeOpenParen,
|
|
tokenizeCloseParen,
|
|
tokenizeAsterisk,
|
|
tokenizeString,
|
|
tokenizeFunction,
|
|
tokenizeReturn,
|
|
tokenizeInteger,
|
|
tokenizeNote,
|
|
tokenizeIdentifier,
|
|
tokenizeComma,
|
|
tokenizeOpenBracket,
|
|
tokenizeCloseBracket,
|
|
tokenizeAssign,
|
|
tokenizeColon,
|
|
tokenizePercent,
|
|
tokenizeMinus,
|
|
tokenizeDot,
|
|
tokenizeComment,
|
|
tokenizeWhitespaces,
|
|
)
|
|
|
|
filters = [
|
|
lambda token: token.type is not None,
|
|
lambda token: token.type != TokenType.COMMENT
|
|
]
|
|
|
|
def tokenize(lines):
|
|
tokens = []
|
|
for lineNumber, line in enumerate(lines):
|
|
current = 0
|
|
while current < len(line):
|
|
consumedChars, token = combinedTokenizer(line, current, lineNumber)
|
|
|
|
if consumedChars == 0:
|
|
raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'")
|
|
|
|
current += consumedChars
|
|
tokens.append(token)
|
|
|
|
return TokenList(filterTokens(filters, tokens))
|
|
|
|
|
|
def combinedTokenizer(line, current, lineNumber):
|
|
for tokenizer in tokenizers:
|
|
consumedChars, token = tokenizer(line, current, lineNumber)
|
|
if consumedChars > 0:
|
|
return (consumedChars, token)
|
|
return (0, None)
|
|
|
|
|
|
def filterTokens(filters, tokens):
|
|
if not filters:
|
|
return tokens
|
|
|
|
return list(filterTokens(filters[1:], (token for token in tokens if filters[0](token))))
|
|
|
|
|
|
__all__ = ["tokenize"]
|