Refactor tokenizer

This commit is contained in:
Bartłomiej Pluta
2019-07-03 01:55:08 +02:00
parent 8313d2dcfd
commit f826516d8f
41 changed files with 589 additions and 296 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
__pycache__/
*.mus

View File

@@ -1,251 +0,0 @@
from enum import Enum
import time
import re
import sys
from Error import SyntaxException
class Tokens:
def __init__(self, tokens = []):
self.tokens = tokens
self.cursor = 0
self.snap = 0
def append(self, token):
self.tokens.append(token)
def __getitem__(self, index):
return self.tokens[index]
def current(self):
if self.cursor >= len(self.tokens):
raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}")
return self.tokens[self.cursor]
def next(self, number=1):
return self.tokens[self.cursor + number]
def prev(self, number=1):
return self.tokens[self.cursor - number]
def hasMore(self, count=1):
return self.cursor + count < len(self.tokens)
def hasCurrent(self):
return self.cursor < len(self.tokens)
def ahead(self):
self.cursor += 1
def snapshot(self):
self.snapshot = self.cursor
def reset(self):
self.cursor = self.snapshot
return self.tokens[self.cursor]
def __str__(self):
return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]"
def __repr__(self):
return self.__str__()
class TokenType(Enum):
OPEN_PAREN = 1
CLOSE_PAREN = 2
ASTERISK = 3
STRING = 4
IDENTIFIER = 5
COMMA = 6
INTEGER = 7
OPEN_BRACKET = 8
CLOSE_BRACKET = 9
ASSIGN = 10
COLON = 11
NOTE = 12
COMMENT = 13
PERCENT = 14
MINUS = 15
FUNCTION = 16
RETURN = 17
DOT = 18
class Token:
def __init__(self, type, value, pos):
self.type = type
self.value = value
self.pos = pos
def __str__(self):
return "Token(" + str(self.type) + ", '" + self.value + "', " + str(self.pos) + ")"
def __repr__(self):
return self.__str__()
def tokenizeOpenParen(input, current, line):
return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line)
def tokenizeChar(type, char, input, current, line):
if input[current] == char:
return (1, Token(type, input[current], (line, current)))
return (0, None)
def tokenizeCloseParen(input, current, line):
return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line)
def tokenizeAsterisk(input, current, line):
return tokenizeChar(TokenType.ASTERISK, '*', input, current, line)
def tokenizeString(input, current, line):
if input[current] == '"':
value = input[current]
char = ''
consumedChars = 1
while char != '"':
if char is None: #TODO!!!
print("String not terminated")
char = input[current + consumedChars]
value += char
consumedChars += 1
return (consumedChars, Token(TokenType.STRING, value, (line, current)))
return (0, None)
def tokenizeRegexPattern(type, pattern, input, current, line):
consumedChars = 0
value = ''
while current+consumedChars < len(input) and re.match(pattern, input[current+consumedChars]):
value += input[current+consumedChars]
consumedChars += 1
return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None)
def tokenizeWhitespaces(input, current, line):
return tokenizeRegexPattern(None, r'\s', input, current, line)
def tokenizeIdentifier(input, current, line):
return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line)
def tokenizeComma(input, current, line):
return tokenizeChar(TokenType.COMMA, ',', input, current, line)
def tokenizeInteger(input, current, line):
return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line)
def tokenizeOpenBracket(input, current, line):
return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line)
def tokenizeCloseBracket(input, current, line):
return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line)
def tokenizeAssign(input, current, line):
return tokenizeChar(TokenType.ASSIGN, '=', input, current, line)
def tokenizeColon(input, current, line):
return tokenizeChar(TokenType.COLON, ':', input, current, line)
def tokenizeComment(input, current, line):
if input[current] == '#':
consumedChars = 0
value = ''
while current+consumedChars < len(input):
value += input[current+consumedChars]
consumedChars += 1
pass
return (consumedChars, Token(TokenType.COMMENT, value, (line, current)))
return (0, None)
def tokenizeNote(input, current, line):
consumedChars = 0
value = ''
if input[current] == '@':
consumedChars += 1
value += input[current]
if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'):
value += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'):
value += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
value += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and input[current+consumedChars] == '.':
duration = input[current+consumedChars]
consumedChars += 1
while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
duration += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and input[current+consumedChars] == 'd':
duration += input[current+consumedChars]
consumedChars += 1
if len(duration) > 1:
value += duration
else:
consumedChars -= 1
return (consumedChars, Token(TokenType.NOTE, value, (line, current)))
return (0, None)
def tokenizePercent(input, current, line):
return tokenizeChar(TokenType.PERCENT, '%', input, current, line)
def tokenizeMinus(input, current, line):
return tokenizeChar(TokenType.MINUS, '-', input, current, line)
def tokenizeFunction(input, current, line):
return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line)
def tokenizeKeyword(type, keyword, input, current, line):
if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword:
return (len(keyword), Token(type, keyword, (line, current)))
return (0, None)
def tokenizeReturn(input, current, line):
return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line)
def tokenizeDot(input, current, line):
return tokenizeChar(TokenType.DOT, '.', input, current, line)
tokenizers = (
tokenizeOpenParen,
tokenizeCloseParen,
tokenizeAsterisk,
tokenizeString,
tokenizeFunction,
tokenizeReturn,
tokenizeInteger,
tokenizeNote,
tokenizeIdentifier,
tokenizeComma,
tokenizeOpenBracket,
tokenizeCloseBracket,
tokenizeAssign,
tokenizeColon,
tokenizePercent,
tokenizeMinus,
tokenizeDot,
tokenizeComment,
tokenizeWhitespaces,
)
def doTokenize(lines):
tokens = []
for lineNumber, line in enumerate(lines):
current = 0
while current < len(line):
tokenized = False
for tokenizer in tokenizers:
consumedChars, value = tokenizer(line, current, lineNumber)
if consumedChars > 0:
tokens.append(value)
current += consumedChars
tokenized = True
break
if not tokenized:
raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'")
return [token for token in tokens if token.type is not None]
def tokenize(lines):
tokens = doTokenize(lines)
return Tokens([ token for token in tokens if token.type != TokenType.COMMENT])

26
main.py
View File

@@ -1,26 +0,0 @@
from Tokenizer import tokenize
from Parser import parse
from Evaluator import evaluate
from Environment import createEnvironment
from Error import SyntaxException, RuntimeException
import sys
if __name__ == "__main__":
try:
with open(sys.argv[1], 'r') as source:
lines = [line.rstrip('\n') for line in source.readlines()]
env = createEnvironment()
tokens = tokenize(lines)
ast = parse(tokens)
evaluate(ast, env)
except SyntaxException as e:
print(e.msg)
except RuntimeException as e:
print(e.msg)
except KeyboardInterrupt:
print("Program interrupted")

View File

230
smnp/OldParser.py Normal file
View File

@@ -0,0 +1,230 @@
from Tokenizer import *
from Note import *
from AST import *
from Error import SyntaxException
def expectedFound(expected, found):
raise SyntaxException(None, f"Expected: {expected}, found: {found}")
def assertType(expected, found):
if expected != found:
raise SyntaxException(None, f"Expected: {expected}, found: {found}")
def parseInteger(input, parent):
token = input.pop(0)
return IntegerLiteralNode(int(token.value), parent, token.pos)
def parseString(input, parent):
token = input.pop(0)
return StringLiteralNode(token.value[1:-1], parent, token.pos)
def parseNote(input, parent):
token = input.pop(0)
value = token.value
consumedChars = 1
notePitch = value[consumedChars]
consumedChars += 1
octave = 4
duration = 4
dot = False
if consumedChars < len(value) and value[consumedChars] in ('b', '#'):
notePitch += value[consumedChars]
consumedChars += 1
if consumedChars < len(value) and re.match(r'\d', value[consumedChars]):
octave = int(value[consumedChars])
consumedChars += 1
if consumedChars < len(value) and value[consumedChars] == '.':
consumedChars += 1
durationString = ''
while consumedChars < len(value) and re.match(r'\d', value[consumedChars]):
durationString += value[consumedChars]
consumedChars += 1
duration = int(durationString)
if consumedChars < len(value) and value[consumedChars] == '.':
dot = True
consumedChars += 1
return NoteLiteralNode(Note(notePitch, octave, duration, dot), parent, token.pos)
def parseComma(input, parent):
token = input.pop(0)
return CommaNode(parent, token.pos)
def parseList(input, parent):
token = input.pop(0)
node = ListNode(parent, token.pos)
while input[0].type != TokenType.CLOSE_PAREN:
element = parseArrayElement(input, node)
if element is None:
raise SyntaxException(input[0].pos, "Invalid element '{input[0].value}'")
node.append(element)
if input[0].type != TokenType.CLOSE_PAREN:
expectedFound(TokenType.CLOSE_PAREN, input[0].type)
input.pop(0)
return node
def parseBlock(input, parent):
token = input.pop(0)
block = BlockNode(parent, token.pos)
while input[0].type != TokenType.CLOSE_BRACKET:
block.append(parseToken(input, block))
if input[0].type != TokenType.CLOSE_BRACKET:
expectedFound(TokenType.CLOSE_BRACKET, input[0].type)
input.pop(0)
return block
def parseAsterisk(input, parent):
token = input.pop(0)
iterator = parent.pop(-1)
value = parseStatement(input, parent)
asterisk = AsteriskStatementNode(iterator, value, parent, token.pos)
iterator.parent = asterisk
value.parent = asterisk
return asterisk
def parseNoteOrColon(input, parent):
note = parseNote(input, parent)
if len(input) > 1 and input[0].type == TokenType.COLON:
token = input.pop(0)
b = parseNote(input, parent)
if b is None:
raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'")
colon = ColonNode(note, b, parent, token.pos)
note.parent = colon
b.parent = colon
return colon
return note
def parseIntegerOrColonOrPercent(input, parent):
integer = parseInteger(input, parent)
if len(input) > 1 and input[0].type == TokenType.COLON:
token = input.pop(0)
b = parseInteger(input, parent)
if b is None:
raise SyntaxException(input[0].pos, f"Invalid colon argument '{input[0].value}'")
colon = ColonNode(integer, b, parent, token.pos)
integer.parent = colon
b.parent = colon
return colon
if len(input) > 0 and input[0].type == TokenType.PERCENT:
input.pop(0)
percent = PercentNode(integer, parent, integer.pos)
integer.parent = percent
return percent
return integer
def parseFunctionCallOrAssignOrIdentifier(input, parent):
token = input.pop(0)
identifier = IdentifierNode(token.value, parent, token.pos)
# Function call
if len(input) > 0 and input[0].type == TokenType.OPEN_PAREN:
arguments = parseList(input, parent)
func = FunctionCallNode(identifier, arguments, parent, token.pos)
identifier.parent = func
arguments.parent = func
return func
# Assign
if len(input) > 1 and input[0].type == TokenType.ASSIGN:
token = input.pop(0)
value = parseExpression(input, parent) #
assign = AssignExpression(identifier, value, parent, token.pos)
identifier.parent = assign
value.parent = assign
return assign
return identifier
def parseMinus(input, parent):
token = input.pop(0)
value = parseInteger(input, parent)
return IntegerLiteralNode(-value.value, parent, token.pos)
def parseFunctionDefinition(input, parent):
input.pop(0)
assertType(TokenType.IDENTIFIER, input[0].type)
token = input.pop(0)
name = IdentifierNode(token.value, parent, token.pos)
assertType(TokenType.OPEN_PAREN, input[0].type)
parameters = parseList(input, parent)
assertType(TokenType.OPEN_BRACKET, input[0].type)
body = parseBlock(input, parent)
func = FunctionDefinitionNode(name, parameters, body, parent, token.pos)
name.parent = func
parameters.parent = func
body.parent = func
return func
def parseReturn(input, parent):
token = input.pop(0)
value = parseExpression(input, parent)
returnNode = ReturnNode(value, parent, token.pos)
value.parent = returnNode
return returnNode
def parseExpression(input, parent):
type = input[0].type
if type == TokenType.FUNCTION:
return parseFunctionDefinition(input, parent)
if type == TokenType.RETURN:
return parseReturn(input, parent)
if type == TokenType.MINUS:
return parseMinus(input, parent)
if type == TokenType.INTEGER:
return parseIntegerOrColonOrPercent(input, parent)
if type == TokenType.STRING:
return parseString(input, parent)
if type == TokenType.NOTE:
return parseNoteOrColon(input, parent)
if type == TokenType.IDENTIFIER:
return parseFunctionCallOrAssignOrIdentifier(input, parent)
if type == TokenType.OPEN_PAREN:
return parseList(input, parent)
raise SyntaxException(input[0].pos, f"Unexpected character '{input[0].value}'")
def parseArrayElement(input, parent):
type = input[0].type
if type == TokenType.COMMA:
return parseComma(input, parent)
return parseExpression(input, parent)
def parseStatement(input, parent):
type = input[0].type
if type == TokenType.OPEN_BRACKET:
return parseBlock(input, parent)
if type == TokenType.ASTERISK:
return parseAsterisk(input, parent)
return parseExpression(input, parent)
def parseToken(input, parent):
#import pdb; pdb.set_trace()
return parseStatement(input, parent)
def parse(input):
root = Program()
while len(input) > 0:
root.append(parseToken(input, root))
return root

0
smnp/__init__.py Normal file
View File

4
smnp/__main__.py Normal file
View File

@@ -0,0 +1,4 @@
from smnp.main import main
if __name__ == "__main__":
main()

View File

@@ -1,5 +1,5 @@
import sys
from Evaluator import objectString
from parser.Environment import objectString
from Note import *
import random
import Synth
@@ -46,19 +46,6 @@ class Environment():
return scope
else:
return scope
def sample(args, env):
if len(args) == 1 and isinstance(args[0], list):
return _sample(args[0])
elif len(args) == 0:
return _sample(Note.range(Note(NotePitch.C), Note(NotePitch.H)))
elif all(isinstance(x, Note) for x in args):
return _sample(args)
else:
pass # not valid signature
def _sample(list):
return list[int(random.uniform(0, len(list)))]
def doPrint(args, env):
print("".join([objectString(arg) for arg in args]))

0
smnp/error/__init__.py Normal file
View File

4
smnp/error/runtime.py Normal file
View File

@@ -0,0 +1,4 @@
class RuntimeException(Exception):
def __init__(self, pos, msg):
posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"
self.msg = f"Syntax error {posStr}:\n{msg}"

View File

@@ -2,8 +2,3 @@ class SyntaxException(Exception):
def __init__(self, pos, msg):
posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"
self.msg = f"Syntax error {posStr}:\n{msg}"
class RuntimeException(Exception):
def __init__(self, pos, msg):
posStr = "" if pos is None else f"[line {pos[0]+1}, col {pos[1]+1}]"
self.msg = f"Syntax error {posStr}:\n{msg}"

29
smnp/main.py Normal file
View File

@@ -0,0 +1,29 @@
import sys
from smnp.error.syntax import SyntaxException
from smnp.error.runtime import RuntimeException
from smnp.token.tokenizer import tokenize
#from Tokenizer import tokenize
#from Parser import parse
#from Evaluator import evaluate
#from Environment import createEnvironment
#from Error import SyntaxException, RuntimeException
def main():
try:
with open(sys.argv[1], 'r') as source:
lines = [line.rstrip('\n') for line in source.readlines()]
#env = createEnvironment()
tokens = tokenize(lines)
print(tokens)
#ast = parse(tokens)
#evaluate(ast, env)
except SyntaxException as e:
print(e.msg)
except RuntimeException as e:
print(e.msg)
except KeyboardInterrupt:
print("Program interrupted")

1
smnp/token/__init__.py Normal file
View File

@@ -0,0 +1 @@
__all__ = ["tokenize"]

54
smnp/token/model.py Normal file
View File

@@ -0,0 +1,54 @@
class Token:
def __init__(self, type, value, pos):
self.type = type
self.value = value
self.pos = pos
def __str__(self):
return "Token(" + str(self.type) + ", '" + self.value + "', " + str(self.pos) + ")"
def __repr__(self):
return self.__str__()
class TokenList:
def __init__(self, tokens = []):
self.tokens = tokens
self.cursor = 0
self.snap = 0
def append(self, token):
self.tokens.append(token)
def __getitem__(self, index):
return self.tokens[index]
def current(self):
if self.cursor >= len(self.tokens):
raise RuntimeError(f"Cursor points to not existing token! Cursor = {self.cursor}, len = {len(self.tokens)}")
return self.tokens[self.cursor]
def next(self, number=1):
return self.tokens[self.cursor + number]
def prev(self, number=1):
return self.tokens[self.cursor - number]
def hasMore(self, count=1):
return self.cursor + count < len(self.tokens)
def hasCurrent(self):
return self.cursor < len(self.tokens)
def ahead(self):
self.cursor += 1
def snapshot(self):
self.snapshot = self.cursor
def reset(self):
self.cursor = self.snapshot
return self.tokens[self.cursor]
def __str__(self):
return f"[Cursor: {self.cursor}\n{', '.join([str(token) for token in self.tokens])}]"
def __repr__(self):
return self.__str__()

81
smnp/token/tokenizer.py Normal file
View File

@@ -0,0 +1,81 @@
import sys
import time
import re
from smnp.error.syntax import SyntaxException
from smnp.token.type import TokenType
from smnp.token.model import Token, TokenList
from smnp.token.tools import tokenizeChar, tokenizeRegexPattern
from smnp.token.tokenizers.paren import tokenizeOpenParen, tokenizeCloseParen
from smnp.token.tokenizers.asterisk import tokenizeAsterisk
from smnp.token.tokenizers.whitespace import tokenizeWhitespaces
from smnp.token.tokenizers.identifier import tokenizeIdentifier
from smnp.token.tokenizers.comma import tokenizeComma
from smnp.token.tokenizers.string import tokenizeString
from smnp.token.tokenizers.integer import tokenizeInteger
from smnp.token.tokenizers.bracket import tokenizeOpenBracket, tokenizeCloseBracket
from smnp.token.tokenizers.assign import tokenizeAssign
from smnp.token.tokenizers.colon import tokenizeColon
from smnp.token.tokenizers.comment import tokenizeComment
from smnp.token.tokenizers.note import tokenizeNote
from smnp.token.tokenizers.function import tokenizeFunction
from smnp.token.tokenizers.ret import tokenizeReturn
from smnp.token.tokenizers.percent import tokenizePercent
from smnp.token.tokenizers.minus import tokenizeMinus
from smnp.token.tokenizers.dot import tokenizeDot
tokenizers = (
tokenizeOpenParen,
tokenizeCloseParen,
tokenizeAsterisk,
tokenizeString,
tokenizeFunction,
tokenizeReturn,
tokenizeInteger,
tokenizeNote,
tokenizeIdentifier,
tokenizeComma,
tokenizeOpenBracket,
tokenizeCloseBracket,
tokenizeAssign,
tokenizeColon,
tokenizePercent,
tokenizeMinus,
tokenizeDot,
tokenizeComment,
tokenizeWhitespaces,
)
filters = [
lambda token: token.type is not None,
lambda token: token.type != TokenType.COMMENT
]
def tokenize(lines):
tokens = []
for lineNumber, line in enumerate(lines):
current = 0
while current < len(line):
consumedChars, token = combinedTokenizer(line, current, lineNumber)
if consumedChars == 0:
raise SyntaxException((lineNumber, current), f"Unknown symbol '{line[current]}'")
current += consumedChars
tokens.append(token)
return TokenList(filterTokens(filters, tokens))
def combinedTokenizer(line, current, lineNumber):
for tokenizer in tokenizers:
consumedChars, token = tokenizer(line, current, lineNumber)
if consumedChars > 0:
return (consumedChars, token)
return (0, None)
def filterTokens(filters, tokens):
if not filters:
return tokens
return filterTokens(filters[1:], (token for token in tokens if filters[0](token)))
__all__ = ["tokenize"]

View File

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeAssign(input, current, line):
return tokenizeChar(TokenType.ASSIGN, '=', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeAsterisk(input, current, line):
return tokenizeChar(TokenType.ASTERISK, '*', input, current, line)

View File

@@ -0,0 +1,8 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeOpenBracket(input, current, line):
return tokenizeChar(TokenType.OPEN_BRACKET, '{', input, current, line)
def tokenizeCloseBracket(input, current, line):
return tokenizeChar(TokenType.CLOSE_BRACKET, '}', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeColon(input, current, line):
return tokenizeChar(TokenType.COLON, ':', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeComma(input, current, line):
return tokenizeChar(TokenType.COMMA, ',', input, current, line)

View File

@@ -0,0 +1,13 @@
from smnp.token.type import TokenType
from smnp.token.model import Token
def tokenizeComment(input, current, line):
if input[current] == '#':
consumedChars = 0
value = ''
while current+consumedChars < len(input):
value += input[current+consumedChars]
consumedChars += 1
pass
return (consumedChars, Token(TokenType.COMMENT, value, (line, current)))
return (0, None)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeDot(input, current, line):
return tokenizeChar(TokenType.DOT, '.', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeKeyword
from smnp.token.type import TokenType
def tokenizeFunction(input, current, line):
return tokenizeKeyword(TokenType.FUNCTION, 'function', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeRegexPattern
from smnp.token.type import TokenType
def tokenizeIdentifier(input, current, line):
return tokenizeRegexPattern(TokenType.IDENTIFIER, r'\w', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeRegexPattern
from smnp.token.type import TokenType
def tokenizeInteger(input, current, line):
return tokenizeRegexPattern(TokenType.INTEGER, r'\d', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeMinus(input, current, line):
return tokenizeChar(TokenType.MINUS, '-', input, current, line)

View File

@@ -0,0 +1,37 @@
import re
from smnp.token.type import TokenType
from smnp.token.model import Token
def tokenizeNote(input, current, line):
consumedChars = 0
value = ''
if input[current] == '@':
consumedChars += 1
value += input[current]
if input[current+consumedChars] in ('C', 'c', 'D', 'd', 'E', 'e', 'F', 'f', 'G', 'g', 'A', 'a', 'H', 'h', 'B', 'b'):
value += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and input[current+consumedChars] in ('b', '#'):
value += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
value += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and input[current+consumedChars] == '.':
duration = input[current+consumedChars]
consumedChars += 1
while current+consumedChars < len(input) and re.match(r'\d', input[current+consumedChars]):
duration += input[current+consumedChars]
consumedChars += 1
if current+consumedChars < len(input) and input[current+consumedChars] == 'd':
duration += input[current+consumedChars]
consumedChars += 1
if len(duration) > 1:
value += duration
else:
consumedChars -= 1
return (consumedChars, Token(TokenType.NOTE, value, (line, current)))
return (0, None)

View File

@@ -0,0 +1,8 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizeOpenParen(input, current, line):
return tokenizeChar(TokenType.OPEN_PAREN, '(', input, current, line)
def tokenizeCloseParen(input, current, line):
return tokenizeChar(TokenType.CLOSE_PAREN, ')', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeChar
from smnp.token.type import TokenType
def tokenizePercent(input, current, line):
return tokenizeChar(TokenType.PERCENT, '%', input, current, line)

View File

@@ -0,0 +1,5 @@
from smnp.token.tools import tokenizeKeyword
from smnp.token.type import TokenType
def tokenizeReturn(input, current, line):
return tokenizeKeyword(TokenType.RETURN, 'return', input, current, line)

View File

@@ -0,0 +1,16 @@
from smnp.token.type import TokenType
from smnp.token.model import Token
def tokenizeString(input, current, line):
if input[current] == '"':
value = input[current]
char = ''
consumedChars = 1
while char != '"':
if char is None: #TODO!!!
print("String not terminated")
char = input[current + consumedChars]
value += char
consumedChars += 1
return (consumedChars, Token(TokenType.STRING, value, (line, current)))
return (0, None)

View File

@@ -0,0 +1,4 @@
from smnp.token.tools import tokenizeRegexPattern
def tokenizeWhitespaces(input, current, line):
return tokenizeRegexPattern(None, r'\s', input, current, line)

21
smnp/token/tools.py Normal file
View File

@@ -0,0 +1,21 @@
import re
from smnp.token.model import Token
def tokenizeChar(type, char, input, current, line):
if input[current] == char:
return (1, Token(type, input[current], (line, current)))
return (0, None)
def tokenizeRegexPattern(type, pattern, input, current, line):
consumedChars = 0
value = ''
while current+consumedChars < len(input) and re.match(pattern, input[current+consumedChars]):
value += input[current+consumedChars]
consumedChars += 1
return (consumedChars, Token(type, value, (line, current)) if consumedChars > 0 else None)
def tokenizeKeyword(type, keyword, input, current, line):
if len(input) >= current+len(keyword) and input[current:current+len(keyword)] == keyword:
return (len(keyword), Token(type, keyword, (line, current)))
return (0, None)

21
smnp/token/type.py Normal file
View File

@@ -0,0 +1,21 @@
from enum import Enum
class TokenType(Enum):
OPEN_PAREN = 1
CLOSE_PAREN = 2
ASTERISK = 3
STRING = 4
IDENTIFIER = 5
COMMA = 6
INTEGER = 7
OPEN_BRACKET = 8
CLOSE_BRACKET = 9
ASSIGN = 10
COLON = 11
NOTE = 12
COMMENT = 13
PERCENT = 14
MINUS = 15
FUNCTION = 16
RETURN = 17
DOT = 18