Migrate tokenizer to Kotlin

This commit is contained in:
2020-03-04 18:11:35 +01:00
parent 9e532079ca
commit 4697411cc9
20 changed files with 953 additions and 0 deletions

25
build.gradle Normal file
View File

@@ -0,0 +1,25 @@
plugins {
id 'java'
id 'org.jetbrains.kotlin.jvm' version '1.3.61'
}
group 'io.bartek'
version '1.0-SNAPSHOT'
sourceCompatibility = 1.8
repositories {
mavenCentral()
}
dependencies {
implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk8"
testCompile group: 'junit', name: 'junit', version: '4.12'
}
compileKotlin {
kotlinOptions.jvmTarget = "1.8"
}
compileTestKotlin {
kotlinOptions.jvmTarget = "1.8"
}

1
gradle.properties Normal file
View File

@@ -0,0 +1 @@
kotlin.code.style=official

172
gradlew vendored Executable file
View File

@@ -0,0 +1,172 @@
#!/usr/bin/env sh
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m"'
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=$(save "$@")
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi
exec "$JAVACMD" "$@"

84
gradlew.bat vendored Normal file
View File

@@ -0,0 +1,84 @@
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

2
settings.gradle Normal file
View File

@@ -0,0 +1,2 @@
rootProject.name = 'smnp'

8
src/main/kotlin/SMNP.kt Normal file
View File

@@ -0,0 +1,8 @@
import dsl.token.tokenizer.DefaultTokenizer
import interpreter.Interpreter
import java.io.File
fun main(args: Array<String>) {
val interpreter = Interpreter()
interpreter.run(File("/home/bartek/Developent/SMNP-Kotlin/examples/adeste.mus"))
}

View File

@@ -0,0 +1,17 @@
package data.model.entity
import data.model.enumeration.Pitch
class Note private constructor(val pitch: Pitch, val octave: Int, val duration: Int, val dot: Boolean) {
data class Builder(var pitch: Pitch = Pitch.A, var octave: Int = 4, var duration: Int = 4, var dot: Boolean = false) {
fun pitch(pitch: Pitch) = apply { this.pitch = pitch }
fun octave(octave: Int) = apply { this.octave = octave }
fun duration(duration: Int) = apply { this.duration = duration }
fun dot(dot: Boolean) = apply { this.dot = dot }
fun build() = Note(pitch, octave, duration, dot)
}
override fun toString(): String {
return "${pitch}${octave}:${duration}${if (dot) "d" else ""}"
}
}

View File

@@ -0,0 +1,53 @@
package data.model.enumeration
import java.lang.RuntimeException
enum class Pitch {
C, C_S, D, D_S, E, F, F_S, G, G_S, A, A_S, H;
override fun toString(): String {
return when(this) {
C -> "C"
C_S -> "C#"
D -> "D"
D_S -> "D#"
E -> "E"
F -> "F"
F_S -> "F#"
G -> "G"
G_S -> "G#"
A -> "A"
A_S -> "A#"
H -> "H"
}
}
companion object {
fun parse(symbol: String): Pitch {
return when(symbol.toLowerCase()) {
"cb" -> H
"c" -> C
"c#" -> C_S
"db" -> C_S
"d" -> D
"d#" -> D_S
"eb" -> D_S
"e" -> E
"e#" -> F
"fb" -> E
"f" -> F
"f#" -> F_S
"gb" -> F_S
"g" -> G
"g#" -> G_S
"ab" -> G_S
"a" -> A
"a#" -> A_S
"b" -> A_S
"h" -> H
"h#" -> C
else -> throw RuntimeException("Unknown pitch symbol")
}
}
}
}

View File

@@ -0,0 +1,19 @@
package dsl.token.model.entity
import dsl.token.model.enumeration.TokenType
data class Token(val type: TokenType, val value: Any, val rawValue: String, val position: TokenPosition) {
constructor(type: TokenType, value: String, position: TokenPosition): this(type, value, value, position)
fun mapValue(mapper: (Any) -> Any): Token {
return Token(type, mapper(value), rawValue, position)
}
companion object {
val NONE = Token(TokenType.NONE, "", TokenPosition.NONE)
}
override fun toString(): String {
return "(${type.name.toLowerCase()}, »${rawValue}«, ${position.short()})"
}
}

View File

@@ -0,0 +1,58 @@
package dsl.token.model.entity
import java.lang.RuntimeException
class TokenList(val tokens: List<Token>, val lines: List<String>) {
private var cursor = 0
private var snap = 0
operator fun get(index: Int): Token {
return tokens[index]
}
fun current(): Token {
if(!hasCurrent()) {
throw RuntimeException("Cursor points to not existing token! Cursor = ${cursor}, length = ${tokens.size}")
}
return tokens[cursor]
}
fun currentPos(): TokenPosition {
return current().position
}
fun hasCurrent(): Boolean {
return cursor < tokens.size
}
fun hasMore(count: Int = 1): Boolean {
return cursor + count < tokens.size
}
fun next(number: Int = 1): Token {
return tokens[cursor + number]
}
fun prev(number: Int = 1): Token {
return tokens[cursor - number]
}
fun ahead() {
cursor += 1
}
fun snapshot(): Int {
return cursor
}
fun restore(snapshot: Int) {
cursor = snapshot
}
fun reset() {
cursor = 0
}
}

View File

@@ -0,0 +1,15 @@
package dsl.token.model.entity
data class TokenPosition(val line: Int, val beginCol: Int, val endCol: Int) {
companion object {
val NONE = TokenPosition(-1, -1, -1)
}
override fun toString(): String {
return "[line ${line+1}, col ${beginCol}]"
}
fun short(): String {
return "${line+1}:${beginCol}}"
}
}

View File

@@ -0,0 +1,29 @@
package dsl.token.model.entity
import dsl.token.model.enumeration.TokenType
data class TokenizerOutput(val consumedChars: Int, val token: Token) {
fun consumed(): Boolean {
return consumedChars > 0
}
fun mapToken(mapper: (Token) -> Token): TokenizerOutput {
return TokenizerOutput(consumedChars, mapper(token) )
}
companion object {
val NONE = TokenizerOutput(0, Token.NONE)
fun produce(consumedChars: Int, token: Token): TokenizerOutput {
return if (consumedChars > 0) TokenizerOutput(consumedChars, token) else NONE
}
fun produce(consumedChars: Int, value: String, tokenType: TokenType, line: Int, beginCol: Int): TokenizerOutput {
return produce(consumedChars, Token(tokenType, value, TokenPosition(line, beginCol, beginCol + consumedChars)))
}
fun produce(consumedChars: Int, value: Any, rawValue: String, tokenType: TokenType, line: Int, beginCol: Int): TokenizerOutput {
return produce(consumedChars, Token(tokenType, value, rawValue, TokenPosition(line, beginCol, beginCol + consumedChars)))
}
}
}

View File

@@ -0,0 +1,49 @@
package dsl.token.model.enumeration
enum class TokenType(val token: String) {
NONE("<NONE>"),
RELATION("==, !=, >=, <="),
DOUBLE_ASTERISK("**"),
OPEN_CURLY("{"),
CLOSE_CURLY("}"),
OPEN_PAREN("("),
CLOSE_PAREN(")"),
OPEN_SQUARE("["),
CLOSE_SQUARE("]"),
OPEN_ANGLE("<"),
CLOSE_ANGLE(">"),
SEMICOLON(";"),
ASTERISK("*"),
PERCENT("%"),
ASSIGN("="),
ARROW("->"),
COMMA(","),
SLASH("/"),
MINUS("-"),
PLUS("+"),
CARET("^"),
DOTS("..."),
AMP("&"),
DOT("."),
AND("and"),
OR("or"),
NOT("not"),
INTEGER("integer"),
STRING("string"),
FLOAT("float"),
NOTE("note"),
BOOL("bool"),
TYPE("type"),
FUNCTION("function"),
RETURN("return"),
EXTEND("extend"),
IMPORT("import"),
THROW("throw"),
FROM("from"),
WITH("with"),
ELSE("else"),
IF("if"),
AS("as"),
IDENTIFIER("identifier"),
COMMENT("comment")
}

View File

@@ -0,0 +1,21 @@
package dsl.token.tokenizer
import dsl.token.model.entity.TokenizerOutput
import dsl.token.model.enumeration.TokenType
class CommentTokenizer : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
if (input[current] == '#') {
var consumedChars = 0
var value = ""
while(current + consumedChars < input.length) {
value += input[current + consumedChars]
consumedChars += 1
}
return TokenizerOutput.produce(consumedChars, value.substring(1).trim(), value, TokenType.COMMENT, line, current)
}
return TokenizerOutput.NONE
}
}

View File

@@ -0,0 +1,110 @@
package dsl.token.tokenizer
import dsl.token.model.entity.Token
import dsl.token.model.entity.TokenList
import dsl.token.model.entity.TokenizerOutput
import dsl.token.model.enumeration.TokenType
import dsl.token.tokenizer.Tokenizer.Companion.default
import dsl.token.tokenizer.Tokenizer.Companion.keywords
import dsl.token.tokenizer.Tokenizer.Companion.mapValue
import dsl.token.tokenizer.Tokenizer.Companion.regex
import dsl.token.tokenizer.Tokenizer.Companion.separated
class DefaultTokenizer : Tokenizer {
private val tokenizers = listOf(
default(TokenType.ARROW),
// Double-character operators
keywords(TokenType.RELATION, "==", "!=", "<=", ">="),
default(TokenType.DOUBLE_ASTERISK),
// Characters
default(TokenType.OPEN_CURLY),
default(TokenType.CLOSE_CURLY),
default(TokenType.OPEN_PAREN),
default(TokenType.CLOSE_PAREN),
default(TokenType.OPEN_SQUARE),
default(TokenType.CLOSE_SQUARE),
default(TokenType.OPEN_ANGLE),
default(TokenType.CLOSE_ANGLE),
default(TokenType.SEMICOLON),
default(TokenType.ASTERISK),
default(TokenType.PERCENT),
default(TokenType.ASSIGN),
default(TokenType.COMMA),
default(TokenType.SLASH),
default(TokenType.MINUS),
default(TokenType.PLUS),
default(TokenType.CARET),
default(TokenType.DOTS),
default(TokenType.AMP),
default(TokenType.DOT),
// Types
mapValue(separated(FloatTokenizer())) { (it as String).toFloat() },
mapValue(separated(regex(TokenType.INTEGER, "\\d"))) { (it as String).toInt() },
StringTokenizer(),
separated(NoteTokenizer()),
mapValue(separated(keywords(TokenType.BOOL, "true", "false"))) { it == "true" },
// Keywords
separated(default(TokenType.FUNCTION)),
separated(default(TokenType.RETURN)),
separated(default(TokenType.EXTEND)),
separated(default(TokenType.IMPORT)),
separated(default(TokenType.THROW)),
separated(default(TokenType.FROM)),
separated(default(TokenType.WITH)),
separated(default(TokenType.ELSE)),
separated(default(TokenType.AND)),
separated(default(TokenType.NOT)),
separated(default(TokenType.AS)),
separated(default(TokenType.IF)),
separated(default(TokenType.OR)),
// Identifier (couldn't be before keywords!)
regex(TokenType.IDENTIFIER, "\\w"),
// Whitespaces
regex(TokenType.NONE, "\\s"),
// Comments
CommentTokenizer()
)
private val filters: List<(Token) -> Boolean> = listOf(
{ token -> token.type != TokenType.NONE },
{ token -> token.type != TokenType.COMMENT }
)
private val tokenizer = Tokenizer.firstOf(tokenizers)
fun tokenize(lines: List<String>): TokenList {
val tokens: MutableList<Token> = mutableListOf()
for ((index, line) in lines.withIndex()) {
var current = 0
while (current < line.length) {
val output = tokenize(line, current, index)
if (!output.consumed()) {
throw RuntimeException("Unknown symbol ${line[current]}")
}
current += output.consumedChars
tokens.add(output.token)
}
}
val filteredTokens = filterTokens(tokens.toList())
return TokenList(filteredTokens, lines)
}
private fun filterTokens(tokens: List<Token>): List<Token> {
return tokens.filter { token -> filters.all { filter -> filter(token) } }
}
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
return tokenizer.tokenize(input, current, line)
}
}

View File

@@ -0,0 +1,16 @@
package dsl.token.tokenizer
import dsl.token.model.entity.Token
import dsl.token.model.entity.TokenizerOutput
import dsl.token.model.enumeration.TokenType
class FloatTokenizer : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
return Tokenizer.combined(
{ values, tokenPosition -> Token(TokenType.FLOAT, values.joinToString(""), tokenPosition) },
Tokenizer.regex(TokenType.NONE, "\\d"),
Tokenizer.keyword(TokenType.NONE, "."),
Tokenizer.regex(TokenType.NONE, "\\d")
).tokenize(input, current, line)
}
}

View File

@@ -0,0 +1,76 @@
package dsl.token.tokenizer
import data.model.entity.Note
import data.model.enumeration.Pitch
import dsl.token.model.entity.TokenizerOutput
import dsl.token.model.enumeration.TokenType
class NoteTokenizer : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
var consumedChars = 0
var notePitch: String
var octave: Int? = null
var duration: String? = null
var dot = false
var rawValue = ""
// Note literal start symbol
if(input[current] == '@') {
rawValue += input[current+consumedChars]
consumedChars += 1
// Note basic pitch
if(listOf('c', 'd', 'e', 'f', 'g', 'a', 'h').contains(input[current+consumedChars].toLowerCase())) {
rawValue += input[current+consumedChars]
notePitch = input[current+consumedChars].toString()
consumedChars += 1
// Flat or sharp
if(current+consumedChars < input.length && listOf('b', '#').contains(input[current+consumedChars])) {
rawValue += input[current+consumedChars]
notePitch += input[current+consumedChars]
consumedChars += 1
}
// Octave
if(current+consumedChars < input.length && "\\d".toRegex().matches(input[current+consumedChars].toString())) {
rawValue += input[current+consumedChars]
octave = input[current+consumedChars].toString().toInt()
consumedChars += 1
}
// Duration start symbol
if(current+consumedChars < input.length && input[current+consumedChars] == ':') {
rawValue += input[current+consumedChars]
duration = ""
consumedChars += 1
while(current+consumedChars < input.length && "\\d".toRegex().matches(input[current+consumedChars].toString())) {
rawValue += input[current+consumedChars]
duration += input[current+consumedChars]
consumedChars += 1
}
if(duration.isEmpty()) {
return TokenizerOutput.NONE
}
dot = (current+consumedChars < input.length && input[current+consumedChars] == 'd')
if(dot) {
rawValue += input[current+consumedChars]
consumedChars += 1
}
}
val note = Note.Builder()
.pitch(Pitch.parse(notePitch))
.dot(dot)
octave?.let { note.octave(it) }
duration?.let { note.duration(it.toInt()) }
return TokenizerOutput.produce(consumedChars, note.build(), rawValue, TokenType.NOTE, line, current)
}
}
return TokenizerOutput.NONE
}
}

View File

@@ -0,0 +1,24 @@
package dsl.token.tokenizer
import dsl.token.model.entity.TokenizerOutput
import dsl.token.model.enumeration.TokenType
class StringTokenizer : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
if(input[current] == '"') {
var value = input[current].toString()
var consumedChars = 1
while(input.length > current + consumedChars && input[current+consumedChars] != '"') {
value += input[current + consumedChars]
consumedChars += 1
}
value += input[current + consumedChars]
consumedChars += 1
return TokenizerOutput.produce(consumedChars, value.substring(1, value.length-1), value, TokenType.STRING, line, current)
}
return TokenizerOutput.NONE
}
}

View File

@@ -0,0 +1,152 @@
package dsl.token.tokenizer
import dsl.token.model.entity.Token
import dsl.token.model.entity.TokenPosition
import dsl.token.model.entity.TokenizerOutput
import dsl.token.model.enumeration.TokenType
interface Tokenizer {
fun tokenize(input: String, current: Int, line: Int): TokenizerOutput
companion object {
// Char regex
fun regex(type: TokenType, pattern: String): Tokenizer {
return object : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
var consumedChars = 0
var value = ""
while (current + consumedChars < input.length && pattern.toRegex().matches(input[current + consumedChars].toString())) {
value += input[current + consumedChars]
consumedChars += 1
}
return TokenizerOutput.produce(consumedChars, value, type, line, current)
}
}
}
// Literal keyword ("function", "or", ".")
fun keyword(type: TokenType, keyword: String): Tokenizer {
return object : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
if ((input.length >= current + keyword.length) && (input.substring(
current,
current + keyword.length
) == keyword)
) {
return TokenizerOutput.produce(keyword.length, keyword, type, line, current)
}
return TokenizerOutput.NONE
}
}
}
// One of keywords
fun keywords(type: TokenType, vararg keywords: String): Tokenizer {
return object : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
for (keyword in keywords) {
val output = keyword(type, keyword).tokenize(input, current, line)
if (output.consumed()) {
return output
}
}
return TokenizerOutput.NONE
}
}
}
// Token for regular TokenType
fun default(type: TokenType): Tokenizer {
return object : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
return keyword(type, type.token).tokenize(input, current, line)
}
}
}
// Isolate dsl.token (for example "function" | "functions" | "function s")
fun separated(tokenizer: Tokenizer, end: String = "\\W"): Tokenizer {
return object : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
val output = tokenizer.tokenize(input, current, line)
if (output.consumed()) {
if ((input.length > current + output.consumedChars) && end.toRegex().matches(input[current + output.consumedChars].toString())) {
return output
}
if ((input.length == current + output.consumedChars)) {
return output
}
}
return TokenizerOutput.NONE
}
}
}
// Change dsl.token value (rawValue will be kept)
fun mapValue(tokenizer: Tokenizer, mapper: (Any) -> Any): Tokenizer {
return object : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
val output = tokenizer.tokenize(input, current, line)
if(output.consumed()) {
return output.mapToken { output.token.mapValue(mapper) }
}
return TokenizerOutput.NONE
}
}
}
// Complex tokenizer consisting of smaller ones (like "3.14" = regex(\d) + keyword(.) + regex(\d))
fun combined(createToken: (List<String>, TokenPosition) -> Token, vararg tokenizers: Tokenizer): Tokenizer {
return object : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
var consumedChars = 0
val tokens: MutableList<Token> = mutableListOf()
for(tokenizer in tokenizers) {
val output = tokenizer.tokenize(input, current + consumedChars, line)
if(output.consumed()) {
consumedChars += output.consumedChars
tokens.add(output.token)
} else {
return TokenizerOutput.NONE
}
}
if(consumedChars == 0) {
return TokenizerOutput.NONE
}
return TokenizerOutput.produce(consumedChars, createToken(tokens.map { it.rawValue }, TokenPosition(line, tokens.first().position.beginCol, tokens.last().position.endCol)))
}
}
}
// First matched tokenizer
fun firstOf(tokenizers: List<Tokenizer>): Tokenizer {
return object : Tokenizer {
override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput {
for (tokenizer in tokenizers) {
val output = tokenizer.tokenize(input, current, line)
if(output.consumed()) {
return output
}
}
return TokenizerOutput.NONE
}
}
}
}
}

View File

@@ -0,0 +1,22 @@
package interpreter
import dsl.token.tokenizer.DefaultTokenizer
import java.io.BufferedInputStream
import java.io.File
import java.io.FileInputStream
class Interpreter {
fun run(code: String) {
val tokenizer = DefaultTokenizer()
val lines = code.split("\n")
val tokens = tokenizer.tokenize(lines)
println(tokens)
}
fun run(file: File) {
val tokenizer = DefaultTokenizer()
val lines = file.readLines()
val tokens = tokenizer.tokenize(lines)
println(tokens)
}
}