From 4697411cc933ab4cc4954f7a43a1268e412a21a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Przemys=C5=82aw=20Pluta?= Date: Wed, 4 Mar 2020 18:11:35 +0100 Subject: [PATCH] Migrate tokenizer to Kotlin --- build.gradle | 25 +++ gradle.properties | 1 + gradlew | 172 ++++++++++++++++++ gradlew.bat | 84 +++++++++ settings.gradle | 2 + src/main/kotlin/SMNP.kt | 8 + src/main/kotlin/data/model/entity/Note.kt | 17 ++ .../kotlin/data/model/enumeration/Pitch.kt | 53 ++++++ .../kotlin/dsl/token/model/entity/Token.kt | 19 ++ .../dsl/token/model/entity/TokenList.kt | 58 ++++++ .../dsl/token/model/entity/TokenPosition.kt | 15 ++ .../dsl/token/model/entity/TokenizerOutput.kt | 29 +++ .../dsl/token/model/enumeration/TokenType.kt | 49 +++++ .../dsl/token/tokenizer/CommentTokenizer.kt | 21 +++ .../dsl/token/tokenizer/DefaultTokenizer.kt | 110 +++++++++++ .../dsl/token/tokenizer/FloatTokenizer.kt | 16 ++ .../dsl/token/tokenizer/NoteTokenizer.kt | 76 ++++++++ .../dsl/token/tokenizer/StringTokenizer.kt | 24 +++ .../kotlin/dsl/token/tokenizer/Tokenizer.kt | 152 ++++++++++++++++ src/main/kotlin/interpreter/Interpreter.kt | 22 +++ 20 files changed, 953 insertions(+) create mode 100644 build.gradle create mode 100644 gradle.properties create mode 100755 gradlew create mode 100644 gradlew.bat create mode 100644 settings.gradle create mode 100644 src/main/kotlin/SMNP.kt create mode 100644 src/main/kotlin/data/model/entity/Note.kt create mode 100644 src/main/kotlin/data/model/enumeration/Pitch.kt create mode 100644 src/main/kotlin/dsl/token/model/entity/Token.kt create mode 100644 src/main/kotlin/dsl/token/model/entity/TokenList.kt create mode 100644 src/main/kotlin/dsl/token/model/entity/TokenPosition.kt create mode 100644 src/main/kotlin/dsl/token/model/entity/TokenizerOutput.kt create mode 100644 src/main/kotlin/dsl/token/model/enumeration/TokenType.kt create mode 100644 src/main/kotlin/dsl/token/tokenizer/CommentTokenizer.kt create mode 100644 src/main/kotlin/dsl/token/tokenizer/DefaultTokenizer.kt create mode 100644 src/main/kotlin/dsl/token/tokenizer/FloatTokenizer.kt create mode 100644 src/main/kotlin/dsl/token/tokenizer/NoteTokenizer.kt create mode 100644 src/main/kotlin/dsl/token/tokenizer/StringTokenizer.kt create mode 100644 src/main/kotlin/dsl/token/tokenizer/Tokenizer.kt create mode 100644 src/main/kotlin/interpreter/Interpreter.kt diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000..05d4341 --- /dev/null +++ b/build.gradle @@ -0,0 +1,25 @@ +plugins { + id 'java' + id 'org.jetbrains.kotlin.jvm' version '1.3.61' +} + +group 'io.bartek' +version '1.0-SNAPSHOT' + +sourceCompatibility = 1.8 + +repositories { + mavenCentral() +} + +dependencies { + implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk8" + testCompile group: 'junit', name: 'junit', version: '4.12' +} + +compileKotlin { + kotlinOptions.jvmTarget = "1.8" +} +compileTestKotlin { + kotlinOptions.jvmTarget = "1.8" +} \ No newline at end of file diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 0000000..29e08e8 --- /dev/null +++ b/gradle.properties @@ -0,0 +1 @@ +kotlin.code.style=official \ No newline at end of file diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..af6708f --- /dev/null +++ b/gradlew @@ -0,0 +1,172 @@ +#!/usr/bin/env sh + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=$(save "$@") + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong +if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then + cd "$(dirname "$0")" +fi + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..6d57edc --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,84 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000..e1835d6 --- /dev/null +++ b/settings.gradle @@ -0,0 +1,2 @@ +rootProject.name = 'smnp' + diff --git a/src/main/kotlin/SMNP.kt b/src/main/kotlin/SMNP.kt new file mode 100644 index 0000000..c347f37 --- /dev/null +++ b/src/main/kotlin/SMNP.kt @@ -0,0 +1,8 @@ +import dsl.token.tokenizer.DefaultTokenizer +import interpreter.Interpreter +import java.io.File + +fun main(args: Array) { + val interpreter = Interpreter() + interpreter.run(File("/home/bartek/Developent/SMNP-Kotlin/examples/adeste.mus")) +} \ No newline at end of file diff --git a/src/main/kotlin/data/model/entity/Note.kt b/src/main/kotlin/data/model/entity/Note.kt new file mode 100644 index 0000000..dd7102d --- /dev/null +++ b/src/main/kotlin/data/model/entity/Note.kt @@ -0,0 +1,17 @@ +package data.model.entity + +import data.model.enumeration.Pitch + +class Note private constructor(val pitch: Pitch, val octave: Int, val duration: Int, val dot: Boolean) { + data class Builder(var pitch: Pitch = Pitch.A, var octave: Int = 4, var duration: Int = 4, var dot: Boolean = false) { + fun pitch(pitch: Pitch) = apply { this.pitch = pitch } + fun octave(octave: Int) = apply { this.octave = octave } + fun duration(duration: Int) = apply { this.duration = duration } + fun dot(dot: Boolean) = apply { this.dot = dot } + fun build() = Note(pitch, octave, duration, dot) + } + + override fun toString(): String { + return "${pitch}${octave}:${duration}${if (dot) "d" else ""}" + } +} \ No newline at end of file diff --git a/src/main/kotlin/data/model/enumeration/Pitch.kt b/src/main/kotlin/data/model/enumeration/Pitch.kt new file mode 100644 index 0000000..a7af688 --- /dev/null +++ b/src/main/kotlin/data/model/enumeration/Pitch.kt @@ -0,0 +1,53 @@ +package data.model.enumeration + +import java.lang.RuntimeException + +enum class Pitch { + C, C_S, D, D_S, E, F, F_S, G, G_S, A, A_S, H; + + override fun toString(): String { + return when(this) { + C -> "C" + C_S -> "C#" + D -> "D" + D_S -> "D#" + E -> "E" + F -> "F" + F_S -> "F#" + G -> "G" + G_S -> "G#" + A -> "A" + A_S -> "A#" + H -> "H" + } + } + + companion object { + fun parse(symbol: String): Pitch { + return when(symbol.toLowerCase()) { + "cb" -> H + "c" -> C + "c#" -> C_S + "db" -> C_S + "d" -> D + "d#" -> D_S + "eb" -> D_S + "e" -> E + "e#" -> F + "fb" -> E + "f" -> F + "f#" -> F_S + "gb" -> F_S + "g" -> G + "g#" -> G_S + "ab" -> G_S + "a" -> A + "a#" -> A_S + "b" -> A_S + "h" -> H + "h#" -> C + else -> throw RuntimeException("Unknown pitch symbol") + } + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/model/entity/Token.kt b/src/main/kotlin/dsl/token/model/entity/Token.kt new file mode 100644 index 0000000..ccf1eed --- /dev/null +++ b/src/main/kotlin/dsl/token/model/entity/Token.kt @@ -0,0 +1,19 @@ +package dsl.token.model.entity + +import dsl.token.model.enumeration.TokenType + +data class Token(val type: TokenType, val value: Any, val rawValue: String, val position: TokenPosition) { + constructor(type: TokenType, value: String, position: TokenPosition): this(type, value, value, position) + + fun mapValue(mapper: (Any) -> Any): Token { + return Token(type, mapper(value), rawValue, position) + } + + companion object { + val NONE = Token(TokenType.NONE, "", TokenPosition.NONE) + } + + override fun toString(): String { + return "(${type.name.toLowerCase()}, »${rawValue}«, ${position.short()})" + } +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/model/entity/TokenList.kt b/src/main/kotlin/dsl/token/model/entity/TokenList.kt new file mode 100644 index 0000000..e19205d --- /dev/null +++ b/src/main/kotlin/dsl/token/model/entity/TokenList.kt @@ -0,0 +1,58 @@ +package dsl.token.model.entity + +import java.lang.RuntimeException + +class TokenList(val tokens: List, val lines: List) { + private var cursor = 0 + private var snap = 0 + + operator fun get(index: Int): Token { + return tokens[index] + } + + fun current(): Token { + if(!hasCurrent()) { + throw RuntimeException("Cursor points to not existing token! Cursor = ${cursor}, length = ${tokens.size}") + } + + return tokens[cursor] + } + + fun currentPos(): TokenPosition { + return current().position + } + + fun hasCurrent(): Boolean { + return cursor < tokens.size + } + + fun hasMore(count: Int = 1): Boolean { + return cursor + count < tokens.size + } + + fun next(number: Int = 1): Token { + return tokens[cursor + number] + } + + fun prev(number: Int = 1): Token { + return tokens[cursor - number] + } + + fun ahead() { + cursor += 1 + } + + fun snapshot(): Int { + return cursor + } + + fun restore(snapshot: Int) { + cursor = snapshot + } + + fun reset() { + cursor = 0 + } + + +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/model/entity/TokenPosition.kt b/src/main/kotlin/dsl/token/model/entity/TokenPosition.kt new file mode 100644 index 0000000..7e5a5b5 --- /dev/null +++ b/src/main/kotlin/dsl/token/model/entity/TokenPosition.kt @@ -0,0 +1,15 @@ +package dsl.token.model.entity + +data class TokenPosition(val line: Int, val beginCol: Int, val endCol: Int) { + companion object { + val NONE = TokenPosition(-1, -1, -1) + } + + override fun toString(): String { + return "[line ${line+1}, col ${beginCol}]" + } + + fun short(): String { + return "${line+1}:${beginCol}}" + } +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/model/entity/TokenizerOutput.kt b/src/main/kotlin/dsl/token/model/entity/TokenizerOutput.kt new file mode 100644 index 0000000..f8c02eb --- /dev/null +++ b/src/main/kotlin/dsl/token/model/entity/TokenizerOutput.kt @@ -0,0 +1,29 @@ +package dsl.token.model.entity + +import dsl.token.model.enumeration.TokenType + +data class TokenizerOutput(val consumedChars: Int, val token: Token) { + fun consumed(): Boolean { + return consumedChars > 0 + } + + fun mapToken(mapper: (Token) -> Token): TokenizerOutput { + return TokenizerOutput(consumedChars, mapper(token) ) + } + + companion object { + val NONE = TokenizerOutput(0, Token.NONE) + + fun produce(consumedChars: Int, token: Token): TokenizerOutput { + return if (consumedChars > 0) TokenizerOutput(consumedChars, token) else NONE + } + + fun produce(consumedChars: Int, value: String, tokenType: TokenType, line: Int, beginCol: Int): TokenizerOutput { + return produce(consumedChars, Token(tokenType, value, TokenPosition(line, beginCol, beginCol + consumedChars))) + } + + fun produce(consumedChars: Int, value: Any, rawValue: String, tokenType: TokenType, line: Int, beginCol: Int): TokenizerOutput { + return produce(consumedChars, Token(tokenType, value, rawValue, TokenPosition(line, beginCol, beginCol + consumedChars))) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/model/enumeration/TokenType.kt b/src/main/kotlin/dsl/token/model/enumeration/TokenType.kt new file mode 100644 index 0000000..2e2412f --- /dev/null +++ b/src/main/kotlin/dsl/token/model/enumeration/TokenType.kt @@ -0,0 +1,49 @@ +package dsl.token.model.enumeration + +enum class TokenType(val token: String) { + NONE(""), + RELATION("==, !=, >=, <="), + DOUBLE_ASTERISK("**"), + OPEN_CURLY("{"), + CLOSE_CURLY("}"), + OPEN_PAREN("("), + CLOSE_PAREN(")"), + OPEN_SQUARE("["), + CLOSE_SQUARE("]"), + OPEN_ANGLE("<"), + CLOSE_ANGLE(">"), + SEMICOLON(";"), + ASTERISK("*"), + PERCENT("%"), + ASSIGN("="), + ARROW("->"), + COMMA(","), + SLASH("/"), + MINUS("-"), + PLUS("+"), + CARET("^"), + DOTS("..."), + AMP("&"), + DOT("."), + AND("and"), + OR("or"), + NOT("not"), + INTEGER("integer"), + STRING("string"), + FLOAT("float"), + NOTE("note"), + BOOL("bool"), + TYPE("type"), + FUNCTION("function"), + RETURN("return"), + EXTEND("extend"), + IMPORT("import"), + THROW("throw"), + FROM("from"), + WITH("with"), + ELSE("else"), + IF("if"), + AS("as"), + IDENTIFIER("identifier"), + COMMENT("comment") +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/tokenizer/CommentTokenizer.kt b/src/main/kotlin/dsl/token/tokenizer/CommentTokenizer.kt new file mode 100644 index 0000000..7aef5a7 --- /dev/null +++ b/src/main/kotlin/dsl/token/tokenizer/CommentTokenizer.kt @@ -0,0 +1,21 @@ +package dsl.token.tokenizer + +import dsl.token.model.entity.TokenizerOutput +import dsl.token.model.enumeration.TokenType + +class CommentTokenizer : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + if (input[current] == '#') { + var consumedChars = 0 + var value = "" + while(current + consumedChars < input.length) { + value += input[current + consumedChars] + consumedChars += 1 + } + + return TokenizerOutput.produce(consumedChars, value.substring(1).trim(), value, TokenType.COMMENT, line, current) + } + + return TokenizerOutput.NONE + } +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/tokenizer/DefaultTokenizer.kt b/src/main/kotlin/dsl/token/tokenizer/DefaultTokenizer.kt new file mode 100644 index 0000000..d4f44d6 --- /dev/null +++ b/src/main/kotlin/dsl/token/tokenizer/DefaultTokenizer.kt @@ -0,0 +1,110 @@ +package dsl.token.tokenizer + +import dsl.token.model.entity.Token +import dsl.token.model.entity.TokenList +import dsl.token.model.entity.TokenizerOutput +import dsl.token.model.enumeration.TokenType +import dsl.token.tokenizer.Tokenizer.Companion.default +import dsl.token.tokenizer.Tokenizer.Companion.keywords +import dsl.token.tokenizer.Tokenizer.Companion.mapValue +import dsl.token.tokenizer.Tokenizer.Companion.regex +import dsl.token.tokenizer.Tokenizer.Companion.separated + +class DefaultTokenizer : Tokenizer { + private val tokenizers = listOf( + default(TokenType.ARROW), + + // Double-character operators + keywords(TokenType.RELATION, "==", "!=", "<=", ">="), + default(TokenType.DOUBLE_ASTERISK), + + // Characters + default(TokenType.OPEN_CURLY), + default(TokenType.CLOSE_CURLY), + default(TokenType.OPEN_PAREN), + default(TokenType.CLOSE_PAREN), + default(TokenType.OPEN_SQUARE), + default(TokenType.CLOSE_SQUARE), + default(TokenType.OPEN_ANGLE), + default(TokenType.CLOSE_ANGLE), + default(TokenType.SEMICOLON), + default(TokenType.ASTERISK), + default(TokenType.PERCENT), + default(TokenType.ASSIGN), + default(TokenType.COMMA), + default(TokenType.SLASH), + default(TokenType.MINUS), + default(TokenType.PLUS), + default(TokenType.CARET), + default(TokenType.DOTS), + default(TokenType.AMP), + default(TokenType.DOT), + + // Types + mapValue(separated(FloatTokenizer())) { (it as String).toFloat() }, + mapValue(separated(regex(TokenType.INTEGER, "\\d"))) { (it as String).toInt() }, + StringTokenizer(), + separated(NoteTokenizer()), + mapValue(separated(keywords(TokenType.BOOL, "true", "false"))) { it == "true" }, + + // Keywords + separated(default(TokenType.FUNCTION)), + separated(default(TokenType.RETURN)), + separated(default(TokenType.EXTEND)), + separated(default(TokenType.IMPORT)), + separated(default(TokenType.THROW)), + separated(default(TokenType.FROM)), + separated(default(TokenType.WITH)), + separated(default(TokenType.ELSE)), + separated(default(TokenType.AND)), + separated(default(TokenType.NOT)), + separated(default(TokenType.AS)), + separated(default(TokenType.IF)), + separated(default(TokenType.OR)), + + // Identifier (couldn't be before keywords!) + regex(TokenType.IDENTIFIER, "\\w"), + + // Whitespaces + regex(TokenType.NONE, "\\s"), + + // Comments + CommentTokenizer() + ) + + private val filters: List<(Token) -> Boolean> = listOf( + { token -> token.type != TokenType.NONE }, + { token -> token.type != TokenType.COMMENT } + ) + + private val tokenizer = Tokenizer.firstOf(tokenizers) + + fun tokenize(lines: List): TokenList { + val tokens: MutableList = mutableListOf() + + for ((index, line) in lines.withIndex()) { + var current = 0 + while (current < line.length) { + val output = tokenize(line, current, index) + + if (!output.consumed()) { + throw RuntimeException("Unknown symbol ${line[current]}") + } + + current += output.consumedChars + tokens.add(output.token) + } + } + + val filteredTokens = filterTokens(tokens.toList()) + return TokenList(filteredTokens, lines) + } + + private fun filterTokens(tokens: List): List { + return tokens.filter { token -> filters.all { filter -> filter(token) } } + } + + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + return tokenizer.tokenize(input, current, line) + } +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/tokenizer/FloatTokenizer.kt b/src/main/kotlin/dsl/token/tokenizer/FloatTokenizer.kt new file mode 100644 index 0000000..9786581 --- /dev/null +++ b/src/main/kotlin/dsl/token/tokenizer/FloatTokenizer.kt @@ -0,0 +1,16 @@ +package dsl.token.tokenizer + +import dsl.token.model.entity.Token +import dsl.token.model.entity.TokenizerOutput +import dsl.token.model.enumeration.TokenType + +class FloatTokenizer : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + return Tokenizer.combined( + { values, tokenPosition -> Token(TokenType.FLOAT, values.joinToString(""), tokenPosition) }, + Tokenizer.regex(TokenType.NONE, "\\d"), + Tokenizer.keyword(TokenType.NONE, "."), + Tokenizer.regex(TokenType.NONE, "\\d") + ).tokenize(input, current, line) + } +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/tokenizer/NoteTokenizer.kt b/src/main/kotlin/dsl/token/tokenizer/NoteTokenizer.kt new file mode 100644 index 0000000..f269cdb --- /dev/null +++ b/src/main/kotlin/dsl/token/tokenizer/NoteTokenizer.kt @@ -0,0 +1,76 @@ +package dsl.token.tokenizer + +import data.model.entity.Note +import data.model.enumeration.Pitch +import dsl.token.model.entity.TokenizerOutput +import dsl.token.model.enumeration.TokenType + +class NoteTokenizer : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + var consumedChars = 0 + var notePitch: String + var octave: Int? = null + var duration: String? = null + var dot = false + var rawValue = "" + + // Note literal start symbol + if(input[current] == '@') { + rawValue += input[current+consumedChars] + consumedChars += 1 + + // Note basic pitch + if(listOf('c', 'd', 'e', 'f', 'g', 'a', 'h').contains(input[current+consumedChars].toLowerCase())) { + rawValue += input[current+consumedChars] + notePitch = input[current+consumedChars].toString() + consumedChars += 1 + + // Flat or sharp + if(current+consumedChars < input.length && listOf('b', '#').contains(input[current+consumedChars])) { + rawValue += input[current+consumedChars] + notePitch += input[current+consumedChars] + consumedChars += 1 + } + + // Octave + if(current+consumedChars < input.length && "\\d".toRegex().matches(input[current+consumedChars].toString())) { + rawValue += input[current+consumedChars] + octave = input[current+consumedChars].toString().toInt() + consumedChars += 1 + } + + // Duration start symbol + if(current+consumedChars < input.length && input[current+consumedChars] == ':') { + rawValue += input[current+consumedChars] + duration = "" + consumedChars += 1 + while(current+consumedChars < input.length && "\\d".toRegex().matches(input[current+consumedChars].toString())) { + rawValue += input[current+consumedChars] + duration += input[current+consumedChars] + consumedChars += 1 + } + + if(duration.isEmpty()) { + return TokenizerOutput.NONE + } + dot = (current+consumedChars < input.length && input[current+consumedChars] == 'd') + + if(dot) { + rawValue += input[current+consumedChars] + consumedChars += 1 + } + } + + val note = Note.Builder() + .pitch(Pitch.parse(notePitch)) + .dot(dot) + octave?.let { note.octave(it) } + duration?.let { note.duration(it.toInt()) } + + return TokenizerOutput.produce(consumedChars, note.build(), rawValue, TokenType.NOTE, line, current) + } + } + + return TokenizerOutput.NONE + } +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/tokenizer/StringTokenizer.kt b/src/main/kotlin/dsl/token/tokenizer/StringTokenizer.kt new file mode 100644 index 0000000..32cd6d0 --- /dev/null +++ b/src/main/kotlin/dsl/token/tokenizer/StringTokenizer.kt @@ -0,0 +1,24 @@ +package dsl.token.tokenizer + +import dsl.token.model.entity.TokenizerOutput +import dsl.token.model.enumeration.TokenType + +class StringTokenizer : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + if(input[current] == '"') { + var value = input[current].toString() + var consumedChars = 1 + while(input.length > current + consumedChars && input[current+consumedChars] != '"') { + value += input[current + consumedChars] + consumedChars += 1 + } + + value += input[current + consumedChars] + consumedChars += 1 + + return TokenizerOutput.produce(consumedChars, value.substring(1, value.length-1), value, TokenType.STRING, line, current) + } + + return TokenizerOutput.NONE + } +} \ No newline at end of file diff --git a/src/main/kotlin/dsl/token/tokenizer/Tokenizer.kt b/src/main/kotlin/dsl/token/tokenizer/Tokenizer.kt new file mode 100644 index 0000000..e23e53b --- /dev/null +++ b/src/main/kotlin/dsl/token/tokenizer/Tokenizer.kt @@ -0,0 +1,152 @@ +package dsl.token.tokenizer + +import dsl.token.model.entity.Token +import dsl.token.model.entity.TokenPosition +import dsl.token.model.entity.TokenizerOutput +import dsl.token.model.enumeration.TokenType + +interface Tokenizer { + fun tokenize(input: String, current: Int, line: Int): TokenizerOutput + + companion object { + // Char regex + fun regex(type: TokenType, pattern: String): Tokenizer { + return object : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + var consumedChars = 0 + var value = "" + + while (current + consumedChars < input.length && pattern.toRegex().matches(input[current + consumedChars].toString())) { + value += input[current + consumedChars] + consumedChars += 1 + } + + return TokenizerOutput.produce(consumedChars, value, type, line, current) + } + } + } + + // Literal keyword ("function", "or", ".") + fun keyword(type: TokenType, keyword: String): Tokenizer { + return object : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + if ((input.length >= current + keyword.length) && (input.substring( + current, + current + keyword.length + ) == keyword) + ) { + return TokenizerOutput.produce(keyword.length, keyword, type, line, current) + } + + return TokenizerOutput.NONE + } + } + } + + // One of keywords + fun keywords(type: TokenType, vararg keywords: String): Tokenizer { + return object : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + for (keyword in keywords) { + val output = keyword(type, keyword).tokenize(input, current, line) + if (output.consumed()) { + return output + } + } + + return TokenizerOutput.NONE + } + + } + } + + // Token for regular TokenType + fun default(type: TokenType): Tokenizer { + return object : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + return keyword(type, type.token).tokenize(input, current, line) + } + + } + } + + // Isolate dsl.token (for example "function" | "functions" | "function s") + fun separated(tokenizer: Tokenizer, end: String = "\\W"): Tokenizer { + return object : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + val output = tokenizer.tokenize(input, current, line) + if (output.consumed()) { + if ((input.length > current + output.consumedChars) && end.toRegex().matches(input[current + output.consumedChars].toString())) { + return output + } + + if ((input.length == current + output.consumedChars)) { + return output + } + } + + return TokenizerOutput.NONE + } + + } + } + + // Change dsl.token value (rawValue will be kept) + fun mapValue(tokenizer: Tokenizer, mapper: (Any) -> Any): Tokenizer { + return object : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + val output = tokenizer.tokenize(input, current, line) + if(output.consumed()) { + return output.mapToken { output.token.mapValue(mapper) } + } + + return TokenizerOutput.NONE + } + } + } + + // Complex tokenizer consisting of smaller ones (like "3.14" = regex(\d) + keyword(.) + regex(\d)) + fun combined(createToken: (List, TokenPosition) -> Token, vararg tokenizers: Tokenizer): Tokenizer { + return object : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + var consumedChars = 0 + val tokens: MutableList = mutableListOf() + + for(tokenizer in tokenizers) { + val output = tokenizer.tokenize(input, current + consumedChars, line) + if(output.consumed()) { + consumedChars += output.consumedChars + tokens.add(output.token) + } else { + return TokenizerOutput.NONE + } + } + + if(consumedChars == 0) { + return TokenizerOutput.NONE + } + + return TokenizerOutput.produce(consumedChars, createToken(tokens.map { it.rawValue }, TokenPosition(line, tokens.first().position.beginCol, tokens.last().position.endCol))) + } + + } + } + + // First matched tokenizer + fun firstOf(tokenizers: List): Tokenizer { + return object : Tokenizer { + override fun tokenize(input: String, current: Int, line: Int): TokenizerOutput { + for (tokenizer in tokenizers) { + val output = tokenizer.tokenize(input, current, line) + if(output.consumed()) { + return output + } + } + + return TokenizerOutput.NONE + } + + } + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/interpreter/Interpreter.kt b/src/main/kotlin/interpreter/Interpreter.kt new file mode 100644 index 0000000..19ac35c --- /dev/null +++ b/src/main/kotlin/interpreter/Interpreter.kt @@ -0,0 +1,22 @@ +package interpreter + +import dsl.token.tokenizer.DefaultTokenizer +import java.io.BufferedInputStream +import java.io.File +import java.io.FileInputStream + +class Interpreter { + fun run(code: String) { + val tokenizer = DefaultTokenizer() + val lines = code.split("\n") + val tokens = tokenizer.tokenize(lines) + println(tokens) + } + + fun run(file: File) { + val tokenizer = DefaultTokenizer() + val lines = file.readLines() + val tokens = tokenizer.tokenize(lines) + println(tokens) + } +} \ No newline at end of file