Create string tokenizer

This commit is contained in:
2021-11-03 18:06:53 +01:00
parent ac030715ad
commit 8ae464047e
2 changed files with 51 additions and 18 deletions

View File

@@ -10,6 +10,7 @@ import qualified Util as U
data Token = Operator VM.Op data Token = Operator VM.Op
| KeywordLiteral String | KeywordLiteral String
| IntLiteral Int | IntLiteral Int
| StringLiteral String
| WhiteSpace | WhiteSpace
| Comment String | Comment String
deriving (Eq, Show) deriving (Eq, Show)
@@ -68,23 +69,23 @@ tokenizeHex input = if isPrefix && len > 0
numberStr = takeWhile Char.isHexDigit (drop 2 input) numberStr = takeWhile Char.isHexDigit (drop 2 input)
tokenizeChar :: Tokenizer tokenizeChar :: Tokenizer
tokenizeChar ('\'':'\\':x:'\'':_) = seq >>= (\s -> return $ TokenizeResult (IntLiteral s) 4) tokenizeChar ('\'':'\\':x:'\'':_) = U.controlChar x >>= (\s -> return $ TokenizeResult (IntLiteral s) 4)
where
seq = case x of
'n' -> Just 10
't' -> Just 9
'v' -> Just 11
'b' -> Just 8
'r' -> Just 13
'f' -> Just 12
'a' -> Just 7
'\\' -> Just 92
'\'' -> Just 39
'0' -> Just 0
_ -> Nothing
tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . ord $ x) 3 tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . ord $ x) 3
tokenizeChar _ = Nothing tokenizeChar _ = Nothing
tokenizeString :: Tokenizer
tokenizeString ('"':xs) = do
string <- extractString xs
unescaped <- U.unescape string
return $ TokenizeResult (StringLiteral unescaped) (length string + 2)
where
extractString [] = Nothing
extractString (x:xs)
| x == '"' = Just []
| x == '\n' = Nothing
| otherwise = extractString xs >>= (\r -> return $ x : r)
tokenizeString _ = Nothing
tokenizeComment :: Tokenizer tokenizeComment :: Tokenizer
tokenizeComment [] = Nothing tokenizeComment [] = Nothing
tokenizeComment (x:xs) = if x == ';' tokenizeComment (x:xs) = if x == ';'
@@ -124,5 +125,6 @@ tokenizers = anyTokenizer
, sepTokenizer Char.isSpace tokenizeOperators , sepTokenizer Char.isSpace tokenizeOperators
, sepTokenizer Char.isSpace tokenizeHex , sepTokenizer Char.isSpace tokenizeHex
, sepTokenizer Char.isSpace tokenizeDecimal , sepTokenizer Char.isSpace tokenizeDecimal
, sepTokenizer Char.isSpace tokenizeChar , tokenizeChar
, tokenizeString
] ]

View File

@@ -1,10 +1,14 @@
module Util ( module Util (
toLowerCase, toLowerCase,
byteStr, byteStr,
bytesStr bytesStr,
head,
unescape,
controlChar
) where ) where
import Data.List import Prelude hiding (head)
import Data.List hiding (head)
import Data.Word import Data.Word
import Numeric (showHex) import Numeric (showHex)
import qualified Data.Char as Char import qualified Data.Char as Char
@@ -27,3 +31,30 @@ insertAtN c n xs = insertAtN' n xs
pad :: Char -> Int -> String -> String pad :: Char -> Int -> String -> String
pad char width string = replicate (width - length string) char ++ string pad char width string = replicate (width - length string) char ++ string
head :: [a] -> Maybe a
head [] = Nothing
head (x:_) = Just x
unescape :: String -> Maybe String
unescape ('\\':x:xs) = do
cc <- fmap Char.chr $ controlChar x
rest <- unescape xs
return $ cc : rest
unescape (x:xs) = unescape xs >>= (\rest -> return $ x : rest)
unescape [] = Just []
controlChar :: Char -> Maybe Int
controlChar x = case x of
'n' -> Just 10
't' -> Just 9
'v' -> Just 11
'b' -> Just 8
'r' -> Just 13
'f' -> Just 12
'a' -> Just 7
'\\' -> Just 92
'"' -> Just 34
'\'' -> Just 39
'0' -> Just 0
_ -> Nothing