Create string tokenizer

This commit is contained in:
2021-11-03 18:06:53 +01:00
parent ac030715ad
commit 8ae464047e
2 changed files with 51 additions and 18 deletions

View File

@@ -10,6 +10,7 @@ import qualified Util as U
data Token = Operator VM.Op
| KeywordLiteral String
| IntLiteral Int
| StringLiteral String
| WhiteSpace
| Comment String
deriving (Eq, Show)
@@ -68,23 +69,23 @@ tokenizeHex input = if isPrefix && len > 0
numberStr = takeWhile Char.isHexDigit (drop 2 input)
tokenizeChar :: Tokenizer
tokenizeChar ('\'':'\\':x:'\'':_) = seq >>= (\s -> return $ TokenizeResult (IntLiteral s) 4)
where
seq = case x of
'n' -> Just 10
't' -> Just 9
'v' -> Just 11
'b' -> Just 8
'r' -> Just 13
'f' -> Just 12
'a' -> Just 7
'\\' -> Just 92
'\'' -> Just 39
'0' -> Just 0
_ -> Nothing
tokenizeChar ('\'':'\\':x:'\'':_) = U.controlChar x >>= (\s -> return $ TokenizeResult (IntLiteral s) 4)
tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . ord $ x) 3
tokenizeChar _ = Nothing
tokenizeString :: Tokenizer
tokenizeString ('"':xs) = do
string <- extractString xs
unescaped <- U.unescape string
return $ TokenizeResult (StringLiteral unescaped) (length string + 2)
where
extractString [] = Nothing
extractString (x:xs)
| x == '"' = Just []
| x == '\n' = Nothing
| otherwise = extractString xs >>= (\r -> return $ x : r)
tokenizeString _ = Nothing
tokenizeComment :: Tokenizer
tokenizeComment [] = Nothing
tokenizeComment (x:xs) = if x == ';'
@@ -124,5 +125,6 @@ tokenizers = anyTokenizer
, sepTokenizer Char.isSpace tokenizeOperators
, sepTokenizer Char.isSpace tokenizeHex
, sepTokenizer Char.isSpace tokenizeDecimal
, sepTokenizer Char.isSpace tokenizeChar
, tokenizeChar
, tokenizeString
]

View File

@@ -1,10 +1,14 @@
module Util (
toLowerCase,
byteStr,
bytesStr
bytesStr,
head,
unescape,
controlChar
) where
import Data.List
import Prelude hiding (head)
import Data.List hiding (head)
import Data.Word
import Numeric (showHex)
import qualified Data.Char as Char
@@ -26,4 +30,31 @@ insertAtN c n xs = insertAtN' n xs
insertAtN' m (x:xs) = x : insertAtN' (m-1) xs
pad :: Char -> Int -> String -> String
pad char width string = replicate (width - length string) char ++ string
pad char width string = replicate (width - length string) char ++ string
head :: [a] -> Maybe a
head [] = Nothing
head (x:_) = Just x
unescape :: String -> Maybe String
unescape ('\\':x:xs) = do
cc <- fmap Char.chr $ controlChar x
rest <- unescape xs
return $ cc : rest
unescape (x:xs) = unescape xs >>= (\rest -> return $ x : rest)
unescape [] = Just []
controlChar :: Char -> Maybe Int
controlChar x = case x of
'n' -> Just 10
't' -> Just 9
'v' -> Just 11
'b' -> Just 8
'r' -> Just 13
'f' -> Just 12
'a' -> Just 7
'\\' -> Just 92
'"' -> Just 34
'\'' -> Just 39
'0' -> Just 0
_ -> Nothing