Create string tokenizer
This commit is contained in:
@@ -10,6 +10,7 @@ import qualified Util as U
|
||||
data Token = Operator VM.Op
|
||||
| KeywordLiteral String
|
||||
| IntLiteral Int
|
||||
| StringLiteral String
|
||||
| WhiteSpace
|
||||
| Comment String
|
||||
deriving (Eq, Show)
|
||||
@@ -68,23 +69,23 @@ tokenizeHex input = if isPrefix && len > 0
|
||||
numberStr = takeWhile Char.isHexDigit (drop 2 input)
|
||||
|
||||
tokenizeChar :: Tokenizer
|
||||
tokenizeChar ('\'':'\\':x:'\'':_) = seq >>= (\s -> return $ TokenizeResult (IntLiteral s) 4)
|
||||
where
|
||||
seq = case x of
|
||||
'n' -> Just 10
|
||||
't' -> Just 9
|
||||
'v' -> Just 11
|
||||
'b' -> Just 8
|
||||
'r' -> Just 13
|
||||
'f' -> Just 12
|
||||
'a' -> Just 7
|
||||
'\\' -> Just 92
|
||||
'\'' -> Just 39
|
||||
'0' -> Just 0
|
||||
_ -> Nothing
|
||||
tokenizeChar ('\'':'\\':x:'\'':_) = U.controlChar x >>= (\s -> return $ TokenizeResult (IntLiteral s) 4)
|
||||
tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . ord $ x) 3
|
||||
tokenizeChar _ = Nothing
|
||||
|
||||
tokenizeString :: Tokenizer
|
||||
tokenizeString ('"':xs) = do
|
||||
string <- extractString xs
|
||||
unescaped <- U.unescape string
|
||||
return $ TokenizeResult (StringLiteral unescaped) (length string + 2)
|
||||
where
|
||||
extractString [] = Nothing
|
||||
extractString (x:xs)
|
||||
| x == '"' = Just []
|
||||
| x == '\n' = Nothing
|
||||
| otherwise = extractString xs >>= (\r -> return $ x : r)
|
||||
tokenizeString _ = Nothing
|
||||
|
||||
tokenizeComment :: Tokenizer
|
||||
tokenizeComment [] = Nothing
|
||||
tokenizeComment (x:xs) = if x == ';'
|
||||
@@ -124,5 +125,6 @@ tokenizers = anyTokenizer
|
||||
, sepTokenizer Char.isSpace tokenizeOperators
|
||||
, sepTokenizer Char.isSpace tokenizeHex
|
||||
, sepTokenizer Char.isSpace tokenizeDecimal
|
||||
, sepTokenizer Char.isSpace tokenizeChar
|
||||
, tokenizeChar
|
||||
, tokenizeString
|
||||
]
|
||||
37
app/Util.hs
37
app/Util.hs
@@ -1,10 +1,14 @@
|
||||
module Util (
|
||||
toLowerCase,
|
||||
byteStr,
|
||||
bytesStr
|
||||
bytesStr,
|
||||
head,
|
||||
unescape,
|
||||
controlChar
|
||||
) where
|
||||
|
||||
import Data.List
|
||||
import Prelude hiding (head)
|
||||
import Data.List hiding (head)
|
||||
import Data.Word
|
||||
import Numeric (showHex)
|
||||
import qualified Data.Char as Char
|
||||
@@ -26,4 +30,31 @@ insertAtN c n xs = insertAtN' n xs
|
||||
insertAtN' m (x:xs) = x : insertAtN' (m-1) xs
|
||||
|
||||
pad :: Char -> Int -> String -> String
|
||||
pad char width string = replicate (width - length string) char ++ string
|
||||
pad char width string = replicate (width - length string) char ++ string
|
||||
|
||||
head :: [a] -> Maybe a
|
||||
head [] = Nothing
|
||||
head (x:_) = Just x
|
||||
|
||||
unescape :: String -> Maybe String
|
||||
unescape ('\\':x:xs) = do
|
||||
cc <- fmap Char.chr $ controlChar x
|
||||
rest <- unescape xs
|
||||
return $ cc : rest
|
||||
unescape (x:xs) = unescape xs >>= (\rest -> return $ x : rest)
|
||||
unescape [] = Just []
|
||||
|
||||
controlChar :: Char -> Maybe Int
|
||||
controlChar x = case x of
|
||||
'n' -> Just 10
|
||||
't' -> Just 9
|
||||
'v' -> Just 11
|
||||
'b' -> Just 8
|
||||
'r' -> Just 13
|
||||
'f' -> Just 12
|
||||
'a' -> Just 7
|
||||
'\\' -> Just 92
|
||||
'"' -> Just 34
|
||||
'\'' -> Just 39
|
||||
'0' -> Just 0
|
||||
_ -> Nothing
|
||||
Reference in New Issue
Block a user