Create string tokenizer
This commit is contained in:
@@ -10,6 +10,7 @@ import qualified Util as U
|
|||||||
data Token = Operator VM.Op
|
data Token = Operator VM.Op
|
||||||
| KeywordLiteral String
|
| KeywordLiteral String
|
||||||
| IntLiteral Int
|
| IntLiteral Int
|
||||||
|
| StringLiteral String
|
||||||
| WhiteSpace
|
| WhiteSpace
|
||||||
| Comment String
|
| Comment String
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show)
|
||||||
@@ -68,23 +69,23 @@ tokenizeHex input = if isPrefix && len > 0
|
|||||||
numberStr = takeWhile Char.isHexDigit (drop 2 input)
|
numberStr = takeWhile Char.isHexDigit (drop 2 input)
|
||||||
|
|
||||||
tokenizeChar :: Tokenizer
|
tokenizeChar :: Tokenizer
|
||||||
tokenizeChar ('\'':'\\':x:'\'':_) = seq >>= (\s -> return $ TokenizeResult (IntLiteral s) 4)
|
tokenizeChar ('\'':'\\':x:'\'':_) = U.controlChar x >>= (\s -> return $ TokenizeResult (IntLiteral s) 4)
|
||||||
where
|
|
||||||
seq = case x of
|
|
||||||
'n' -> Just 10
|
|
||||||
't' -> Just 9
|
|
||||||
'v' -> Just 11
|
|
||||||
'b' -> Just 8
|
|
||||||
'r' -> Just 13
|
|
||||||
'f' -> Just 12
|
|
||||||
'a' -> Just 7
|
|
||||||
'\\' -> Just 92
|
|
||||||
'\'' -> Just 39
|
|
||||||
'0' -> Just 0
|
|
||||||
_ -> Nothing
|
|
||||||
tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . ord $ x) 3
|
tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . ord $ x) 3
|
||||||
tokenizeChar _ = Nothing
|
tokenizeChar _ = Nothing
|
||||||
|
|
||||||
|
tokenizeString :: Tokenizer
|
||||||
|
tokenizeString ('"':xs) = do
|
||||||
|
string <- extractString xs
|
||||||
|
unescaped <- U.unescape string
|
||||||
|
return $ TokenizeResult (StringLiteral unescaped) (length string + 2)
|
||||||
|
where
|
||||||
|
extractString [] = Nothing
|
||||||
|
extractString (x:xs)
|
||||||
|
| x == '"' = Just []
|
||||||
|
| x == '\n' = Nothing
|
||||||
|
| otherwise = extractString xs >>= (\r -> return $ x : r)
|
||||||
|
tokenizeString _ = Nothing
|
||||||
|
|
||||||
tokenizeComment :: Tokenizer
|
tokenizeComment :: Tokenizer
|
||||||
tokenizeComment [] = Nothing
|
tokenizeComment [] = Nothing
|
||||||
tokenizeComment (x:xs) = if x == ';'
|
tokenizeComment (x:xs) = if x == ';'
|
||||||
@@ -124,5 +125,6 @@ tokenizers = anyTokenizer
|
|||||||
, sepTokenizer Char.isSpace tokenizeOperators
|
, sepTokenizer Char.isSpace tokenizeOperators
|
||||||
, sepTokenizer Char.isSpace tokenizeHex
|
, sepTokenizer Char.isSpace tokenizeHex
|
||||||
, sepTokenizer Char.isSpace tokenizeDecimal
|
, sepTokenizer Char.isSpace tokenizeDecimal
|
||||||
, sepTokenizer Char.isSpace tokenizeChar
|
, tokenizeChar
|
||||||
|
, tokenizeString
|
||||||
]
|
]
|
||||||
35
app/Util.hs
35
app/Util.hs
@@ -1,10 +1,14 @@
|
|||||||
module Util (
|
module Util (
|
||||||
toLowerCase,
|
toLowerCase,
|
||||||
byteStr,
|
byteStr,
|
||||||
bytesStr
|
bytesStr,
|
||||||
|
head,
|
||||||
|
unescape,
|
||||||
|
controlChar
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.List
|
import Prelude hiding (head)
|
||||||
|
import Data.List hiding (head)
|
||||||
import Data.Word
|
import Data.Word
|
||||||
import Numeric (showHex)
|
import Numeric (showHex)
|
||||||
import qualified Data.Char as Char
|
import qualified Data.Char as Char
|
||||||
@@ -27,3 +31,30 @@ insertAtN c n xs = insertAtN' n xs
|
|||||||
|
|
||||||
pad :: Char -> Int -> String -> String
|
pad :: Char -> Int -> String -> String
|
||||||
pad char width string = replicate (width - length string) char ++ string
|
pad char width string = replicate (width - length string) char ++ string
|
||||||
|
|
||||||
|
head :: [a] -> Maybe a
|
||||||
|
head [] = Nothing
|
||||||
|
head (x:_) = Just x
|
||||||
|
|
||||||
|
unescape :: String -> Maybe String
|
||||||
|
unescape ('\\':x:xs) = do
|
||||||
|
cc <- fmap Char.chr $ controlChar x
|
||||||
|
rest <- unescape xs
|
||||||
|
return $ cc : rest
|
||||||
|
unescape (x:xs) = unescape xs >>= (\rest -> return $ x : rest)
|
||||||
|
unescape [] = Just []
|
||||||
|
|
||||||
|
controlChar :: Char -> Maybe Int
|
||||||
|
controlChar x = case x of
|
||||||
|
'n' -> Just 10
|
||||||
|
't' -> Just 9
|
||||||
|
'v' -> Just 11
|
||||||
|
'b' -> Just 8
|
||||||
|
'r' -> Just 13
|
||||||
|
'f' -> Just 12
|
||||||
|
'a' -> Just 7
|
||||||
|
'\\' -> Just 92
|
||||||
|
'"' -> Just 34
|
||||||
|
'\'' -> Just 39
|
||||||
|
'0' -> Just 0
|
||||||
|
_ -> Nothing
|
||||||
Reference in New Issue
Block a user