187 lines
6.0 KiB
Haskell
187 lines
6.0 KiB
Haskell
module Assembler (
|
|
tokenize,
|
|
parse
|
|
) where
|
|
|
|
import qualified Data.Char as Char
|
|
import qualified Data.Monoid as Monoid
|
|
import qualified VirtualMachine as VM (Op(..), Instruction, Command, instructionByOp)
|
|
import qualified Util as U
|
|
|
|
data Token = Operator VM.Op
|
|
| IntLiteral Int
|
|
| StringLiteral String
|
|
| Identifier String
|
|
| Colon
|
|
| Ampersand
|
|
| NewLine
|
|
| WhiteSpace
|
|
| Comment String
|
|
deriving (Eq, Show)
|
|
|
|
type ConsumedChars = Int
|
|
data TokenizeResult = TokenizeResult Token ConsumedChars deriving (Eq, Show)
|
|
|
|
type Tokenizer = String -> Maybe TokenizeResult
|
|
|
|
type CaseSensitive = Bool
|
|
keywordTokenizer :: CaseSensitive -> String -> Token -> Tokenizer
|
|
keywordTokenizer _ _ _ [] = Nothing
|
|
keywordTokenizer cs kwd token input
|
|
| matches = Just $ TokenizeResult token len
|
|
| otherwise = Nothing
|
|
where
|
|
len = length kwd
|
|
mapper = if cs then id else U.toLowerCase
|
|
zipped = zipWith (==) (mapper kwd) (mapper . take len $ input)
|
|
matches = and zipped && len == length zipped
|
|
|
|
operatorTokenizer :: VM.Op -> Tokenizer
|
|
operatorTokenizer op input = keywordTokenizer False (U.toLowerCase . show $ op) (Operator op) input
|
|
|
|
tokenizeOperators :: Tokenizer
|
|
tokenizeOperators = anyTokenizer $ map operatorTokenizer [VM.Push ..]
|
|
|
|
tokenizeIdentifier :: Tokenizer
|
|
tokenizeIdentifier input@(x:_) = if null identifier || (not . Char.isAlpha) x
|
|
then Nothing
|
|
else Just $ TokenizeResult (Identifier identifier) (length identifier)
|
|
where
|
|
identifier = takeWhile (or . sequenceA [Char.isAlphaNum, (=='_')]) input
|
|
|
|
tokenizeWhitespace :: Tokenizer
|
|
tokenizeWhitespace [] = Nothing
|
|
tokenizeWhitespace (x:_)
|
|
| Char.isSpace x = Just $ TokenizeResult WhiteSpace 1
|
|
| otherwise = Nothing
|
|
|
|
tokenizeDecimal :: Tokenizer
|
|
tokenizeDecimal [] = Nothing
|
|
tokenizeDecimal input = if null numberStr
|
|
then Nothing
|
|
else Just $ TokenizeResult (IntLiteral number) len
|
|
where
|
|
number = read numberStr
|
|
len = length numberStr
|
|
numberStr = takeWhile Char.isDigit input
|
|
|
|
tokenizeHex :: Tokenizer
|
|
tokenizeHex [] = Nothing
|
|
tokenizeHex input = if isPrefix && len > 0
|
|
then Just $ TokenizeResult (IntLiteral number) (len + 2)
|
|
else Nothing
|
|
where
|
|
isPrefix = take 2 input == "0x"
|
|
number = read . ("0x"++) $ numberStr
|
|
len = length numberStr
|
|
numberStr = takeWhile Char.isHexDigit (drop 2 input)
|
|
|
|
tokenizeChar :: Tokenizer
|
|
tokenizeChar ('\'':'\\':x:'\'':_) = U.controlChar x >>= (\s -> return $ TokenizeResult (IntLiteral s) 4)
|
|
tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . Char.ord $ x) 3
|
|
tokenizeChar _ = Nothing
|
|
|
|
tokenizeString :: Tokenizer
|
|
tokenizeString ('"':xs) = do
|
|
string <- extractString xs
|
|
unescaped <- U.unescape string
|
|
return $ TokenizeResult (StringLiteral unescaped) (length string + 2)
|
|
where
|
|
extractString [] = Nothing
|
|
extractString (x:xs)
|
|
| x == '"' = Just []
|
|
| x == '\n' = Nothing
|
|
| otherwise = extractString xs >>= (\r -> return $ x : r)
|
|
tokenizeString _ = Nothing
|
|
|
|
tokenizeComment :: Tokenizer
|
|
tokenizeComment [] = Nothing
|
|
tokenizeComment (x:xs) = if x == ';'
|
|
then Just $ TokenizeResult (Comment comment) (len + 1)
|
|
else Nothing
|
|
where
|
|
len = length comment
|
|
comment = takeWhile (/='\n') xs
|
|
|
|
type SeparatorPredicate = Char -> Bool
|
|
sepTokenizer :: SeparatorPredicate -> Tokenizer -> Tokenizer
|
|
sepTokenizer pred tokenizer input = do
|
|
(TokenizeResult token consumed) <- tokenizer input
|
|
let next = drop consumed input
|
|
let (isSep, consumed') = if null next
|
|
then (True, 0)
|
|
else if pred . head $ next
|
|
then (True, 1)
|
|
else (False, 0)
|
|
if isSep
|
|
then return $ TokenizeResult token (consumed + consumed')
|
|
else Nothing
|
|
|
|
anyTokenizer :: [Tokenizer] -> Tokenizer
|
|
anyTokenizer tokenizers input = Monoid.getFirst . Monoid.mconcat . map Monoid.First $ sequenceA tokenizers input
|
|
|
|
tokenize :: String -> Either String [Token]
|
|
tokenize [] = Right []
|
|
tokenize input = tokens >>= (\t -> Right $ filter tokenFilter t)
|
|
where
|
|
tokens = case tokenizers input of
|
|
(Just (TokenizeResult token chars)) -> tokenize (drop chars input) >>= (\rest -> return $ token : rest)
|
|
Nothing -> Left $ "Unknown token: " ++ take 20 input
|
|
|
|
tokenFilter :: Token -> Bool
|
|
tokenFilter (WhiteSpace) = False
|
|
tokenFilter (Comment _) = False
|
|
tokenFilter _ = True
|
|
|
|
tokenizers :: Tokenizer
|
|
tokenizers = anyTokenizer
|
|
[ keywordTokenizer False "\n" NewLine
|
|
, tokenizeWhitespace
|
|
, tokenizeComment
|
|
, sepTokenizer Char.isSpace tokenizeOperators
|
|
, sepTokenizer Char.isSpace tokenizeHex
|
|
, sepTokenizer Char.isSpace tokenizeDecimal
|
|
, tokenizeIdentifier
|
|
, keywordTokenizer False ":" Colon
|
|
, keywordTokenizer False "&" Ampersand
|
|
, tokenizeChar
|
|
, tokenizeString
|
|
]
|
|
|
|
data AST = OperatorNode VM.Op
|
|
| IntegerNode Int
|
|
| IdentifierNode String
|
|
deriving (Eq, Show)
|
|
|
|
type ConsumedTokens = Int
|
|
data ParseResult = ParseResult AST ConsumedTokens deriving (Eq, Show)
|
|
|
|
type Parser = [Token] -> Maybe ParseResult
|
|
|
|
parseOperator :: [Token] -> Maybe ParseResult
|
|
parseOperator ((Operator op):_) = Just $ ParseResult (OperatorNode op) 1
|
|
parseOperator _ = Nothing
|
|
|
|
parseInt :: [Token] -> Maybe ParseResult
|
|
parseInt ((IntLiteral int):_) = Just $ ParseResult (IntegerNode int) 1
|
|
parseInt _ = Nothing
|
|
|
|
parseIdentifier :: [Token] -> Maybe ParseResult
|
|
parseIdentifier ((Identifier id):_) = Just $ ParseResult (IdentifierNode id) 1
|
|
parseIdentifier _ = Nothing
|
|
|
|
parseAny :: [Parser] -> Parser
|
|
parseAny parsers tokens = Monoid.getFirst . Monoid.mconcat . map Monoid.First $ sequenceA parsers tokens
|
|
|
|
parse :: [Token] -> Either String [AST]
|
|
parse [] = Right []
|
|
parse tokens = case parsers tokens of
|
|
(Just (ParseResult ast consumed)) -> parse (drop consumed tokens) >>= (\rest -> return $ ast : rest)
|
|
Nothing -> Left "Unexpected token"
|
|
|
|
parsers :: Parser
|
|
parsers = parseAny
|
|
[ parseOperator
|
|
, parseInt
|
|
, parseIdentifier
|
|
] |