Files
mvm/app/Assembler/Parser.hs

170 lines
5.3 KiB
Haskell

{-# OPTIONS_GHC -Wno-incomplete-patterns #-}
module Assembler.Parser where
import Data.List (intercalate)
import Data.Monoid (First(..))
import qualified Assembler.Tokenizer as T (Token(..))
import VirtualMachine.VM (Op)
import Util (explode)
data Scope = Local | Global deriving (Eq, Show, Enum, Bounded)
data AST = Empty
| Operator Op
| Integer Int
| Identifier String
| Colon
| Ampersand
| Dot
| LabelDef Scope String
| LabelRef Scope String
| Param AST
| Params [AST]
| Instruction AST AST
| Line AST AST
| Program [AST]
deriving (Eq, Show)
type ConsumedTokens = Int
data ParseResult = ParseResult AST ConsumedTokens deriving (Eq, Show)
type Parser = [T.Token] -> Maybe ParseResult
-- OP := push | pop ...
parseOperator :: Parser
parseOperator ((T.Operator op):_) = Just $ ParseResult (Operator op) 1
parseOperator _ = Nothing
-- INT := 0 | 1 | ... | 0x00 | 0x01 | ... | 'a' | 'b' | ...
parseInt :: Parser
parseInt ((T.IntLiteral int):_) = Just $ ParseResult (Integer int) 1
parseInt _ = Nothing
-- ID := [alnum, '_']+
parseIdentifier :: Parser
parseIdentifier ((T.Identifier iden):_) = Just $ ParseResult (Identifier iden) 1
parseIdentifier _ = Nothing
-- ':'
parseColon :: Parser
parseColon (T.Colon:_) = Just $ ParseResult Colon 1
parseColon _ = Nothing
-- '&'
parseAmpersand :: Parser
parseAmpersand (T.Ampersand:_) = Just $ ParseResult Ampersand 1
parseAmpersand _ = Nothing
-- '.'
parseDot :: Parser
parseDot (T.Dot:_) = Just $ ParseResult Dot 1
parseDot _ = Nothing
-- label_def := '.'? ID ':'
parseLabelDef :: Parser
parseLabelDef = parseSeq [parseOptionally parseDot, parseIdentifier, parseColon] combine
where
combine [Dot, Identifier iden, _] = LabelDef Local iden
combine [_, Identifier iden, _] = LabelDef Global iden
-- label_ref := '&' ID
parseLabelRef :: Parser
parseLabelRef = parseSeq [parseAmpersand, parseOptionally parseDot, parseIdentifier] combine
where
combine [_, Dot, Identifier iden] = LabelRef Local iden
combine [_, _, Identifier iden] = LabelRef Global iden
-- param := INT | label_ref
parseParam :: Parser
parseParam = parseAlt [parseInt, parseLabelRef] Param
-- instr := OP param*
parseInstr :: Parser
parseInstr = parseSeq [parseOperator, parseMany0 parseParam Params] (\[op, ps] -> Instruction op ps)
-- line := label_def? instr?
parseLine :: Parser
parseLine = parseSeq [parseOptionally parseLabelDef, parseOptionally parseInstr] (\[label, instr] -> Line label instr)
mapAST :: Parser -> (AST -> AST) -> Parser
mapAST _ _ [] = Nothing
mapAST parser mapper tokens = do
(ParseResult ast consumed) <- parser tokens
return $ ParseResult (mapper ast) consumed
-- a?
parseOptionally :: Parser -> Parser
parseOptionally parser input = case parser input of
Nothing -> Just $ ParseResult Empty 0
result -> result
-- a*
parseMany0 :: Parser -> ([AST] -> AST) -> Parser
parseMany0 parser combiner = parseOptionally $ parseMany parser combiner
-- a+
parseMany :: Parser -> ([AST] -> AST) -> Parser
parseMany parser combiner tokens = if null asts
then Nothing
else Just $ ParseResult ast consumed
where
results = parseGreedy parser tokens
consumed = sum $ map (\(ParseResult _ c) -> c) results
asts = map (\(ParseResult a _) -> a) results
ast = combiner asts
-- a a a a a a a...
parseGreedy :: Parser -> [T.Token] -> [ParseResult]
parseGreedy parser tokens = case parser tokens of
(Just r@(ParseResult _ consumed)) -> r : parseGreedy parser (drop consumed tokens)
Nothing -> []
-- a | b | c
parseAlt :: [Parser] -> (AST -> AST) -> Parser
parseAlt parsers mapper tokens = do
(ParseResult ast consumed) <- parseAny parsers tokens
return $ ParseResult (mapper ast) consumed
-- a | b | c
parseAny :: [Parser] -> Parser
parseAny _ [] = Nothing
parseAny parsers tokens = getFirst . mconcat . map First $ sequenceA parsers tokens
-- a b c
parseSeq :: [Parser] -> ([AST] -> AST) -> Parser
parseSeq _ _ [] = Nothing
parseSeq parsers combiner tokens = do
results <- parseAll parsers tokens
let consumed = sum $ map (\(ParseResult _ c) -> c) results
let asts = map (\(ParseResult a _) -> a) results
if length asts == length parsers
then return $ ParseResult (combiner asts) consumed
else Nothing
-- a b c
parseAll :: [Parser] -> [T.Token] -> Maybe [ParseResult]
parseAll [] _ = Just []
parseAll (p:ps) tokens = do
(ParseResult ast consumed) <- p tokens
rest <- parseAll ps (drop consumed tokens)
return $ ParseResult ast consumed : rest
-- 'Nothing' if not consumed tokens exist
assertConsumed :: Parser -> Parser
assertConsumed _ [] = Nothing
assertConsumed parser tokens = do
r@(ParseResult _ consumed) <- parser tokens
if null (drop consumed tokens)
then return r
else Nothing
parse :: [T.Token] -> Either String AST
parse tokens = do
let codeLines = explode (==T.NewLine) tokens
let results = map (assertConsumed parseLine) codeLines
let errors = filter ((==Nothing) . snd) $ zip codeLines results
let errorMsg = "Parse error(s):\n" ++ intercalate "\n" (map (show . fst) errors)
case sequenceA results of
(Just r) -> return $ Program $ map (\(ParseResult ast _) -> ast) r
Nothing -> Left errorMsg