Enable parsing line by line

This commit is contained in:
2021-11-04 11:11:43 +01:00
parent 759f0b5c00
commit 324336a897

View File

@@ -131,7 +131,6 @@ tokenize input = tokens >>= (\t -> Right $ filter tokenFilter t)
tokenFilter :: Token -> Bool tokenFilter :: Token -> Bool
tokenFilter (WhiteSpace) = False tokenFilter (WhiteSpace) = False
tokenFilter (Comment _) = False tokenFilter (Comment _) = False
tokenFilter (NewLine) = False
tokenFilter _ = True tokenFilter _ = True
tokenizers :: Tokenizer tokenizers :: Tokenizer
@@ -139,9 +138,9 @@ tokenizers = anyTokenizer
[ keywordTokenizer False "\n" NewLine [ keywordTokenizer False "\n" NewLine
, tokenizeWhitespace , tokenizeWhitespace
, tokenizeComment , tokenizeComment
, sepTokenizer Char.isSpace tokenizeOperators , tokenizeOperators
, sepTokenizer Char.isSpace tokenizeHex , tokenizeHex
, sepTokenizer Char.isSpace tokenizeDecimal , tokenizeDecimal
, tokenizeIdentifier , tokenizeIdentifier
, keywordTokenizer False ":" Colon , keywordTokenizer False ":" Colon
, keywordTokenizer False "&" Ampersand , keywordTokenizer False "&" Ampersand
@@ -160,7 +159,8 @@ data AST = EmptyNode
| ParamNode AST | ParamNode AST
| ParamsNode [AST] | ParamsNode [AST]
| InstructionNode AST AST | InstructionNode AST AST
| LineNode AST AST | LineNode AST AST
| ProgramNode [AST]
deriving (Eq, Show) deriving (Eq, Show)
type ConsumedTokens = Int type ConsumedTokens = Int
@@ -168,40 +168,50 @@ data ParseResult = ParseResult AST ConsumedTokens deriving (Eq, Show)
type Parser = [Token] -> Maybe ParseResult type Parser = [Token] -> Maybe ParseResult
-- OP := push | pop ...
parseOperator :: Parser parseOperator :: Parser
parseOperator ((Operator op):_) = Just $ ParseResult (OperatorNode op) 1 parseOperator ((Operator op):_) = Just $ ParseResult (OperatorNode op) 1
parseOperator _ = Nothing parseOperator _ = Nothing
-- INT := 0 | 1 | ... | 0x00 | 0x01 | ... | 'a' | 'b' | ...
parseInt :: Parser parseInt :: Parser
parseInt ((IntLiteral int):_) = Just $ ParseResult (IntegerNode int) 1 parseInt ((IntLiteral int):_) = Just $ ParseResult (IntegerNode int) 1
parseInt _ = Nothing parseInt _ = Nothing
-- ID := [alnum, '_']+
parseIdentifier :: Parser parseIdentifier :: Parser
parseIdentifier ((Identifier id):_) = Just $ ParseResult (IdentifierNode id) 1 parseIdentifier ((Identifier id):_) = Just $ ParseResult (IdentifierNode id) 1
parseIdentifier _ = Nothing parseIdentifier _ = Nothing
-- ':'
parseColon :: Parser parseColon :: Parser
parseColon ((Colon):_) = Just $ ParseResult ColonNode 1 parseColon ((Colon):_) = Just $ ParseResult ColonNode 1
parseColon _ = Nothing parseColon _ = Nothing
-- '&'
parseAmpersand :: Parser parseAmpersand :: Parser
parseAmpersand ((Ampersand):_) = Just $ ParseResult AmpersandNode 1 parseAmpersand ((Ampersand):_) = Just $ ParseResult AmpersandNode 1
parseAmpersand _ = Nothing parseAmpersand _ = Nothing
-- label_def := ID ':'
parseLabelDef :: Parser parseLabelDef :: Parser
parseLabelDef = parseSeq [parseIdentifier, parseColon] combine parseLabelDef = parseSeq [parseIdentifier, parseColon] combine
where combine = (\[(IdentifierNode id), _] -> LabelDefNode id) where combine = (\[(IdentifierNode id), _] -> LabelDefNode id)
-- label_ref := '&' ID
parseLabelRef :: Parser parseLabelRef :: Parser
parseLabelRef = parseSeq [parseAmpersand, parseIdentifier] combine parseLabelRef = parseSeq [parseAmpersand, parseIdentifier] combine
where combine = (\[_, (IdentifierNode id)] -> LabelRefNode id) where combine = (\[_, (IdentifierNode id)] -> LabelRefNode id)
-- param := INT | label_ref
parseParam :: Parser parseParam :: Parser
parseParam = parseAlt [parseInt, parseLabelRef] ParamNode parseParam = parseAlt [parseInt, parseLabelRef] ParamNode
-- instr := OP param*
parseInstr :: Parser parseInstr :: Parser
parseInstr = parseSeq [parseOperator, parseMany0 parseParam ParamsNode] (\[op, ps] -> InstructionNode op ps) parseInstr = parseSeq [parseOperator, parseMany0 parseParam ParamsNode] (\[op, ps] -> InstructionNode op ps)
-- line := label_def? instr?
parseLine :: Parser parseLine :: Parser
parseLine = parseSeq [parseOptionally parseLabelDef, parseOptionally parseInstr] (\[label, instr] -> LineNode label instr) parseLine = parseSeq [parseOptionally parseLabelDef, parseOptionally parseInstr] (\[label, instr] -> LineNode label instr)
@@ -210,14 +220,17 @@ mapAST parser mapper tokens = do
(ParseResult ast consumed) <- parser tokens (ParseResult ast consumed) <- parser tokens
return $ ParseResult (mapper ast) consumed return $ ParseResult (mapper ast) consumed
-- a?
parseOptionally :: Parser -> Parser parseOptionally :: Parser -> Parser
parseOptionally parser input = case parser input of parseOptionally parser input = case parser input of
Nothing -> Just $ ParseResult EmptyNode 0 Nothing -> Just $ ParseResult EmptyNode 0
result -> result result -> result
-- a*
parseMany0 :: Parser -> ([AST] -> AST) -> Parser parseMany0 :: Parser -> ([AST] -> AST) -> Parser
parseMany0 parser combiner = parseOptionally $ parseMany parser combiner parseMany0 parser combiner = parseOptionally $ parseMany parser combiner
-- a+
parseMany :: Parser -> ([AST] -> AST) -> Parser parseMany :: Parser -> ([AST] -> AST) -> Parser
parseMany parser combiner tokens = if null asts parseMany parser combiner tokens = if null asts
then Nothing then Nothing
@@ -228,19 +241,23 @@ parseMany parser combiner tokens = if null asts
asts = map (\(ParseResult a _) -> a) results asts = map (\(ParseResult a _) -> a) results
ast = combiner asts ast = combiner asts
-- a a a a a a a...
parseGreedy :: Parser -> [Token] -> [ParseResult] parseGreedy :: Parser -> [Token] -> [ParseResult]
parseGreedy parser tokens = case parser tokens of parseGreedy parser tokens = case parser tokens of
(Just r@(ParseResult ast consumed)) -> r : parseGreedy parser (drop consumed tokens) (Just r@(ParseResult ast consumed)) -> r : parseGreedy parser (drop consumed tokens)
Nothing -> [] Nothing -> []
-- a | b | c
parseAlt :: [Parser] -> (AST -> AST) -> Parser parseAlt :: [Parser] -> (AST -> AST) -> Parser
parseAlt parsers mapper tokens = do parseAlt parsers mapper tokens = do
(ParseResult ast consumed) <- parseAny parsers tokens (ParseResult ast consumed) <- parseAny parsers tokens
return $ ParseResult (mapper ast) consumed return $ ParseResult (mapper ast) consumed
-- a | b | c
parseAny :: [Parser] -> Parser parseAny :: [Parser] -> Parser
parseAny parsers tokens = Monoid.getFirst . Monoid.mconcat . map Monoid.First $ sequenceA parsers tokens parseAny parsers tokens = Monoid.getFirst . Monoid.mconcat . map Monoid.First $ sequenceA parsers tokens
-- a b c
parseSeq :: [Parser] -> ([AST] -> AST) -> Parser parseSeq :: [Parser] -> ([AST] -> AST) -> Parser
parseSeq parsers combiner tokens = do parseSeq parsers combiner tokens = do
results <- parseAll parsers tokens results <- parseAll parsers tokens
@@ -250,6 +267,7 @@ parseSeq parsers combiner tokens = do
then return $ ParseResult (combiner asts) consumed then return $ ParseResult (combiner asts) consumed
else Nothing else Nothing
-- a b c
parseAll :: [Parser] -> [Token] -> Maybe [ParseResult] parseAll :: [Parser] -> [Token] -> Maybe [ParseResult]
parseAll [] _ = Just [] parseAll [] _ = Just []
parseAll (p:ps) tokens = do parseAll (p:ps) tokens = do
@@ -257,8 +275,18 @@ parseAll (p:ps) tokens = do
rest <- parseAll ps (drop consumed tokens) rest <- parseAll ps (drop consumed tokens)
return $ (ParseResult ast consumed) : rest return $ (ParseResult ast consumed) : rest
parse :: [Token] -> Either String [AST] -- 'Nothing' if not consumed tokens exist
parse [] = Right [] assertConsumed :: Parser -> Parser
parse tokens = case parseLine tokens of assertConsumed parser tokens = do
(Just (ParseResult ast consumed)) -> parse (drop consumed tokens) >>= (\rest -> return $ ast : rest) r@(ParseResult _ consumed) <- parser tokens
Nothing -> Left $ "Unexpected token: " ++ (show . head) tokens if null (drop consumed tokens)
then return r
else Nothing
parse :: [Token] -> Either String AST
parse tokens = case sequenceA results of
(Just r) -> Right $ ProgramNode $ map (\(ParseResult ast _) -> ast) r
Nothing -> Left "Unexpected token"
where
lines = U.explode (==NewLine) tokens
results = map (assertConsumed parseLine) lines