From aae14eb3f4cd602f7c9bb4651765fd7f0a948cde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Przemys=C5=82aw=20Pluta?= Date: Wed, 3 Nov 2021 11:30:49 +0100 Subject: [PATCH] Create whitespace tokenizer and common tokenizer function --- app/Assembler.hs | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/app/Assembler.hs b/app/Assembler.hs index eadaea4..16477cf 100644 --- a/app/Assembler.hs +++ b/app/Assembler.hs @@ -2,11 +2,16 @@ module Assembler ( tokenize ) where +import Data.Char as Char import Data.Monoid as Monoid import qualified VirtualMachine as VM (Op(..), Instruction, Command, instructionByOp) import qualified Util as U -data Token = Operator VM.Op | KeywordLiteral String | IntLiteral Int deriving (Eq, Show) +data Token = Operator VM.Op + | KeywordLiteral String + | IntLiteral Int + | WhiteSpace + deriving (Eq, Show) type ConsumedChars = Int data TokenizeResult = TokenizeResult Token ConsumedChars deriving (Eq, Show) @@ -31,11 +36,21 @@ tokenizeOperator op input = case keywordToken of Nothing -> Nothing where keywordToken = tokenizeKeyword False (U.toLowerCase . show $ op) input -anyTokenizer :: [Tokenizer] -> Tokenizer -anyTokenizer tokenizers input = Monoid.getFirst . Monoid.mconcat . map Monoid.First $ sequenceA tokenizers input - tokenizeOperators :: Tokenizer tokenizeOperators = anyTokenizer $ map tokenizeOperator [VM.Push ..] -tokenize :: Tokenizer -tokenize = tokenizeOperators +whitespaceTokenizer :: Tokenizer +whitespaceTokenizer [] = Nothing +whitespaceTokenizer (x:_) + | Char.isSpace x = Just $ TokenizeResult WhiteSpace 1 + | otherwise = Nothing + +anyTokenizer :: [Tokenizer] -> Tokenizer +anyTokenizer tokenizers input = Monoid.getFirst . Monoid.mconcat . map Monoid.First $ sequenceA tokenizers input + +tokenize :: String -> Either String [Token] +tokenize [] = Right [] +tokenize input = case runTokenizer input of + (Just (TokenizeResult token chars)) -> tokenize (drop chars input) >>= (\rest -> return $ token : rest) + Nothing -> Left $ "Unknown token: " ++ take 20 input + where runTokenizer = anyTokenizer [tokenizeOperators, whitespaceTokenizer] \ No newline at end of file