Create whitespace tokenizer and common tokenizer function

This commit is contained in:
2021-11-03 11:30:49 +01:00
parent a73ab7fc89
commit aae14eb3f4

View File

@@ -2,11 +2,16 @@ module Assembler (
tokenize tokenize
) where ) where
import Data.Char as Char
import Data.Monoid as Monoid import Data.Monoid as Monoid
import qualified VirtualMachine as VM (Op(..), Instruction, Command, instructionByOp) import qualified VirtualMachine as VM (Op(..), Instruction, Command, instructionByOp)
import qualified Util as U import qualified Util as U
data Token = Operator VM.Op | KeywordLiteral String | IntLiteral Int deriving (Eq, Show) data Token = Operator VM.Op
| KeywordLiteral String
| IntLiteral Int
| WhiteSpace
deriving (Eq, Show)
type ConsumedChars = Int type ConsumedChars = Int
data TokenizeResult = TokenizeResult Token ConsumedChars deriving (Eq, Show) data TokenizeResult = TokenizeResult Token ConsumedChars deriving (Eq, Show)
@@ -31,11 +36,21 @@ tokenizeOperator op input = case keywordToken of
Nothing -> Nothing Nothing -> Nothing
where keywordToken = tokenizeKeyword False (U.toLowerCase . show $ op) input where keywordToken = tokenizeKeyword False (U.toLowerCase . show $ op) input
anyTokenizer :: [Tokenizer] -> Tokenizer
anyTokenizer tokenizers input = Monoid.getFirst . Monoid.mconcat . map Monoid.First $ sequenceA tokenizers input
tokenizeOperators :: Tokenizer tokenizeOperators :: Tokenizer
tokenizeOperators = anyTokenizer $ map tokenizeOperator [VM.Push ..] tokenizeOperators = anyTokenizer $ map tokenizeOperator [VM.Push ..]
tokenize :: Tokenizer whitespaceTokenizer :: Tokenizer
tokenize = tokenizeOperators whitespaceTokenizer [] = Nothing
whitespaceTokenizer (x:_)
| Char.isSpace x = Just $ TokenizeResult WhiteSpace 1
| otherwise = Nothing
anyTokenizer :: [Tokenizer] -> Tokenizer
anyTokenizer tokenizers input = Monoid.getFirst . Monoid.mconcat . map Monoid.First $ sequenceA tokenizers input
tokenize :: String -> Either String [Token]
tokenize [] = Right []
tokenize input = case runTokenizer input of
(Just (TokenizeResult token chars)) -> tokenize (drop chars input) >>= (\rest -> return $ token : rest)
Nothing -> Left $ "Unknown token: " ++ take 20 input
where runTokenizer = anyTokenizer [tokenizeOperators, whitespaceTokenizer]