Create whitespace tokenizer and common tokenizer function
This commit is contained in:
@@ -2,11 +2,16 @@ module Assembler (
|
|||||||
tokenize
|
tokenize
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import Data.Char as Char
|
||||||
import Data.Monoid as Monoid
|
import Data.Monoid as Monoid
|
||||||
import qualified VirtualMachine as VM (Op(..), Instruction, Command, instructionByOp)
|
import qualified VirtualMachine as VM (Op(..), Instruction, Command, instructionByOp)
|
||||||
import qualified Util as U
|
import qualified Util as U
|
||||||
|
|
||||||
data Token = Operator VM.Op | KeywordLiteral String | IntLiteral Int deriving (Eq, Show)
|
data Token = Operator VM.Op
|
||||||
|
| KeywordLiteral String
|
||||||
|
| IntLiteral Int
|
||||||
|
| WhiteSpace
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
type ConsumedChars = Int
|
type ConsumedChars = Int
|
||||||
data TokenizeResult = TokenizeResult Token ConsumedChars deriving (Eq, Show)
|
data TokenizeResult = TokenizeResult Token ConsumedChars deriving (Eq, Show)
|
||||||
@@ -31,11 +36,21 @@ tokenizeOperator op input = case keywordToken of
|
|||||||
Nothing -> Nothing
|
Nothing -> Nothing
|
||||||
where keywordToken = tokenizeKeyword False (U.toLowerCase . show $ op) input
|
where keywordToken = tokenizeKeyword False (U.toLowerCase . show $ op) input
|
||||||
|
|
||||||
anyTokenizer :: [Tokenizer] -> Tokenizer
|
|
||||||
anyTokenizer tokenizers input = Monoid.getFirst . Monoid.mconcat . map Monoid.First $ sequenceA tokenizers input
|
|
||||||
|
|
||||||
tokenizeOperators :: Tokenizer
|
tokenizeOperators :: Tokenizer
|
||||||
tokenizeOperators = anyTokenizer $ map tokenizeOperator [VM.Push ..]
|
tokenizeOperators = anyTokenizer $ map tokenizeOperator [VM.Push ..]
|
||||||
|
|
||||||
tokenize :: Tokenizer
|
whitespaceTokenizer :: Tokenizer
|
||||||
tokenize = tokenizeOperators
|
whitespaceTokenizer [] = Nothing
|
||||||
|
whitespaceTokenizer (x:_)
|
||||||
|
| Char.isSpace x = Just $ TokenizeResult WhiteSpace 1
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
anyTokenizer :: [Tokenizer] -> Tokenizer
|
||||||
|
anyTokenizer tokenizers input = Monoid.getFirst . Monoid.mconcat . map Monoid.First $ sequenceA tokenizers input
|
||||||
|
|
||||||
|
tokenize :: String -> Either String [Token]
|
||||||
|
tokenize [] = Right []
|
||||||
|
tokenize input = case runTokenizer input of
|
||||||
|
(Just (TokenizeResult token chars)) -> tokenize (drop chars input) >>= (\rest -> return $ token : rest)
|
||||||
|
Nothing -> Left $ "Unknown token: " ++ take 20 input
|
||||||
|
where runTokenizer = anyTokenizer [tokenizeOperators, whitespaceTokenizer]
|
||||||
Reference in New Issue
Block a user