Refactor code
This commit is contained in:
@@ -7,4 +7,4 @@ import Assembler.Parser (parse)
|
||||
import Assembler.Emitter (emit)
|
||||
|
||||
compile :: String -> Either String [Word8]
|
||||
compile input = return input >>= tokenize >>= parse >>= emit
|
||||
compile input = tokenize input >>= parse >>= emit
|
||||
|
||||
@@ -12,18 +12,18 @@ import qualified Data.Map as M
|
||||
import Assembler.Parser (AST(..), Scope(..))
|
||||
|
||||
|
||||
data Bean = Byte Word8
|
||||
data Bean = Byte Word8
|
||||
| Reference String
|
||||
deriving (Show, Eq)
|
||||
|
||||
data Context = Context { _beans :: [Bean]
|
||||
, _labels :: M.Map String Int
|
||||
, _labels :: M.Map String Int
|
||||
, _currentLabel :: Maybe String
|
||||
} deriving (Show, Eq)
|
||||
type Emitter = AST -> ExceptT String (State Context) ()
|
||||
|
||||
empty :: Context
|
||||
empty = Context { _beans = [], _labels = M.fromList [], _currentLabel = Nothing }
|
||||
empty = Context { _beans = [], _labels = M.empty, _currentLabel = Nothing }
|
||||
|
||||
emitBean :: Bean -> ExceptT String (State Context) ()
|
||||
emitBean bean = lift $ do
|
||||
@@ -32,17 +32,17 @@ emitBean bean = lift $ do
|
||||
return ()
|
||||
|
||||
emitByte :: Word8 -> ExceptT String (State Context) ()
|
||||
emitByte byte = emitBean $ Byte $ byte
|
||||
emitByte byte = emitBean $ Byte byte
|
||||
|
||||
emitParam :: Emitter
|
||||
emitParam (Param (Integer x)) = emitByte $ fromIntegral $ x
|
||||
emitParam (Param (LabelRef Global l)) = emitBean $ Reference $ l
|
||||
emitParam (Param (Integer x)) = emitByte $ fromIntegral x
|
||||
emitParam (Param (LabelRef Global l)) = emitBean $ Reference l
|
||||
emitParam (Param (LabelRef Local l)) = do
|
||||
ctx <- lift get
|
||||
scope <- case _currentLabel ctx of
|
||||
(Just s) -> return s
|
||||
Nothing -> throwError $ "Local label ('." ++ l ++ "') reference is allowed only in the global label scope"
|
||||
emitBean $ Reference $ (scope ++ "." ++ l)
|
||||
emitBean $ Reference (scope ++ "." ++ l)
|
||||
emitParam _ = throwError "Number or label reference expected"
|
||||
|
||||
emitLabelDef :: Emitter
|
||||
@@ -50,7 +50,7 @@ emitLabelDef (LabelDef Global label) = do
|
||||
ctx <- lift get
|
||||
let labels = _labels ctx
|
||||
let current = length (_beans ctx)
|
||||
when (label `M.member` labels) (throwError $ "Label '" ++ (label) ++ "' is already defined")
|
||||
when (label `M.member` labels) (throwError $ "Label '" ++ label ++ "' is already defined")
|
||||
put ctx { _labels = M.insert label current labels, _currentLabel = Just label }
|
||||
return ()
|
||||
emitLabelDef (LabelDef Local label) = do
|
||||
@@ -61,9 +61,9 @@ emitLabelDef (LabelDef Local label) = do
|
||||
Nothing -> throwError $ "Local label ('." ++ label ++ "') can be defined only in the global label scope"
|
||||
let canonicalLabel = scope ++ "." ++ label
|
||||
let current = length (_beans ctx)
|
||||
when (canonicalLabel `M.member` labels) (throwError $ "Label '" ++ (label) ++ "' is already defined in the global label '" ++ scope ++ "' scope")
|
||||
when (canonicalLabel `M.member` labels) (throwError $ "Label '" ++ label ++ "' is already defined in the global label '" ++ scope ++ "' scope")
|
||||
put ctx { _labels = M.insert canonicalLabel current labels }
|
||||
return ()
|
||||
return ()
|
||||
emitLabelDef _ = throwError "Label definition expected"
|
||||
|
||||
emitInstr :: Emitter
|
||||
@@ -71,7 +71,6 @@ emitInstr (Instruction (Operator op) Empty) = emitByte $ fromIntegral . fromEnum
|
||||
emitInstr (Instruction (Operator op) (Params params)) = do
|
||||
emitByte $ fromIntegral $ fromEnum op
|
||||
mapM_ emitParam params
|
||||
return ()
|
||||
emitInstr _ = throwError "Instruction expected"
|
||||
|
||||
emitLine :: Emitter
|
||||
@@ -81,26 +80,26 @@ emitLine (Line labelDef instr) = emitLabelDef labelDef >> emitInstr instr >> ret
|
||||
emitLine _ = throwError "Line of code expected"
|
||||
|
||||
emitProgram :: Emitter
|
||||
emitProgram (Program progLines) = mapM emitLine progLines >> return ()
|
||||
emitProgram (Program progLines) = mapM_ emitLine progLines
|
||||
emitProgram _ = throwError "Program code expected"
|
||||
|
||||
resolveLabels :: M.Map String Int -> [Bean] -> Either String [Bean]
|
||||
resolveLabels labels beans = sequence $ foldr folder [] beans
|
||||
where
|
||||
folder b acc = (resolveLabel labels b) : acc
|
||||
folder b acc = resolveLabel labels b : acc
|
||||
|
||||
resolveLabel :: M.Map String Int -> Bean -> Either String Bean
|
||||
resolveLabel _ b@(Byte _) = Right b
|
||||
resolveLabel labels (Reference label) = case M.lookup label labels of
|
||||
(Just t) -> Right . Byte . fromIntegral $ t
|
||||
Nothing -> Left $ "Label '" ++ label ++ "' is not defined"
|
||||
Nothing -> Left $ "Label '" ++ label ++ "' is not defined"
|
||||
|
||||
emit :: AST -> Either String [Word8]
|
||||
emit root = do
|
||||
ctx <- flip evalState empty $ runExceptT $ emitProgram root >> lift get
|
||||
emit root = do
|
||||
ctx <- flip evalState empty $ runExceptT $ emitProgram root >> lift get
|
||||
let labels = _labels ctx
|
||||
let beans = _beans ctx
|
||||
resolved <- resolveLabels labels beans
|
||||
return $ map (\(Byte b) -> b) resolved
|
||||
|
||||
return $ map (\(Byte b) -> b) resolved
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
{-# OPTIONS_GHC -Wno-incomplete-patterns #-}
|
||||
module Assembler.Parser where
|
||||
|
||||
import Data.List (intercalate)
|
||||
@@ -10,7 +11,7 @@ import Util (explode)
|
||||
data Scope = Local | Global deriving (Eq, Show, Enum, Bounded)
|
||||
|
||||
data AST = Empty
|
||||
| Operator Op
|
||||
| Operator Op
|
||||
| Integer Int
|
||||
| Identifier String
|
||||
| Colon
|
||||
@@ -21,8 +22,8 @@ data AST = Empty
|
||||
| Param AST
|
||||
| Params [AST]
|
||||
| Instruction AST AST
|
||||
| Line AST AST
|
||||
| Program [AST]
|
||||
| Line AST AST
|
||||
| Program [AST]
|
||||
deriving (Eq, Show)
|
||||
|
||||
type ConsumedTokens = Int
|
||||
@@ -43,36 +44,36 @@ parseInt _ = Nothing
|
||||
-- ID := [alnum, '_']+
|
||||
parseIdentifier :: Parser
|
||||
parseIdentifier ((T.Identifier iden):_) = Just $ ParseResult (Identifier iden) 1
|
||||
parseIdentifier _ = Nothing
|
||||
parseIdentifier _ = Nothing
|
||||
|
||||
-- ':'
|
||||
parseColon :: Parser
|
||||
parseColon ((T.Colon):_) = Just $ ParseResult Colon 1
|
||||
parseColon _ = Nothing
|
||||
parseColon (T.Colon:_) = Just $ ParseResult Colon 1
|
||||
parseColon _ = Nothing
|
||||
|
||||
-- '&'
|
||||
parseAmpersand :: Parser
|
||||
parseAmpersand ((T.Ampersand):_) = Just $ ParseResult Ampersand 1
|
||||
parseAmpersand _ = Nothing
|
||||
parseAmpersand (T.Ampersand:_) = Just $ ParseResult Ampersand 1
|
||||
parseAmpersand _ = Nothing
|
||||
|
||||
-- '.'
|
||||
parseDot :: Parser
|
||||
parseDot ((T.Dot):_) = Just $ ParseResult Dot 1
|
||||
parseDot _ = Nothing
|
||||
parseDot (T.Dot:_) = Just $ ParseResult Dot 1
|
||||
parseDot _ = Nothing
|
||||
|
||||
-- label_def := '.'? ID ':'
|
||||
parseLabelDef :: Parser
|
||||
parseLabelDef = parseSeq [parseOptionally parseDot, parseIdentifier, parseColon] combine
|
||||
parseLabelDef = parseSeq [parseOptionally parseDot, parseIdentifier, parseColon] combine
|
||||
where
|
||||
combine [Dot, (Identifier iden), _] = LabelDef Local iden
|
||||
combine [_, (Identifier iden), _] = LabelDef Global iden
|
||||
combine [Dot, Identifier iden, _] = LabelDef Local iden
|
||||
combine [_, Identifier iden, _] = LabelDef Global iden
|
||||
|
||||
-- label_ref := '&' ID
|
||||
parseLabelRef :: Parser
|
||||
parseLabelRef = parseSeq [parseAmpersand, parseOptionally parseDot, parseIdentifier] combine
|
||||
where
|
||||
combine [_, Dot, (Identifier iden)] = LabelRef Local iden
|
||||
combine [_, _, (Identifier iden)] = LabelRef Global iden
|
||||
combine [_, Dot, Identifier iden] = LabelRef Local iden
|
||||
combine [_, _, Identifier iden] = LabelRef Global iden
|
||||
|
||||
-- param := INT | label_ref
|
||||
parseParam :: Parser
|
||||
@@ -107,17 +108,17 @@ parseMany :: Parser -> ([AST] -> AST) -> Parser
|
||||
parseMany parser combiner tokens = if null asts
|
||||
then Nothing
|
||||
else Just $ ParseResult ast consumed
|
||||
where
|
||||
where
|
||||
results = parseGreedy parser tokens
|
||||
consumed = sum $ map (\(ParseResult _ c) -> c) results
|
||||
asts = map (\(ParseResult a _) -> a) results
|
||||
asts = map (\(ParseResult a _) -> a) results
|
||||
ast = combiner asts
|
||||
|
||||
-- a a a a a a a...
|
||||
parseGreedy :: Parser -> [T.Token] -> [ParseResult]
|
||||
parseGreedy parser tokens = case parser tokens of
|
||||
(Just r@(ParseResult _ consumed)) -> r : parseGreedy parser (drop consumed tokens)
|
||||
Nothing -> []
|
||||
Nothing -> []
|
||||
|
||||
-- a | b | c
|
||||
parseAlt :: [Parser] -> (AST -> AST) -> Parser
|
||||
@@ -137,7 +138,7 @@ parseSeq parsers combiner tokens = do
|
||||
results <- parseAll parsers tokens
|
||||
let consumed = sum $ map (\(ParseResult _ c) -> c) results
|
||||
let asts = map (\(ParseResult a _) -> a) results
|
||||
if (length asts) == (length parsers)
|
||||
if length asts == length parsers
|
||||
then return $ ParseResult (combiner asts) consumed
|
||||
else Nothing
|
||||
|
||||
@@ -147,7 +148,7 @@ parseAll [] _ = Just []
|
||||
parseAll (p:ps) tokens = do
|
||||
(ParseResult ast consumed) <- p tokens
|
||||
rest <- parseAll ps (drop consumed tokens)
|
||||
return $ (ParseResult ast consumed) : rest
|
||||
return $ ParseResult ast consumed : rest
|
||||
|
||||
-- 'Nothing' if not consumed tokens exist
|
||||
assertConsumed :: Parser -> Parser
|
||||
@@ -162,8 +163,8 @@ parse :: [T.Token] -> Either String AST
|
||||
parse tokens = do
|
||||
let codeLines = explode (==T.NewLine) tokens
|
||||
let results = map (assertConsumed parseLine) codeLines
|
||||
let errors = filter ((==Nothing) . snd) $ zipWith (,) codeLines $ results
|
||||
let errorMsg = "Parse error(s):\n" ++ (intercalate "\n" $ map (show . fst) errors)
|
||||
let errors = filter ((==Nothing) . snd) $ zip codeLines results
|
||||
let errorMsg = "Parse error(s):\n" ++ intercalate "\n" (map (show . fst) errors)
|
||||
case sequenceA results of
|
||||
(Just r) -> return $ Program $ map (\(ParseResult ast _) -> ast) r
|
||||
Nothing -> Left errorMsg
|
||||
@@ -8,8 +8,8 @@ import VirtualMachine.VM (Op(..))
|
||||
import Util (toLowerCase, controlChar, unescape)
|
||||
|
||||
|
||||
data Token = Operator Op
|
||||
| IntLiteral Int
|
||||
data Token = Operator Op
|
||||
| IntLiteral Int
|
||||
| StringLiteral String
|
||||
| Identifier String
|
||||
| Colon
|
||||
@@ -38,59 +38,52 @@ keywordTokenizer cs kwd token input
|
||||
matches = and zipped && len == length zipped
|
||||
|
||||
operatorTokenizer :: Op -> Tokenizer
|
||||
operatorTokenizer op input = keywordTokenizer False (toLowerCase . show $ op) (Operator op) input
|
||||
operatorTokenizer op = keywordTokenizer False (toLowerCase . show $ op) (Operator op)
|
||||
|
||||
tokenizeOperators :: Tokenizer
|
||||
tokenizeOperators = anyTokenizer $ map operatorTokenizer ops
|
||||
where
|
||||
ops = sortBy cmp [Nop ..]
|
||||
cmp x y = (length . show) y `compare` (length . show) x
|
||||
tokenizeOperators = anyTokenizer $ map operatorTokenizer $ sortBy cmp [Nop ..]
|
||||
where cmp x y = (length . show) y `compare` (length . show) x
|
||||
|
||||
tokenizeIdentifier :: Tokenizer
|
||||
tokenizeIdentifier [] = Nothing
|
||||
tokenizeIdentifier input@(x:_) = if null identifier || (not . isAlpha) x
|
||||
then Nothing
|
||||
else Just $ TokenizeResult (Identifier identifier) (length identifier)
|
||||
where
|
||||
identifier = takeWhile (or . sequenceA [isAlphaNum, (=='_')]) input
|
||||
where identifier = takeWhile (or . sequenceA [isAlphaNum, (=='_')]) input
|
||||
|
||||
tokenizeWhitespace :: Tokenizer
|
||||
tokenizeWhitespace [] = Nothing
|
||||
tokenizeWhitespace (x:_)
|
||||
| isSpace x = Just $ TokenizeResult WhiteSpace 1
|
||||
| otherwise = Nothing
|
||||
| otherwise = Nothing
|
||||
|
||||
tokenizeDecimal :: Tokenizer
|
||||
tokenizeDecimal [] = Nothing
|
||||
tokenizeDecimal input = if null numberStr
|
||||
then Nothing
|
||||
else Just $ TokenizeResult (IntLiteral number) len
|
||||
where
|
||||
where
|
||||
number = read numberStr
|
||||
len = length numberStr
|
||||
numberStr = takeWhile isDigit input
|
||||
|
||||
tokenizeHex :: Tokenizer
|
||||
tokenizeHex [] = Nothing
|
||||
tokenizeHex input = if isPrefix && len > 0
|
||||
then Just $ TokenizeResult (IntLiteral number) (len + 2)
|
||||
else Nothing
|
||||
where
|
||||
isPrefix = take 2 input == "0x"
|
||||
number = read . ("0x"++) $ numberStr
|
||||
len = length numberStr
|
||||
numberStr = takeWhile isHexDigit (drop 2 input)
|
||||
tokenizeHex ('0':'x':input) = if null input
|
||||
then Nothing
|
||||
else Just $ TokenizeResult (IntLiteral $ read $ "0x" ++ numberStr) (length numberStr + 2)
|
||||
where numberStr = takeWhile isHexDigit input
|
||||
tokenizeHex _ = Nothing
|
||||
|
||||
tokenizeChar :: Tokenizer
|
||||
tokenizeChar ('\'':'\\':x:'\'':_) = controlChar x >>= (\s -> return $ TokenizeResult (IntLiteral s) 4)
|
||||
tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . ord $ x) 3
|
||||
tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . ord $ x) 3
|
||||
tokenizeChar _ = Nothing
|
||||
|
||||
tokenizeString :: Tokenizer
|
||||
tokenizeString ('"':xs) = do
|
||||
string <- extractString xs
|
||||
unescaped <- unescape string
|
||||
return $ TokenizeResult (StringLiteral unescaped) (length string + 2)
|
||||
return $ TokenizeResult (StringLiteral unescaped) (length string + 2)
|
||||
where
|
||||
extractString [] = Nothing
|
||||
extractString (y:ys)
|
||||
@@ -100,13 +93,9 @@ tokenizeString ('"':xs) = do
|
||||
tokenizeString _ = Nothing
|
||||
|
||||
tokenizeComment :: Tokenizer
|
||||
tokenizeComment [] = Nothing
|
||||
tokenizeComment (x:xs) = if x == ';'
|
||||
then Just $ TokenizeResult (Comment comment) (len + 1)
|
||||
else Nothing
|
||||
where
|
||||
len = length comment
|
||||
comment = takeWhile (/='\n') xs
|
||||
tokenizeComment (';':xs) = Just $ TokenizeResult (Comment comment) (length comment + 1)
|
||||
where comment = takeWhile (/='\n') xs
|
||||
tokenizeComment _ = Nothing
|
||||
|
||||
type SeparatorPredicate = Char -> Bool
|
||||
sepTokenizer :: SeparatorPredicate -> Tokenizer -> Tokenizer
|
||||
@@ -115,7 +104,7 @@ sepTokenizer predicate tokenizer input = do
|
||||
result@(TokenizeResult _ consumed) <- tokenizer input
|
||||
let next = drop consumed input
|
||||
if null next || (predicate . head $ next)
|
||||
then return $ result
|
||||
then return result
|
||||
else Nothing
|
||||
|
||||
anyTokenizer :: [Tokenizer] -> Tokenizer
|
||||
@@ -124,7 +113,7 @@ anyTokenizer tokenizers input = getFirst . mconcat . map First $ sequenceA token
|
||||
|
||||
tokenize :: String -> Either String [Token]
|
||||
tokenize [] = Right []
|
||||
tokenize input = tokens >>= (\t -> Right $ filter tokenFilter t)
|
||||
tokenize input = tokens >>= (Right . filter tokenFilter)
|
||||
where
|
||||
tokens = case tokenizers input of
|
||||
(Just (TokenizeResult token chars)) -> tokenize (drop chars input) >>= (\rest -> return $ token : rest)
|
||||
@@ -145,6 +134,6 @@ tokenize input = tokens >>= (\t -> Right $ filter tokenFilter t)
|
||||
]
|
||||
|
||||
tokenFilter :: Token -> Bool
|
||||
tokenFilter (WhiteSpace) = False
|
||||
tokenFilter WhiteSpace = False
|
||||
tokenFilter (Comment _) = False
|
||||
tokenFilter _ = True
|
||||
Reference in New Issue
Block a user