Refactor code

This commit is contained in:
2021-11-18 17:20:27 +01:00
parent c656b8ca4e
commit 2c56582460
15 changed files with 454 additions and 659 deletions

View File

@@ -7,4 +7,4 @@ import Assembler.Parser (parse)
import Assembler.Emitter (emit)
compile :: String -> Either String [Word8]
compile input = return input >>= tokenize >>= parse >>= emit
compile input = tokenize input >>= parse >>= emit

View File

@@ -12,18 +12,18 @@ import qualified Data.Map as M
import Assembler.Parser (AST(..), Scope(..))
data Bean = Byte Word8
data Bean = Byte Word8
| Reference String
deriving (Show, Eq)
data Context = Context { _beans :: [Bean]
, _labels :: M.Map String Int
, _labels :: M.Map String Int
, _currentLabel :: Maybe String
} deriving (Show, Eq)
type Emitter = AST -> ExceptT String (State Context) ()
empty :: Context
empty = Context { _beans = [], _labels = M.fromList [], _currentLabel = Nothing }
empty = Context { _beans = [], _labels = M.empty, _currentLabel = Nothing }
emitBean :: Bean -> ExceptT String (State Context) ()
emitBean bean = lift $ do
@@ -32,17 +32,17 @@ emitBean bean = lift $ do
return ()
emitByte :: Word8 -> ExceptT String (State Context) ()
emitByte byte = emitBean $ Byte $ byte
emitByte byte = emitBean $ Byte byte
emitParam :: Emitter
emitParam (Param (Integer x)) = emitByte $ fromIntegral $ x
emitParam (Param (LabelRef Global l)) = emitBean $ Reference $ l
emitParam (Param (Integer x)) = emitByte $ fromIntegral x
emitParam (Param (LabelRef Global l)) = emitBean $ Reference l
emitParam (Param (LabelRef Local l)) = do
ctx <- lift get
scope <- case _currentLabel ctx of
(Just s) -> return s
Nothing -> throwError $ "Local label ('." ++ l ++ "') reference is allowed only in the global label scope"
emitBean $ Reference $ (scope ++ "." ++ l)
emitBean $ Reference (scope ++ "." ++ l)
emitParam _ = throwError "Number or label reference expected"
emitLabelDef :: Emitter
@@ -50,7 +50,7 @@ emitLabelDef (LabelDef Global label) = do
ctx <- lift get
let labels = _labels ctx
let current = length (_beans ctx)
when (label `M.member` labels) (throwError $ "Label '" ++ (label) ++ "' is already defined")
when (label `M.member` labels) (throwError $ "Label '" ++ label ++ "' is already defined")
put ctx { _labels = M.insert label current labels, _currentLabel = Just label }
return ()
emitLabelDef (LabelDef Local label) = do
@@ -61,9 +61,9 @@ emitLabelDef (LabelDef Local label) = do
Nothing -> throwError $ "Local label ('." ++ label ++ "') can be defined only in the global label scope"
let canonicalLabel = scope ++ "." ++ label
let current = length (_beans ctx)
when (canonicalLabel `M.member` labels) (throwError $ "Label '" ++ (label) ++ "' is already defined in the global label '" ++ scope ++ "' scope")
when (canonicalLabel `M.member` labels) (throwError $ "Label '" ++ label ++ "' is already defined in the global label '" ++ scope ++ "' scope")
put ctx { _labels = M.insert canonicalLabel current labels }
return ()
return ()
emitLabelDef _ = throwError "Label definition expected"
emitInstr :: Emitter
@@ -71,7 +71,6 @@ emitInstr (Instruction (Operator op) Empty) = emitByte $ fromIntegral . fromEnum
emitInstr (Instruction (Operator op) (Params params)) = do
emitByte $ fromIntegral $ fromEnum op
mapM_ emitParam params
return ()
emitInstr _ = throwError "Instruction expected"
emitLine :: Emitter
@@ -81,26 +80,26 @@ emitLine (Line labelDef instr) = emitLabelDef labelDef >> emitInstr instr >> ret
emitLine _ = throwError "Line of code expected"
emitProgram :: Emitter
emitProgram (Program progLines) = mapM emitLine progLines >> return ()
emitProgram (Program progLines) = mapM_ emitLine progLines
emitProgram _ = throwError "Program code expected"
resolveLabels :: M.Map String Int -> [Bean] -> Either String [Bean]
resolveLabels labels beans = sequence $ foldr folder [] beans
where
folder b acc = (resolveLabel labels b) : acc
folder b acc = resolveLabel labels b : acc
resolveLabel :: M.Map String Int -> Bean -> Either String Bean
resolveLabel _ b@(Byte _) = Right b
resolveLabel labels (Reference label) = case M.lookup label labels of
(Just t) -> Right . Byte . fromIntegral $ t
Nothing -> Left $ "Label '" ++ label ++ "' is not defined"
Nothing -> Left $ "Label '" ++ label ++ "' is not defined"
emit :: AST -> Either String [Word8]
emit root = do
ctx <- flip evalState empty $ runExceptT $ emitProgram root >> lift get
emit root = do
ctx <- flip evalState empty $ runExceptT $ emitProgram root >> lift get
let labels = _labels ctx
let beans = _beans ctx
resolved <- resolveLabels labels beans
return $ map (\(Byte b) -> b) resolved
return $ map (\(Byte b) -> b) resolved

View File

@@ -1,3 +1,4 @@
{-# OPTIONS_GHC -Wno-incomplete-patterns #-}
module Assembler.Parser where
import Data.List (intercalate)
@@ -10,7 +11,7 @@ import Util (explode)
data Scope = Local | Global deriving (Eq, Show, Enum, Bounded)
data AST = Empty
| Operator Op
| Operator Op
| Integer Int
| Identifier String
| Colon
@@ -21,8 +22,8 @@ data AST = Empty
| Param AST
| Params [AST]
| Instruction AST AST
| Line AST AST
| Program [AST]
| Line AST AST
| Program [AST]
deriving (Eq, Show)
type ConsumedTokens = Int
@@ -43,36 +44,36 @@ parseInt _ = Nothing
-- ID := [alnum, '_']+
parseIdentifier :: Parser
parseIdentifier ((T.Identifier iden):_) = Just $ ParseResult (Identifier iden) 1
parseIdentifier _ = Nothing
parseIdentifier _ = Nothing
-- ':'
parseColon :: Parser
parseColon ((T.Colon):_) = Just $ ParseResult Colon 1
parseColon _ = Nothing
parseColon (T.Colon:_) = Just $ ParseResult Colon 1
parseColon _ = Nothing
-- '&'
parseAmpersand :: Parser
parseAmpersand ((T.Ampersand):_) = Just $ ParseResult Ampersand 1
parseAmpersand _ = Nothing
parseAmpersand (T.Ampersand:_) = Just $ ParseResult Ampersand 1
parseAmpersand _ = Nothing
-- '.'
parseDot :: Parser
parseDot ((T.Dot):_) = Just $ ParseResult Dot 1
parseDot _ = Nothing
parseDot (T.Dot:_) = Just $ ParseResult Dot 1
parseDot _ = Nothing
-- label_def := '.'? ID ':'
parseLabelDef :: Parser
parseLabelDef = parseSeq [parseOptionally parseDot, parseIdentifier, parseColon] combine
parseLabelDef = parseSeq [parseOptionally parseDot, parseIdentifier, parseColon] combine
where
combine [Dot, (Identifier iden), _] = LabelDef Local iden
combine [_, (Identifier iden), _] = LabelDef Global iden
combine [Dot, Identifier iden, _] = LabelDef Local iden
combine [_, Identifier iden, _] = LabelDef Global iden
-- label_ref := '&' ID
parseLabelRef :: Parser
parseLabelRef = parseSeq [parseAmpersand, parseOptionally parseDot, parseIdentifier] combine
where
combine [_, Dot, (Identifier iden)] = LabelRef Local iden
combine [_, _, (Identifier iden)] = LabelRef Global iden
combine [_, Dot, Identifier iden] = LabelRef Local iden
combine [_, _, Identifier iden] = LabelRef Global iden
-- param := INT | label_ref
parseParam :: Parser
@@ -107,17 +108,17 @@ parseMany :: Parser -> ([AST] -> AST) -> Parser
parseMany parser combiner tokens = if null asts
then Nothing
else Just $ ParseResult ast consumed
where
where
results = parseGreedy parser tokens
consumed = sum $ map (\(ParseResult _ c) -> c) results
asts = map (\(ParseResult a _) -> a) results
asts = map (\(ParseResult a _) -> a) results
ast = combiner asts
-- a a a a a a a...
parseGreedy :: Parser -> [T.Token] -> [ParseResult]
parseGreedy parser tokens = case parser tokens of
(Just r@(ParseResult _ consumed)) -> r : parseGreedy parser (drop consumed tokens)
Nothing -> []
Nothing -> []
-- a | b | c
parseAlt :: [Parser] -> (AST -> AST) -> Parser
@@ -137,7 +138,7 @@ parseSeq parsers combiner tokens = do
results <- parseAll parsers tokens
let consumed = sum $ map (\(ParseResult _ c) -> c) results
let asts = map (\(ParseResult a _) -> a) results
if (length asts) == (length parsers)
if length asts == length parsers
then return $ ParseResult (combiner asts) consumed
else Nothing
@@ -147,7 +148,7 @@ parseAll [] _ = Just []
parseAll (p:ps) tokens = do
(ParseResult ast consumed) <- p tokens
rest <- parseAll ps (drop consumed tokens)
return $ (ParseResult ast consumed) : rest
return $ ParseResult ast consumed : rest
-- 'Nothing' if not consumed tokens exist
assertConsumed :: Parser -> Parser
@@ -162,8 +163,8 @@ parse :: [T.Token] -> Either String AST
parse tokens = do
let codeLines = explode (==T.NewLine) tokens
let results = map (assertConsumed parseLine) codeLines
let errors = filter ((==Nothing) . snd) $ zipWith (,) codeLines $ results
let errorMsg = "Parse error(s):\n" ++ (intercalate "\n" $ map (show . fst) errors)
let errors = filter ((==Nothing) . snd) $ zip codeLines results
let errorMsg = "Parse error(s):\n" ++ intercalate "\n" (map (show . fst) errors)
case sequenceA results of
(Just r) -> return $ Program $ map (\(ParseResult ast _) -> ast) r
Nothing -> Left errorMsg

View File

@@ -8,8 +8,8 @@ import VirtualMachine.VM (Op(..))
import Util (toLowerCase, controlChar, unescape)
data Token = Operator Op
| IntLiteral Int
data Token = Operator Op
| IntLiteral Int
| StringLiteral String
| Identifier String
| Colon
@@ -38,59 +38,52 @@ keywordTokenizer cs kwd token input
matches = and zipped && len == length zipped
operatorTokenizer :: Op -> Tokenizer
operatorTokenizer op input = keywordTokenizer False (toLowerCase . show $ op) (Operator op) input
operatorTokenizer op = keywordTokenizer False (toLowerCase . show $ op) (Operator op)
tokenizeOperators :: Tokenizer
tokenizeOperators = anyTokenizer $ map operatorTokenizer ops
where
ops = sortBy cmp [Nop ..]
cmp x y = (length . show) y `compare` (length . show) x
tokenizeOperators = anyTokenizer $ map operatorTokenizer $ sortBy cmp [Nop ..]
where cmp x y = (length . show) y `compare` (length . show) x
tokenizeIdentifier :: Tokenizer
tokenizeIdentifier [] = Nothing
tokenizeIdentifier input@(x:_) = if null identifier || (not . isAlpha) x
then Nothing
else Just $ TokenizeResult (Identifier identifier) (length identifier)
where
identifier = takeWhile (or . sequenceA [isAlphaNum, (=='_')]) input
where identifier = takeWhile (or . sequenceA [isAlphaNum, (=='_')]) input
tokenizeWhitespace :: Tokenizer
tokenizeWhitespace [] = Nothing
tokenizeWhitespace (x:_)
| isSpace x = Just $ TokenizeResult WhiteSpace 1
| otherwise = Nothing
| otherwise = Nothing
tokenizeDecimal :: Tokenizer
tokenizeDecimal [] = Nothing
tokenizeDecimal input = if null numberStr
then Nothing
else Just $ TokenizeResult (IntLiteral number) len
where
where
number = read numberStr
len = length numberStr
numberStr = takeWhile isDigit input
tokenizeHex :: Tokenizer
tokenizeHex [] = Nothing
tokenizeHex input = if isPrefix && len > 0
then Just $ TokenizeResult (IntLiteral number) (len + 2)
else Nothing
where
isPrefix = take 2 input == "0x"
number = read . ("0x"++) $ numberStr
len = length numberStr
numberStr = takeWhile isHexDigit (drop 2 input)
tokenizeHex ('0':'x':input) = if null input
then Nothing
else Just $ TokenizeResult (IntLiteral $ read $ "0x" ++ numberStr) (length numberStr + 2)
where numberStr = takeWhile isHexDigit input
tokenizeHex _ = Nothing
tokenizeChar :: Tokenizer
tokenizeChar ('\'':'\\':x:'\'':_) = controlChar x >>= (\s -> return $ TokenizeResult (IntLiteral s) 4)
tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . ord $ x) 3
tokenizeChar ('\'':x:'\'':_) = Just $ TokenizeResult (IntLiteral . ord $ x) 3
tokenizeChar _ = Nothing
tokenizeString :: Tokenizer
tokenizeString ('"':xs) = do
string <- extractString xs
unescaped <- unescape string
return $ TokenizeResult (StringLiteral unescaped) (length string + 2)
return $ TokenizeResult (StringLiteral unescaped) (length string + 2)
where
extractString [] = Nothing
extractString (y:ys)
@@ -100,13 +93,9 @@ tokenizeString ('"':xs) = do
tokenizeString _ = Nothing
tokenizeComment :: Tokenizer
tokenizeComment [] = Nothing
tokenizeComment (x:xs) = if x == ';'
then Just $ TokenizeResult (Comment comment) (len + 1)
else Nothing
where
len = length comment
comment = takeWhile (/='\n') xs
tokenizeComment (';':xs) = Just $ TokenizeResult (Comment comment) (length comment + 1)
where comment = takeWhile (/='\n') xs
tokenizeComment _ = Nothing
type SeparatorPredicate = Char -> Bool
sepTokenizer :: SeparatorPredicate -> Tokenizer -> Tokenizer
@@ -115,7 +104,7 @@ sepTokenizer predicate tokenizer input = do
result@(TokenizeResult _ consumed) <- tokenizer input
let next = drop consumed input
if null next || (predicate . head $ next)
then return $ result
then return result
else Nothing
anyTokenizer :: [Tokenizer] -> Tokenizer
@@ -124,7 +113,7 @@ anyTokenizer tokenizers input = getFirst . mconcat . map First $ sequenceA token
tokenize :: String -> Either String [Token]
tokenize [] = Right []
tokenize input = tokens >>= (\t -> Right $ filter tokenFilter t)
tokenize input = tokens >>= (Right . filter tokenFilter)
where
tokens = case tokenizers input of
(Just (TokenizeResult token chars)) -> tokenize (drop chars input) >>= (\rest -> return $ token : rest)
@@ -145,6 +134,6 @@ tokenize input = tokens >>= (\t -> Right $ filter tokenFilter t)
]
tokenFilter :: Token -> Bool
tokenFilter (WhiteSpace) = False
tokenFilter WhiteSpace = False
tokenFilter (Comment _) = False
tokenFilter _ = True