Hello. I am reading Martijn's MSc Thesis "Generic Selections of Subexpressions", where one can found some discussions about annotating abstract syntax trees (AST).
In order to follow the discussion I wrote the attached Haskell program, which is an interpreter for an simple typed expression language. The Annotations package is used. The expression pattern is represented by a single recursive data type. Annotations are used for positions in the input source, and also for the type of expressions and subexpressions. I would like somebody to review the code and comment on it, as I am not sure I am using the concepts right. Also I would like the type checker to produce an expression annotated with both positions and calculated types. Currently it discards the position annotations. Any sugestions on how to modify it is welcome. Next step is adding a new form of expression to introduce local variable bindings. After that I want to start working with ASTs represented by mutually recursive data types. Then I will need multirec... Romildo
{-# LANGUAGE FlexibleContexts #-} module Main where import Control.Applicative (Applicative(pure,(<*>)),(<$>),(<$),(<*)) import Data.Traversable (Traversable(traverse)) import Data.Foldable (Foldable(foldr)) import Text.Parsec hiding (chainl1) import Data.Char (isDigit,isAlpha,isAlphaNum) import Data.Tree (Tree(Node),drawTree) import Annotations.F.Fixpoints import Annotations.F.Annotated import Annotations.Except -- the goal is to implement an interpreter for a simple language of -- typed expressions using annotations on recursive data types -- type of expressions data ExprType = NUMERIC | LOGIC deriving (Show) -- value of an expression data ExprValue = Numeric Double | Logic Bool deriving (Show) -- identifiers are strings type Id = String -- binary operators used in expressions data Op = Add | Sub | Mul | Div -- arithmetic | Eq | Ne | Gt | Ge | Lt | Le -- relational | And | Or -- logical deriving (Show) -- pattern of an expression data ExprF r = Num Double -- numeric literal | Log Bool -- logical literal | Var Id -- variable | Bin Op r r -- binary operation deriving (Show) -- mapping a function over an expression pattern instance Functor ExprF where fmap _ (Num n) = Num n fmap _ (Log b) = Log b fmap _ (Var v) = Var v fmap f (Bin op x y) = Bin op (f x) (f y) -- traversing an expression pattern instance Traversable ExprF where traverse _ (Num n) = pure (Num n) traverse _ (Log b) = pure (Log b) traverse _ (Var v) = pure (Var v) traverse f (Bin op x y) = Bin op <$> f x <*> f y -- folding an expression pattern instance Foldable ExprF where foldr _ z (Num _) = z foldr _ z (Log _) = z foldr _ z (Var _) = z foldr f z (Bin op l r) = f l (f r z) -- bare expressions newtype Expr = Expr { runExpr :: Fix ExprF } deriving (Show) -- range of source positions: used to delimit where something appeared -- in a source input data Range = Range SourcePos SourcePos instance Show Range where show (Range p1 p2) | n1 == n2 = if null n1 then showLC lc1 ++ "-" ++ showLC lc2 else showN n1 ++ " " ++ showLC lc1 ++ "-" ++ showLC lc2 | otherwise = showN n1 ++ " " ++ showLC lc1 ++ "-" ++ showN n2 ++ " " ++ showLC lc2 where n1 = sourceName p1 lc1 = (sourceLine p1,sourceColumn p1) n2 = sourceName p2 lc2 = (sourceLine p2,sourceColumn p2) showN n = "\"" ++ n ++ "\"" showLC (l,c) = show l ++ ":" ++ show c -- expressions annoted with positions newtype PosExpr = PosExpr { runPosExpr :: Fix (Ann Range ExprF) } deriving (Show) -- convert an expression to a rose tree of strings -- this helps to visualize the expression structure exprTree :: Algebra ExprF (Tree String) exprTree (Num n) = Node ("Num: " ++ show n) [] exprTree (Log b) = Node ("Log: " ++ show b) [] exprTree (Var v) = Node ("Var: " ++ v) [] exprTree (Bin op l r) = Node ("Bin: " ++ show op) [l,r] -- convert an annotated expression to a tree of strings -- this helps to visualize the annotated expression structure annExprTree :: Show a => Algebra (Ann a ExprF) (Tree String) annExprTree (Ann z expr) = case expr of Num n -> Node (annot ("Num: " ++ show n)) [] Log b -> Node (annot ("Log: " ++ show b)) [] Var v -> Node (annot ("Var: " ++ v)) [] Bin op l r -> Node (annot ("Bin: " ++ show op)) [l,r] where annot x = x ++ " " ++ show z -- a memory is just an association list between -- variable names and values type Memory = [(Id,ExprValue)] -- an algebra to find the value of an expression annotated with -- positions, given a memory exprEval :: Memory -> ErrorAlgebra ExprF String ExprValue exprEval _ (Num n) = Right (Numeric n) exprEval _ (Log b) = Right (Logic b) exprEval m (Var v) = case lookup v m of Just x -> Right x; Nothing -> Left ("undefined variable: " ++ v) exprEval _ (Bin op (Numeric x) (Numeric y)) = case op of Add -> Right (Numeric (x + y)) Sub -> Right (Numeric (x - y)) Mul -> Right (Numeric (x * y)) Div | y == 0 -> Left "division by zero" | otherwise -> Right (Numeric (x / y)) Eq -> Right (Logic (x == y)) Ne -> Right (Logic (x /= y)) Gt -> Right (Logic (x > y)) Ge -> Right (Logic (x >= y)) Lt -> Right (Logic (x < y)) Le -> Right (Logic (x <= y)) _ -> Left "type mismatch: unexpected numeric operand" exprEval _ (Bin op (Logic x) (Logic y)) = case op of Eq -> Right (Logic (x == y)) Ne -> Right (Logic (x /= y)) And -> Right (Logic (x && y)) Or -> Right (Logic (x || y)) _ -> Left "type mismatch: unexpected logical operand" exprEval _ (Bin _ _ _) = Left "type mismatch" -- an algebra to find the type of an expression (annotated with -- positions), given an environment (association list) with the type of -- each defined variable; an expression annotated with its type is -- constructed exprTypeCheck :: [(Id, ExprType)] -> ErrorAlgebra ExprF String (Fix (Ann ExprType ExprF)) exprTypeCheck _ e@(Num _) = Right $ mkAnnFix NUMERIC e exprTypeCheck _ e@(Log _) = Right $ mkAnnFix LOGIC e exprTypeCheck m e@(Var v) = case lookup v m of Just t -> Right $ mkAnnFix t e Nothing -> Left ("undefined variable: " ++ v) exprTypeCheck _ e@(Bin Add (In (Ann NUMERIC _)) (In (Ann NUMERIC _))) = Right $ mkAnnFix NUMERIC e exprTypeCheck _ e@(Bin Sub (In (Ann NUMERIC _)) (In (Ann NUMERIC _))) = Right $ mkAnnFix NUMERIC e exprTypeCheck _ e@(Bin Mul (In (Ann NUMERIC _)) (In (Ann NUMERIC _))) = Right $ mkAnnFix NUMERIC e exprTypeCheck _ e@(Bin Div (In (Ann NUMERIC _)) (In (Ann NUMERIC _))) = Right $ mkAnnFix NUMERIC e exprTypeCheck _ e@(Bin Eq (In (Ann NUMERIC _)) (In (Ann NUMERIC _))) = Right $ mkAnnFix LOGIC e exprTypeCheck _ e@(Bin Ne (In (Ann NUMERIC _)) (In (Ann NUMERIC _))) = Right $ mkAnnFix LOGIC e exprTypeCheck _ e@(Bin Gt (In (Ann NUMERIC _)) (In (Ann NUMERIC _))) = Right $ mkAnnFix LOGIC e exprTypeCheck _ e@(Bin Ge (In (Ann NUMERIC _)) (In (Ann NUMERIC _))) = Right $ mkAnnFix LOGIC e exprTypeCheck _ e@(Bin Lt (In (Ann NUMERIC _)) (In (Ann NUMERIC _))) = Right $ mkAnnFix LOGIC e exprTypeCheck _ e@(Bin Le (In (Ann NUMERIC _)) (In (Ann NUMERIC _))) = Right $ mkAnnFix LOGIC e exprTypeCheck _ e@(Bin And (In (Ann LOGIC _)) (In (Ann LOGIC _))) = Right $ mkAnnFix LOGIC e exprTypeCheck _ e@(Bin Or (In (Ann LOGIC _)) (In (Ann LOGIC _))) = Right $ mkAnnFix LOGIC e exprTypeCheck _ (Bin _ _ _ ) = Left "type mismatch" -- parser for annotated expressions with positions unit p = do left <- getPosition x <- p right <- getPosition return (mkAnnFix (Range left right) x) chainl1 px pf = do left <- getPosition x <- px rest left x where rest left x = option x $ do f <- pf y <- px right <- getPosition rest left (mkAnnFix (Range left right) (f x y)) pToken s = lexeme (string s) pNum,pLog,pVar,pFactor,pTerm,pArit,pRel,pConj,pDisj,pExpr :: Stream s m Char => ParsecT s u m (Fix (Ann Range ExprF)) pExpr = pDisj pDisj = chainl1 pConj (Bin Or <$ pToken "|") pConj = chainl1 pRel (Bin And <$ pToken "&") pRel = do left <- getPosition x <- pArit option x $ do o <- Bin Eq <$ pToken "=" <|> Bin Ne <$ pToken "!=" <|> Bin Ge <$ try (pToken ">=") <|> Bin Gt <$ pToken ">" <|> Bin Le <$ try (pToken "<=") <|> Bin Lt <$ pToken "<" y <- pArit right <- getPosition return (mkAnnFix (Range left right) (o x y)) pArit = chainl1 pTerm (Bin Add <$ pToken "+" <|> Bin Sub <$ pToken "-") pTerm = chainl1 pFactor (Bin Mul <$ pToken "*" <|> Bin Div <$ pToken "/") pFactor = pNum <|> pLog <|> pVar <|> between (pToken "(") (pToken ")") pExpr pNum = lexeme $ unit $ Num . read <$> many1 (satisfy isDigit) pLog = lexeme $ unit $ Log <$> ((pToken "t" >> return True) <|> (pToken "f" >> return False)) pVar = lexeme $ unit $ Var <$> do x <- satisfy isAlpha xs <- many (satisfy isAlphaNum) return (x:xs) lexeme p = p <* spaces -- the main I/O action: -- -- read a line and parses it as an expression -- -- shows the abstract syntatic tree of the expression -- -- type checks the expression and shows the resulting expression -- annotated with types -- -- evaluates the expression and shows its value main = do s <- getLine case parse (pExpr <* eof) "" s of Left err -> putStrLn (show err) Right x -> do putStrLn (drawTree (cata annExprTree x)) case errorCata (exprTypeCheck []) x of Failed err -> putStrLn (show err) OK y -> do putStrLn (drawTree (cata annExprTree y)) putStrLn (show (errorCata (exprEval []) x))
_______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe