Commit 67e98e69 authored by Andrew Gibiansky's avatar Andrew Gibiansky

Adding parser... Modified code from @avogt

parent c0d97abb
......@@ -82,6 +82,7 @@ executable IHaskell
IHaskell.Eval.Completion
IHaskell.Eval.Info
IHaskell.Eval.Evaluate
IHaskell.Eval.Parser
IHaskell.IPython
IHaskell.Message.Parser
IHaskell.Message.UUID
......@@ -118,7 +119,8 @@ executable IHaskell
directory,
here,
system-filepath,
text ==0.11.*
text ==0.11.*,
mtl == 2.1.*
Test-Suite doctests
Type: exitcode-stdio-1.0
......
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
module IHaskell.Display (
IHaskellDisplay(..),
MimeType(..),
DisplayData(..),
) where
import IHaskell.Types
-- | A class for displayable Haskell types.
class IHaskellDisplay a where
display :: a -> [DisplayData]
......@@ -11,11 +11,12 @@ module IHaskell.Eval.Evaluate (
) where
import ClassyPrelude hiding (liftIO, hGetContents)
import Prelude(putChar, tail, init)
import Prelude(putChar, tail, init, (!!))
import Data.List.Utils
import Data.List(findIndex)
import Data.String.Utils
import Text.Printf
import Data.Char as Char
import Language.Haskell.Exts.Parser hiding (parseType)
import Language.Haskell.Exts.Pretty
......@@ -27,8 +28,10 @@ import GhcMonad (liftIO)
import GHC hiding (Stmt, TypeSig)
import GHC.Paths
import Exception hiding (evaluate)
import Outputable
import Packages
import Module
import qualified System.IO.Strict as StrictIO
import IHaskell.Types
......@@ -36,7 +39,7 @@ import IHaskell.Types
data ErrorOccurred = Success | Failure
debug :: Bool
debug = False
debug = True
ignoreTypePrefixes :: [String]
ignoreTypePrefixes = ["GHC.Types", "GHC.Base", "GHC.Show", "System.IO",
......@@ -110,14 +113,35 @@ interpret action = runGhc (Just libdir) $ do
dflags <- getSessionDynFlags
void $ setSessionDynFlags $ dflags { hscTarget = HscInterpreted, ghcLink = LinkInMemory }
-- Load packages that start with ihaskell-* and aren't just IHaskell.
displayPackages <- liftIO $ do
(dflags, _) <- initPackages dflags
let Just db = pkgDatabase dflags
packageNames = map (packageIdString . packageConfigId) db
initStr = "ihaskell-"
ihaskellPkgs = filter (startswith initStr) packageNames
displayPkgs = filter (isAlpha . (!! (length initStr + 1))) ihaskellPkgs
return displayPkgs
-- Generate import statements all Display modules.
let capitalize :: String -> String
capitalize (first:rest) = Char.toUpper first : rest
importFmt = "import IHaskell.Display.%s"
toImportStmt :: String -> String
toImportStmt = printf importFmt . capitalize . (!! 1) . split "-"
displayImports = map toImportStmt displayPackages
-- Import modules.
imports <- mapM parseImportDecl globalImports
imports <- mapM parseImportDecl $ globalImports ++ displayImports
setContext $ map IIDecl imports
-- Give a value for `it`. This is required due to the way we handle `it`
-- in the wrapper statements - if it doesn't exist, the first statement
-- will fail.
runStmt "()" RunToCompletion
runStmt "putStrLn \"\"" RunToCompletion
-- Run the rest of the interpreter
action
......@@ -148,7 +172,7 @@ joinDisplays displays =
plains = filter isPlain displays
other = filter (not . isPlain) displays
getText (Display PlainText text) = text
joinedPlains = Display PlainText $ concat $ map getText plains in
joinedPlains = Display PlainText $ concatMap getText plains in
case length plains of
0 -> other
_ -> joinedPlains : other
......@@ -192,7 +216,7 @@ parseCommands code = joinMultilineDeclarations $ concatMap makeCommands pieces
makeCommands str
| isDirective str = [createDirective str]
| isImport str = [Import $ strip str]
| length rest > 0 && isTypeDeclaration first =
| not (null rest) && isTypeDeclaration first =
let (firstStmt:restStmts) = makeCommands $ unlines rest in
case firstStmt of
Declaration decl -> Declaration (first ++ decl) : restStmts
......
{-# LANGUAGE NoImplicitPrelude #-}
module IHaskell.Eval.Parser (
chunkCode,
CodeChunk(..),
ChunkType(..),
) where
import ClassyPrelude hiding (liftIO, hGetContents)
import FastString
import StringBuffer
import ErrUtils
import SrcLoc
import GHC
import Bag
import Lexer
import Data.String.Utils (strip, startswith)
import Data.List.Utils (grab)
import Control.Monad.State
-- | A chunk of code with with a source span and an associated chunk type.
data CodeChunk = Chunk RealSrcSpan ChunkType
-- | Possible types of code chunks.
data ChunkType = Directive
| Expr
| Stmt
| Decl
| Import
deriving (Eq, Show, Ord)
-- | Simple tree data structure.
data Tree a = Branch [Tree a] | Leaf a deriving Show
-- | Delimiter categorization as an opening, closing, or neither opening
-- nor closing delimiter. Used to generate trees of tokens.
data DelimType = Opening | Closing | Neither
-- | Put the given statements into a `do` block.
wrapInDoBlock :: String -> String
wrapInDoBlock codeStr =
if null stripped
then []
else unlines $ "do" : map indent (lines stripped)
where
stripped = strip codeStr
indent = (" " ++)
-- | Convert a string of code into raw, uncleaned code chunks.
classifyCode :: DynFlags -> String -> Either String [CodeChunk]
classifyCode flags codeStr = groupLikeChunks . treeToChunks . tokenTree <$> runLexer flags (wrapInDoBlock codeStr)
-- | Group code chunks that are alike into one code chunk.
groupLikeChunks :: [CodeChunk] -> [CodeChunk]
groupLikeChunks chunks = map joinChunks $ groupBy sameChunkType chunks
where
sameChunkType (Chunk _ firstType) (Chunk _ secondType) = firstType == secondType
joinChunks :: [CodeChunk] -> CodeChunk
joinChunks [chunk] = chunk
joinChunks (Chunk firstLoc chunkType:rest) = Chunk newSpan chunkType
where newSpan = mkRealSrcSpan (realSrcSpanStart firstLoc) (realSrcSpanEnd restChunkLoc)
Chunk restChunkLoc _ = joinChunks rest
-- | Convert a list of tokens into a tree of tokens.
tokenTree :: [Located Lexer.Token] -> Tree (Located Lexer.Token)
tokenTree = toTree $ \x -> case x of
-- Opening delimiters are opening curly braces and opening parentheses.
L _ ITvocurly -> Opening
L _ IToparen -> Opening
-- Closing delimiters are closing curly braces and closing parentheses.
L _ ITvccurly -> Closing
L _ ITcparen -> Closing
-- Everthing else isn't a delimiter.
_ -> Neither
-- | Convert a list into a tree given a function that can classify each
-- element of the list as a delimiter (opening or closing) or not
-- a delimiter.
toTree :: (a -> DelimType) -- ^ Function which classifies the delimiter type of a list element.
-> [a] -- ^ List of tokens.
-> Tree a -- ^ Tree generated from tokens where each set of delimiters encodes a new level.
toTree delimType tokens =
case toTree' delimType 0 [[]] tokens of
x -> Branch $ reverse x
where
-- Helper function for tree conversion.
toTree' :: (a -> DelimType) -- Convert list element to a tree.
-> Int -- The level of the tree which is being parsed.
-> [[Tree a]] -- The currently parsed branches at every level.
-- The first element is a list of branches
-- at the level currently being parsed, the
-- second element is the branches at the
-- level above, and so on.
-> [a] -- Remaining tokens.
-> [Tree a] -- Branches of the output tree.
toTree' delimType n accum (token:rest) =
case delimType token of
-- If we see an opening delimiter, go down one level.
-- Reset the parsed things at the current level to nothing, since
-- we haven't parsed any tokens.
Opening -> toTree' delimType (n+1) ([] : accum) rest
-- If we see a closing parenthesis, go back up one level.
-- The level below just becomes a single parsed token at this level.
Closing -> case accum of
sublevel : currentLevel : uplevels -> toTree' delimType (n-1) levels rest
where first = Branch $ reverse sublevel
currentLevelNodes = first : currentLevel
levels = currentLevelNodes : uplevels
-- If we see something that isn't a delimiter, simply add it to the
-- current level of parsed nodes.
Neither -> case accum of
currentLevel : uplevels -> toTree' delimType n ((Leaf token : currentLevel) : uplevels) rest
-- Once done parsing, return the branches. We're done paring because
-- the remaining tokens are empty and because the level of the tree is
-- just zero (the top level).
toTree' _ 0 (a:_) [] = a
-- | Divide the code string into chunks. Each code chunk can be evaluated
-- separately.
chunkCode :: GhcMonad m => String -- ^ String containing code to parse and split.
-> m (Either String [(String, ChunkType)]) -- ^ Either an error string or a list of code chunks.
chunkCode codeString = do
flags <- getSessionDynFlags
let chunks = classifyCode flags codeString
return $ case chunks of
Right chunks -> Right $ evalState (extractDirectives chunks) $ lines codeString
Left str -> Left str
where
-- Get number of lines in a source span.
nlines :: RealSrcSpan -> Int
nlines span = 1 + srcLocLine (realSrcSpanEnd span) - srcLocLine (realSrcSpanStart span)
-- Extract all directives in this chunk. Convert a chunk into a list of
-- strings and their chunk types.
extractDirectives :: [CodeChunk] -> State [String] [(String, ChunkType)]
extractDirectives (Chunk span chunkType:rest) = do
spanLines <- grab $ nlines span
next <- extractDirectives rest
return $ catchDirectives spanLines chunkType ++ next
where
catchDirectives :: [String] -> ChunkType -> [(String, ChunkType)]
catchDirectives codeLines chunkType = case break isDirective codeLines of
-- If there are no directives...
(allLines, []) -> [(unlines allLines, chunkType)]
(preLines, directiveLine:postLines) -> [(unlines preLines, chunkType), (directiveLine, Directive)] ++ catchDirectives postLines chunkType
isDirective line = startswith ":" $ strip line
extractDirectives [] = return []
-- | only go down one level
treeToChunks :: Tree (Located Lexer.Token) -> [CodeChunk]
treeToChunks = convertToChunks (convertToChunks leafToChunk)
where
convertToChunks recursion (Branch subnodes) = concatMap recursion subnodes
convertToChunks _ (Leaf value) = makeChunk value
leafToChunk (Leaf value) = makeChunk value
leafToChunk _ = []
makeChunk (L (RealSrcSpan location) token) = [Chunk location $ classifyToken token]
makeChunk _ = []
-- | Classifies a token based on what type of Haskell form it is likely to
-- be part of. Certain tokens can mean you are in an import or in
-- a declaration. However, you can have declarations inside expressions or
-- statements when between curly brackets. After lexing the input, lines
-- are classified based on tokens using `classifyToken` and then the
-- original input is split based on these classifications.
classifyToken :: Lexer.Token -> ChunkType
classifyToken tok = case tok of
ITclass -> Decl
ITdata -> Decl
ITdefault -> Decl
ITderiving -> Decl
IThiding -> Decl
ITimport -> Import
ITinfix -> Decl
ITinfixl -> Decl
ITinfixr -> Decl
ITinstance -> Decl
ITmodule -> Decl
ITnewtype -> Decl
ITqualified -> Import
ITtype -> Decl
ITwhere -> Decl
ITscc -> Decl
ITforeign -> Decl
ITexport -> Decl
ITlabel -> Decl -- ?
ITdynamic -> Decl
ITsafe -> Decl
ITinterruptible -> Decl
ITunsafe -> Decl
ITstdcallconv -> Decl
ITccallconv -> Decl
ITcapiconv -> Decl
ITprimcallconv -> Decl
ITfamily -> Decl
ITinline_prag {} -> Decl
ITspec_prag {} -> Decl
ITspec_inline_prag {} -> Decl
ITsource_prag {} -> Decl
ITrules_prag {} -> Decl
ITwarning_prag {} -> Decl
ITdeprecated_prag {} -> Decl
ITline_prag {} -> Decl
ITscc_prag -> Decl
ITgenerated_prag -> Decl
ITcore_prag -> Decl
ITunpack_prag -> Decl
ITnounpack_prag -> Decl
ITann_prag -> Decl
ITclose_prag -> Decl
IToptions_prag {} -> Decl
ITinclude_prag {} -> Decl
ITlanguage_prag -> Decl
ITvect_prag -> Decl -- ?
ITvect_scalar_prag -> Decl
ITnovect_prag -> Decl
ITctype -> Decl
ITdcolon -> Decl
ITequal -> Decl
ITvbar -> Decl -- |
ITdotdot -> Expr -- [1 .. ]
ITcolon -> Expr
ITcase -> Expr
ITdo -> Expr
ITelse -> Expr
ITif -> Expr
ITin -> Expr
ITlet -> Expr
ITof -> Expr
ITthen -> Expr
ITforall -> Expr
ITmdo -> Expr
ITgroup -> Expr -- SQL comprehensions.
ITby -> Expr
ITusing -> Expr
ITlam -> Expr
ITlcase -> Expr
ITlarrow -> Expr
ITrarrow -> Expr
ITat -> Expr
ITtilde -> Expr
ITtildehsh -> Expr
ITdarrow -> Expr
ITminus -> Expr
ITbang -> Expr
ITstar -> Expr
ITdot -> Expr
ITbiglam -> Expr
ITocurly -> Expr
ITccurly -> Expr
ITvocurly -> Expr
ITvccurly -> Expr
ITobrack -> Expr
ITopabrack -> Expr
ITcpabrack -> Expr
ITcbrack -> Expr
IToparen -> Expr
ITcparen -> Expr
IToubxparen -> Expr
ITcubxparen -> Expr
ITsemi -> Expr
ITcomma -> Expr
ITunderscore -> Expr
ITbackquote -> Expr
ITsimpleQuote -> Expr
ITvarid {} -> Expr
ITconid {} -> Expr
ITvarsym {} -> Expr
ITconsym {} -> Expr
ITqvarid {} -> Expr
ITqconid {} -> Expr
ITqvarsym {} -> Expr
ITqconsym {} -> Expr
ITprefixqvarsym {} -> Expr
ITprefixqconsym {} -> Expr
ITdupipvarid {} -> Expr
ITchar {} -> Expr
ITstring {} -> Expr
ITinteger{} -> Expr
ITrational{} -> Expr
ITprimchar {} -> Expr
ITprimstring{} -> Expr
ITprimint {} -> Expr
ITprimword {} -> Expr
ITprimfloat {} -> Expr
ITprimdouble {} -> Expr
ITopenExpQuote -> Expr
ITopenPatQuote -> Expr
ITopenDecQuote -> Expr
ITopenTypQuote -> Expr
ITcloseQuote -> Expr
ITidEscape {} -> Expr
ITparenEscape -> Expr
ITtyQuote -> Expr
ITquasiQuote {} -> Expr
ITqQuasiQuote {} -> Expr
ITproc -> Expr
ITrec -> Expr
IToparenbar -> Expr
ITcparenbar -> Expr
ITlarrowtail -> Expr
ITrarrowtail -> Expr
ITLarrowtail -> Expr
ITRarrowtail -> Expr
ITunknown {} -> Expr
ITeof -> Expr
ITdocCommentNext {} -> Expr
ITdocCommentPrev {} -> Expr
ITdocCommentNamed {} -> Expr
ITdocSection {} -> Expr
ITdocOptions {} -> Expr
ITdocOptionsOld {} -> Expr
ITlineComment {} -> Expr
ITblockComment {} -> Expr
-- All constructors are listed above.
-- A new keyword addition to GHC will trigger a warning here.
-- | Runs the GHC lexer on the code string. Returns an error string or
-- a list of tokens and locations for each token.
runLexer :: DynFlags -> String -> Either String [Located Token]
runLexer flags codeString = toEither (lexTokenStream buffer location flags)
where
-- Location displayed as the parsing location.
filename = "<interactive>"
initLine = 1
initCol = 1
location = mkRealSrcLoc (mkFastString filename) initLine initCol
buffer = stringToStringBuffer codeString
-- Convert a parse success or failure into an Either type.
toEither (PFailed span err) = Left $ printErrorBag $ unitBag $ mkPlainErrMsg flags span err
toEither (POk _ tokens) = Right tokens
printErrorBag bag = unlines $ map show $ bagToList bag
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment