Commit b82c1577 authored by Andrew Gibiansky's avatar Andrew Gibiansky

Separated Haskell parser into package, closes #63

parent 9158525f
...@@ -202,20 +202,10 @@ ipythonTests = do ...@@ -202,20 +202,10 @@ ipythonTests = do
parseVersion "12.5.10" `shouldBe` [12, 5, 10] parseVersion "12.5.10" `shouldBe` [12, 5, 10]
parserTests = do parserTests = do
splitAtLocTests
layoutChunkerTests layoutChunkerTests
moduleNameTests moduleNameTests
parseStringTests parseStringTests
splitAtLocTests = describe "String Splitting Util" $ do
it "splits properly (example 1)" $
splitAtLoc 2 3 "abc\ndefghi\nxyz\n123" `shouldBe` ("abc\nde","fghi\nxyz\n123")
it "splits properly (example 2)" $
splitAtLoc 2 1 "abc" `shouldBe` ("abc","")
it "splits properly (example 3)" $
splitAtLoc 2 1 "abc\nhello" `shouldBe` ("abc\n","hello")
layoutChunkerTests = describe "Layout Chunk" $ do layoutChunkerTests = describe "Layout Chunk" $ do
it "chunks 'a string'" $ it "chunks 'a string'" $
layoutChunks "a string" `shouldBe` ["a string"] layoutChunks "a string" `shouldBe` ["a string"]
......
...@@ -37,7 +37,7 @@ maintainer: andrew.gibiansky@gmail.com ...@@ -37,7 +37,7 @@ maintainer: andrew.gibiansky@gmail.com
category: Development category: Development
build-type: Custom build-type: Simple
-- Constraint on the version of Cabal needed to build this package. -- Constraint on the version of Cabal needed to build this package.
cabal-version: >=1.8 cabal-version: >=1.8
...@@ -56,6 +56,7 @@ extra-source-files: ...@@ -56,6 +56,7 @@ extra-source-files:
library library
build-depends: base ==4.6.*, build-depends: base ==4.6.*,
ghc-parser,
unix >= 2.6, unix >= 2.6,
hspec, hspec,
zeromq3-haskell ==0.5.*, zeromq3-haskell ==0.5.*,
...@@ -105,15 +106,12 @@ executable IHaskell ...@@ -105,15 +106,12 @@ executable IHaskell
IHaskell.ZeroMQ IHaskell.ZeroMQ
IHaskell.Display IHaskell.Display
IHaskell.Config IHaskell.Config
IHaskell.GHC.HaskellParser
extensions: DoAndIfThenElse extensions: DoAndIfThenElse
-- Other library packages from which modules are imported. -- Other library packages from which modules are imported.
build-depends: base ==4.6.*, build-depends: base ==4.6.*,
ghc-parser,
unix >= 2.6, unix >= 2.6,
hspec, hspec,
zeromq3-haskell ==0.5.*, zeromq3-haskell ==0.5.*,
...@@ -145,6 +143,7 @@ Test-Suite hspec ...@@ -145,6 +143,7 @@ Test-Suite hspec
Ghc-Options: -threaded Ghc-Options: -threaded
Main-Is: Hspec.hs Main-Is: Hspec.hs
build-depends: base ==4.6.*, build-depends: base ==4.6.*,
ghc-parser,
unix >= 2.6, unix >= 2.6,
hspec, hspec,
zeromq3-haskell ==0.5.*, zeromq3-haskell ==0.5.*,
......
...@@ -7,7 +7,6 @@ module IHaskell.Eval.Parser ( ...@@ -7,7 +7,6 @@ module IHaskell.Eval.Parser (
LineNumber, LineNumber,
ColumnNumber, ColumnNumber,
ErrMsg, ErrMsg,
splitAtLoc,
layoutChunks, layoutChunks,
parseDirective, parseDirective,
getModuleName getModuleName
...@@ -31,19 +30,7 @@ import Outputable hiding ((<>)) ...@@ -31,19 +30,7 @@ import Outputable hiding ((<>))
import SrcLoc import SrcLoc
import StringBuffer import StringBuffer
import IHaskell.GHC.HaskellParser import Language.Haskell.GHC.Parser
-- | A line number in an input string.
type LineNumber = Int
-- | A column number in an input string.
type ColumnNumber = Int
-- | An error message string.
type ErrMsg = String
-- | A location in an input string.
data StringLoc = Loc LineNumber ColumnNumber deriving (Show, Eq)
-- | A block of code to be evaluated. -- | A block of code to be evaluated.
-- Each block contains a single element - one declaration, statement, -- Each block contains a single element - one declaration, statement,
...@@ -70,22 +57,14 @@ data DirectiveType ...@@ -70,22 +57,14 @@ data DirectiveType
| GetHelp -- ^ General help via ':?' or ':help'. | GetHelp -- ^ General help via ':?' or ':help'.
deriving (Show, Eq) deriving (Show, Eq)
-- | Output from running a parser.
data ParseOutput a
= Failure ErrMsg StringLoc -- ^ Parser failed with given error message and location.
| Success a (String, String) -- ^ Parser succeeded with an output.
deriving (Eq, Show) -- Auxiliary strings say what part of the
-- input string was used and what
-- part is remaining.
-- | Parse a string into code blocks. -- | Parse a string into code blocks.
parseString :: GhcMonad m => String -> m [CodeBlock] parseString :: GhcMonad m => String -> m [CodeBlock]
parseString codeString = do parseString codeString = do
-- Try to parse this as a single module. -- Try to parse this as a single module.
flags <- getSessionDynFlags flags <- getSessionDynFlags
let output = runParser flags fullModule codeString let output = runParser flags parserModule codeString
case output of case output of
Success {} -> return [Module codeString] Parsed {} -> return [Module codeString]
Failure {} -> Failure {} ->
-- Split input into chunks based on indentation. -- Split input into chunks based on indentation.
let chunks = layoutChunks $ dropComments codeString in let chunks = layoutChunks $ dropComments codeString in
...@@ -132,14 +111,11 @@ parseCodeChunk code startLine = do ...@@ -132,14 +111,11 @@ parseCodeChunk code startLine = do
[] -> return $ bestError $ failures results [] -> return $ bestError $ failures results
-- If one of the parsers succeeded -- If one of the parsers succeeded
(result, used, remaining):_ -> result:_ -> return result
return $ if not . null . strip $ remaining
then ParseError (Loc 1 1) $ "Could not parse " ++ code
else result
where where
successes :: [ParseOutput a] -> [(a, String, String)] successes :: [ParseOutput a] -> [a]
successes [] = [] successes [] = []
successes (Success a (used, rem):rest) = (a, used, rem) : successes rest successes (Parsed a:rest) = a : successes rest
successes (_:rest) = successes rest successes (_:rest) = successes rest
failures :: [ParseOutput a] -> [(ErrMsg, LineNumber, ColumnNumber)] failures :: [ParseOutput a] -> [(ErrMsg, LineNumber, ColumnNumber)]
...@@ -154,7 +130,7 @@ parseCodeChunk code startLine = do ...@@ -154,7 +130,7 @@ parseCodeChunk code startLine = do
compareLoc (_, line1, col1) (_, line2, col2) = compare line1 line2 <> compare col1 col2 compareLoc (_, line1, col1) (_, line2, col2) = compare line1 line2 <> compare col1 col2
statementToExpression :: DynFlags -> ParseOutput CodeBlock -> ParseOutput CodeBlock statementToExpression :: DynFlags -> ParseOutput CodeBlock -> ParseOutput CodeBlock
statementToExpression flags (Success (Statement stmt) strs) = Success result strs statementToExpression flags (Parsed (Statement stmt)) = Parsed result
where result = if isExpr flags stmt where result = if isExpr flags stmt
then Expression stmt then Expression stmt
else Statement stmt else Statement stmt
...@@ -162,27 +138,28 @@ parseCodeChunk code startLine = do ...@@ -162,27 +138,28 @@ parseCodeChunk code startLine = do
-- Check whether a string is a valid expression. -- Check whether a string is a valid expression.
isExpr :: DynFlags -> String -> Bool isExpr :: DynFlags -> String -> Bool
isExpr flags str = case runParser flags fullExpression str of isExpr flags str = case runParser flags parserExpression str of
Failure {} -> False Parsed {} -> True
Success {} -> True _ -> False
tryParser :: String -> (String -> CodeBlock, String -> ParseOutput String) -> ParseOutput CodeBlock tryParser :: String -> (String -> CodeBlock, String -> ParseOutput String) -> ParseOutput CodeBlock
tryParser string (blockType, parser) = case parser string of tryParser string (blockType, parser) = case parser string of
Success res (used, remaining) -> Success (blockType res) (used, remaining) Parsed res -> Parsed (blockType res)
Failure err loc -> Failure err loc Failure err loc -> Failure err loc
parsers :: DynFlags -> [(String -> CodeBlock, String -> ParseOutput String)] parsers :: DynFlags -> [(String -> CodeBlock, String -> ParseOutput String)]
parsers flags = parsers flags =
[ (Import, unparser partialImport) [ (Import, unparser parserImport)
, (TypeSignature, unparser partialTypeSignature) , (TypeSignature, unparser parserTypeSignature)
, (Declaration, unparser partialDeclaration) , (Declaration, unparser parserDeclaration)
, (Statement, unparser partialStatement) , (Statement, unparser parserStatement)
] ]
where where
unparser :: P a -> String -> ParseOutput String unparser :: Parser a -> String -> ParseOutput String
unparser parser code = unparser parser code =
case runParser flags parser code of case runParser flags parser code of
Success out strs -> Success code strs Parsed out -> Parsed code
Partial out strs -> Partial code strs
Failure err loc -> Failure err loc Failure err loc -> Failure err loc
-- | Find consecutive declarations of the same function and join them into -- | Find consecutive declarations of the same function and join them into
...@@ -248,53 +225,6 @@ parseDirective (':':directive) line = case find rightDirective directives of ...@@ -248,53 +225,6 @@ parseDirective (':':directive) line = case find rightDirective directives of
] ]
parseDirective _ _ = error "Directive must start with colon!" parseDirective _ _ = error "Directive must start with colon!"
-- | Run a GHC parser on a string. Return success or failure with
-- associated information for both.
runParser :: DynFlags -> P a -> String -> ParseOutput a
runParser flags parser str =
-- Create an initial parser state.
let filename = "<interactive>"
location = mkRealSrcLoc (mkFastString filename) 1 1
buffer = stringToStringBuffer str
parseState = mkPState flags buffer location in
-- Convert a GHC parser output into our own.
toParseOut $ unP parser parseState
where
toParseOut :: ParseResult a -> ParseOutput a
toParseOut (PFailed span@(RealSrcSpan realSpan) err) =
let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
line = srcLocLine $ realSrcSpanStart realSpan
col = srcLocCol $ realSrcSpanStart realSpan
in Failure errMsg $ Loc line col
toParseOut (PFailed span err) =
let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
in Failure errMsg $ Loc 0 0
toParseOut (POk parseState result) =
let parseEnd = realSrcSpanStart $ last_loc parseState
endLine = srcLocLine parseEnd
endCol = srcLocCol parseEnd
(before, after) = splitAtLoc endLine endCol str in
Success result (before, after)
-- Convert the bag of errors into an error string.
printErrorBag bag = joinLines . map show $ bagToList bag
-- | Split a string at a given line and column. The column is included in
-- the second part of the split.
splitAtLoc :: LineNumber -> ColumnNumber -> String -> (String, String)
splitAtLoc line col string =
if line > length (lines string)
then (string, "")
else (before, after)
where
(beforeLines, afterLines) = splitAt line $ lines string
theLine = last beforeLines
(beforeChars, afterChars) = splitAt (col - 1) theLine
before = joinLines (init beforeLines) ++ '\n' : beforeChars
after = joinLines $ afterChars : afterLines
-- | Split an input string into chunks based on indentation. -- | Split an input string into chunks based on indentation.
-- A chunk is a line and all lines immediately following that are indented -- A chunk is a line and all lines immediately following that are indented
...@@ -357,10 +287,10 @@ dropComments = removeOneLineComments . removeMultilineComments ...@@ -357,10 +287,10 @@ dropComments = removeOneLineComments . removeMultilineComments
getModuleName :: GhcMonad m => String -> m [String] getModuleName :: GhcMonad m => String -> m [String]
getModuleName moduleSrc = do getModuleName moduleSrc = do
flags <- getSessionDynFlags flags <- getSessionDynFlags
let output = runParser flags fullModule moduleSrc let output = runParser flags parserModule moduleSrc
case output of case output of
Failure {} -> error "Module parsing failed." Failure {} -> error "Module parsing failed."
Success mod _ -> Parsed mod ->
case unLoc <$> hsmodName (unLoc mod) of case unLoc <$> hsmodName (unLoc mod) of
Nothing -> error "Module must have a name." Nothing -> error "Module must have a name."
Just name -> return $ split "." $ moduleNameString name Just name -> return $ split "." $ moduleNameString name
import Distribution.Simple import Distribution.Simple
import System.Cmd main = defaultMain
import Control.Monad
main :: IO ()
main = defaultMainWithHooks simpleUserHooks{
preConf = \args confFlags -> do
buildParser
preConf simpleUserHooks args confFlags,
postInst = \args flags descr buildInfo -> do
postInst simpleUserHooks args flags descr buildInfo
system "IHaskell setup"
system "cp ./images/ihaskell-logo.png `ipython locate profile haskell`/static/base/images/ipynblogo.png"
return ()
}
buildParser = system "./build-parser.sh"
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings -- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details -- for details
{-# OPTIONS_GHC -O0 -fno-ignore-interface-pragmas #-}
{- {-
Careful optimisation of the parser: we don't want to throw everything Careful optimisation of the parser: we don't want to throw everything
at it, because that takes too long and doesn't buy much, but we do want at it, because that takes too long and doesn't buy much, but we do want
...@@ -25,7 +24,20 @@ to inline certain key external functions, so we instruct GHC not to ...@@ -25,7 +24,20 @@ to inline certain key external functions, so we instruct GHC not to
throw away inlinings as it would normally do in -O0 mode. throw away inlinings as it would normally do in -O0 mode.
-} -}
module IHaskell.GHC.HaskellParser (fullExpression, fullModule, partialTypeSignature, partialStatement, partialExpression, partialImport, partialDeclaration) where module Language.Haskell.GHC.HappyParser (
fullModule,
fullTypeSignature,
fullStatement,
fullExpression,
fullImport,
fullDeclaration,
partialModule,
partialTypeSignature,
partialStatement,
partialExpression,
partialImport,
partialDeclaration
) where
import HsSyn import HsSyn
import RdrHsSyn import RdrHsSyn
...@@ -355,21 +367,22 @@ TH_QQUASIQUOTE { L _ (ITqQuasiQuote _) } ...@@ -355,21 +367,22 @@ TH_QQUASIQUOTE { L _ (ITqQuasiQuote _) }
%monad { P } { >>= } { return } %monad { P } { >>= } { return }
%lexer { lexer } { L _ ITeof } %lexer { lexer } { L _ ITeof }
%name parseModule module
%name parseStmt maybe_stmt
%name parseIdentifier identifier
%name parseType ctype
%partial parseHeader header
%tokentype { (Located Token) } %tokentype { (Located Token) }
--- Partial parsers for IHaskell --- Parsers for IHaskell
%partial partialStatement stmt %partial partialStatement stmt
%partial partialImport importdecl %partial partialImport importdecl
%partial partialDeclaration topdecl %partial partialDeclaration topdecl
%partial partialExpression exp
%partial partialTypeSignature signature %partial partialTypeSignature signature
%name fullModule namedModule %partial partialModule namedModule
%partial partialExpression exp
%name fullStatement stmt
%name fullImport importdecl
%name fullDeclaration topdecl
%name fullExpression exp %name fullExpression exp
%name fullTypeSignature signature
%name fullModule namedModule
%% %%
signature :: { LHsDecl RdrName } signature :: { LHsDecl RdrName }
......
The MIT License (MIT)
Copyright (c) 2013 Andrew Gibiansky
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
This diff is collapsed.
module Language.Haskell.GHC.Parser (
-- Parser handling
runParser,
LineNumber,
ColumnNumber,
ErrMsg,
StringLoc(..),
ParseOutput(..),
Parser,
-- Different parsers
parserStatement,
parserImport,
parserDeclaration,
parserTypeSignature,
parserModule,
parserExpression,
partialStatement,
partialImport,
partialDeclaration,
partialTypeSignature,
partialModule,
partialExpression,
) where
import Data.List (intercalate)
import Bag
import ErrUtils hiding (ErrMsg)
import FastString
import GHC
import Lexer
import OrdList
import Outputable hiding ((<>))
import SrcLoc
import StringBuffer
import qualified Language.Haskell.GHC.HappyParser as Parse
-- | A line number in an input string.
type LineNumber = Int
-- | A column number in an input string.
type ColumnNumber = Int
-- | An error message string.
type ErrMsg = String
-- | A location in an input string.
data StringLoc = Loc LineNumber ColumnNumber deriving (Show, Eq)
-- | Output from running a parser.
data ParseOutput a
= Failure ErrMsg StringLoc -- ^ Parser failed with given error message and location.
| Parsed a -- ^ Parser succeeded with an output.
| Partial a (String, String) -- ^ Partial parser succeeded with an output.
deriving (Eq, Show) -- Auxiliary strings say what part of the
-- input string was used and what
-- part is remaining.
data ParserType = FullParser | PartialParser
data Parser a = Parser ParserType (P a)
-- Our parsers.
parserStatement = Parser FullParser Parse.fullStatement
parserImport = Parser FullParser Parse.fullImport
parserDeclaration = Parser FullParser Parse.fullDeclaration
parserExpression = Parser FullParser Parse.fullExpression
parserTypeSignature = Parser FullParser Parse.fullTypeSignature
parserModule = Parser FullParser Parse.fullModule
partialStatement = Parser PartialParser Parse.partialStatement
partialImport = Parser PartialParser Parse.partialImport
partialDeclaration = Parser PartialParser Parse.partialDeclaration
partialExpression = Parser PartialParser Parse.partialExpression
partialTypeSignature = Parser PartialParser Parse.partialTypeSignature
partialModule = Parser PartialParser Parse.partialModule
-- | Run a GHC parser on a string. Return success or failure with
-- associated information for both.
runParser :: DynFlags -> Parser a -> String -> ParseOutput a
runParser flags (Parser parserType parser) str =
-- Create an initial parser state.
let filename = "<interactive>"
location = mkRealSrcLoc (mkFastString filename) 1 1
buffer = stringToStringBuffer str
parseState = mkPState flags buffer location in
-- Convert a GHC parser output into our own.
toParseOut $ unP parser parseState
where
toParseOut :: ParseResult a -> ParseOutput a
toParseOut (PFailed span@(RealSrcSpan realSpan) err) =
let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
line = srcLocLine $ realSrcSpanStart realSpan
col = srcLocCol $ realSrcSpanStart realSpan
in Failure errMsg $ Loc line col
toParseOut (PFailed span err) =
let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
in Failure errMsg $ Loc 0 0
toParseOut (POk parseState result) =
let parseEnd = realSrcSpanStart $ last_loc parseState
endLine = srcLocLine parseEnd
endCol = srcLocCol parseEnd
(before, after) = splitAtLoc endLine endCol str in
case parserType of
PartialParser -> Partial result (before, after)
FullParser -> Parsed result
-- Convert the bag of errors into an error string.
printErrorBag bag = joinLines . map show $ bagToList bag
-- | Split a string at a given line and column. The column is included in
-- the second part of the split.
splitAtLoc :: LineNumber -> ColumnNumber -> String -> (String, String)
splitAtLoc line col string =
if line > length (lines string)
then (string, "")
else (before, after)
where
(beforeLines, afterLines) = splitAt line $ lines string
theLine = last beforeLines
(beforeChars, afterChars) = splitAt (col - 1) theLine
before = joinLines (init beforeLines) ++ '\n' : beforeChars
after = joinLines $ afterChars : afterLines
-- Not the same as 'unlines', due to trailing \n
joinLines :: [String] -> String
joinLines = intercalate "\n"
import Distribution.Simple
import System.Cmd
main = defaultMainWithHooks simpleUserHooks{
preConf = \args confFlags -> do
system "./build-parser.sh"
preConf simpleUserHooks args confFlags
}
...@@ -9,5 +9,5 @@ happy Parser.y ...@@ -9,5 +9,5 @@ happy Parser.y
rm Parser.y rm Parser.y
# Move output Haskell to source directory. # Move output Haskell to source directory.
mkdir -p IHaskell/GHC mkdir -p Language/Haskell/GHC
mv Parser.hs IHaskell/GHC/HaskellParser.hs mv Parser.hs Language/Haskell/GHC/HappyParser.hs
-- Initial ghc-parser.cabal generated by cabal init. For further
-- documentation, see http://haskell.org/cabal/users-guide/
name: ghc-parser
version: 0.1.0.0
synopsis: Haskell source parser from GHC.
-- description:
homepage: https://github.com/gibiansky/IHaskell
license: MIT
license-file: LICENSE
author: Andrew Gibiansky
maintainer: andrew.gibiansky@gmail.com
-- copyright:
category: Language
build-type: Custom
-- extra-source-files:
cabal-version: >=1.10
library
exposed-modules: Language.Haskell.GHC.Parser,
Language.Haskell.GHC.HappyParser
-- other-modules:
-- other-extensions:
build-depends: base >=4.6 && <4.7, ghc == 7.6.3
-- hs-source-dirs:
default-language: Haskell2010
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment