Commit b82c1577 authored by Andrew Gibiansky's avatar Andrew Gibiansky

Separated Haskell parser into package, closes #63

parent 9158525f
......@@ -202,20 +202,10 @@ ipythonTests = do
parseVersion "12.5.10" `shouldBe` [12, 5, 10]
parserTests = do
splitAtLocTests
layoutChunkerTests
moduleNameTests
parseStringTests
splitAtLocTests = describe "String Splitting Util" $ do
it "splits properly (example 1)" $
splitAtLoc 2 3 "abc\ndefghi\nxyz\n123" `shouldBe` ("abc\nde","fghi\nxyz\n123")
it "splits properly (example 2)" $
splitAtLoc 2 1 "abc" `shouldBe` ("abc","")
it "splits properly (example 3)" $
splitAtLoc 2 1 "abc\nhello" `shouldBe` ("abc\n","hello")
layoutChunkerTests = describe "Layout Chunk" $ do
it "chunks 'a string'" $
layoutChunks "a string" `shouldBe` ["a string"]
......
......@@ -37,7 +37,7 @@ maintainer: andrew.gibiansky@gmail.com
category: Development
build-type: Custom
build-type: Simple
-- Constraint on the version of Cabal needed to build this package.
cabal-version: >=1.8
......@@ -56,6 +56,7 @@ extra-source-files:
library
build-depends: base ==4.6.*,
ghc-parser,
unix >= 2.6,
hspec,
zeromq3-haskell ==0.5.*,
......@@ -105,15 +106,12 @@ executable IHaskell
IHaskell.ZeroMQ
IHaskell.Display
IHaskell.Config
IHaskell.GHC.HaskellParser
extensions: DoAndIfThenElse
-- Other library packages from which modules are imported.
build-depends: base ==4.6.*,
ghc-parser,
unix >= 2.6,
hspec,
zeromq3-haskell ==0.5.*,
......@@ -145,6 +143,7 @@ Test-Suite hspec
Ghc-Options: -threaded
Main-Is: Hspec.hs
build-depends: base ==4.6.*,
ghc-parser,
unix >= 2.6,
hspec,
zeromq3-haskell ==0.5.*,
......
......@@ -7,7 +7,6 @@ module IHaskell.Eval.Parser (
LineNumber,
ColumnNumber,
ErrMsg,
splitAtLoc,
layoutChunks,
parseDirective,
getModuleName
......@@ -31,19 +30,7 @@ import Outputable hiding ((<>))
import SrcLoc
import StringBuffer
import IHaskell.GHC.HaskellParser
-- | A line number in an input string.
type LineNumber = Int
-- | A column number in an input string.
type ColumnNumber = Int
-- | An error message string.
type ErrMsg = String
-- | A location in an input string.
data StringLoc = Loc LineNumber ColumnNumber deriving (Show, Eq)
import Language.Haskell.GHC.Parser
-- | A block of code to be evaluated.
-- Each block contains a single element - one declaration, statement,
......@@ -70,22 +57,14 @@ data DirectiveType
| GetHelp -- ^ General help via ':?' or ':help'.
deriving (Show, Eq)
-- | Output from running a parser.
data ParseOutput a
= Failure ErrMsg StringLoc -- ^ Parser failed with given error message and location.
| Success a (String, String) -- ^ Parser succeeded with an output.
deriving (Eq, Show) -- Auxiliary strings say what part of the
-- input string was used and what
-- part is remaining.
-- | Parse a string into code blocks.
parseString :: GhcMonad m => String -> m [CodeBlock]
parseString codeString = do
-- Try to parse this as a single module.
flags <- getSessionDynFlags
let output = runParser flags fullModule codeString
let output = runParser flags parserModule codeString
case output of
Success {} -> return [Module codeString]
Parsed {} -> return [Module codeString]
Failure {} ->
-- Split input into chunks based on indentation.
let chunks = layoutChunks $ dropComments codeString in
......@@ -132,14 +111,11 @@ parseCodeChunk code startLine = do
[] -> return $ bestError $ failures results
-- If one of the parsers succeeded
(result, used, remaining):_ ->
return $ if not . null . strip $ remaining
then ParseError (Loc 1 1) $ "Could not parse " ++ code
else result
result:_ -> return result
where
successes :: [ParseOutput a] -> [(a, String, String)]
successes :: [ParseOutput a] -> [a]
successes [] = []
successes (Success a (used, rem):rest) = (a, used, rem) : successes rest
successes (Parsed a:rest) = a : successes rest
successes (_:rest) = successes rest
failures :: [ParseOutput a] -> [(ErrMsg, LineNumber, ColumnNumber)]
......@@ -154,7 +130,7 @@ parseCodeChunk code startLine = do
compareLoc (_, line1, col1) (_, line2, col2) = compare line1 line2 <> compare col1 col2
statementToExpression :: DynFlags -> ParseOutput CodeBlock -> ParseOutput CodeBlock
statementToExpression flags (Success (Statement stmt) strs) = Success result strs
statementToExpression flags (Parsed (Statement stmt)) = Parsed result
where result = if isExpr flags stmt
then Expression stmt
else Statement stmt
......@@ -162,27 +138,28 @@ parseCodeChunk code startLine = do
-- Check whether a string is a valid expression.
isExpr :: DynFlags -> String -> Bool
isExpr flags str = case runParser flags fullExpression str of
Failure {} -> False
Success {} -> True
isExpr flags str = case runParser flags parserExpression str of
Parsed {} -> True
_ -> False
tryParser :: String -> (String -> CodeBlock, String -> ParseOutput String) -> ParseOutput CodeBlock
tryParser string (blockType, parser) = case parser string of
Success res (used, remaining) -> Success (blockType res) (used, remaining)
Parsed res -> Parsed (blockType res)
Failure err loc -> Failure err loc
parsers :: DynFlags -> [(String -> CodeBlock, String -> ParseOutput String)]
parsers flags =
[ (Import, unparser partialImport)
, (TypeSignature, unparser partialTypeSignature)
, (Declaration, unparser partialDeclaration)
, (Statement, unparser partialStatement)
[ (Import, unparser parserImport)
, (TypeSignature, unparser parserTypeSignature)
, (Declaration, unparser parserDeclaration)
, (Statement, unparser parserStatement)
]
where
unparser :: P a -> String -> ParseOutput String
unparser :: Parser a -> String -> ParseOutput String
unparser parser code =
case runParser flags parser code of
Success out strs -> Success code strs
Parsed out -> Parsed code
Partial out strs -> Partial code strs
Failure err loc -> Failure err loc
-- | Find consecutive declarations of the same function and join them into
......@@ -248,53 +225,6 @@ parseDirective (':':directive) line = case find rightDirective directives of
]
parseDirective _ _ = error "Directive must start with colon!"
-- | Run a GHC parser on a string. Return success or failure with
-- associated information for both.
runParser :: DynFlags -> P a -> String -> ParseOutput a
runParser flags parser str =
-- Create an initial parser state.
let filename = "<interactive>"
location = mkRealSrcLoc (mkFastString filename) 1 1
buffer = stringToStringBuffer str
parseState = mkPState flags buffer location in
-- Convert a GHC parser output into our own.
toParseOut $ unP parser parseState
where
toParseOut :: ParseResult a -> ParseOutput a
toParseOut (PFailed span@(RealSrcSpan realSpan) err) =
let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
line = srcLocLine $ realSrcSpanStart realSpan
col = srcLocCol $ realSrcSpanStart realSpan
in Failure errMsg $ Loc line col
toParseOut (PFailed span err) =
let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
in Failure errMsg $ Loc 0 0
toParseOut (POk parseState result) =
let parseEnd = realSrcSpanStart $ last_loc parseState
endLine = srcLocLine parseEnd
endCol = srcLocCol parseEnd
(before, after) = splitAtLoc endLine endCol str in
Success result (before, after)
-- Convert the bag of errors into an error string.
printErrorBag bag = joinLines . map show $ bagToList bag
-- | Split a string at a given line and column. The column is included in
-- the second part of the split.
splitAtLoc :: LineNumber -> ColumnNumber -> String -> (String, String)
splitAtLoc line col string =
if line > length (lines string)
then (string, "")
else (before, after)
where
(beforeLines, afterLines) = splitAt line $ lines string
theLine = last beforeLines
(beforeChars, afterChars) = splitAt (col - 1) theLine
before = joinLines (init beforeLines) ++ '\n' : beforeChars
after = joinLines $ afterChars : afterLines
-- | Split an input string into chunks based on indentation.
-- A chunk is a line and all lines immediately following that are indented
......@@ -357,10 +287,10 @@ dropComments = removeOneLineComments . removeMultilineComments
getModuleName :: GhcMonad m => String -> m [String]
getModuleName moduleSrc = do
flags <- getSessionDynFlags
let output = runParser flags fullModule moduleSrc
let output = runParser flags parserModule moduleSrc
case output of
Failure {} -> error "Module parsing failed."
Success mod _ ->
Parsed mod ->
case unLoc <$> hsmodName (unLoc mod) of
Nothing -> error "Module must have a name."
Just name -> return $ split "." $ moduleNameString name
import Distribution.Simple
import System.Cmd
import Control.Monad
main :: IO ()
main = defaultMainWithHooks simpleUserHooks{
preConf = \args confFlags -> do
buildParser
preConf simpleUserHooks args confFlags,
postInst = \args flags descr buildInfo -> do
postInst simpleUserHooks args flags descr buildInfo
system "IHaskell setup"
system "cp ./images/ihaskell-logo.png `ipython locate profile haskell`/static/base/images/ipynblogo.png"
return ()
}
buildParser = system "./build-parser.sh"
main = defaultMain
......@@ -17,7 +17,6 @@
-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details
{-# OPTIONS_GHC -O0 -fno-ignore-interface-pragmas #-}
{-
Careful optimisation of the parser: we don't want to throw everything
at it, because that takes too long and doesn't buy much, but we do want
......@@ -25,7 +24,20 @@ to inline certain key external functions, so we instruct GHC not to
throw away inlinings as it would normally do in -O0 mode.
-}
module IHaskell.GHC.HaskellParser (fullExpression, fullModule, partialTypeSignature, partialStatement, partialExpression, partialImport, partialDeclaration) where
module Language.Haskell.GHC.HappyParser (
fullModule,
fullTypeSignature,
fullStatement,
fullExpression,
fullImport,
fullDeclaration,
partialModule,
partialTypeSignature,
partialStatement,
partialExpression,
partialImport,
partialDeclaration
) where
import HsSyn
import RdrHsSyn
......@@ -355,21 +367,22 @@ TH_QQUASIQUOTE { L _ (ITqQuasiQuote _) }
%monad { P } { >>= } { return }
%lexer { lexer } { L _ ITeof }
%name parseModule module
%name parseStmt maybe_stmt
%name parseIdentifier identifier
%name parseType ctype
%partial parseHeader header
%tokentype { (Located Token) }
--- Partial parsers for IHaskell
--- Parsers for IHaskell
%partial partialStatement stmt
%partial partialImport importdecl
%partial partialDeclaration topdecl
%partial partialExpression exp
%partial partialTypeSignature signature
%name fullModule namedModule
%partial partialModule namedModule
%partial partialExpression exp
%name fullStatement stmt
%name fullImport importdecl
%name fullDeclaration topdecl
%name fullExpression exp
%name fullTypeSignature signature
%name fullModule namedModule
%%
signature :: { LHsDecl RdrName }
......
The MIT License (MIT)
Copyright (c) 2013 Andrew Gibiansky
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
This diff is collapsed.
module Language.Haskell.GHC.Parser (
-- Parser handling
runParser,
LineNumber,
ColumnNumber,
ErrMsg,
StringLoc(..),
ParseOutput(..),
Parser,
-- Different parsers
parserStatement,
parserImport,
parserDeclaration,
parserTypeSignature,
parserModule,
parserExpression,
partialStatement,
partialImport,
partialDeclaration,
partialTypeSignature,
partialModule,
partialExpression,
) where
import Data.List (intercalate)
import Bag
import ErrUtils hiding (ErrMsg)
import FastString
import GHC
import Lexer
import OrdList
import Outputable hiding ((<>))
import SrcLoc
import StringBuffer
import qualified Language.Haskell.GHC.HappyParser as Parse
-- | A line number in an input string.
type LineNumber = Int
-- | A column number in an input string.
type ColumnNumber = Int
-- | An error message string.
type ErrMsg = String
-- | A location in an input string.
data StringLoc = Loc LineNumber ColumnNumber deriving (Show, Eq)
-- | Output from running a parser.
data ParseOutput a
= Failure ErrMsg StringLoc -- ^ Parser failed with given error message and location.
| Parsed a -- ^ Parser succeeded with an output.
| Partial a (String, String) -- ^ Partial parser succeeded with an output.
deriving (Eq, Show) -- Auxiliary strings say what part of the
-- input string was used and what
-- part is remaining.
data ParserType = FullParser | PartialParser
data Parser a = Parser ParserType (P a)
-- Our parsers.
parserStatement = Parser FullParser Parse.fullStatement
parserImport = Parser FullParser Parse.fullImport
parserDeclaration = Parser FullParser Parse.fullDeclaration
parserExpression = Parser FullParser Parse.fullExpression
parserTypeSignature = Parser FullParser Parse.fullTypeSignature
parserModule = Parser FullParser Parse.fullModule
partialStatement = Parser PartialParser Parse.partialStatement
partialImport = Parser PartialParser Parse.partialImport
partialDeclaration = Parser PartialParser Parse.partialDeclaration
partialExpression = Parser PartialParser Parse.partialExpression
partialTypeSignature = Parser PartialParser Parse.partialTypeSignature
partialModule = Parser PartialParser Parse.partialModule
-- | Run a GHC parser on a string. Return success or failure with
-- associated information for both.
runParser :: DynFlags -> Parser a -> String -> ParseOutput a
runParser flags (Parser parserType parser) str =
-- Create an initial parser state.
let filename = "<interactive>"
location = mkRealSrcLoc (mkFastString filename) 1 1
buffer = stringToStringBuffer str
parseState = mkPState flags buffer location in
-- Convert a GHC parser output into our own.
toParseOut $ unP parser parseState
where
toParseOut :: ParseResult a -> ParseOutput a
toParseOut (PFailed span@(RealSrcSpan realSpan) err) =
let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
line = srcLocLine $ realSrcSpanStart realSpan
col = srcLocCol $ realSrcSpanStart realSpan
in Failure errMsg $ Loc line col
toParseOut (PFailed span err) =
let errMsg = printErrorBag $ unitBag $ mkPlainErrMsg flags span err
in Failure errMsg $ Loc 0 0
toParseOut (POk parseState result) =
let parseEnd = realSrcSpanStart $ last_loc parseState
endLine = srcLocLine parseEnd
endCol = srcLocCol parseEnd
(before, after) = splitAtLoc endLine endCol str in
case parserType of
PartialParser -> Partial result (before, after)
FullParser -> Parsed result
-- Convert the bag of errors into an error string.
printErrorBag bag = joinLines . map show $ bagToList bag
-- | Split a string at a given line and column. The column is included in
-- the second part of the split.
splitAtLoc :: LineNumber -> ColumnNumber -> String -> (String, String)
splitAtLoc line col string =
if line > length (lines string)
then (string, "")
else (before, after)
where
(beforeLines, afterLines) = splitAt line $ lines string
theLine = last beforeLines
(beforeChars, afterChars) = splitAt (col - 1) theLine
before = joinLines (init beforeLines) ++ '\n' : beforeChars
after = joinLines $ afterChars : afterLines
-- Not the same as 'unlines', due to trailing \n
joinLines :: [String] -> String
joinLines = intercalate "\n"
import Distribution.Simple
import System.Cmd
main = defaultMainWithHooks simpleUserHooks{
preConf = \args confFlags -> do
system "./build-parser.sh"
preConf simpleUserHooks args confFlags
}
......@@ -9,5 +9,5 @@ happy Parser.y
rm Parser.y
# Move output Haskell to source directory.
mkdir -p IHaskell/GHC
mv Parser.hs IHaskell/GHC/HaskellParser.hs
mkdir -p Language/Haskell/GHC
mv Parser.hs Language/Haskell/GHC/HappyParser.hs
-- Initial ghc-parser.cabal generated by cabal init. For further
-- documentation, see http://haskell.org/cabal/users-guide/
name: ghc-parser
version: 0.1.0.0
synopsis: Haskell source parser from GHC.
-- description:
homepage: https://github.com/gibiansky/IHaskell
license: MIT
license-file: LICENSE
author: Andrew Gibiansky
maintainer: andrew.gibiansky@gmail.com
-- copyright:
category: Language
build-type: Custom
-- extra-source-files:
cabal-version: >=1.10
library
exposed-modules: Language.Haskell.GHC.Parser,
Language.Haskell.GHC.HappyParser
-- other-modules:
-- other-extensions:
build-depends: base >=4.6 && <4.7, ghc == 7.6.3
-- hs-source-dirs:
default-language: Haskell2010
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment