Commit bfb0af88 authored by Andrew Gibiansky's avatar Andrew Gibiansky

Moving parsing preprocessing into ghc-parser

parent 806aa5cf
{-# LANGUAGE DeriveFunctor #-}
module Language.Haskell.GHC.Parser ( module Language.Haskell.GHC.Parser (
-- Parser handling -- Parser handling
runParser, runParser,
...@@ -7,6 +8,7 @@ module Language.Haskell.GHC.Parser ( ...@@ -7,6 +8,7 @@ module Language.Haskell.GHC.Parser (
StringLoc(..), StringLoc(..),
ParseOutput(..), ParseOutput(..),
Parser, Parser,
Located(..),
-- Different parsers -- Different parsers
parserStatement, parserStatement,
...@@ -21,18 +23,22 @@ module Language.Haskell.GHC.Parser ( ...@@ -21,18 +23,22 @@ module Language.Haskell.GHC.Parser (
partialTypeSignature, partialTypeSignature,
partialModule, partialModule,
partialExpression, partialExpression,
-- Haskell string preprocessing.
removeComments,
layoutChunks,
) where ) where
import Data.List (intercalate) import Data.List (intercalate, findIndex)
import Bag import Bag
import ErrUtils hiding (ErrMsg) import ErrUtils hiding (ErrMsg)
import FastString import FastString
import GHC import GHC hiding (Located)
import Lexer import Lexer
import OrdList import OrdList
import Outputable hiding ((<>)) import Outputable hiding ((<>))
import SrcLoc import SrcLoc hiding (Located)
import StringBuffer import StringBuffer
import qualified Language.Haskell.GHC.HappyParser as Parse import qualified Language.Haskell.GHC.HappyParser as Parse
...@@ -57,6 +63,13 @@ data ParseOutput a ...@@ -57,6 +63,13 @@ data ParseOutput a
deriving (Eq, Show) -- Auxiliary strings say what part of the deriving (Eq, Show) -- Auxiliary strings say what part of the
-- input string was used and what -- input string was used and what
-- part is remaining. -- part is remaining.
--
-- | Store locations along with a value.
data Located a = Located {
line :: LineNumber, -- Where this element is located.
unloc :: a -- Located element.
} deriving (Eq, Show, Functor)
data ParserType = FullParser | PartialParser data ParserType = FullParser | PartialParser
data Parser a = Parser ParserType (P a) data Parser a = Parser ParserType (P a)
...@@ -128,3 +141,79 @@ splitAtLoc line col string = ...@@ -128,3 +141,79 @@ splitAtLoc line col string =
-- Not the same as 'unlines', due to trailing \n -- Not the same as 'unlines', due to trailing \n
joinLines :: [String] -> String joinLines :: [String] -> String
joinLines = intercalate "\n" joinLines = intercalate "\n"
-- | Split an input string into chunks based on indentation.
-- A chunk is a line and all lines immediately following that are indented
-- beyond the indentation of the first line. This parses Haskell layout
-- rules properly, and allows using multiline expressions via indentation.
layoutChunks :: String -> [Located String]
layoutChunks = go 1
where
go :: LineNumber -> String -> [Located String]
go line = filter (not . null . unloc) . map (fmap strip) . layoutLines line . lines
-- drop spaces on left and right
strip = dropRight . dropLeft
where
dropLeft = dropWhile (`elem` whitespace)
dropRight = reverse . dropWhile (`elem` whitespace) . reverse
whitespace = " \t\n"
layoutLines :: LineNumber -> [String] -> [Located String]
-- Empty string case. If there's no input, output is empty.
layoutLines _ [] = []
-- Use the indent of the first line to find the end of the first block.
layoutLines lineIdx all@(firstLine:rest) =
let firstIndent = indentLevel firstLine
blockEnded line = indentLevel line <= firstIndent in
case findIndex blockEnded rest of
-- If the first block doesn't end, return the whole string, since
-- that just means the block takes up the entire string.
Nothing -> [Located lineIdx $ intercalate "\n" all]
-- We found the end of the block. Split this bit out and recurse.
Just idx ->
let (before, after) = splitAt idx rest in
Located lineIdx (joinLines $ firstLine:before) : go (lineIdx + idx + 1) (joinLines after)
-- Compute indent level of a string as number of leading spaces.
indentLevel :: String -> Int
indentLevel (' ':str) = 1 + indentLevel str
-- Count a tab as two spaces.
indentLevel ('\t':str) = 2 + indentLevel str
-- Count empty lines as a large indent level, so they're always with the previous expression.
indentLevel "" = 100000
indentLevel _ = 0
-- | Drop comments from Haskell source.
-- Simply gets rid of them, does not replace them in any way.
removeComments :: String -> String
removeComments = removeOneLineComments . removeMultilineComments 0
where
removeOneLineComments str =
case str of
-- Don't remove comments after cmd directives
':':'!':remaining ->":!" ++ takeLine remaining ++ dropLine remaining
'-':'-':remaining -> dropLine remaining
x:xs -> x:removeOneLineComments xs
[] -> []
where
dropLine = removeOneLineComments . dropWhile (/= '\n')
takeLine = takeWhile (/= '\n')
removeMultilineComments nesting str =
case str of
'{':'-':remaining -> removeMultilineComments (nesting + 1) remaining
'-':'}':remaining ->
if nesting > 0
then removeMultilineComments (nesting - 1) remaining
else '-':'}':removeMultilineComments nesting remaining
x:xs ->
if nesting > 0
then removeMultilineComments nesting xs
else x:removeMultilineComments nesting xs
[] -> []
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
-- documentation, see http://haskell.org/cabal/users-guide/ -- documentation, see http://haskell.org/cabal/users-guide/
name: ghc-parser name: ghc-parser
version: 0.1.0.0 version: 0.1.1.0
synopsis: Haskell source parser from GHC. synopsis: Haskell source parser from GHC.
-- description: -- description:
homepage: https://github.com/gibiansky/IHaskell homepage: https://github.com/gibiansky/IHaskell
......
...@@ -64,7 +64,7 @@ library ...@@ -64,7 +64,7 @@ library
directory -any, directory -any,
filepath -any, filepath -any,
ghc ==7.6.*, ghc ==7.6.*,
ghc-parser -any, ghc-parser >=0.1.1,
ghc-paths ==0.1.*, ghc-paths ==0.1.*,
haskeline -any, haskeline -any,
here -any, here -any,
......
...@@ -17,7 +17,7 @@ import Data.Monoid ...@@ -17,7 +17,7 @@ import Data.Monoid
import IHaskell.Types import IHaskell.Types
import IHaskell.Display import IHaskell.Display
import IHaskell.IPython import IHaskell.IPython
import IHaskell.Eval.Parser import IHaskell.Eval.Parser hiding (line)
data LintSeverity = LintWarning | LintError deriving (Eq, Show) data LintSeverity = LintWarning | LintError deriving (Eq, Show)
......
...@@ -50,11 +50,6 @@ data CodeBlock ...@@ -50,11 +50,6 @@ data CodeBlock
| ParseError StringLoc ErrMsg -- ^ An error indicating that parsing the code block failed. | ParseError StringLoc ErrMsg -- ^ An error indicating that parsing the code block failed.
deriving (Show, Eq) deriving (Show, Eq)
-- | Store locations along with a value.
data Located a = Located LineNumber a deriving (Eq, Show)
instance Functor Located where
fmap f (Located line a) = Located line $ f a
-- | Directive types. Each directive is associated with a string in the -- | Directive types. Each directive is associated with a string in the
-- directive code block. -- directive code block.
data DirectiveType data DirectiveType
...@@ -70,14 +65,6 @@ data DirectiveType ...@@ -70,14 +65,6 @@ data DirectiveType
| GetDoc -- ^ Get documentation for an identifier via Hoogle. | GetDoc -- ^ Get documentation for an identifier via Hoogle.
deriving (Show, Eq) deriving (Show, Eq)
-- | Unlocate something - drop the position.
unloc :: Located a -> a
unloc (Located _ a) = a
-- | Get the line number of a located element.
line :: Located a -> LineNumber
line (Located l _) = l
-- | Parse a string into code blocks. -- | Parse a string into code blocks.
parseString :: GhcMonad m => String -> m [Located CodeBlock] parseString :: GhcMonad m => String -> m [Located CodeBlock]
parseString codeString = do parseString codeString = do
...@@ -88,7 +75,7 @@ parseString codeString = do ...@@ -88,7 +75,7 @@ parseString codeString = do
Parsed {} -> return [Located 1 $ Module codeString] Parsed {} -> return [Located 1 $ Module codeString]
Failure {} -> do Failure {} -> do
-- Split input into chunks based on indentation. -- Split input into chunks based on indentation.
let chunks = layoutChunks $ dropComments codeString let chunks = layoutChunks $ removeComments codeString
result <- joinFunctions <$> processChunks [] chunks result <- joinFunctions <$> processChunks [] chunks
-- Return to previous flags. When parsing, flags can be set to make -- Return to previous flags. When parsing, flags can be set to make
...@@ -268,69 +255,6 @@ parseDirective (':':directive) line = case find rightDirective directives of ...@@ -268,69 +255,6 @@ parseDirective (':':directive) line = case find rightDirective directives of
] ]
parseDirective _ _ = error "Directive must start with colon!" parseDirective _ _ = error "Directive must start with colon!"
-- | Split an input string into chunks based on indentation.
-- A chunk is a line and all lines immediately following that are indented
-- beyond the indentation of the first line. This parses Haskell layout
-- rules properly, and allows using multiline expressions via indentation.
layoutChunks :: String -> [Located String]
layoutChunks = go 1
where
go :: LineNumber -> String -> [Located String]
go line = filter (not . null . unloc) . map (fmap strip) . layoutLines line . lines
layoutLines :: LineNumber -> [String] -> [Located String]
-- Empty string case. If there's no input, output is empty.
layoutLines _ [] = []
-- Use the indent of the first line to find the end of the first block.
layoutLines lineIdx all@(firstLine:rest) =
let firstIndent = indentLevel firstLine
blockEnded line = indentLevel line <= firstIndent in
case findIndex blockEnded rest of
-- If the first block doesn't end, return the whole string, since
-- that just means the block takes up the entire string.
Nothing -> [Located lineIdx $ intercalate "\n" all]
-- We found the end of the block. Split this bit out and recurse.
Just idx ->
let (before, after) = splitAt idx rest in
Located lineIdx (joinLines $ firstLine:before) : go (lineIdx + idx + 1) (joinLines after)
-- Compute indent level of a string as number of leading spaces.
indentLevel :: String -> Int
indentLevel (' ':str) = 1 + indentLevel str
-- Count a tab as two spaces.
indentLevel ('\t':str) = 2 + indentLevel str
-- Count empty lines as a large indent level, so they're always with the previous expression.
indentLevel "" = 100000
indentLevel _ = 0
-- Not the same as 'unlines', due to trailing \n
joinLines :: [String] -> String
joinLines = intercalate "\n"
-- | Drop comments from Haskell source.
dropComments :: String -> String
dropComments = removeOneLineComments . removeMultilineComments
where
-- Don't remove comments after cmd directives
removeOneLineComments (':':'!':remaining) = ":!" ++ takeWhile (/= '\n') remaining ++
removeOneLineComments (dropWhile (/= '\n') remaining)
removeOneLineComments ('-':'-':remaining) = removeOneLineComments (dropWhile (/= '\n') remaining)
removeOneLineComments (x:xs) = x:removeOneLineComments xs
removeOneLineComments x = x
removeMultilineComments ('{':'-':remaining) =
case subIndex "-}" remaining of
Nothing -> ""
Just idx -> removeMultilineComments $ drop (2 + idx) remaining
removeMultilineComments (x:xs) = x:removeMultilineComments xs
removeMultilineComments x = x
-- | Parse a module and return the name declared in the 'module X where' -- | Parse a module and return the name declared in the 'module X where'
-- line. That line is required, and if it does not exist, this will error. -- line. That line is required, and if it does not exist, this will error.
-- Names with periods in them are returned piece y piece. -- Names with periods in them are returned piece y piece.
...@@ -344,3 +268,7 @@ getModuleName moduleSrc = do ...@@ -344,3 +268,7 @@ getModuleName moduleSrc = do
case unLoc <$> hsmodName (unLoc mod) of case unLoc <$> hsmodName (unLoc mod) of
Nothing -> error "Module must have a name." Nothing -> error "Module must have a name."
Just name -> return $ split "." $ moduleNameString name Just name -> return $ split "." $ moduleNameString name
-- Not the same as 'unlines', due to trailing \n
joinLines :: [String] -> String
joinLines = intercalate "\n"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment