Commit bfb0af88 authored by Andrew Gibiansky's avatar Andrew Gibiansky

Moving parsing preprocessing into ghc-parser

parent 806aa5cf
{-# LANGUAGE DeriveFunctor #-}
module Language.Haskell.GHC.Parser (
-- Parser handling
runParser,
......@@ -7,6 +8,7 @@ module Language.Haskell.GHC.Parser (
StringLoc(..),
ParseOutput(..),
Parser,
Located(..),
-- Different parsers
parserStatement,
......@@ -21,18 +23,22 @@ module Language.Haskell.GHC.Parser (
partialTypeSignature,
partialModule,
partialExpression,
-- Haskell string preprocessing.
removeComments,
layoutChunks,
) where
import Data.List (intercalate)
import Data.List (intercalate, findIndex)
import Bag
import ErrUtils hiding (ErrMsg)
import FastString
import GHC
import GHC hiding (Located)
import Lexer
import OrdList
import Outputable hiding ((<>))
import SrcLoc
import SrcLoc hiding (Located)
import StringBuffer
import qualified Language.Haskell.GHC.HappyParser as Parse
......@@ -57,6 +63,13 @@ data ParseOutput a
deriving (Eq, Show) -- Auxiliary strings say what part of the
-- input string was used and what
-- part is remaining.
--
-- | Store locations along with a value.
data Located a = Located {
line :: LineNumber, -- Where this element is located.
unloc :: a -- Located element.
} deriving (Eq, Show, Functor)
data ParserType = FullParser | PartialParser
data Parser a = Parser ParserType (P a)
......@@ -128,3 +141,79 @@ splitAtLoc line col string =
-- Not the same as 'unlines', due to trailing \n
joinLines :: [String] -> String
joinLines = intercalate "\n"
-- | Split an input string into chunks based on indentation.
-- A chunk is a line and all lines immediately following that are indented
-- beyond the indentation of the first line. This parses Haskell layout
-- rules properly, and allows using multiline expressions via indentation.
layoutChunks :: String -> [Located String]
layoutChunks = go 1
where
go :: LineNumber -> String -> [Located String]
go line = filter (not . null . unloc) . map (fmap strip) . layoutLines line . lines
-- drop spaces on left and right
strip = dropRight . dropLeft
where
dropLeft = dropWhile (`elem` whitespace)
dropRight = reverse . dropWhile (`elem` whitespace) . reverse
whitespace = " \t\n"
layoutLines :: LineNumber -> [String] -> [Located String]
-- Empty string case. If there's no input, output is empty.
layoutLines _ [] = []
-- Use the indent of the first line to find the end of the first block.
layoutLines lineIdx all@(firstLine:rest) =
let firstIndent = indentLevel firstLine
blockEnded line = indentLevel line <= firstIndent in
case findIndex blockEnded rest of
-- If the first block doesn't end, return the whole string, since
-- that just means the block takes up the entire string.
Nothing -> [Located lineIdx $ intercalate "\n" all]
-- We found the end of the block. Split this bit out and recurse.
Just idx ->
let (before, after) = splitAt idx rest in
Located lineIdx (joinLines $ firstLine:before) : go (lineIdx + idx + 1) (joinLines after)
-- Compute indent level of a string as number of leading spaces.
indentLevel :: String -> Int
indentLevel (' ':str) = 1 + indentLevel str
-- Count a tab as two spaces.
indentLevel ('\t':str) = 2 + indentLevel str
-- Count empty lines as a large indent level, so they're always with the previous expression.
indentLevel "" = 100000
indentLevel _ = 0
-- | Drop comments from Haskell source.
-- Simply gets rid of them, does not replace them in any way.
removeComments :: String -> String
removeComments = removeOneLineComments . removeMultilineComments 0
where
removeOneLineComments str =
case str of
-- Don't remove comments after cmd directives
':':'!':remaining ->":!" ++ takeLine remaining ++ dropLine remaining
'-':'-':remaining -> dropLine remaining
x:xs -> x:removeOneLineComments xs
[] -> []
where
dropLine = removeOneLineComments . dropWhile (/= '\n')
takeLine = takeWhile (/= '\n')
removeMultilineComments nesting str =
case str of
'{':'-':remaining -> removeMultilineComments (nesting + 1) remaining
'-':'}':remaining ->
if nesting > 0
then removeMultilineComments (nesting - 1) remaining
else '-':'}':removeMultilineComments nesting remaining
x:xs ->
if nesting > 0
then removeMultilineComments nesting xs
else x:removeMultilineComments nesting xs
[] -> []
......@@ -2,7 +2,7 @@
-- documentation, see http://haskell.org/cabal/users-guide/
name: ghc-parser
version: 0.1.0.0
version: 0.1.1.0
synopsis: Haskell source parser from GHC.
-- description:
homepage: https://github.com/gibiansky/IHaskell
......
......@@ -64,7 +64,7 @@ library
directory -any,
filepath -any,
ghc ==7.6.*,
ghc-parser -any,
ghc-parser >=0.1.1,
ghc-paths ==0.1.*,
haskeline -any,
here -any,
......
......@@ -17,7 +17,7 @@ import Data.Monoid
import IHaskell.Types
import IHaskell.Display
import IHaskell.IPython
import IHaskell.Eval.Parser
import IHaskell.Eval.Parser hiding (line)
data LintSeverity = LintWarning | LintError deriving (Eq, Show)
......
......@@ -50,11 +50,6 @@ data CodeBlock
| ParseError StringLoc ErrMsg -- ^ An error indicating that parsing the code block failed.
deriving (Show, Eq)
-- | Store locations along with a value.
data Located a = Located LineNumber a deriving (Eq, Show)
instance Functor Located where
fmap f (Located line a) = Located line $ f a
-- | Directive types. Each directive is associated with a string in the
-- directive code block.
data DirectiveType
......@@ -70,14 +65,6 @@ data DirectiveType
| GetDoc -- ^ Get documentation for an identifier via Hoogle.
deriving (Show, Eq)
-- | Unlocate something - drop the position.
unloc :: Located a -> a
unloc (Located _ a) = a
-- | Get the line number of a located element.
line :: Located a -> LineNumber
line (Located l _) = l
-- | Parse a string into code blocks.
parseString :: GhcMonad m => String -> m [Located CodeBlock]
parseString codeString = do
......@@ -88,7 +75,7 @@ parseString codeString = do
Parsed {} -> return [Located 1 $ Module codeString]
Failure {} -> do
-- Split input into chunks based on indentation.
let chunks = layoutChunks $ dropComments codeString
let chunks = layoutChunks $ removeComments codeString
result <- joinFunctions <$> processChunks [] chunks
-- Return to previous flags. When parsing, flags can be set to make
......@@ -268,69 +255,6 @@ parseDirective (':':directive) line = case find rightDirective directives of
]
parseDirective _ _ = error "Directive must start with colon!"
-- | Split an input string into chunks based on indentation.
-- A chunk is a line and all lines immediately following that are indented
-- beyond the indentation of the first line. This parses Haskell layout
-- rules properly, and allows using multiline expressions via indentation.
layoutChunks :: String -> [Located String]
layoutChunks = go 1
where
go :: LineNumber -> String -> [Located String]
go line = filter (not . null . unloc) . map (fmap strip) . layoutLines line . lines
layoutLines :: LineNumber -> [String] -> [Located String]
-- Empty string case. If there's no input, output is empty.
layoutLines _ [] = []
-- Use the indent of the first line to find the end of the first block.
layoutLines lineIdx all@(firstLine:rest) =
let firstIndent = indentLevel firstLine
blockEnded line = indentLevel line <= firstIndent in
case findIndex blockEnded rest of
-- If the first block doesn't end, return the whole string, since
-- that just means the block takes up the entire string.
Nothing -> [Located lineIdx $ intercalate "\n" all]
-- We found the end of the block. Split this bit out and recurse.
Just idx ->
let (before, after) = splitAt idx rest in
Located lineIdx (joinLines $ firstLine:before) : go (lineIdx + idx + 1) (joinLines after)
-- Compute indent level of a string as number of leading spaces.
indentLevel :: String -> Int
indentLevel (' ':str) = 1 + indentLevel str
-- Count a tab as two spaces.
indentLevel ('\t':str) = 2 + indentLevel str
-- Count empty lines as a large indent level, so they're always with the previous expression.
indentLevel "" = 100000
indentLevel _ = 0
-- Not the same as 'unlines', due to trailing \n
joinLines :: [String] -> String
joinLines = intercalate "\n"
-- | Drop comments from Haskell source.
dropComments :: String -> String
dropComments = removeOneLineComments . removeMultilineComments
where
-- Don't remove comments after cmd directives
removeOneLineComments (':':'!':remaining) = ":!" ++ takeWhile (/= '\n') remaining ++
removeOneLineComments (dropWhile (/= '\n') remaining)
removeOneLineComments ('-':'-':remaining) = removeOneLineComments (dropWhile (/= '\n') remaining)
removeOneLineComments (x:xs) = x:removeOneLineComments xs
removeOneLineComments x = x
removeMultilineComments ('{':'-':remaining) =
case subIndex "-}" remaining of
Nothing -> ""
Just idx -> removeMultilineComments $ drop (2 + idx) remaining
removeMultilineComments (x:xs) = x:removeMultilineComments xs
removeMultilineComments x = x
-- | Parse a module and return the name declared in the 'module X where'
-- line. That line is required, and if it does not exist, this will error.
-- Names with periods in them are returned piece y piece.
......@@ -344,3 +268,7 @@ getModuleName moduleSrc = do
case unLoc <$> hsmodName (unLoc mod) of
Nothing -> error "Module must have a name."
Just name -> return $ split "." $ moduleNameString name
-- Not the same as 'unlines', due to trailing \n
joinLines :: [String] -> String
joinLines = intercalate "\n"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment