Commit 636f243e authored by Andrew Gibiansky's avatar Andrew Gibiansky

Adding conversion files from @aavogt

parent 4867841c
-- | Description : mostly reversible conversion between ipynb and lhs
module IHaskell.Convert (convert) where
import Control.Monad.Identity (Identity(Identity), unless, when)
import IHaskell.Convert.Args (ConvertSpec(ConvertSpec, convertInput, convertLhsStyle, convertOutput, convertOverwriteFiles, convertToIpynb), fromJustConvertSpec, toConvertSpec)
import IHaskell.Convert.IpynbToLhs (ipynbToLhs)
import IHaskell.Convert.LhsToIpynb (lhsToIpynb)
import IHaskell.Flags (Argument)
import System.Directory (doesFileExist)
import Text.Printf (printf)
-- | used by @IHaskell convert@
convert :: [Argument] -> IO ()
convert args = case fromJustConvertSpec (toConvertSpec args) of
ConvertSpec { convertToIpynb = Identity toIpynb,
convertInput = Identity inputFile,
convertOutput = Identity outputFile,
convertLhsStyle = Identity lhsStyle,
convertOverwriteFiles = force }
| toIpynb -> do
unless force (failIfExists outputFile)
lhsToIpynb lhsStyle inputFile outputFile
| otherwise -> do
unless force (failIfExists outputFile)
ipynbToLhs lhsStyle inputFile outputFile
-- | Call fail when the named file already exists.
failIfExists :: FilePath -> IO ()
failIfExists file = do
exists <- doesFileExist file
when exists $ fail $
printf "File %s already exists. To force supply --force." file
-- | Description: interpret flags parsed by "IHaskell.Flags"
module IHaskell.Convert.Args
(ConvertSpec(..),
fromJustConvertSpec,
toConvertSpec,
) where
import Control.Applicative ((<$>))
import Control.Monad.Identity (Identity(Identity))
import Data.Char (toLower)
import Data.List (partition)
import Data.Maybe (fromMaybe)
import qualified Data.Text.Lazy as T (pack, Text)
import IHaskell.Flags (Argument(ConvertFrom, ConvertFromFormat, ConvertLhsStyle, ConvertTo, ConvertToFormat, OverwriteFiles), LhsStyle, lhsStyleBird, NotebookFormat(..))
import System.FilePath ((<.>), dropExtension, takeExtension)
import Text.Printf (printf)
-- | ConvertSpec is the accumulator for command line arguments
data ConvertSpec f = ConvertSpec
{ convertToIpynb :: f Bool,
convertInput :: f FilePath,
convertOutput :: f FilePath,
convertLhsStyle :: f (LhsStyle T.Text),
convertOverwriteFiles :: Bool
}
-- | Convert a possibly-incomplete specification for what to convert
-- into one which can be executed. Calls error when data is missing.
fromJustConvertSpec :: ConvertSpec Maybe -> ConvertSpec Identity
fromJustConvertSpec convertSpec = convertSpec {
convertToIpynb = Identity toIpynb,
convertInput = Identity inputFile,
convertOutput = Identity outputFile,
convertLhsStyle = Identity $ fromMaybe
(T.pack <$> lhsStyleBird)
(convertLhsStyle convertSpec)
}
where
toIpynb = fromMaybe (error "Error: direction for conversion unknown")
(convertToIpynb convertSpec)
(inputFile, outputFile) = case (convertInput convertSpec, convertOutput convertSpec) of
(Nothing, Nothing) -> error "Error: no files specified for conversion"
(Just i, Nothing) | toIpynb -> (i, dropExtension i <.> "ipynb")
| otherwise -> (i, dropExtension i <.> "lhs")
(Nothing, Just o) | toIpynb -> (dropExtension o <.> "lhs", o)
| otherwise -> (dropExtension o <.> "ipynb", o)
(Just i, Just o) -> (i, o)
-- | Does this @Argument@ explicitly request a file format?
isFormatSpec :: Argument -> Bool
isFormatSpec (ConvertToFormat _) = True
isFormatSpec (ConvertFromFormat _) = True
isFormatSpec _ = False
toConvertSpec :: [Argument] -> ConvertSpec Maybe
toConvertSpec args = mergeArgs otherArgs
(mergeArgs formatSpecArgs initialConvertSpec)
where
(formatSpecArgs, otherArgs) = partition isFormatSpec args
initialConvertSpec = ConvertSpec Nothing Nothing Nothing Nothing False
mergeArgs :: [Argument] -> ConvertSpec Maybe -> ConvertSpec Maybe
mergeArgs args initialConvertSpec = foldr mergeArg initialConvertSpec args
mergeArg :: Argument -> ConvertSpec Maybe -> ConvertSpec Maybe
mergeArg OverwriteFiles convertSpec = convertSpec { convertOverwriteFiles = True }
mergeArg (ConvertLhsStyle lhsStyle) convertSpec
| Just previousLhsStyle <- convertLhsStyle convertSpec,
previousLhsStyle /= fmap T.pack lhsStyle = error $ printf
"Conflicting lhs styles requested: <%s> and <%s>"
(show lhsStyle) (show previousLhsStyle)
| otherwise = convertSpec { convertLhsStyle = Just (T.pack <$> lhsStyle) }
mergeArg (ConvertFrom inputFile) convertSpec
| Just previousInputFile <- convertInput convertSpec,
previousInputFile /= inputFile = error $ printf "Multiple input files specified: <%s> and <%s>"
inputFile previousInputFile
| otherwise = convertSpec {
convertInput = Just inputFile,
convertToIpynb = case (convertToIpynb convertSpec, fromExt inputFile) of
(prev, Nothing) -> prev
(prev @ (Just _), _) -> prev
(Nothing, format) -> fmap (== LhsMarkdown) format
}
mergeArg (ConvertTo outputFile) convertSpec
| Just previousOutputFile <- convertOutput convertSpec,
previousOutputFile /= outputFile = error $ printf "Multiple output files specified: <%s> and <%s>"
outputFile previousOutputFile
| otherwise = convertSpec {
convertOutput = Just outputFile,
convertToIpynb = case (convertToIpynb convertSpec, fromExt outputFile) of
(prev, Nothing) -> prev
(prev @ (Just _), _) -> prev
(Nothing, format) -> fmap (== IPYNB) format
}
mergeArg unexpectedArg _ = error $ "IHaskell.Convert.mergeArg: impossible argument: "
++ show unexpectedArg
-- | Guess the format based on the file extension.
fromExt :: FilePath -> Maybe NotebookFormat
fromExt s = case map toLower (takeExtension s) of
".lhs" -> Just LhsMarkdown
".ipynb" -> Just IPYNB
_ -> Nothing
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module IHaskell.Convert.IpynbToLhs (ipynbToLhs) where
import Control.Applicative ((<$>))
import Data.Aeson (decode, Object, Value(Array, Object, String))
import qualified Data.ByteString.Lazy as L (readFile)
import qualified Data.HashMap.Strict as M (lookup)
import Data.Maybe (fromMaybe)
import Data.Monoid ((<>), Monoid(mempty))
import qualified Data.Text.Lazy as T (concat, fromStrict, Text, unlines)
import qualified Data.Text.Lazy.IO as T (writeFile)
import Data.Vector (Vector)
import qualified Data.Vector as V (map, mapM, toList)
import IHaskell.Flags (LhsStyle(lhsBeginCode, lhsBeginOutput, lhsCodePrefix, lhsEndCode, lhsEndOutput, lhsOutputPrefix))
ipynbToLhs :: LhsStyle T.Text
-> FilePath -- ^ the filename of an ipython notebook
-> FilePath -- ^ the filename of the literate haskell to write
-> IO ()
ipynbToLhs sty from to = do
Just (js :: Object) <- decode <$> L.readFile from
case M.lookup "worksheets" js of
Just (Array worksheets)
| [ Object worksheet ] <- V.toList worksheets,
Just (Array cells) <- M.lookup "cells" worksheet ->
T.writeFile to $ T.unlines $ V.toList
$ V.map (\(Object y) -> convCell sty y) cells
_ -> error "IHaskell.Convert.ipynbTolhs: json does not follow expected schema"
concatWithPrefix :: T.Text -- ^ the prefix to add to every line
-> Vector Value -- ^ a json array of text lines
-> Maybe T.Text
concatWithPrefix p arr = T.concat . map (p <>) . V.toList <$> V.mapM toStr arr
toStr :: Value -> Maybe T.Text
toStr (String x) = Just (T.fromStrict x)
toStr _ = Nothing
-- | @convCell sty cell@ converts a single cell in JSON into text suitable
-- for the type of lhs file described by the @sty@
convCell :: LhsStyle T.Text -> Object -> T.Text
convCell _sty object
| Just (String "markdown") <- M.lookup "cell_type" object,
Just (Array xs) <- M.lookup "source" object,
~ (Just s) <- concatWithPrefix "" xs = s
convCell sty object
| Just (String "code") <- M.lookup "cell_type" object,
Just (Array i) <- M.lookup "input" object,
Just (Array o) <- M.lookup "outputs" object,
~ (Just i) <- concatWithPrefix (lhsCodePrefix sty) i,
o <- fromMaybe mempty (convOutputs sty o) = "\n" <>
lhsBeginCode sty <> i <> lhsEndCode sty <> "\n" <> o <> "\n"
convCell _ _ = "IHaskell.Convert.convCell: unknown cell"
convOutputs :: LhsStyle T.Text
-> Vector Value -- ^ JSON array of output lines containing text or markup
-> Maybe T.Text
convOutputs sty array = do
outputLines <- V.mapM (getTexts (lhsOutputPrefix sty)) array
return $ lhsBeginOutput sty <> T.concat (V.toList outputLines) <> lhsEndOutput sty
getTexts :: T.Text -> Value -> Maybe T.Text
getTexts p (Object object)
| Just (Array text) <- M.lookup "text" object = concatWithPrefix p text
getTexts _ _ = Nothing
{-# LANGUAGE OverloadedStrings #-}
module IHaskell.Convert.LhsToIpynb (lhsToIpynb) where
import Control.Applicative ((<$>))
import Data.Aeson ((.=), encode, object, Value(Array, Bool, Number, String))
import qualified Data.ByteString.Lazy as L (writeFile)
import Data.Char (isSpace)
import Data.Monoid (Monoid(mempty))
import qualified Data.Text as TS (Text)
import qualified Data.Text.Lazy as T (dropWhile, lines, stripPrefix, Text, toStrict)
import qualified Data.Text.Lazy.IO as T (readFile)
import qualified Data.Vector as V (fromList, singleton)
import IHaskell.Flags (LhsStyle(LhsStyle))
lhsToIpynb :: LhsStyle T.Text -> FilePath -> FilePath -> IO ()
lhsToIpynb sty from to = do
classed <- classifyLines sty . T.lines <$> T.readFile from
L.writeFile to . encode . encodeCells $ groupClassified classed
data CellLine a = CodeLine a | OutputLine a | MarkdownLine a
deriving Show
isCode :: CellLine t -> Bool
isCode (CodeLine _) = True
isCode _ = False
isOutput :: CellLine t -> Bool
isOutput (OutputLine _) = True
isOutput _ = False
isMD :: CellLine t -> Bool
isMD (MarkdownLine _) = True
isMD _ = False
isEmptyMD :: (Eq a, Monoid a) => CellLine a -> Bool
isEmptyMD (MarkdownLine a) = a == mempty
isEmptyMD _ = False
untag :: CellLine t -> t
untag (CodeLine a) = a
untag (OutputLine a) = a
untag (MarkdownLine a) = a
data Cell a = Code a a | Markdown a
deriving (Show)
encodeCells :: [Cell [T.Text]] -> Value
encodeCells xs = object $
[ "worksheets" .= Array (V.singleton (object
[ "cells" .= Array (V.fromList (map cellToVal xs)) ] ))
] ++ boilerplate
cellToVal :: Cell [T.Text] -> Value
cellToVal (Code i o) = object $
[ "cell_type" .= String "code",
"collapsed" .= Bool False,
"language" .= String "python", -- is what it IPython gives us
"metadata" .= object [],
"input" .= arrayFromTxt i,
"outputs" .= Array
(V.fromList (
[ object ["text" .= arrayFromTxt o,
"metadata" .= object [],
"output_type" .= String "display_data" ]
| _ <- take 1 o])) ]
cellToVal (Markdown txt) = object $
[ "cell_type" .= String "markdown",
"metadata" .= object [],
"source" .= arrayFromTxt txt ]
-- | arrayFromTxt makes a JSON array of string s
arrayFromTxt :: [T.Text] -> Value
arrayFromTxt i = Array (V.fromList (map (String . T.toStrict) i))
-- | ihaskell needs this boilerplate at the upper level to interpret the
-- json describing cells and output correctly.
boilerplate :: [(TS.Text, Value)]
boilerplate =
[ "metadata" .= object [ "language" .= String "haskell", "name" .= String ""],
"nbformat" .= Number 3,
"nbformat_minor" .= Number 0 ]
groupClassified :: [CellLine T.Text] -> [Cell [T.Text]]
groupClassified (CodeLine a : x)
| (c,x) <- span isCode x,
(_,x) <- span isEmptyMD x,
(o,x) <- span isOutput x = Code (a : map untag c) (map untag o) : groupClassified x
groupClassified (MarkdownLine a : x)
| (m,x) <- span isMD x = Markdown (a: map untag m) : groupClassified x
groupClassified (OutputLine a : x ) = Markdown [a] : groupClassified x
groupClassified [] = []
classifyLines :: LhsStyle T.Text -> [T.Text] -> [CellLine T.Text]
classifyLines sty@(LhsStyle c o _ _ _ _) (l:ls) = case (sp c, sp o) of
(Just a, Nothing) -> CodeLine a : classifyLines sty ls
(Nothing, Just a) -> OutputLine a : classifyLines sty ls
(Nothing,Nothing) -> MarkdownLine l : classifyLines sty ls
_ -> error "IHaskell.Convert.classifyLines"
where sp c = T.stripPrefix (T.dropWhile isSpace c) (T.dropWhile isSpace l)
classifyLines _ [] = []
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment