Commit bd47a5e3 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[CLI] spliting executables into server and cli. Fixing parser.

parent 99d4f1f3
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE RankNTypes #-}
import Control.Lens (Getting, foldMapOf)
data Fold i o = forall m . Monoid m => Fold (i -> m) (m -> o)
instance Functor (Fold i) where
fmap k (Fold tally summarize) = Fold tally (k . summarize)
instance Applicative (Fold i) where
pure o = Fold (\_ -> ()) (\_ -> o)
Fold tallyF summarizeF <*> Fold tallyX summarizeX = Fold tally summarize
where
tally i = (tallyF i, tallyX i)
summarize (nF, nX) = summarizeF nF (summarizeX nX)
focus :: (forall m . Monoid m => Getting m b a) -> Fold a o -> Fold b o
focus lens (Fold tally summarize) = Fold (foldMapOf lens tally) summarize
{-# LANGUAGE ExistentialQuantification #-}
-- | Thanks to Gabriel Gonzales and his beautiful folds
import Data.Monoid
import Prelude hiding (head, last, all, any, sum, product, length)
import qualified Data.Foldable
data Fold i o = forall m . Monoid m => Fold (i -> m) (m -> o)
fold :: Fold i o -> [i] -> o
fold (Fold tally summarize) is = summarize (reduce (map tally is))
where
reduce = Data.Foldable.foldl' (<>) mempty
--
head :: Fold a (Maybe a)
head = Fold (First . Just) getFirst
last :: Fold a (Maybe a)
last = Fold (Last . Just) getLast
--
all :: (a -> Bool) -> Fold a Bool
all predicate = Fold (All . predicate) getAll
any :: (a -> Bool) -> Fold a Bool
any predicate = Fold (Any . predicate) getAny
--
sum :: Num n => Fold n n
sum = Fold Sum getSum
product :: Num n => Fold n n
product = Fold Product getProduct
length :: Num n => Fold i n
length = Fold (\_ -> Sum 1) getSum
--
{-# LANGUAGE BangPatterns #-}
data Average a = Average { numerator :: !a, denominator :: !Int }
instance Num a => Monoid (Average a) where
mempty = Average 0 0
mappend (Average xL nL) (Average xR nR) = Average (xL + xR) (nL + nR)
average :: Fractional a => Fold a a
average = Fold tally summarize
where
tally x = Average x 1
summarize (Average numerator denominator) =
numerator / fromIntegral denominator
......@@ -124,9 +124,29 @@ library:
# - utc
executables:
gargantext:
gargantext-server:
main: Main.hs
source-dirs: app
source-dirs: bin/gargantext-server
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- base
- containers
- gargantext
- vector
- cassava
- ini
- optparse-generic
- unordered-containers
- full-text-search
gargantext-cli:
main: Main.hs
source-dirs: bin/gargantext-cli
ghc-options:
- -threaded
- -rtsopts
......
......@@ -23,20 +23,31 @@ please follow the types.
module Gargantext.Text.Parsers -- (parse, FileFormat(..))
where
import Gargantext.Prelude
import System.FilePath (FilePath(), takeExtension)
import Codec.Archive.Zip (withArchive, getEntry, getEntries)
import System.FilePath (FilePath())
import Data.Either.Extra (partitionEithers)
import Data.List (concat)
import qualified Data.Map as DM
import qualified Data.ByteString as DB
import Data.Ord()
import Data.String()
import Data.Either(Either(..))
import Data.Attoparsec.ByteString (parseOnly, Parser)
import Data.Text (Text)
import qualified Data.Text as DT
-- | Activate Async for to parse in parallel
--import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Concurrent.Async as CCA (mapConcurrently)
import Data.Text.Encoding (decodeUtf8)
import Data.String (String())
------------------------------------------------------------------------
import Gargantext.Prelude
import Gargantext.Text.Parsers.WOS (wosParser)
------------------------------------------------------------------------
type ParseError = String
type Field = Text
......@@ -60,38 +71,37 @@ data FileFormat = WOS -- Implemented (ISI Format)
-- TODO: to debug maybe add the filepath in error message
--parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]])
--parse format path = do
-- files <- case takeExtension path of
-- ".zip" -> openZip path
-- _ -> pure <$> DB.readFile path
-- (as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files
-- pure (as, map toText $ concat bs)
-- where
-- -- TODO : decode with bayesian inference on encodings
-- toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b))
--
--
---- | withParser:
---- According the format of the text, choosing the right parser.
---- TODO withParser :: FileFormat -> Parser [Document]
--withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
--withParser WOS = wosParser
----withParser DOC = docParser
----withParser ODT = odtParser
----withParser XML = xmlParser
----withParser _ = error "[ERROR] Parser not implemented yet"
--
--runParser :: FileFormat -> DB.ByteString
-- -> IO (Either String [[(DB.ByteString, DB.ByteString)]])
--runParser format text = pure $ parseOnly (withParser format) text
--
--openZip :: FilePath -> IO [DB.ByteString]
--openZip fp = do
-- path <- resolveFile' fp
-- entries <- withArchive path (DM.keys <$> getEntries)
-- bs <- mapConcurrently (\s -> withArchive path (getEntry s)) entries
-- pure bs
parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]])
parse format path = do
files <- case takeExtension path of
".zip" -> openZip path
_ -> pure <$> DB.readFile path
(as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files
pure (as, map toText $ concat bs)
where
-- TODO : decode with bayesian inference on encodings
toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b))
-- | withParser:
-- According the format of the text, choosing the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = wosParser
--withParser DOC = docParser
--withParser ODT = odtParser
--withParser XML = xmlParser
--withParser _ = error "[ERROR] Parser not implemented yet"
runParser :: FileFormat -> DB.ByteString
-> IO (Either String [[(DB.ByteString, DB.ByteString)]])
runParser format text = pure $ parseOnly (withParser format) text
openZip :: FilePath -> IO [DB.ByteString]
openZip fp = do
entries <- withArchive fp (DM.keys <$> getEntries)
bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries
pure bs
clean :: Text -> Text
clean txt = DT.map clean' txt
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment