Commit 03f5859a authored by Alexandre Delanoë's avatar Alexandre Delanoë

[MERGE] distances for Graph.

parents 7e4ad917 d7fd1875
...@@ -7,6 +7,7 @@ Maintainer : team@gargantext.org ...@@ -7,6 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
<<<<<<< HEAD
Script to start gargantext with different modes (Dev, Prod, Mock). Script to start gargantext with different modes (Dev, Prod, Mock).
-} -}
...@@ -18,6 +19,7 @@ Script to start gargantext with different modes (Dev, Prod, Mock). ...@@ -18,6 +19,7 @@ Script to start gargantext with different modes (Dev, Prod, Mock).
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module Main where module Main where
...@@ -27,6 +29,13 @@ import Data.Text (unpack) ...@@ -27,6 +29,13 @@ import Data.Text (unpack)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.API (startGargantext, startGargantextMock) import Gargantext.API (startGargantext, startGargantextMock)
--------------------------------------------------------
-- Graph Tests
--import qualified Gargantext.Graph.Utils as U
--import qualified Gargantext.Graph.Distances.Conditional as C
--import qualified Gargantext.Graph.Distances.Distributional as D
--import qualified Gargantext.Graph.Distances.Matrice as M
-------------------------------------------------------- --------------------------------------------------------
data Mode = Dev | Mock | Prod data Mode = Dev | Mock | Prod
...@@ -45,6 +54,8 @@ data MyOptions w = MyOptions { run :: w ::: Mode <?> "Possible modes: De ...@@ -45,6 +54,8 @@ data MyOptions w = MyOptions { run :: w ::: Mode <?> "Possible modes: De
instance ParseRecord (MyOptions Wrapped) instance ParseRecord (MyOptions Wrapped)
deriving instance Show (MyOptions Unwrapped) deriving instance Show (MyOptions Unwrapped)
main :: IO () main :: IO ()
main = do main = do
MyOptions myMode myPort myIniFile <- unwrapRecord MyOptions myMode myPort myIniFile <- unwrapRecord
...@@ -66,3 +77,8 @@ main = do ...@@ -66,3 +77,8 @@ main = do
putStrLn $ "Starting Gargantext with mode: " <> show myMode putStrLn $ "Starting Gargantext with mode: " <> show myMode
start start
-- main' :: IO ()
--main' = putStrLn $ show $ M.conditional $ M.myMat 10
...@@ -18,7 +18,8 @@ library: ...@@ -18,7 +18,8 @@ library:
ghc-options: ghc-options:
- -Wincomplete-uni-patterns - -Wincomplete-uni-patterns
- -Wincomplete-record-updates - -Wincomplete-record-updates
- -Werror - -Wmissing-signatures
# - -Werror
exposed-modules: exposed-modules:
- Gargantext - Gargantext
- Gargantext.Prelude - Gargantext.Prelude
...@@ -30,6 +31,7 @@ library: ...@@ -30,6 +31,7 @@ library:
- Gargantext.API - Gargantext.API
dependencies: dependencies:
- QuickCheck - QuickCheck
- accelerate
- aeson - aeson
- aeson-lens - aeson-lens
- aeson-pretty - aeson-pretty
...@@ -64,6 +66,7 @@ library: ...@@ -64,6 +66,7 @@ library:
- jose-jwt - jose-jwt
- lens - lens
- logging-effect - logging-effect
- matrix
- monad-logger - monad-logger
- mtl - mtl
- natural-transformation - natural-transformation
...@@ -117,7 +120,12 @@ library: ...@@ -117,7 +120,12 @@ library:
executable: executable:
main: Main.hs main: Main.hs
source-dirs: app source-dirs: app
ghc-options: -threaded -rtsopts -with-rtsopts=-N -O2 ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies: dependencies:
- base - base
- containers - containers
...@@ -155,6 +163,7 @@ tests: ...@@ -155,6 +163,7 @@ tests:
- -threaded - -threaded
- -rtsopts - -rtsopts
- -with-rtsopts=-N - -with-rtsopts=-N
- -Wmissing-signatures
dependencies: dependencies:
- doctest - doctest
- Glob - Glob
......
...@@ -34,7 +34,7 @@ import Gargantext.Prelude ...@@ -34,7 +34,7 @@ import Gargantext.Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- All the Database is structred like a hierarchical Tree -- All the Database is structred like a hierarchical Tree
data Tree a = NodeT a [Tree a] data Tree a = NodeT a [Tree a]
deriving (Show, Read, Eq) deriving (Show, Read, Eq)
-- data Tree a = NodeT a [Tree a] -- data Tree a = NodeT a [Tree a]
-- same as Data.Tree -- same as Data.Tree
......
...@@ -45,7 +45,7 @@ import Data.Profunctor.Product.TH (makeAdaptorAndInstance) ...@@ -45,7 +45,7 @@ import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Typeable (Typeable) import Data.Typeable (Typeable)
import qualified Data.ByteString.Internal as DBI import qualified Data.ByteString.Internal as DBI
import Database.PostgreSQL.Simple (Connection) import Database.PostgreSQL.Simple (Connection)
import Opaleye import Opaleye hiding (FromField)
-- | Types for Node Database Management -- | Types for Node Database Management
data PGTSVector data PGTSVector
......
{-|
Module : Gargantext.Graph.Distances.Conditional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Motivation and definition of the @Conditional@ distance.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE Strict #-}
module Gargantext.Graph.Distances.Conditional
where
import Data.Matrix hiding (identity)
import Data.String.Conversions (ConvertibleStrings(..))
import Data.List (concat, sortOn)
import qualified Data.List as L
import Data.Map (Map)
import qualified Data.Map as M
import Data.Set (Set)
import qualified Data.Set as S
import Data.Vector (Vector)
import qualified Data.Vector as V
import Gargantext.Prelude
import Gargantext.Graph.Utils
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Optimisation issue
toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
toBeOptimized m = proba Col m
------------------------------------------------------------------------
-- | Main Functions
-- Compute the probability from axis
-- x' = x / (sum Col x)
proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
where
f' m' c = mapOnly a f c m'
mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
mapOnly Col = mapCol
mapOnly Row = mapRow
mapAll :: (a -> a) -> Matrix a -> Matrix a
mapAll f m = mapOn Col (\_ -> f) m
---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
distFromSum :: (Num a, Fractional a)
=> Axis -> Matrix a -> Matrix a
distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
---------------------------------------------------------------
---------------------------------------------------------------
-- | To compute included/excluded or specific/generic scores
opWith :: (Fractional a1, Num a1)
=> (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
where
n = fromIntegral $ nOf Col xs
---------------------------------------------------------------
-------------------------------------------------------
conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
conditional m = filter (threshold m') m'
where
------------------------------------------------------------------------
-- | Main Operations
-- x' = x / (sum Col x)
x' = proba Col m
------------------------------------------------------------------------
-- xs = (sum Col x') - x'
xs = distFromSum Col x'
-- ys = (sum Row x') - x'
ys = distFromSum Row x'
------------------------------------------------------------------------
-- | Top included or excluded
ie = opWith (+) xs ys
-- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
-- | Top specific or generic
sg = opWith (-) xs ys
-- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
nodes_kept :: [Int]
nodes_kept = take k' $ S.toList
$ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
$ map fst
$ nodes_included k <> nodes_specific k
nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
nodes_specific m = take m $ sortOn snd $ toListsWithIndex sg
insert as s = foldl' (\s' a -> S.insert a s') s as
k' = 2*k
k = 10
dico_nodes :: Map Int Int
dico_nodes = M.fromList $ zip [1..] nodes_kept
dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
m' = matrix (length nodes_kept)
(length nodes_kept)
(\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
threshold m = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m) (V.enumFromTo 1 (nOf Col m))
filter t m = mapAll (\x -> filter' t x) m
where
filter' t x = case (x >= t) of
True -> x
False -> 0
------------------------------------------------------------------------
{-|
Module : Gargantext.Graph.Distances.Distributional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Motivation and definition of the @Distributional@ distance.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE Strict #-}
module Gargantext.Graph.Distances.Distributional
where
import Data.Matrix hiding (identity)
import Data.String.Conversions (ConvertibleStrings(..))
import Data.Map (Map)
import qualified Data.Map as M
import Data.Set (Set)
import qualified Data.Set as S
import Data.Vector (Vector)
import qualified Data.Vector as V
import Gargantext.Prelude
import Gargantext.Graph.Utils
distributional :: (Floating a, Ord a) => Matrix a -> [((Int, Int), a)]
distributional m = filter (\((x,y), d) -> foldl' (&&) True (conditions x y d) ) distriList
where
conditions x y d = [ (x /= y)
, (d > miniMax')
, ((M.lookup (x,y) distriMap) > (M.lookup (y,x) distriMap))
]
distriList = toListsWithIndex distriMatrix
distriMatrix = ri (mi m)
distriMap = M.fromList $ distriList
miniMax' = miniMax distriMatrix
ri :: (Ord a, Fractional a) => Matrix a -> Matrix a
ri m = matrix c r doRi
where
doRi (x,y) = doRi' x y m
doRi' x y mi'' = sumMin x y mi'' / (V.sum $ ax Col x y mi'')
sumMin x y mi' = V.sum $ V.map (\(a,b) -> min a b )
$ V.zip (ax Col x y mi') (ax Row x y mi')
(c,r) = (nOf Col m, nOf Row m)
mi :: (Ord a, Floating a) => Matrix a -> Matrix a
mi m = matrix c r createMat
where
(c,r) = (nOf Col m, nOf Row m)
createMat (x,y) = doMi x y m
doMi x y m = if x == y then 0 else (max (log (doMi' x y m)) 0 )
doMi' x y m = (getElem x y m) / ( cross x y m / total m )
cross x y m = (V.sum $ ax Col x y m) * (V.sum $ ax Row x y m)
ax :: Axis -> Int -> Int -> Matrix a -> Vector a
ax a i j m = dropAt j' $ axis a i' m
where
i' = div i c + 1
j' = mod r j + 1
(c,r) = (nOf Col m, nOf Row m)
miniMax :: (Ord a) => Matrix a -> a
miniMax m = V.minimum $ V.map (\c -> V.maximum $ getCol c m) (V.enumFromTo 1 (nOf Col m))
{-|
Module : Gargantext.Graph.Distances.Matrix
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Motivation and definition of the @Conditional@ distance.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Graph.Distances.Matrice
where
import Data.Array.Accelerate.Data.Bits
import Data.Array.Accelerate.Interpreter
import Data.Array.Accelerate
import Data.Array.Accelerate.Smart
import Data.Array.Accelerate.Type
import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
import Data.Maybe (Maybe(Just))
import qualified Gargantext.Prelude as P
import qualified Data.Array.Accelerate.Array.Representation as Repr
matrix :: Elt c => Int -> [c] -> Matrix c
matrix n l = fromList (Z :. n :. n) l
myMat :: Int -> Matrix Double
myMat n = matrix n [1..]
-- | Two ways to get the rank (as documentation)
rank :: (Matrix Double) -> Int
rank m = arrayRank $ arrayShape m
rank' :: (Matrix Double) -> Int
rank' m = n
where
Z :. _ :. n = arrayShape m
-----------------------------------------------------------------------
-- | Conditional Distance
type Rank = Int
proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
proba r mat = zipWith (/) mat (mkSum r mat)
mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
mkSum r mat = replicate (constant (Z :. (r :: Int) :. All))
$ fold (+) 0 mat
type Matrix' a = Acc (Matrix a)
conditional :: Matrix Double -> (Matrix Double, Matrix Double)
conditional m = (run $ ie (use m), run $ sg (use m))
where
r :: Rank
r = rank' m
xs :: Matrix' Double -> Matrix' Double
xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
ie :: Matrix' Double -> Matrix' Double
ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
sg :: Acc (Matrix Double) -> Acc (Matrix Double)
sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
n :: Exp Double
n = P.fromIntegral r
-- filter with threshold
-----------------------------------------------------------------------
-- | Distributional Distance
distributional :: Matrix Double -> Matrix Double
distributional m = run $ filter $ ri (use m)
where
n = rank m
filter m = zipWith (\a b -> max a b) m (transpose m)
--miniMax m = fold minimum $ fold maximum m
ri mat = zipWith (/) mat1 mat2
where
mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
mat2 = mkSum n mat
mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
$ zipWith (/) (crossProduct m') (total m')
total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
cross mat = zipWith (-) (mkSum n mat) (mat)
{-|
Module : Gargantext.Graph.Distances.Utils
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE Strict #-}
module Gargantext.Graph.Utils
where
import Data.Matrix hiding (identity)
import Data.Map (Map)
import qualified Data.Map as M
import Data.Set (Set)
import qualified Data.Set as S
import Data.Vector (Vector)
import qualified Data.Vector as V
import qualified Data.List as L
import Gargantext.Prelude
------------------------------------------------------------------------
-- | Some utils to build the matrix from cooccurrence results
type Distance = Double
type Cooc = Int
type NgramId = Int
type Index = Int
-- Type Families
--type Matrix' Index a
--type Matrix' NgramId a
data Matrice a = Matrice { matrice_fromIndex :: !(Map Index NgramId)
, matrice_toIndex :: !(Map NgramId Index)
, matrice :: !(Matrix a)
} deriving (Show)
--fromMatrice :: Matrice Double -> [(NgramId, NgramId, Double)]
--fromMatrice m = undefined
toMatrice :: [(NgramId, NgramId, Int)] -> Matrice Double
toMatrice ns = Matrice fromIndx toIndx m
where
s = cooc2set ns
(fromIndx, toIndx) = set2indexes s
n = (length (S.toList s))
idx = toIndex toIndx ns
m = matrix n n (\x -> maybe 0 identity (fromIntegral <$> M.lookup x idx))
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
toIndex :: Map NgramId Index -> [(NgramId, NgramId, a)] -> Map (Index,Index) a
toIndex ni ns = to ni ns
fromIndex :: Map Index NgramId -> [(Index, Index, a)] -> Map (NgramId,NgramId) a
fromIndex ni ns = to ni ns
-------------------------------------------------------------------------------
to :: (Ord b, Ord k) => Map k b -> [(k, k, a)] -> Map (b, b) a
to index ns = M.fromList $ map (\(a1,a2,c) -> ( ( (M.!) index a1
, (M.!) index a2
)
, c
)
) ns
-------------------------------------------------------------------------------
cooc2set :: [(NgramId, NgramId, a)] -> Set NgramId
cooc2set cs' = foldl' (\s (a1,a2,_) -> insert [a1,a2] s ) S.empty cs'
where
insert as s = foldl' (\s' a -> S.insert a s') s as
set2indexes :: Set NgramId -> (Map Index NgramId, Map NgramId Index)
set2indexes s = (M.fromList fromIndex', M.fromList toIndex')
where
s' = S.toList s
fromIndex' = zip [1..] s'
toIndex' = zip s' [1..]
------------------------------------------------------------------------
-- Data.Vector.Additions
dropAt :: Int -> Vector a -> Vector a
dropAt n v = debut <> (V.tail fin)
where
debut = V.take n v
fin = V.drop n v
------------------------------------------------------------------------
data Axis = Col | Row
---- | Matrix Algebra
--data Algebra a = Point a | Vector a | Matrix a
--
--multiply :: Algebra a -> Matrix a -> Matrix a
--multiply (Point a) = undefined
--multiply (Vector a) = undefined
--multiply (Matrix a) = undefined
--
--div :: Fractional a => Matrix a -> Matrix a
--div m = foldl' (\m c -> divCol c m) m [1.. (ncols m)]
-- where
-- divCol c m = mapCol (\_ x -> 1/x) c m
--
--divide :: Fractional a => Matrix a -> Matrix a -> Matrix a
--divide a b = a `multStd` (div b)
------------------------------------------------------------------------
-- | Matrix functions
type AxisId = Int
total :: Num a => Matrix a -> a
total m = V.sum $ V.map (\c -> V.sum (getCol c m)) (V.enumFromTo 1 (nOf Col m))
nOf :: Axis -> Matrix a -> Int
nOf Row = nrows
nOf Col = ncols
axis :: Axis -> AxisId -> Matrix a -> Vector a
axis Col = getCol
axis Row = getRow
toListsWithIndex :: Matrix a -> [((Int, Int), a)]
toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m
where
concat' :: [(Int, [(Int, a)])] -> [((Int, Int), a)]
concat' xs = L.concat $ map (\(x, ys) -> map (\(y, a) -> ((x,y), a)) ys ) xs
-- | For tests only, to be removed
m1 :: Matrix Double
m1 = fromList 300 300 [1..]
...@@ -40,12 +40,12 @@ import Protolude ( Bool(True, False), Int, Double, Integer ...@@ -40,12 +40,12 @@ import Protolude ( Bool(True, False), Int, Double, Integer
, sum, fromIntegral, length, fmap, foldl, foldl' , sum, fromIntegral, length, fmap, foldl, foldl'
, takeWhile, sqrt, undefined, identity , takeWhile, sqrt, undefined, identity
, abs, min, max, maximum, minimum, return, snd, truncate , abs, min, max, maximum, minimum, return, snd, truncate
, (+), (*), (/), (-), (.), ($), (**), (^), (<), (>), log , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
, Eq, (==), (>=), (<=), (<>), (/=) , Eq, (==), (>=), (<=), (<>), (/=)
, (&&), (||), not, any , (&&), (||), not, any
, fst, snd, toS , fst, snd, toS
, elem, die, mod, div, const, either , elem, die, mod, div, const, either
, curry, uncurry , curry, uncurry, repeat
, otherwise, when , otherwise, when
, undefined , undefined
, IO() , IO()
......
...@@ -34,7 +34,7 @@ import Data.Map.Strict (Map ...@@ -34,7 +34,7 @@ import Data.Map.Strict (Map
, insertWith, insertWithKey, unionWith , insertWith, insertWithKey, unionWith
, toList , toList
) )
import Data.Set (Set)
import qualified Data.Map.Strict as DMS import qualified Data.Map.Strict as DMS
import Control.Monad ((>>),(>>=)) import Control.Monad ((>>),(>>=))
import Data.String (String()) import Data.String (String())
......
...@@ -52,16 +52,16 @@ import Gargantext.Text.Parsers.WOS (wosParser) ...@@ -52,16 +52,16 @@ import Gargantext.Text.Parsers.WOS (wosParser)
---- import Gargantext.Parsers.DOC (docParser) ---- import Gargantext.Parsers.DOC (docParser)
---- import Gargantext.Parsers.ODT (odtParser) ---- import Gargantext.Parsers.ODT (odtParser)
--import Gargantext.Prelude (pm)
--import Gargantext.Types.Main (ErrorMessage(), Corpus) --import Gargantext.Types.Main (ErrorMessage(), Corpus)
-- FIXME
--type Field = Text
type ParseError = String type ParseError = String
-- type Field = Text
--data Corpus = Corpus { _corpusErrors :: [ParseError] type Document = DM.Map Field Text
-- , _corpusMap :: Map FilePath (Map Field Text)
-- } type FilesParsed = DM.Map FilePath FileParsed
data FileParsed = FileParsed { _fileParsed_errors :: Maybe ParseError
, _fileParsed_result :: [Document]
} deriving (Show)
-- | According to the format of Input file, -- | According to the format of Input file,
...@@ -76,38 +76,38 @@ data FileFormat = WOS -- Implemented (ISI Format) ...@@ -76,38 +76,38 @@ data FileFormat = WOS -- Implemented (ISI Format)
-- TODO: to debug maybe add the filepath in error message -- TODO: to debug maybe add the filepath in error message
parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]]) --parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]])
parse format path = do --parse format path = do
files <- case takeExtension path of -- files <- case takeExtension path of
".zip" -> openZip path -- ".zip" -> openZip path
_ -> pure <$> DB.readFile path -- _ -> pure <$> DB.readFile path
(as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files -- (as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files
pure (as, map toText $ concat bs) -- pure (as, map toText $ concat bs)
where -- where
-- TODO : decode with bayesian inference on encodings -- -- TODO : decode with bayesian inference on encodings
toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b)) -- toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b))
--
--
-- | withParser: ---- | withParser:
-- According the format of the text, choosing the right parser. ---- According the format of the text, choosing the right parser.
-- TODO withParser :: FileFormat -> Parser [Document] ---- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]] --withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = wosParser --withParser WOS = wosParser
--withParser DOC = docParser ----withParser DOC = docParser
--withParser ODT = odtParser ----withParser ODT = odtParser
--withParser XML = xmlParser ----withParser XML = xmlParser
--withParser _ = error "[ERROR] Parser not implemented yet" ----withParser _ = error "[ERROR] Parser not implemented yet"
--
runParser :: FileFormat -> DB.ByteString --runParser :: FileFormat -> DB.ByteString
-> IO (Either String [[(DB.ByteString, DB.ByteString)]]) -- -> IO (Either String [[(DB.ByteString, DB.ByteString)]])
runParser format text = pure $ parseOnly (withParser format) text --runParser format text = pure $ parseOnly (withParser format) text
--
openZip :: FilePath -> IO [DB.ByteString] --openZip :: FilePath -> IO [DB.ByteString]
openZip fp = do --openZip fp = do
path <- resolveFile' fp -- path <- resolveFile' fp
entries <- withArchive path (DM.keys <$> getEntries) -- entries <- withArchive path (DM.keys <$> getEntries)
bs <- mapConcurrently (\s -> withArchive path (getEntry s)) entries -- bs <- mapConcurrently (\s -> withArchive path (getEntry s)) entries
pure bs -- pure bs
clean :: Text -> Text clean :: Text -> Text
clean txt = DT.map clean' txt clean txt = DT.map clean' txt
......
...@@ -3,6 +3,7 @@ extra-package-dbs: [] ...@@ -3,6 +3,7 @@ extra-package-dbs: []
packages: packages:
- . - .
- servant-job - servant-job
#- '/home/alexandre/local/logiciels/haskell/accelerate/accelerate'
allow-newer: true allow-newer: true
extra-deps: extra-deps:
...@@ -12,6 +13,8 @@ extra-deps: ...@@ -12,6 +13,8 @@ extra-deps:
commit: 6f0595d2421005837d59151a8b26eee83ebb67b5 commit: 6f0595d2421005837d59151a8b26eee83ebb67b5
- git: https://github.com/delanoe/servant-static-th.git - git: https://github.com/delanoe/servant-static-th.git
commit: fff77e79fe94d563ab5cae2609b78c17b5c1f434 commit: fff77e79fe94d563ab5cae2609b78c17b5c1f434
- git: https://github.com/delanoe/accelerate.git
commit: 007fd483a4410441fb5dd1b689a5f7dab66d27ad
- aeson-1.2.4.0 - aeson-1.2.4.0
- aeson-lens-0.5.0.0 - aeson-lens-0.5.0.0
- duckling-0.1.3.0 - duckling-0.1.3.0
...@@ -33,4 +36,4 @@ extra-deps: ...@@ -33,4 +36,4 @@ extra-deps:
- text-1.2.3.0 - text-1.2.3.0
- text-show-3.6.2 - text-show-3.6.2
- servant-flatten-0.2 - servant-flatten-0.2
resolver: lts-10.6 resolver: lts-11.10
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment