[FEAT] Adding External module for IMT community manager

[CLEAN] adding signatures, removing unused imports, fixing shadowing...

[FEAT] Adding External module for IMT community manager
[CLEAN] adding signatures, removing unused imports, fixing shadowing...
99d4f1f3 · Alexandre Delanoë · e54de3ad · 99d4f1f3 · 99d4f1f3 · 99d4f1f3
Commit 99d4f1f3 authored Jun 28, 2018 by Alexandre Delanoë
27 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ doc
 bin
 deps
 profiling
+_darcs
--- a/package.yaml
+++ b/package.yaml
@@ -18,10 +18,10 @@ library:
  ghc-options:
  - -Wincomplete-uni-patterns
  - -Wincomplete-record-updates
-    #- -Wmissing-signatures
-    #- -Wunused-binds
-    #- -Wunused-imports
-    #  - -Werror
+  - -Wmissing-signatures
+  - -Wunused-binds
+  - -Wunused-imports
+  - -Werror
  exposed-modules:
  - Gargantext
  - Gargantext.TextFlow

--- a/src/Gargantext/API.hs
+++ b/src/Gargantext/API.hs
@@ -56,7 +56,7 @@ import           Network.Wai.Handler.Warp hiding (defaultSettings)

 import           Servant
 import           Servant.Mock (mock)
-import           Servant.Job.Server (WithCallbacks)
+--import           Servant.Job.Server (WithCallbacks)
 import           Servant.Swagger
 import           Servant.Swagger.UI
 -- import Servant.API.Stream
@@ -70,8 +70,8 @@ import Gargantext.API.Node ( Roots    , roots
                           )
 import Gargantext.API.Count  ( CountAPI, count, Query)
 import Gargantext.API.Search ( SearchAPI, search, SearchQuery)
-import Gargantext.API.Orchestrator
-import Gargantext.API.Orchestrator.Types
+--import Gargantext.API.Orchestrator
+--import Gargantext.API.Orchestrator.Types

 ---------------------------------------------------------------------


--- a/src/Gargantext/API/Search.hs
+++ b/src/Gargantext/API/Search.hs
@@ -27,17 +27,12 @@ module Gargantext.API.Search

 import GHC.Generics (Generic)
 import Control.Monad.IO.Class (liftIO)
-import Prelude (Bounded, Enum, minBound, maxBound)


 import Data.Aeson hiding (Error, fieldLabelModifier)
 import Data.Aeson.TH (deriveJSON)
-import Data.Eq (Eq())
-import Data.Either
-import Data.List (repeat, permutations)
 import Data.Swagger
-import Data.Swagger.SchemaOptions
-import Data.Text (Text, pack)
+import Data.Text (Text)
 import Database.PostgreSQL.Simple (Connection)

 import Servant
@@ -102,7 +97,7 @@ type SearchAPI = Post '[JSON] SearchResults

 search :: Connection -> SearchQuery -> Handler SearchResults
 search c (SearchQuery q pId) =
-  liftIO $ SearchResults <$> map (\(i, y, t, s, a, _) -> SearchResult i (cs $ encode t) (cs $ encode a))
+  liftIO $ SearchResults <$> map (\(i, _, t, _, a, _) -> SearchResult i (cs $ encode t) (cs $ encode a))
                         <$> textSearch c (toTSQuery q) pId 5 0 Desc


--- a/src/Gargantext/Core/Types.hs
+++ b/src/Gargantext/Core/Types.hs
@@ -45,7 +45,7 @@ data Terms = Terms { _terms_label :: Label
                   } deriving (Ord)

 instance Show Terms where
-  show (Terms l s) = show l
+  show (Terms l _) = show l

 instance Eq Terms where
  (==) (Terms _ s1) (Terms _ s2) = s1 == s2

--- a/src/Gargantext/Database.hs
+++ b/src/Gargantext/Database.hs
@@ -68,7 +68,6 @@ module Gargantext.Database ( module Gargantext.Database.Utils
    where

 import Gargantext.Core.Types
-import Gargantext.Core.Types.Node
 import Gargantext.Database.Utils (connectGargandb)
 import Gargantext.Database.Node
 import Gargantext.Prelude
@@ -76,9 +75,9 @@ import Database.PostgreSQL.Simple (Connection)
 import Data.Text (Text, pack)
 import Opaleye hiding (FromField)
 import Data.Aeson
-import Data.ByteString (ByteString)
 import Data.List (last, concat)
-type UserId = Int
+
+--type UserId = Int
 --type NodeId = Int

 -- List of NodeId
@@ -102,8 +101,8 @@ ls = get
 tree :: Connection -> PWD -> IO [Node Value]
 tree c p = do
  ns <- get c p
-  cs <- mapM (\p' -> get c [p']) $ map node_id ns
-  pure $ ns <> (concat cs)
+  children <- mapM (\p' -> get c [p']) $ map node_id ns
+  pure $ ns <> (concat children)


 -- | TODO
@@ -112,22 +111,22 @@ post _ [] _   = pure 0
 post _ _ []   = pure 0
 post c pth ns = mkNode c (last pth) ns

-postR :: Connection -> PWD -> [NodeWrite'] -> IO [Int]
-postR _ [] _   = pure [0]
-postR _ _ []   = pure [0]
-postR c pth ns = mkNodeR c (last pth) ns
-
+--postR :: Connection -> PWD -> [NodeWrite'] -> IO [Int]
+--postR _ [] _   = pure [0]
+--postR _ _ []   = pure [0]
+--postR c pth ns = mkNodeR c (last pth) ns
+--

-rm :: Connection -> PWD -> [NodeId] -> IO Int
-rm = del
+--rm :: Connection -> PWD -> [NodeId] -> IO Int
+--rm = del

-del :: Connection -> PWD -> [NodeId] -> IO Int
-del _ [] _ = pure 0
-del _ _ [] = pure 0
-del c pth ns = deleteNodes c ns
+del :: Connection -> [NodeId] -> IO Int
+del _ [] = pure 0
+del c ns = deleteNodes c ns

-put :: Connection -> PWD -> [a] -> IO Int64
-put = undefined
+-- | TODO
+--put :: Connection -> PWD -> [a] -> IO Int64
+--put = undefined

 -- | TODO
 -- cd (Home UserId) | (Node NodeId)
@@ -196,11 +195,9 @@ postAnnuaire corpusName title ns = do
                     )


-
 del' :: [NodeId] -> IO Int
 del' ns = do
  c <- connectGargandb "gargantext.ini"
-  h <- home c
-  del c h ns
+  del c ns


--- a/src/Gargantext/Database/Facet.hs
+++ b/src/Gargantext/Database/Facet.hs
@@ -217,7 +217,7 @@ leftJoin3''' = leftJoin3 queryNodeNodeTable queryNodeTable queryNodeTable cond12

 -- | Building the facet
 selectDocFacet' :: NodeType -> ParentId -> Maybe NodeType -> Query FacetDocRead
-selectDocFacet' pt pId _ = proc () -> do
+selectDocFacet' _ pId _ = proc () -> do
        (n1,(nn,n2)) <- leftJoin3''' -< ()
        restrict -< (.&&) (node_parentId n1 .== (toNullable $ pgInt4 pId))
                          (node_typename n1 .== (pgInt4 $ nodeTypeId Document))

--- a/src/Gargantext/Database/Node.hs
+++ b/src/Gargantext/Database/Node.hs
@@ -32,7 +32,6 @@ import Database.PostgreSQL.Simple.FromField ( Conversion
                                            , returnError
                                            )
 import Prelude hiding (null, id, map, sum)
-import           Data.Time.Segment (jour, timesAfter, Granularity(D))

 import Gargantext.Core.Types
 import Gargantext.Core.Types.Node (NodeType)
@@ -55,7 +54,7 @@ import Data.ByteString (ByteString)

 import Database.PostgreSQL.Simple (Connection)
 import Opaleye hiding (FromField)
-import Opaleye.Internal.QueryArr (Query(..))
+import Opaleye.Internal.QueryArr (Query)
 import qualified Data.Profunctor.Product as PP
 -- | Types for Node Database Management
 data PGTSVector
@@ -251,6 +250,14 @@ node userId parentId nodeType name nodeData = Node Nothing typeId userId parentI
    typeId = nodeTypeId nodeType
    byteData = DB.pack $ DBL.unpack $ encode nodeData

+
+
+node2write :: (Functor f2, Functor f1) =>
+              Int
+              -> NodePoly (f1 Int) Int Int parentId Text (f2 UTCTime) ByteString
+              -> (f1 (Column PGInt4), Column PGInt4, Column PGInt4,
+                  Column PGInt4, Column PGText, f2 (Column PGTimestamptz),
+                  Column PGJsonb)
 node2write pid (Node id tn ud _ nm dt hp) = ((pgInt4    <$> id)
                                         ,(pgInt4        tn)
                                         ,(pgInt4        ud)
@@ -312,17 +319,24 @@ mkNode' conn ns = runInsertMany conn nodeTable' ns
 mkNodeR' :: Connection -> [NodeWriteT] -> IO [Int]
 mkNodeR' conn ns = runInsertManyReturning conn nodeTable' ns (\(i,_,_,_,_,_,_) -> i)

+-- | postNode
 postNode :: Connection -> UserId -> ParentId -> Node' -> IO [Int]
 postNode c uid pid (Node' nt txt v []) = mkNodeR' c (node2table uid pid (Node' nt txt v []))
+
 postNode c uid pid (Node' Corpus txt v ns) = do
  [pid']  <- postNode c uid pid (Node' Corpus txt v [])
-  pids    <- mkNodeR' c $ concat $ (map (\(Node' Document txt v _) -> node2table uid pid' $ Node' Document txt v []) ns)
+  pids    <- mkNodeR' c $ concat $ map (\n -> childWith uid pid' n) ns
  pure (pids)
+
 postNode c uid pid (Node' Annuaire txt v ns) = do
  [pid']  <- postNode c uid pid (Node' Annuaire txt v [])
-  pids    <- mkNodeR' c $ concat $ (map (\(Node' UserPage txt v _) -> node2table uid pid' $ Node' UserPage txt v []) ns)
+  pids    <- mkNodeR' c $ concat $ map (\n -> childWith uid pid' n) ns
  pure (pids)
-postNode c uid pid (Node' _ _ _ _) = panic $ pack "postNode for this type not implemented yet"
+postNode _ _ _ (Node' _ _ _ _) = panic $ pack "postNode for this type not implemented yet"


+childWith :: UserId -> ParentId -> Node' -> [NodeWriteT]
+childWith uId pId (Node' Document txt v []) = node2table uId pId (Node' Document txt v [])
+childWith uId pId (Node' UserPage txt v []) = node2table uId pId (Node' UserPage txt v [])
+childWith _   _   (Node' _        _   _ _) = panic $ pack "This NodeType can not be a child"

--- a/src/Gargantext/Ext/IMT.hs
+++ b/src/Gargantext/Ext/IMT.hs
+{-|
+Module      : Gargantext.API
+Description : Server API
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+-}
+
+{-# LANGUAGE NoImplicitPrelude    #-}
+
+module Gargantext.Ext.IMT where
+
+import Gargantext.Prelude
+import Data.Text (Text, pack, splitOn)
+import Data.Map (Map)
+
+import qualified Data.Set    as S
+import qualified Data.List   as DL
+import qualified Data.Vector as DV
+import qualified Data.Map    as M
+
+import Gargantext.Text.Metrics.Freq as F
+import Gargantext.Text.Parsers.CSV as CSV
+
+data School = School { school_shortName :: Text
+                     , school_longName  :: Text
+                     , school_id        :: Text
+} deriving (Show, Read, Eq)
+
+schools :: [School]
+schools = [ School 
+            (pack "Mines Albi-Carmaux")
+            (pack "Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux")
+            (pack "469216")
+          , School
+            (pack "Mines Alès")
+            (pack "EMA - École des Mines d'Alès")
+            (pack "6279")
+          
+          , School
+            (pack "Mines Douai")
+            (pack "Mines Douai EMD - École des Mines de Douai")
+            (pack "224096")
+          
+          , School
+            (pack "Mines Nantes")
+            (pack "Mines Nantes - Mines Nantes")
+            (pack "84538")
+          
+--          , School
+--            (pack "Mines ParisTech")
+--            (pack "MINES ParisTech - École nationale supérieure des mines de Paris")
+--            (pack "301492")
+--          
+          , School
+            (pack "Mines Saint-Étienne")
+            (pack "Mines Saint-Étienne MSE - École des Mines de Saint-Étienne")
+            (pack "29212")
+          
+          , School
+            (pack "Télécom Bretagne")
+            (pack "Télécom Bretagne")
+            (pack "301262")
+          
+          , School
+            (pack "Télécom École de Management")
+            (pack "TEM - Télécom Ecole de Management")
+            (pack "301442")
+          
+          , School
+            (pack "Télécom ParisTech")
+            (pack "Télécom ParisTech")
+            (pack "300362")
+          
+          , School
+            (pack "Télécom SudParis")
+            (pack "TSP - Télécom SudParis")
+            (pack "352124")
+          
+          , School
+            (pack "IMT Atlantique")
+            (pack "IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire")
+            (pack "481355")
+            ]
+
+mapIdSchool :: Map Text Text
+mapIdSchool = M.fromList $ Gargantext.Prelude.map (\(School n _ i) -> (i,n)) schools
+
+hal_data :: IO (DV.Vector CsvHal)
+hal_data = snd <$> CSV.readHal "doc/corpus_imt/Gargantext_Corpus.csv"
+
+names :: S.Set Text
+names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools
+
+
+publisBySchool :: DV.Vector CsvHal -> [(Maybe Text, Int)]
+publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n))
+                        $ DL.filter (\i -> S.member  (fst i) names) 
+                        $ DL.reverse
+                        $ DL.sortOn snd
+                        $ M.toList
+                        $ F.freq
+                        $ DL.concat
+                        $ DV.toList
+                        $ DV.map (\n -> splitOn (pack ", ") (csvHal_instStructId_i n) )
+                        $ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data'
+
--- a/src/Gargantext/Prelude.hs
+++ b/src/Gargantext/Prelude.hs
@@ -238,6 +238,7 @@ zipSnd f xs = zip xs (f xs)
 unMaybe :: [Maybe a] -> [a]
 unMaybe = map fromJust . L.filter isJust

-- maximumWith
+-- | maximumWith
+maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
 maximumWith f = L.maximumBy (compare `on` f)

--- a/src/Gargantext/Text.hs
+++ b/src/Gargantext/Text.hs
@@ -17,27 +17,19 @@ Text gathers terms in unit of contexts.
 module Gargantext.Text
  where

-import Data.Maybe
-import qualified Data.Text as DT
-
-import qualified Data.Set as S
 import Data.Text (Text, split)
+import qualified Data.Text as DT

 import NLP.FullStop (segment)
 -----------------------------------------------------------------
 import Gargantext.Core
-import Gargantext.Core.Types
-import Gargantext.Text.Metrics.Count (Occ, occurrences, cooc)
 import Gargantext.Prelude hiding (filter)
 -----------------------------------------------------------------

 type Config  = Lang -> Context
-
 type Context = Text -> [Text]
-
 data Viz = Graph | Phylo | Chart

-
 -----------------------------------------------------------------
 -------------------------------------------------------------------
 -- Contexts of text

--- a/src/Gargantext/Text/Context.hs
+++ b/src/Gargantext/Text/Context.hs
@@ -17,7 +17,7 @@ Context of text management tool
 module Gargantext.Text.Context
  where

-import Data.Text (Text, pack, unpack, length)
+import Data.Text (Text, pack, unpack)
 import Data.String (IsString)

 import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
@@ -27,7 +27,9 @@ import Gargantext.Prelude hiding (length)

 data SplitContext = Chars Int | Sentences Int | Paragraphs Int

+tag :: Text -> [Tag Text]
 tag = parseTags
+
 -- | splitBy contexts of Chars or Sentences or Paragraphs
 -- >> splitBy (Chars 0) "abcde"
 -- ["a","b","c","d","e"]

--- a/src/Gargantext/Text/Metrics.hs
+++ b/src/Gargantext/Text/Metrics.hs
@@ -23,30 +23,28 @@ noApax m = M.filter (>1) m
 module Gargantext.Text.Metrics 
  where

-import Data.Text (Text, pack)
-import Data.Ord (comparing, Down(..))
-import Data.Map (Map)
+import Data.Ord (Down(..))
 import qualified Data.List as L
+
+import Data.Map (Map)
 import qualified Data.Map  as M
-import qualified Data.Set  as S
+
+import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.Vector as V
-import qualified Data.Vector.Unboxed as VU
+
 import Data.Tuple.Extra (both)
 --import GHC.Real (Ratio)
 --import qualified Data.Text.Metrics as DTM
-import Data.Array.Accelerate (toList)
-import Math.KMeans (kmeans, euclidSq, elements)
-
+import Data.Array.Accelerate (toList, Matrix)
+--import Math.KMeans (kmeans, euclidSq, elements)

 import Gargantext.Prelude
-
 import Gargantext.Text.Metrics.Count (occurrences, cooc)
 import Gargantext.Text.Terms (TermType(MonoMulti), terms)
 import Gargantext.Core (Lang(EN))
-import Gargantext.Core.Types (Terms(..))
+import Gargantext.Core.Types (Terms(..), Label)
 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
-
+import Gargantext.Text.Metrics.Count (Grouped)
 import Gargantext.Viz.Graph.Distances.Matrice
 import Gargantext.Viz.Graph.Index

@@ -56,8 +54,7 @@ import qualified Data.Array.Accelerate as DAA

 import GHC.Real (round)

-import Debug.Trace
-import Prelude (seq)
+--import Debug.Trace

 data MapListSize   = MapListSize   Int
 data InclusionSize = InclusionSize Int
@@ -95,25 +92,25 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = -- trace ("coocScore
 -- each parts is then ordered by Inclusion/Exclusion
 -- take n scored terms in each parts where n * SampleBins = MapListSize.
 takeSome :: Ord t => FilterConfig -> [Scored t] -> [Scored t]
-takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters k) _) scores = L.take l
+takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters _) _) scores = L.take l
                    $ takeSample n m
                    $ L.take l' $ sortWith (Down . _scored_incExc) scores
                    -- $ splitKmeans k scores
  where
    -- TODO: benchmark with accelerate-example kmeans version
-    splitKmeans x xs = L.concat $ map elements
-                     $ V.take (k-1)
-                     $ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
-                              euclidSq x xs
+    --splitKmeans x xs = L.concat $ map elements
+    --                 $ V.take (k-1)
+    --                 $ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
+    --                          euclidSq x xs
    n = round ((fromIntegral l)/s)
    m = round $ (fromIntegral $ length scores) / (s)
-    takeSample n m xs = -- trace ("splitKmeans " <> show (length xs)) $
-                        L.concat $ map (L.take n)
+    takeSample n' m' xs = -- trace ("splitKmeans " <> show (length xs)) $
+                        L.concat $ map (L.take n')
                                 $ map (sortWith (Down . _scored_incExc))
                                 -- TODO use kmeans s instead of splitEvery
                                 -- in order to split in s heteregenous parts
                                 -- without homogeneous order hypothesis
-                                 $ splitEvery m
+                                 $ splitEvery m'
                                 $ sortWith (Down . _scored_speGen) xs


@@ -125,7 +122,7 @@ data Scored t = Scored { _scored_terms  :: !t
 -- TODO in the textflow we end up needing these indices, it might be better
 -- to compute them earlier and pass them around.
 coocScored :: Ord t => Map (t,t) Int -> [Scored t]
-coocScored m = zipWith (\(i,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scores
+coocScored m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scores
  where
    (ti,fi) = createIndices m
    (is, ss) = incExcSpeGen $ cooc2mat ti m
@@ -171,6 +168,7 @@ metrics_sentences = [ "There is a table with a glass of wine and a spoon."
                    , "I wish the glass did not contain wine."
                    ]

+metrics_sentences_Test :: Bool
 metrics_sentences_Test = metrics_sentences == metrics_sentences'

 -- | Terms reordered to visually check occurrences
@@ -193,6 +191,7 @@ fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
         , (fromList ["glas"]  ,fromList [(["glas"]  , 2 )])
         , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
 -}
+metrics_occ :: IO (Map Grouped (Map Terms Int))
 metrics_occ = occurrences <$> L.concat <$> metrics_terms

 {- 
@@ -201,8 +200,10 @@ metrics_occ = occurrences <$> L.concat <$> metrics_terms
            ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]

 -}
+metrics_cooc :: IO (Map (Label, Label) Int)
 metrics_cooc = cooc <$> metrics_terms

+metrics_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
 metrics_cooc_mat = do
  m <- metrics_cooc
  let (ti,_) = createIndices m
@@ -213,5 +214,6 @@ metrics_cooc_mat = do
       , incExcSpeGen        mat_cooc
       )

+metrics_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
 metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc

--- a/src/Gargantext/Text/Metrics/Count.hs
+++ b/src/Gargantext/Text/Metrics/Count.hs
@@ -29,21 +29,17 @@ module Gargantext.Text.Metrics.Count
  where


-import Control.Arrow ((***))
+import Control.Arrow (Arrow(..), (***))
 import qualified Data.List as List
-import Data.Map.Strict  (Map
-                        , empty, singleton
-                        , insertWith, insertWithKey, unionWith
-                        , toList, lookup, mapKeys
+
+import qualified Data.Map.Strict as DMS
+import Data.Map.Strict  ( Map, empty, singleton
+                        , insertWith, unionWith
+                        , mapKeys
                        )
 import Data.Set (Set)
-import qualified Data.Set as Set
 import Data.Text (pack)

-import qualified Data.Map.Strict as DMS
-import Control.Monad ((>>),(>>=))
-import Data.String (String())
-import Data.Attoparsec.Text

 ------------------------------------------------------------------------
 import Gargantext.Prelude
@@ -76,6 +72,7 @@ type Grouped = Stems
 type Occs = Int
 type Coocs = Int
 type Threshold = Int
+
 removeApax :: Threshold -> Map (Label, Label) Int -> Map (Label, Label) Int
 removeApax t = DMS.filter (> t)

@@ -88,9 +85,9 @@ cooc tss = coocOnWithLabel _terms_stem (useLabelPolicy label_policy) tss

 coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label)
                                      -> [[a]] -> Map (label, label) Coocs
-coocOnWithLabel on policy tss =
-  mapKeys (delta policy) $ coocOn on tss
+coocOnWithLabel on' policy tss = mapKeys (delta policy) $ coocOn on' tss
  where
+    delta :: Arrow a => a b' c' -> a (b', b') (c', c')
    delta f = f *** f


@@ -114,9 +111,9 @@ coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs
 coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
  where
    coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
-    coocOn' f ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
+    coocOn' fun ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
      where
-          ts' = List.nub $ map f ts
+          ts' = List.nub $ map fun ts
          xs = [ ((x, y), 1)
               | x <- ts'
               , y <- ts'

--- a/src/Gargantext/Text/Metrics/Freq.hs
+++ b/src/Gargantext/Text/Metrics/Freq.hs
+{-|
+Module      : Gargantext.Text.Metrics.Freq
+Description : 
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+
+Here is a longer description of this module, containing some
+commentary with @some markup@.
+-}
+
+{-# LANGUAGE NoImplicitPrelude #-}
+
+module Gargantext.Text.Metrics.Freq where
+
+import Gargantext.Prelude
+import Data.Bool (otherwise)
+import Data.Map (empty, Map, insertWith, toList)
+import qualified Data.List as L
+
+countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
+countElem m e = Data.Map.insertWith (+) e 1 m
+
+freq :: (Ord k) => [k] -> Data.Map.Map k Int
+freq = foldl countElem Data.Map.empty
+
+getMaxFromMap :: Ord a => Map a1 a -> [a1]
+getMaxFromMap m = go [] Nothing (toList m)
+  where
+    go ks _        []           = ks 
+    go ks Nothing  ((k,v):rest) = go (k:ks) (Just v) rest
+    go ks (Just u) ((k,v):rest)
+        | v < u     = go ks     (Just u) rest
+        | v > u     = go [k]    (Just v) rest
+        | otherwise = go (k:ks) (Just v) rest
+
+
+average :: [Double] -> Double
+average x = L.sum x / L.genericLength x
+
+average' :: [Int] -> Double
+average' x = (L.sum y) / (L.genericLength y) where
+    y = L.map fromIntegral x
+
+
--- a/src/Gargantext/Text/Metrics/FrequentItemSet.hs
+++ b/src/Gargantext/Text/Metrics/FrequentItemSet.hs
@@ -32,7 +32,6 @@ import Data.Map.Strict (Map)
 import qualified Data.Set as Set
 import Data.Set (Set)
 import qualified Data.Vector as V
-import Data.Vector (Vector)

 import Data.List (filter, concat)
 import Data.Maybe (catMaybes)

--- a/src/Gargantext/Text/Parsers.hs
+++ b/src/Gargantext/Text/Parsers.hs
@@ -25,34 +25,18 @@ module Gargantext.Text.Parsers -- (parse, FileFormat(..))

 import Gargantext.Prelude

-import System.FilePath (takeExtension, FilePath())
-import Data.Attoparsec.ByteString (parseOnly, Parser)
-import qualified Data.ByteString as DB
+import System.FilePath (FilePath())
 import qualified Data.Map        as DM
-import Data.Either.Extra (partitionEithers)
 import Data.Ord()
-import Data.Foldable (concat)
 import Data.String()
-import Data.Either.Extra(Either())

 import Data.Text (Text)
-import Data.Text.Encoding (decodeUtf8)
 import qualified Data.Text as DT
----
--import Control.Monad (join)
-import Codec.Archive.Zip (withArchive, getEntry, getEntries)
-import Path.IO (resolveFile')
------ import qualified Data.ByteString.Lazy as B
--import Control.Applicative ( (<$>) )
-import Control.Concurrent.Async as CCA (mapConcurrently)
+-- | Activate Async for to parse in parallel
+--import Control.Concurrent.Async as CCA (mapConcurrently)

 import Data.String (String())
-import Gargantext.Text.Parsers.WOS (wosParser)
---- import Gargantext.Parsers.XML (xmlParser)
---- import Gargantext.Parsers.DOC (docParser)
---- import Gargantext.Parsers.ODT (odtParser)

--import Gargantext.Types.Main (ErrorMessage(), Corpus)

 type ParseError = String
 type Field      = Text

--- a/src/Gargantext/Text/Parsers/CSV.hs
+++ b/src/Gargantext/Text/Parsers/CSV.hs
@@ -147,7 +147,7 @@ instance FromNamedRecord CsvDoc where
                              <*> r .: "authors"

 instance ToNamedRecord CsvDoc where
-  toNamedRecord (CsvDoc t s py pm pd abst aut) = 
+  toNamedRecord (CsvDoc t s py pm pd abst aut) =
    namedRecord [ "title"  .= t
                , "source" .= s
                , "publication_year"  .= py
@@ -155,7 +155,7 @@ instance ToNamedRecord CsvDoc where
                , "publication_day"   .= pd
                , "abstract"          .= abst
                , "authors"           .= aut
-                ]
+               ]


 csvDecodeOptions :: DecodeOptions
@@ -184,7 +184,94 @@ readCsv fp = do
      Right csvDocs -> pure csvDocs


+readHal :: FilePath -> IO (Header, Vector CsvHal)
+readHal fp = do
+    csvData <- BL.readFile fp
+    case decodeByNameWith csvDecodeOptions csvData of
+      Left e        -> panic (pack e)
+      Right csvDocs -> pure csvDocs
+------------------------------------------------------------------------
+
+
 writeCsv :: FilePath -> (Header, Vector CsvDoc) -> IO ()
 writeCsv fp (h, vs) = BL.writeFile fp $
                      encodeByNameWith csvEncodeOptions h (V.toList vs)

+
+------------------------------------------------------------------------
+-- Hal Format
+data CsvHal = CsvHal
+    { csvHal_title  :: !Text
+    , csvHal_source :: !Text
+    , csvHal_publication_year  :: !Int
+    , csvHal_publication_month :: !Int
+    , csvHal_publication_day   :: !Int
+    , csvHal_abstract          :: !Text
+    , csvHal_authors           :: !Text
+
+    , csvHal_url               :: !Text
+    , csvHal_isbn_s            :: !Text
+    , csvHal_issue_s           :: !Text
+    , csvHal_journalPublisher_s:: !Text
+    , csvHal_language_s        :: !Text
+
+    , csvHal_doiId_s           :: !Text
+    , csvHal_authId_i          :: !Text
+    , csvHal_instStructId_i    :: !Text
+    , csvHal_deptStructId_i    :: !Text
+    , csvHal_labStructId_i     :: !Text
+
+    , csvHal_rteamStructId_i   :: !Text
+    , csvHal_docType_s         :: !Text
+    }
+    deriving (Show)
+
+instance FromNamedRecord CsvHal where
+  parseNamedRecord r = CsvHal <$> r .: "title"
+                              <*> r .: "source"
+                              <*> r .: "publication_year"
+                              <*> r .: "publication_month"
+                              <*> r .: "publication_day"
+                              <*> r .: "abstract"
+                              <*> r .: "authors"
+
+                              <*> r .: "url"
+                              <*> r .: "isbn_s"
+                              <*> r .: "issue_s"
+                              <*> r .: "journalPublisher_s"
+                              <*> r .: "language_s"
+
+                              <*> r .: "doiId_s"
+                              <*> r .: "authId_i"
+                              <*> r .: "instStructId_i"
+                              <*> r .: "deptStructId_i"
+                              <*> r .: "labStructId_i"
+
+                              <*> r .: "rteamStructId_i"
+                              <*> r .: "docType_s"
+
+instance ToNamedRecord CsvHal where
+  toNamedRecord (CsvHal t s py  pm pd abst aut  url isbn iss jour lang  doi auth inst dept lab team doct) = 
+    namedRecord [ "title"  .= t
+                , "source" .= s
+                , "publication_year"  .= py
+                , "publication_month" .= pm
+                , "publication_day"   .= pd
+                , "abstract"          .= abst
+                , "authors"           .= aut
+
+                , "url"                .= url
+                , "isbn_s"             .= isbn
+                , "issue_s"            .= iss
+                , "journalPublisher_s" .= jour
+                , "language_s"         .= lang
+                
+                , "doiId_s"            .= doi
+                , "authId_i"           .= auth
+                , "instStructId_i"     .= inst
+                , "deptStructId_i"     .= dept
+                , "labStructId_i"      .= lab
+                
+                , "rteamStructId_i"    .= team
+                , "docType_s"          .= doct
+               ]
--- a/src/Gargantext/Text/Terms/Mono.hs
+++ b/src/Gargantext/Text/Terms/Mono.hs
@@ -24,7 +24,7 @@ import Gargantext.Core.Types
 import Gargantext.Text.Terms.Mono.Stem (stem)

 import Gargantext.Prelude
-import Data.Char (isAlphaNum, isSpace)
+--import Data.Char (isAlphaNum, isSpace)

 monoterms' :: Lang -> Text -> [Terms]
 monoterms' l txt = map (text2terms l) $ monoterms txt
@@ -40,9 +40,9 @@ text2terms lang txt = Terms label stems
    label = splitOn (pack " ") txt
    stems = S.fromList $ map (stem lang) label

-  --monograms :: Text -> [Text]
+--monograms :: Text -> [Text]
 --monograms xs = monograms $ toLower $ filter isGram xs

-isGram :: Char -> Bool
-isGram  c  = isAlphaNum c || isSpace c || c `elem` ['-','/','\'']
+--isGram :: Char -> Bool
+--isGram  c  = isAlphaNum c || isSpace c || c `elem` ['-','/','\'']

--- a/src/Gargantext/Text/Terms/Mono/Token.hs
+++ b/src/Gargantext/Text/Terms/Mono/Token.hs
@@ -23,20 +23,20 @@ module Gargantext.Text.Terms.Mono.Token (tokenize)
 import Data.Text (Text)
 import qualified Gargantext.Text.Terms.Mono.Token.En as En

-import Gargantext.Core (Lang(..))
-import Gargantext.Prelude
+-- | Contexts depend on the lang
+--import Gargantext.Core (Lang(..))

 type Token = Text

 -- >>> tokenize "A rose is a rose is a rose."
 -- ["A","rose","is","a","rose","is","a","rose", "."]
-- 

-data Context = Letter | Word | Sentence | Line | Paragraph

 tokenize :: Text -> [Token]
 tokenize = En.tokenize

-tokenize' :: Lang -> Context -> [Token]
-tokenize' = undefined
-
+--data Context = Letter | Word | Sentence | Line | Paragraph
+--
+--tokenize' :: Lang -> Context -> [Token]
+--tokenize' = undefined
+--
--- a/src/Gargantext/TextFlow.hs
+++ b/src/Gargantext/TextFlow.hs
@@ -17,28 +17,26 @@ From text to viz, all the flow of texts in Gargantext.
 module Gargantext.TextFlow
  where

+import GHC.IO (FilePath)
 import qualified Data.Text as T
 import Data.Text.IO (readFile)

-import Control.Arrow ((***))
+import Control.Monad.IO.Class (MonadIO)
+
 import Data.Map.Strict (Map)
 import qualified Data.Array.Accelerate as A
 import qualified Data.Map.Strict as M
-import qualified Data.List       as L
-import Data.Tuple.Extra (both)
 ----------------------------------------------
 import Gargantext.Core (Lang(FR))
 import Gargantext.Core.Types (Label)
 import Gargantext.Prelude
-import Prelude (print, seq)

-import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, map2mat, mat2map)
-import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional, distributional)
-import Gargantext.Viz.Graph.Index (Index)
+import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map)
+import Gargantext.Viz.Graph.Distances.Matrice (conditional)
 import Gargantext.Viz.Graph (Graph(..), Node(..), Edge(..), Attributes(..), TypeNode(..))
 import Gargantext.Text.Metrics.Count (cooc)
 import Gargantext.Text.Metrics
-import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
+import Gargantext.Text.Terms (TermType(Mono), extractTerms)
 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))

 import Gargantext.Text.Parsers.CSV
@@ -56,13 +54,15 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain, LouvainNode(..))

 -}

+printDebug :: (Show a, MonadIO m) => [Char] -> a -> m ()
 printDebug msg x = putStrLn $ msg <> " " <> show x
 --printDebug _ _ = pure ()

 data TextFlow = CSV | FullText

 -- workflow :: Lang (EN|FR) -> FilePath -> Graph
-textflow termsLang workType path = do
+textflow :: Lang -> TextFlow -> FilePath -> IO Graph
+textflow _ workType path = do
  -- Text  <- IO Text <- FilePath
  contexts <- case workType of
                FullText -> splitBy (Sentences 5) <$> readFile path
@@ -97,7 +97,7 @@ textflow termsLang workType path = do
  printDebug "myCooc3" $ M.size myCooc3

  -- Cooc -> Matrix
-  let (ti, fi) = createIndices myCooc3
+  let (ti, _) = createIndices myCooc3
  printDebug "ti" $ M.size ti

  let myCooc4 = toIndex ti myCooc3
@@ -145,6 +145,6 @@ data2graph labels coocs distance partitions = Graph nodes edges
                   , edge_target = cs (show t)
                   , edge_weight = w
                   , edge_id     = cs (show i) }
-            | (i, ((s,t), w)) <- zip [0..] (M.toList distance) ]
+            | (i, ((s,t), w)) <- zip ([0..]::[Integer]) (M.toList distance) ]
 -----------------------------------------------------------

--- a/src/Gargantext/Viz/Graph.hs
+++ b/src/Gargantext/Viz/Graph.hs
@@ -27,13 +27,9 @@ import Data.Text (Text)
 import qualified Text.Read as T
 import qualified Data.Text as T

-import Data.Map (Map)
-
 import Gargantext.Prelude
 import Gargantext.Core.Utils.Prefix (unPrefix)

-import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode)
-

 data TypeNode = Terms | Unknown
  deriving (Show, Generic)

--- a/src/Gargantext/Viz/Graph/Distances/Conditional.hs
+++ b/src/Gargantext/Viz/Graph/Distances/Conditional.hs
@@ -18,18 +18,14 @@ module Gargantext.Viz.Graph.Distances.Conditional
  where

 import Data.Matrix hiding (identity)
-import Data.String.Conversions (ConvertibleStrings(..))

-import Data.List (concat, sortOn)
-import qualified Data.List as L
+import Data.List (sortOn)

 import Data.Map (Map)
 import qualified Data.Map as M

-import Data.Set (Set)
 import qualified Data.Set as S

-import Data.Vector (Vector)
 import qualified Data.Vector as V

 import Gargantext.Prelude
@@ -82,7 +78,7 @@ opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)

 -------------------------------------------------------
 conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
-conditional m = filter (threshold m') m'
+conditional m = filterMat (threshold m') m'
  where
 ------------------------------------------------------------------------
    -- | Main Operations
@@ -111,24 +107,24 @@ conditional m = filter (threshold m') m'
                         $ nodes_included k <> nodes_specific k

    nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
-    nodes_specific m = take m $ sortOn snd $ toListsWithIndex sg
+    nodes_specific n = take n $ sortOn snd $ toListsWithIndex sg
    insert as s = foldl' (\s' a -> S.insert a s') s as
    k' = 2*k
    k = 10
    
    dico_nodes :: Map Int Int
-    dico_nodes     = M.fromList $ zip [1..] nodes_kept
-    dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
+    dico_nodes     = M.fromList $ zip ([1..] :: [Int]) nodes_kept
+    --dico_nodes_rev = M.fromList $ zip nodes_kept [1..]

    m' = matrix (length nodes_kept) 
                (length nodes_kept) 
                (\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')

-    threshold m = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m) (V.enumFromTo 1 (nOf Col m))
+    threshold m'' = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m'') (V.enumFromTo 1 (nOf Col m''))
    
-    filter t m  = mapAll (\x -> filter' t x) m
+    filterMat t m''  = mapAll (\x -> filter' t x) m''
      where
-        filter' t x = case (x >= t) of
+        filter' t' x = case (x >= t') of
                        True  -> x
                        False -> 0


--- a/src/Gargantext/Viz/Graph/Distances/Distributional.hs
+++ b/src/Gargantext/Viz/Graph/Distances/Distributional.hs
@@ -20,14 +20,9 @@ module Gargantext.Viz.Graph.Distances.Distributional
  where

 import Data.Matrix hiding (identity)
-import Data.String.Conversions (ConvertibleStrings(..))

-import Data.Map (Map)
 import qualified Data.Map as M

-import Data.Set (Set)
-import qualified Data.Set as S
-
 import Data.Vector (Vector)
 import qualified Data.Vector as V

@@ -64,11 +59,11 @@ mi m = matrix c r createMat
  where
    (c,r) = (nOf Col m, nOf Row m)
    createMat (x,y) = doMi x y m
-    doMi x y m = if x == y then 0 else (max (log (doMi' x y m)) 0 )
+    doMi x y m' = if x == y then 0 else (max (log (doMi' x y m')) 0 )
    
-    doMi' x y m = (getElem x y m) / ( cross x y m / total m )
+    doMi' x y m' = (getElem x y m) / ( cross x y m / total m' )
    
-    cross x y m = (V.sum $ ax Col x y m) * (V.sum $ ax Row x y m)
+    cross x y m' = (V.sum $ ax Col x y m) * (V.sum $ ax Row x y m')
    



--- a/src/Gargantext/Viz/Graph/Distances/Matrice.hs
+++ b/src/Gargantext/Viz/Graph/Distances/Matrice.hs
@@ -34,25 +34,18 @@ Implementation use Accelerate library :
 {-# LANGUAGE TypeOperators       #-}
 {-# LANGUAGE ScopedTypeVariables #-}

-
 module Gargantext.Viz.Graph.Distances.Matrice
  where

 import Data.Array.Accelerate
 import Data.Array.Accelerate.Interpreter (run)
-import Data.Array.Accelerate.Smart
-import Data.Array.Accelerate.Type
-import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)

-import Data.Maybe (Maybe(Just))
 import qualified Gargantext.Prelude as P
-import qualified Data.Array.Accelerate.Array.Representation     as Repr
-
-import Gargantext.Text.Metrics.Count


 -----------------------------------------------------------------------
 -- Test perf.
+distriTest :: Matrix Double
 distriTest = distributional $ myMat 100
 -----------------------------------------------------------------------

@@ -133,7 +126,7 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
  where
    n    = dim m
    
-    filter  m = zipWith (\a b -> max a b) m (transpose m)
+    -- filter  m = zipWith (\a b -> max a b) m (transpose m)
    
    ri mat = zipWith (/) mat1 mat2
      where
@@ -145,7 +138,7 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)

    total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
    
-    crossProduct m = zipWith (*) (cross m  ) (cross (transpose m))
+    crossProduct m''' = zipWith (*) (cross m'''  ) (cross (transpose m'''))
    cross mat      = zipWith (-) (mkSum n mat) (mat)

 -----------------------------------------------------------------------
@@ -207,11 +200,11 @@ p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e
 p_ij m = zipWith (/) m (n_jj m)
  where
    n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
-    n_jj m = backpermute (shape m)
+    n_jj myMat' = backpermute (shape m)
                         (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
                                   -> (Z :. j :. j)
                                )
-                         ) m
+                         ) myMat'

 -- | P(j|i) = Nij /N(ii) Probability to get i given j
 -- to test

--- a/src/Gargantext/Viz/Graph/Index.hs
+++ b/src/Gargantext/Viz/Graph/Index.hs
@@ -21,7 +21,7 @@ TODO:
 {-# LANGUAGE FlexibleContexts  #-}
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE TypeOperators     #-}
-
+{-# LANGUAGE MonoLocalBinds    #-}

 module Gargantext.Viz.Graph.Index
  where
@@ -30,7 +30,6 @@ import qualified Data.Array.Accelerate as A
 import qualified Data.Array.Accelerate.Interpreter as A
 import Data.Array.Accelerate (Matrix, Elt, Shape, (:.)(..), Z(..))

-import qualified Data.Vector.Unboxed as DVU
 import Data.Maybe (fromMaybe)

 import Data.Set (Set)
@@ -71,7 +70,7 @@ mat2map :: (Elt a, Shape (Z :. Index)) =>
            A.Array (Z :. Index :. Index) a -> Map (Index, Index) a
 mat2map m = M.fromList . map f . A.toList . A.run . A.indexed $ A.use m
  where
-    Z :. _ :. n = A.arrayShape m
+    -- Z :. _ :. n = A.arrayShape m
    f ((Z :. i :. j), x) = ((i, j), x)

 -------------------------------------------------------------------------------
@@ -88,8 +87,8 @@ indexConversion index ms = M.fromList $ map (\((k1,k2),c) -> ( ((M.!) index k1,

 -------------------------------------------------------------------------------
 -- TODO
-fromIndex' :: Ord t => Vector t -> Map (Index, Index) a -> Map (t,t) a
-fromIndex' vi ns = undefined
+--fromIndex' :: Ord t => Vector t -> Map (Index, Index) a -> Map (t,t) a
+--fromIndex' vi ns = undefined

 -- TODO
 createIndices' :: Ord t => Map (t, t) b -> (Map t Index, Vector t)

--- a/src/Gargantext/Viz/Graph/Utils.hs
+++ b/src/Gargantext/Viz/Graph/Utils.hs
@@ -21,12 +21,6 @@ module Gargantext.Viz.Graph.Utils

 import Data.Matrix hiding (identity)

-import Data.Map (Map)
-import qualified Data.Map    as M
-
-import Data.Set (Set)
-import qualified Data.Set    as S
-
 import Data.Vector (Vector)
 import qualified Data.Vector as V