[STRUCTURE] Make it simple and clean old code.

697d44ea · Alexandre Delanoë · f4e687d5 · 697d44ea · 697d44ea · 697d44ea
Commit 697d44ea authored May 04, 2018 by Alexandre Delanoë
47 changed files
--- a/package.yaml
+++ b/package.yaml
@@ -22,41 +22,20 @@ library:
  - -Werror
  exposed-modules:
  - Gargantext
-  - Gargantext.Database
+  - Gargantext.Prelude
-  - Gargantext.Database.Instances
+  - Gargantext.Core
-  - Gargantext.Database.Ngram
+  - Gargantext.Core.Utils
-  - Gargantext.Database.Node
+  - Gargantext.Core.Types
-  - Gargantext.Database.Facet
+  - Gargantext.Core.Types.Node
-  - Gargantext.Database.NodeNgram
-  - Gargantext.Database.NodeNgramNgram
-  - Gargantext.Database.NodeNode
-  - Gargantext.Database.NodeNodeNgram
-  - Gargantext.Database.Utils
-  - Gargantext.Database.User
  - Gargantext.Text
-  - Gargantext.Text.Analysis
-  - Gargantext.Text.TFICF
-  - Gargantext.Text.Letters
-  - Gargantext.Text.CoreNLP
-  - Gargantext.Text.Parser
-  - Gargantext.Text.Token.Text
-  - Gargantext.Text.Lang.En
-  - Gargantext.Text.Stem.En
-  - Gargantext.Text.Lang.Fr
  - Gargantext.Text.Metrics
-  - Gargantext.Text.TextMining
+  - Gargantext.Text.Metrics.Occurrences
-  - Gargantext.Text.Occurrences
+  - Gargantext.Text.Metrics.FrequentItemSet
-  - Gargantext.Text.Parsers
+  - Gargantext.Text.Ngrams.PosTagging.CoreNLP
-  - Gargantext.Text.Parsers.WOS
+  - Gargantext.Text.Ngrams.PosTagging.Parser
  - Gargantext.Text.Parsers.Date
-  - Gargantext.Prelude
+  - Gargantext.Database
  - Gargantext.API
-  - Gargantext.API.Auth
-  - Gargantext.Types
-  - Gargantext.Types.Main
-  - Gargantext.Types.Node
-  - Gargantext.Utils.DateUtils
-  - Gargantext.Utils.Prefix
  dependencies:
  - QuickCheck
  - aeson

--- a/src-test/Main.hs
+++ b/src-test/Main.hs
@@ -11,7 +11,7 @@ Portability : POSIX
 {-# LANGUAGE NoImplicitPrelude #-}
 import Gargantext.Prelude
-import Gargantext.Types.Main (Language(..))
+import Gargantext.Core (Lang(..))
 --import qualified Ngrams.Lang.Fr as Fr
 import qualified Ngrams.Lang as Lang
 import qualified Ngrams.Lang.Occurrences as Occ

--- a/src-test/Ngrams/Lang.hs
+++ b/src-test/Ngrams/Lang.hs
@@ -18,11 +18,11 @@ module Ngrams.Lang where
 import Gargantext.Prelude (IO())
-import Gargantext.Types.Main (Language(..))
+import Gargantext.Core (Lang(..))
 import qualified Ngrams.Lang.Fr as Fr
 import qualified Ngrams.Lang.En as En
-ngramsExtractionTest :: Language -> IO ()
+ngramsExtractionTest :: Lang -> IO ()
 ngramsExtractionTest FR = Fr.ngramsExtractionTest
 ngramsExtractionTest EN = En.ngramsExtractionTest
--- a/src-test/Ngrams/Lang/En.hs
+++ b/src-test/Ngrams/Lang/En.hs
@@ -23,8 +23,8 @@ import Data.Text (Text)
 import Test.Hspec
 import Gargantext.Prelude
-import Gargantext.Types.Main (Language(..))
+import Gargantext.Core (Lang(..))
-import Gargantext.Text.Parser (extractNgrams, selectNgrams)
+import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
 ngramsExtractionTest :: IO ()

--- a/src-test/Ngrams/Lang/Fr.hs
+++ b/src-test/Ngrams/Lang/Fr.hs
@@ -20,8 +20,8 @@ module Ngrams.Lang.Fr where
 import Test.Hspec
 import Gargantext.Prelude
-import Gargantext.Types.Main (Language(..))
+import Gargantext.Core (Lang(..))
-import Gargantext.Text.Parser (extractNgrams, selectNgrams)
+import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
 ngramsExtractionTest :: IO ()
 ngramsExtractionTest = hspec $ do

--- a/src-test/Ngrams/Lang/Occurrences.hs
+++ b/src-test/Ngrams/Lang/Occurrences.hs
@@ -22,7 +22,7 @@ import Test.Hspec
 import Data.Either (Either(Right))
 import Gargantext.Prelude
-import Gargantext.Text.Occurrences (parseOccurrences)
+import Gargantext.Text.Metrics.Occurrences (parseOccurrences)
 parsersTest :: IO ()
 parsersTest = hspec $ do

--- a/src-test/Parsers/Date.hs
+++ b/src-test/Parsers/Date.hs
@@ -15,11 +15,9 @@ commentary with @some markup@.
 module Parsers.Date where
-import Gargantext.Prelude
 import Test.Hspec
 import Test.QuickCheck
-import Parsers.Types
 import Control.Applicative ((<*>))
 import Data.Either (Either(..))
@@ -28,7 +26,12 @@ import Data.Text (pack, Text)
 import Text.Parsec.Error (ParseError)
 import Duckling.Time.Types (toRFC3339)
-import Gargantext.Parsers.Date (fromRFC3339)
+-----------------------------------------------------------
+import Gargantext.Prelude
+import Gargantext.Text.Parsers.Date (fromRFC3339)
+import Parsers.Types
+-----------------------------------------------------------
 fromRFC3339Inv ::  Either ParseError ZonedTime -> Text
 fromRFC3339Inv (Right z) = toRFC3339 z

--- a/src/Gargantext/API/Count.hs
+++ b/src/Gargantext/API/Count.hs
@@ -42,7 +42,7 @@ import Test.QuickCheck (elements)
 -- import Control.Applicative ((<*>))
 import Gargantext.Prelude
-import Gargantext.Utils.Prefix (unPrefix)
+import Gargantext.Core.Utils.Prefix (unPrefix)
 -----------------------------------------------------------------------
 type CountAPI = Post '[JSON] Counts

--- a/src/Gargantext/API/Node.hs
+++ b/src/Gargantext/API/Node.hs
@@ -40,7 +40,7 @@ import Servant
 -- import Servant.Multipart
 import Gargantext.Prelude
-import Gargantext.Types.Node
+import Gargantext.Core.Types.Node
 import Gargantext.Database.Node ( getNodesWithParentId
                                , getNode, getNodesWith
                                , deleteNode, deleteNodes)

--- a/src/Gargantext/Core.hs
+++ b/src/Gargantext/Core.hs
+{-|
+Module      : Gargantext.Core
+Description : 
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+Here is a longer description of this module, containing some
+commentary with @some markup@.
+-}
+module Gargantext.Core
+  where
+------------------------------------------------------------------------
+-- | Language of a Text
+-- For simplicity, we suppose text has an homogenous language
+data Lang = EN | FR
+    -- | DE | IT | SP
+    -- > EN == english
+    -- > FR == french
+    -- > DE == deutch  (not implemented yet)
+    -- > IT == italian (not implemented yet)
+    -- > SP == spanish (not implemented yet)
+    -- > ... add your language and help us to implement it (:
--- a/src/Gargantext/Types.hs
+++ b/src/Gargantext/Types.hs
@@ -13,9 +13,9 @@ commentary with @some markup@.
 {-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Types ( module Gargantext.Types.Main
+module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
-                          , module Gargantext.Types.Node
+                          , module Gargantext.Core.Types.Node
                          ) where
-import Gargantext.Types.Main
+import Gargantext.Core.Types.Main
-import Gargantext.Types.Node
+import Gargantext.Core.Types.Node
--- a/src/Gargantext/Types/Main.hs
+++ b/src/Gargantext/Types/Main.hs
 {-|
-Module      : Gargantext.Types.Main
+Module      : Gargantext.Core.Types.Main
 Description : Short description
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -19,7 +19,7 @@ commentary with @some markup@.
 {-# LANGUAGE OverloadedStrings #-}
 ------------------------------------------------------------------------
-module Gargantext.Types.Main where
+module Gargantext.Core.Types.Main where
 ------------------------------------------------------------------------
 import Data.Maybe (fromMaybe)
@@ -28,22 +28,10 @@ import Data.Monoid ((<>))
 import Data.Text (Text, pack)
 import Data.List (lookup)
-import Gargantext.Types.Node
+import Gargantext.Core.Types.Node
 import Gargantext.Prelude
 ------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Language of a Text
-- For simplicity, we suppose text has an homogenous language
-data Language = EN | FR
-    -- | DE | IT | SP
-    -- > EN == english
-    -- > FR == french
-    -- > DE == deutch  (not implemented yet)
-    -- > IT == italian (not implemented yet)
-    -- > SP == spanish (not implemented yet)
-    -- > ... add your language and help us to implement it (:
 -- All the Database is structred like a hierarchical Tree
 data Tree a = NodeT a [Tree a]
    deriving (Show, Read, Eq)

--- a/src/Gargantext/Types/Node.hs
+++ b/src/Gargantext/Types/Node.hs
 {-|
-Module      : Gargantext.Types.Nodes
+Module      : Gargantext.Core.Types.Nodes
 Description : Main Types of Nodes
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -18,7 +18,7 @@ Portability : POSIX
 {-# LANGUAGE TemplateHaskell      #-}
 -- {-# LANGUAGE DuplicateRecordFields #-}
-module Gargantext.Types.Node where
+module Gargantext.Core.Types.Node where
 import Prelude (Enum, Bounded, minBound, maxBound)
@@ -48,7 +48,7 @@ import           Test.QuickCheck.Arbitrary
 import           Test.QuickCheck (elements)
 import           Gargantext.Prelude
-import           Gargantext.Utils.Prefix (unPrefix)
+import           Gargantext.Core.Utils.Prefix (unPrefix)
 ------------------------------------------------------------------------

--- a/src/Gargantext/Types/Phylo.hs
+++ b/src/Gargantext/Types/Phylo.hs
@@ -22,7 +22,7 @@ Phylomemy was first described in [REF].
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE TemplateHaskell   #-}
-module Gargantext.Types.Phylo where
+module Gargantext.Core.Types.Phylo where
 import Data.Aeson.TH (deriveJSON)
 import Data.Maybe   (Maybe)
@@ -32,7 +32,7 @@ import Data.Time.Clock.POSIX  (POSIXTime)
 import GHC.Generics (Generic)
 import Gargantext.Prelude
-import Gargantext.Utils.Prefix (unPrefix)
+import Gargantext.Core.Utils.Prefix (unPrefix)
 ------------------------------------------------------------------------
 -- | Phylo datatype descriptor of a phylomemy

--- a/src/Gargantext/Utils.hs
+++ b/src/Gargantext/Utils.hs
@@ -13,9 +13,9 @@ commentary with @some markup@.
 {-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Utils ( 
+module Gargantext.Core.Utils ( 
                           -- module Gargantext.Utils.Chronos
-                           module Gargantext.Utils.Prefix
+                           module Gargantext.Core.Utils.Prefix
                          ) where
 -- import Gargantext.Utils.Chronos
-import Gargantext.Utils.Prefix
+import Gargantext.Core.Utils.Prefix
--- a/src/Gargantext/Utils/Count_hs
+++ b/src/Gargantext/Utils/Count_hs
 {-|
-Module      : Gargantext.Utils.Counts
+Module      : Gargantext.Core.Utils.Count
 Description : 
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -9,6 +9,9 @@ Portability : POSIX
 Here is a longer description of this module, containing some
 commentary with @some markup@.
+Inspired from Gabriel Gonzales, "beautiful folds" talk.
 -}
 {-# LANGUAGE NoImplicitPrelude #-}
@@ -16,15 +19,17 @@ commentary with @some markup@.
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE RankNTypes                #-}
-module Data.Gargantext.Utils.Count (head, last, all, any, sum, product, length)
+module Gargantext.Core.Utils.Count (head, last, all, any, sum, product, length)
    where
-import Data.Monoid
+import Data.Functor
-import Protolude hiding ((<>), head, last, all, any, sum, product, length)
+import Control.Applicative
 import qualified Data.Foldable
+import Data.Monoid
 import Control.Lens (Getting, foldMapOf)
+import Gargantext.Prelude hiding (head, sum, length)
 data Fold i o = forall m . Monoid m => Fold (i -> m) (m -> o)
 instance Functor (Fold i) where

--- a/src/Gargantext/Utils/DateUtils.hs
+++ b/src/Gargantext/Utils/DateUtils.hs
 {-|
-Module      : Gargantext.Utils.DateUtils
+Module      : Gargantext.Core.Utils.DateUtils
 Description : 
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -10,7 +10,7 @@ Portability : POSIX
 {-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Utils.DateUtils where
+module Gargantext.Core.Utils.DateUtils where
 import Gargantext.Prelude
 import Data.Time (UTCTime, toGregorian, utctDay)

--- a/src/Gargantext/Utils/Prefix.hs
+++ b/src/Gargantext/Utils/Prefix.hs
 {-|
-Module      : Gargantext.Utils.Prefix
+Module      : Gargantext.Core.Utils.Prefix
 Description : 
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -13,7 +13,7 @@ commentary with @some markup@.
 {-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Utils.Prefix where
+module Gargantext.Core.Utils.Prefix where
 import Prelude

--- a/src/Gargantext/Database/Facet.hs
+++ b/src/Gargantext/Database/Facet.hs
@@ -49,13 +49,13 @@ import qualified Opaleye.Internal.Unpackspec()
 import Test.QuickCheck.Arbitrary
 import Test.QuickCheck (elements)
-import Gargantext.Types
+import Gargantext.Core.Types
-import Gargantext.Types.Node (NodeType)
+import Gargantext.Core.Types.Node (NodeType)
+import Gargantext.Core.Utils.Prefix (unPrefix)
 import Gargantext.Database.NodeNode
 import Gargantext.Database.NodeNodeNgram
 import Gargantext.Database.Node
 import Gargantext.Database.Queries
-import Gargantext.Utils.Prefix (unPrefix)
 -- import Gargantext.Database.NodeNgram
 ------------------------------------------------------------------------

--- a/src/Gargantext/Database/Node.hs
+++ b/src/Gargantext/Database/Node.hs
@@ -29,8 +29,8 @@ import Database.PostgreSQL.Simple.FromField ( Conversion
                                            )
 import Prelude hiding (null, id, map, sum)
-import Gargantext.Types
+import Gargantext.Core.Types
-import Gargantext.Types.Node (NodeType)
+import Gargantext.Core.Types.Node (NodeType)
 import Gargantext.Database.Queries
 import Gargantext.Prelude hiding (sum)

--- a/src/Gargantext/Database/Queries.hs
+++ b/src/Gargantext/Database/Queries.hs
@@ -22,7 +22,7 @@ Portability : POSIX
 module Gargantext.Database.Queries where
 import Gargantext.Prelude
-import Gargantext.Types (Limit, Offset, NodePoly)
+import Gargantext.Core.Types (Limit, Offset, NodePoly)
 import Data.Maybe (Maybe, maybe)
 import Control.Arrow ((>>>))
 import Control.Applicative ((<*>))

--- a/src/Gargantext/Database/User.hs
+++ b/src/Gargantext/Database/User.hs
@@ -24,7 +24,6 @@ commentary with @some markup@.
 module Gargantext.Database.User where
-import Gargantext.Prelude
 import GHC.Show(Show(..))
 import Data.Eq(Eq(..))
 import Data.Time (UTCTime)
@@ -40,6 +39,8 @@ import Opaleye
 -- Functions only
 import Data.List (find)
+import Gargantext.Prelude
 data UserLight = UserLight { userLight_id   :: Int
                           , userLight_username :: Text

--- a/src/Gargantext/Error.purs
+++ b/src/Gargantext/Error.purs
-module Data.Gargantext.Error where
-import Control.Exception (Exception)
-import Data.Typeable (Typeable)
-import GHC.Generics (Generic)
-import Network.HTTP.Client (HttpException)
-import System.Exit (ExitCode (..), exitWith)
-import System.IO (stderr)
--import Text.Parsec.Error
--import Text.Parsec.Pos hiding (Line)
-data GargError = GargIOError String IOError
-               | GargHttpError String HttpException
-               | GargParseError String
-               | GargNgramsError String
-               | GargDatabaseError String
-               deriving (Show, Typeable, Generic)
-instance Exception PandocError
-- | Handle GargError by exiting with an error message.
-handleError :: Either GargError a -> IO a
-handleError (Right r) = pure r
-handleError (Left e) =
-  case e of
-    GargIOError _ err'   -> ioError err'
-    GargHttpError u err' -> err 61 $
-      "Could not fetch " ++ u ++ "\n" ++ show err'
-    GargParseError s     -> err 64 s
-    _ s                  -> err 0 s
-err :: Int -> String -> IO a
-err exitCode msg = do
-  UTF8.hPutStrLn stderr msg
-  exitWith $ ExitFailure exitCode
-  return undefined
--- a/src/Gargantext/Text.hs
+++ b/src/Gargantext/Text.hs
@@ -15,38 +15,18 @@ n non negative integer
 -}
 {-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
-module Gargantext.Text ( module Gargantext.Text.Letters
+module Gargantext.Text
-                              --, module Gargantext.Text.Hetero
+  where
-                         , module Gargantext.Text.CoreNLP
-                         , module Gargantext.Text.Parser
-                         , module Gargantext.Text.Occurrences
-                         , module Gargantext.Text.TextMining
-                         , module Gargantext.Text.Metrics
-                         , Ngrams(..), ngrams, occ, sumOcc, text2fis, clean
-                         , ListName(..), equivNgrams, isGram, sentences
-                         , ngramsTest
-                         ) where
-import Gargantext.Text.Letters
--import Gargantext.Text.Hetero
-import Gargantext.Text.CoreNLP
-import Gargantext.Text.Parser
-import Gargantext.Text.Occurrences
-import Gargantext.Text.TextMining
--import Gargantext.Text.Words
-import Gargantext.Text.Metrics
-import qualified Gargantext.Text.FrequentItemSet as FIS
-----------------------------------------------------------------
-import Data.List (sort)
 import Data.Char (Char, isAlphaNum, isSpace)
-import Data.Text (Text, filter, toLower, split, lines, concat)
+import Data.Text (Text, filter, toLower, split, splitOn)
 import qualified Data.Text as DT
-import Data.Text.IO (readFile)
+--import Data.Text.IO (readFile)
+import Data.Set (Set)
+import qualified Data.Set as S
 import Data.Map.Strict  (Map
                        , empty
                        , insertWith, unionWith
@@ -55,25 +35,38 @@ import Data.Map.Strict  (Map
                        )
 import qualified Data.Map.Strict as M (filter)
 import Data.Foldable (foldl')
-import Gargantext.Prelude hiding (filter)
-- Maybe useful later:
+-----------------------------------------------------------------
--import NLP.Stemmer (stem, Stemmer(..))
+import Gargantext.Text.Ngrams.Stem.En
--import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
--import Language.Aspell.Options (ACOption(..))
+import qualified Gargantext.Text.Metrics.FrequentItemSet as FIS
+import Gargantext.Prelude hiding (filter)
+-----------------------------------------------------------------
 data ListName = Stop | Candidate | Graph
  deriving (Show, Eq)
-data Ngrams = Ngrams { _ngramsNgrams   :: [Text]
-                     , _ngramsStem     :: [Text]
-                     , _ngramsListName :: Maybe ListName
+data Ngroup = Ngroup { _ngroup_label  ::  Ngrams
+                     , _ngroup_ngrams :: [Ngrams]
+                     } deriving (Show)
+data Ngrams = Ngrams { _ngrams_label :: [Text]
+                     , _ngrams_stem  :: Set Text
                     } deriving (Show)
+text2ngrams :: Text -> Ngrams
+text2ngrams txt = Ngrams txt' (S.fromList $ map stem txt')
+  where
+    txt' = splitOn " " txt
 equivNgrams :: Ngrams -> Ngrams -> Bool
-equivNgrams  (Ngrams n1 s1 _) (Ngrams n2 s2 _)
+equivNgrams (Ngrams _ s1) (Ngrams _ s2) = s1 `S.isSubsetOf` s2
-  = (sort n1) == (sort n2) || (sort s1) == (sort s2)
+                                        || s2 `S.isSubsetOf` s1
 type Occ     = Int
 --type Index   = Int
@@ -149,20 +142,25 @@ sentences txt = split isStop txt
 isStop :: Char -> Bool
 isStop c = c `elem` ['.','?','!']
+---- | https://en.wikipedia.org/wiki/Text_mining
-- | Tests
+--testText :: Text
-ngramsTest fp =  ws
+--testText = DT.pack "Text mining, also referred to as text data mining, roughly equivalent to text analytics, is the process of deriving high-quality information from text. High-quality information is typically derived through the devising of patterns and trends through means such as statistical pattern learning. Text mining usually involves the process of structuring the input text (usually parsing, along with the addition of some derived linguistic features and the removal of others, and subsequent insertion into a database), deriving patterns within the structured data, and finally evaluation and interpretation of the output. 'High quality' in text mining usually refers to some combination of relevance, novelty, and interestingness. Typical text mining tasks include text categorization, text clustering, concept/entity extraction, production of granular taxonomies, sentiment analysis, document summarization, and entity relation modeling (i.e., learning relations between named entities). Text analysis involves information retrieval, lexical analysis to study word frequency distributions, pattern recognition, tagging/annotation, information extraction, data mining techniques including link and association analysis, visualization, and predictive analytics. The overarching goal is, essentially, to turn text into data for analysis, via application of natural language processing (NLP) and analytical methods. A typical application is to scan a set of documents written in a natural language and either model the document set for predictive classification purposes or populate a database or search index with the information extracted."
-  where
+--
-    txt = concat <$> lines <$> clean <$> readFile fp
+--
-    -- | Number of sentences
+--
-    ls   = sentences <$> txt
+---- | Tests
-    -- | Number of monograms used in the full text
+----ngramsTest :: [Text]
-    ws   = ngrams    <$> txt
+--ngramsTest =  ocs
-    -- | stem ngrams
+--  where
-    -- TODO
+--    --txt = concat <$> lines <$> clean <$> readFile filePath
-    -- group ngrams
+--    txt = clean $ testText
-    ocs  = occ       <$> ws
+--    -- | Number of sentences
+--    ls   = sentences $ txt
-- 
+--    -- | Number of monograms used in the full text
+--    ws   = ngrams    $ txt
+--    -- | stem ngrams
+--    -- TODO
+--    -- group ngrams
+--    ocs  = occ       $ ws
+--
+--
--- a/src/Gargantext/Text/Analysis.hs
+++ b/src/Gargantext/Text/Analysis.hs
-{-|
-Module      : Gargantext.Analysis
-Description : Gargantext Analysis
-Copyright   : (c) CNRS, 2017-Present
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-}
-{-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Text.Analysis 
-  where
-import Gargantext.Prelude (undefined, IO(), Int())
-- import qualified Data.Text.Lazy as DTL
-import Data.Text
-import Opaleye (Column, PGInt4)
--import Data.Map    as DM
--import Data.Vector as DV
-- | Simple function to count Occurrences in a context of text.
-occOfDocument :: Column PGInt4 -> Text -> IO Int
-occOfDocument = undefined
--occOfDocument c_id txt = do
--    docs <- pm (hyperdataDocument_Abstract . node_hyperdata) <$> getCorpusDocument c_id
--    let occs = pm (\x -> maybe ""  identity x) docs
--    let result = case sequence $ pm (parseOccurrences  txt) occs of
--            -- TODO find a way to get nice message d'errors (file, function, line)
--            Left str -> error $ "[ERRROR] at file/function/line" ++ str
--            Right xs -> xs
--    pure (sum result)
-data Occurrences a b = Map a b
--- a/src/Gargantext/Text/Letters.hs
+++ b/src/Gargantext/Text/Letters.hs
-{-|
-Module      : Gargantext.Text.Letters
-Description : Ngrams.Letters module
-Copyright   : (c) CNRS, 2017
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-Sugar to work on letters with Text.
-}
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE OverloadedStrings #-}
-module Gargantext.Text.Letters where
-import qualified Data.Text.Lazy as DTL
-- import qualified Data.Text.Lazy.IO as DTLIO
-import Gargantext.Prelude
-- | /O(n)/ Breaks a 'Text' up into each Text list of chars.
-- from slower to faster:
-letters :: DTL.Text -> [DTL.Text]
-letters text = DTL.chunksOf 1 text
-letters' :: DTL.Text -> [DTL.Text]
-letters' text = DTL.splitOn "#" $ DTL.intersperse '#' text
-letters'' :: DTL.Text -> [DTL.Text]
-letters'' = DTL.foldr (\ch xs -> DTL.singleton ch : xs) []
--- a/src/Gargantext/Text/Metrics.hs
+++ b/src/Gargantext/Text/Metrics.hs
 {-|
-Module      : Gargantext.Text.Metrics 
+Module      : Gargantext.Text.Metrics
-Description : Short description
+Description : All parsers of Gargantext in one file.
-Copyright   : (c) Some Guy, 2013
+Copyright   : (c) CNRS, 2017 - present
-                  Someone Else, 2014
+License     : AGPL + CECILL v3
-License     : GPL-3
+Maintainer  : team@gargantext.org
-Maintainer  : sample@email.com
 Stability   : experimental
 Portability : POSIX
@@ -13,6 +12,7 @@ Mainly reexport functions in @Data.Text.Metrics@
 {-# LANGUAGE NoImplicitPrelude #-}
 module Gargantext.Text.Metrics (levenshtein
                                      , levenshteinNorm
                                      , damerauLevenshtein
@@ -22,12 +22,12 @@ module Gargantext.Text.Metrics (levenshtein
                                      , hamming
                                      ) where
-import Gargantext.Prelude
 import Data.Text (Text)
 import GHC.Real (Ratio)
 import qualified Data.Text.Metrics as DTM
+import Gargantext.Prelude
 {- * Example de titre
 -}

--- a/src/Gargantext/Text/FrequentItemSet.hs
+++ b/src/Gargantext/Text/FrequentItemSet.hs
 {-|
-Module      : Gargantext.Text.FrequentItemSet
+Module      : Gargantext.Text.Metrics.FrequentItemSet
 Description : Ngrams tools
 Copyright   : (c) CNRS, 2018
 License     : AGPL + CECILL v3
@@ -13,7 +13,7 @@ Domain Specific Language to manage Frequent Item Set (FIS)
 {-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Text.FrequentItemSet
+module Gargantext.Text.Metrics.FrequentItemSet
  ( Fis, Size
  , occ, cooc
  , all, between

--- a/src/Gargantext/Text/Hetero.purs
+++ b/src/Gargantext/Text/Hetero.purs
+{-|
+Module      : Gargantext.
+Description : 
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+Here is a longer description of this module, containing some
+commentary with @some markup@.
+-}
 module Gargantext.Text.Hetero where
 import GHC.Real as R

--- a/src/Gargantext/Text/Occurrences.hs
+++ b/src/Gargantext/Text/Occurrences.hs
 {-|
-Module      : Gargantext.Text.Occurrences
+Module      : Gargantext.Text.Metrics.Occurrences
 Description : 
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -14,7 +14,7 @@ commentary with @some markup@.
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
-module Gargantext.Text.Occurrences where
+module Gargantext.Text.Metrics.Occurrences where
 import Gargantext.Prelude

--- a/src/Gargantext/Text/TFICF.hs
+++ b/src/Gargantext/Text/TFICF.hs
 {-|
-Module      : Gargantext.Text.TFICF
+Module      : Gargantext.Text.Metrics.TFICF
 Description : TFICF Ngrams tools
 Copyright   : (c) CNRS, 2017
 License     : AGPL + CECILL v3
@@ -15,7 +15,7 @@ Definition of TFICF
 {-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Text.TFICF where
+module Gargantext.Text.Metrics.TFICF where
 import GHC.Generics (Generic)

--- a/src/Gargantext/Text/Ngrams.hs
+++ b/src/Gargantext/Text/Ngrams.hs
+{-|
+Module      : Gargantext.Parsers
+Description : All parsers of Gargantext in one file.
+Copyright   : (c) CNRS, 2017 - present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+Mainly reexport functions in @Data.Text.Metrics@
+-}
+{-# LANGUAGE NoImplicitPrelude #-}
+module Gargantext.Text.Ngrams
+  where
--- a/src/Gargantext/Text/List.hs
+++ b/src/Gargantext/Text/List.hs
 {-|
-Module      : Gargantext.Text.List
+Module      : Gargantext.Text.Ngrams.Lists
 Description : 
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -13,19 +13,20 @@ commentary with @some markup@.
 {-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Text.List where
+module Gargantext.Text.Ngrams.Lists
+  where
-import Data.Maybe
-import Data.List (filter)
+--import Data.Maybe
-import Gargantext.Text
+--import Data.List (filter)
-import Gargantext.Prelude
+--import Gargantext.Text
+--import Gargantext.Prelude
-graph :: [Ngrams] -> [Ngrams]
+--
-graph ngs = filter (\ng -> _ngramsListName ng == Just Graph) ngs
+--graph :: [Ngrams] -> [Ngrams]
+--graph ngs = filter (\ng -> _ngramsListName ng == Just Graph) ngs
-candidates :: [Ngrams] -> [Ngrams]
+--
-candidates ngs = filter (\ng -> _ngramsListName ng == Just Candidate) ngs
+--candidates :: [Ngrams] -> [Ngrams]
+--candidates ngs = filter (\ng -> _ngramsListName ng == Just Candidate) ngs
-stop :: [Ngrams] -> [Ngrams]
+--
-stop ngs = filter (\ng -> _ngramsListName ng == Just Stop) ngs
+--stop :: [Ngrams] -> [Ngrams]
+--stop ngs = filter (\ng -> _ngramsListName ng == Just Stop) ngs
--- a/src/Gargantext/Text/Ngrams/PosTagging.hs
+++ b/src/Gargantext/Text/Ngrams/PosTagging.hs
+{-|
+Module      : Gargantext.Text.Ngrams.PosTagging
+Description : All parsers of Gargantext in one file.
+Copyright   : (c) CNRS, 2017 - present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+Mainly reexport functions in @Data.Text.Metrics@
+-}
+{-# LANGUAGE NoImplicitPrelude #-}
+module Gargantext.Text.Ngrams.PosTagging
+  where
--- a/src/Gargantext/Text/CoreNLP.hs
+++ b/src/Gargantext/Text/CoreNLP.hs
 {-|
-Module      : Gargantext.Text.CoreNLP
+Module      : Gargantext.Text.Ngrams.PosTagging.CoreNLP
 Description : CoreNLP module
 Copyright   : (c) CNRS, 2017
 License     : AGPL + CECILL v3
@@ -16,18 +16,20 @@ Portability : POSIX
 {-# LANGUAGE TemplateHaskell   #-}
 {-# LANGUAGE TypeOperators     #-}
-module Gargantext.Text.CoreNLP where
+module Gargantext.Text.Ngrams.PosTagging.CoreNLP 
+  where
-import Data.Aeson.TH (deriveJSON)
 import GHC.Generics
-import Data.Monoid ((<>))
 import GHC.Show (Show(..))
-import Gargantext.Types.Main (Language(..))
+import Data.Aeson.TH (deriveJSON)
-import Gargantext.Prelude
+import Data.Monoid ((<>))
-import Gargantext.Utils.Prefix (unPrefix)
 import Data.Text (Text)
+import Gargantext.Core (Lang(..))
+import Gargantext.Core.Utils.Prefix (unPrefix)
+import Gargantext.Prelude
 import Network.HTTP.Simple
@@ -80,7 +82,7 @@ $(deriveJSON (unPrefix "_") ''Sentences)
 -- 
-corenlp :: Language -> Text -> IO Sentences
+corenlp :: Lang -> Text -> IO Sentences
 corenlp lang txt = do
    let properties = case lang of
            EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
@@ -99,7 +101,7 @@ corenlp lang txt = do
 -- Named Entity Recognition example
 -- parseWith  _tokenNer     "Hello world of Peter."
 -- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
-tokenWith :: (Token -> t) -> Language -> Text -> IO [[(Text, t)]]
+tokenWith :: (Token -> t) -> Lang -> Text -> IO [[(Text, t)]]
 tokenWith f lang s = map (map (\t -> (_tokenWord t, f t))) <$> map _sentenceTokens <$> _sentences <$> corenlp lang s

--- a/src/Gargantext/Text/Lang/En.hs
+++ b/src/Gargantext/Text/Lang/En.hs
 {-|
-Module      : Gargantext.Text.Lang.En
+Module      : Gargantext.Text.Ngrams.PosTagging.Lang.En
 Description : 
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -14,7 +14,8 @@ commentary with @some markup@.
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
-module Gargantext.Text.Lang.En (selectNgrams, groupNgrams, textTest) where
+module Gargantext.Text.Ngrams.PosTagging.Lang.En (selectNgrams, groupNgrams, textTest) 
+  where
 import Gargantext.Prelude
 import Data.Text (Text)

--- a/src/Gargantext/Text/Lang/Fr.hs
+++ b/src/Gargantext/Text/Lang/Fr.hs
 {-|
-Module      : Gargantext.Text.Lang.Fr
+Module      : Gargantext.Text.Ngrams.PosTagging.Lang.Fr
 Description : 
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -14,7 +14,7 @@ commentary with @some markup@.
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}
-module Gargantext.Text.Lang.Fr (selectNgrams, groupNgrams, textTest)
+module Gargantext.Text.Ngrams.PosTagging.Lang.Fr (selectNgrams, groupNgrams, textTest)
    where
 import Gargantext.Prelude

--- a/src/Gargantext/Text/Parser.hs
+++ b/src/Gargantext/Text/Parser.hs
 {-|
-Module      : Gargantext.Text.Parser
+Module      : Gargantext.Text.Ngrams.PosTagging.Parser
 Description : 
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -15,15 +15,16 @@ commentary with @some markup@.
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
-module Gargantext.Text.Parser where
+module Gargantext.Text.Ngrams.PosTagging.Parser
+  where
 import Gargantext.Prelude
-import Gargantext.Text.CoreNLP
 import Data.Text hiding (map)
-import Gargantext.Types.Main (Language(..))
+import Gargantext.Core (Lang(..))
-import qualified Gargantext.Text.Lang.En as En
+import Gargantext.Text.Ngrams.PosTagging.CoreNLP
-import qualified Gargantext.Text.Lang.Fr as Fr
+import qualified Gargantext.Text.Ngrams.PosTagging.Lang.En as En
+import qualified Gargantext.Text.Ngrams.PosTagging.Lang.Fr as Fr
 type SNgrams       = (Text, Text, Text)
@@ -45,11 +46,11 @@ type SNgrams       = (Text, Text, Text)
 -- TODO for scientific papers: add maesures
 -- TODO add the p score regex
-extractNgrams :: Language -> Text -> IO [[SNgrams]]
+extractNgrams :: Lang -> Text -> IO [[SNgrams]]
 extractNgrams lang s = map (groupNgrams lang) <$> extractNgrams' lang s
-extractNgrams' :: Language -> Text -> IO [[SNgrams]]
+extractNgrams' :: Lang -> Text -> IO [[SNgrams]]
 extractNgrams' lang t =  map (map token2text)
                     <$> map _sentenceTokens
                     <$> _sentences
@@ -58,13 +59,13 @@ extractNgrams' lang t =  map (map token2text)
 -- | This function selects ngrams according to grammars specific
 --   of each language.
 --   In english, JJ is ADJectiv in french.
-selectNgrams :: Language -> [SNgrams] -> [SNgrams]
+selectNgrams :: Lang -> [SNgrams] -> [SNgrams]
 selectNgrams EN = En.selectNgrams
 selectNgrams FR = Fr.selectNgrams
 -- | This function analyze and groups (or not) ngrams according to 
 --   grammars specific of each language.
-groupNgrams :: Language -> [SNgrams] -> [SNgrams]
+groupNgrams :: Lang -> [SNgrams] -> [SNgrams]
 groupNgrams EN = En.groupNgrams
 groupNgrams FR = Fr.groupNgrams
--- a/src/Gargantext/Text/Ngrams/Stem.hs
+++ b/src/Gargantext/Text/Ngrams/Stem.hs
+{-|
+Module      : Gargantext.Text.Ngrams.Stem
+Description : 
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+Here is a longer description of this module, containing some
+commentary with @some markup@.
+-}
+module Gargantext.Text.Ngrams.Stem
+  where
+import Data.Text (Text)
+import qualified Data.Text   as DT
+import qualified NLP.Stemmer as N
+import Gargantext.Core (Lang(..))
+-- (stem, Stemmer(..))
+--import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
+--import Language.Aspell.Options (ACOption(..))
+stem :: Lang -> Text -> Text
+stem lang = DT.pack . N.stem lang' . DT.unpack
+  where
+    lang' = case lang of
+              EN -> N.English
+              FR -> N.French
--- a/src/Gargantext/Text/Stem/En.hs
+++ b/src/Gargantext/Text/Stem/En.hs
 {-|
-Module      : Gargantext.
+Module      : Gargantext.Text.Ngrams.Stem.En
 Description : Porter Algorithm Implementation purely Haskell
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
@@ -17,7 +17,7 @@ Adapted from:
 {-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Text.Stem.En
+module Gargantext.Text.Ngrams.Stem.En
  where
 import Control.Monad

--- a/src/Gargantext/Text/Token/Text.hs
+++ b/src/Gargantext/Text/Token/Text.hs
 {-|
-Module      : Gargantext.Text.Token.Text
+Module      : Gargantext.Text.Ngrams.Token.Text
 Description : 
-Copyright   : (c) CNRS, 2017-Present
+Copyright   : (c) Grzegorz Chrupała first, after: CNRS, 2018-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
 Stability   : experimental
@@ -13,7 +13,7 @@ Inspired from https://bitbucket.org/gchrupala/lingo/overview
 {-# LANGUAGE OverloadedStrings #-}
-module Gargantext.Text.Token.Text
+module Gargantext.Text.Ngrams.Token.Text
    ( EitherList(..)
    , Tokenizer
    , tokenize
@@ -28,7 +28,7 @@ module Gargantext.Text.Token.Text
    , contractions
    , negatives
    )
-where
+  where
 import qualified Data.Char as Char
 import Data.Maybe

--- a/src/Gargantext/Text/Parsers.hs
+++ b/src/Gargantext/Text/Parsers.hs
 {-|
-Module      : Gargantext.Parsers
+Module      : Gargantext.Text.Parsers
 Description : All parsers of Gargantext in one file.
 Copyright   : (c) CNRS, 2017
 License     : AGPL + CECILL v3

--- a/src/Gargantext/Text/TextMining.hs
+++ b/src/Gargantext/Text/TextMining.hs
-{-|
-Module      : Gargantext.Text.TextMining
-Description : 
-Copyright   : (c) CNRS, 2017-Present
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-Here is a longer description of this module, containing some
-commentary with @some markup@.
-}
-{-# LANGUAGE NoImplicitPrelude #-}
-module Gargantext.Text.TextMining where
-import Gargantext.Prelude
-import Data.Ord(Ordering(LT,GT), compare)
-import Data.Text (pack)
-import Data.Bool (otherwise)
-import Data.Map (empty, Map, insertWith, toList)
-import Data.List (foldl, foldl')
-import qualified Data.List as L
-sortGT :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering
-sortGT (a1, b1) (a2, b2)
-    | a1 < a2 = GT
-    | a1 > a2 = LT
-    | a1 == a2 = compare b1 b2
-sortGT (_, _) (_, _) = panic (pack "What is this case ?")
--histogram :: Ord a => [a] -> [(a, Int)]
--histogram = map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
--histogram = sortGT Prelude.. $ map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
-countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
-countElem m e = Data.Map.insertWith (\n o -> n + o) e 1 m
-freqList :: (Ord k) => [k] -> Data.Map.Map k Int
-freqList = foldl countElem Data.Map.empty
-getMaxFromMap :: Ord a => Map a1 a -> [a1]
-getMaxFromMap m = go [] Nothing (toList m)
-  where
-    go ks _        []           = ks 
-    go ks Nothing  ((k,v):rest) = go (k:ks) (Just v) rest
-    go ks (Just u) ((k,v):rest)
-        | v < u     = go ks     (Just u) rest
-        | v > u     = go [k]    (Just v) rest
-        | otherwise = go (k:ks) (Just v) rest
-merge :: [a] -> [a] -> [a]
-merge [] ys = ys
-merge (x:xs) ys = x:merge ys xs
-average :: [Double] -> Double
-average x = L.sum x / L.genericLength x
-average' :: [Int] -> Double
-average' x = (L.sum y) / (L.genericLength y) where
-    y = L.map fromIntegral x
-countYear :: [Integer] -> Map Integer Integer
-countYear []  = empty
-countYear (x:xs) = insertWith (+) x 1 (countYear xs)
-countYear' :: [Integer] -> Map Integer Integer
-countYear' (xs) = foldl' (\x y -> insertWith (+) y 1 x) empty xs
--- a/src/Gargantext/Text/Word2Vec_hs
+++ b/src/Gargantext/Text/Word2Vec_hs
-- Word2Vec
-- Word Vector in a Field
--- a/src/Gargantext/Utils/Chronos.purs
+++ b/src/Gargantext/Utils/Chronos.purs
-module Data.Gargantext.Utils.Chronos where
-import Data.Maybe (fromJust, isJust, isNothing)
-import qualified Data.Time as DT
-import qualified Data.UTC as DU
-import Data.Time
-import Data.Time.Clock.POSIX
-import Text.Regex
-parseDate :: String -> Maybe [String]
-parseDate d = matchRegex (mkRegex "(.*)/(.*)/(.*)") d
-getDate' :: Maybe [String] -> (Integer, Int, Int)
-getDate' d
-    | isJust d == True = toGregorian $ fromGregorian (read year) (read month) (read day)
-    | otherwise = toGregorian $ fromGregorian 2015 1 1
-      where
-        Just [day, month, year] = d
-getDate :: String -> (Integer, Int, Int)
-getDate = getDate' . parseDate
--getDateDay :: Maybe [String] -> Day
--getDateDay d = fromGregorian (read year) (read month) (read day)
--      where Just [day, month, year] = matchRegex (mkRegex "(.*)/(.*)/(.*)") d
-getDateDay' :: Maybe [String] -> Day
-getDateDay' d 
-    | isJust d == True = fromGregorian (read year) (read month) (read day)
-    | otherwise = fromGregorian 2015 1 1
-      where Just [day, month, year] = d
-getDateDay :: String -> Day
-getDateDay = getDateDay' . parseDate
-getDateUTC :: String -> String
-getDateUTC d = show $ DT.UTCTime (getDateDay d) (DT.timeOfDayToTime $ DT.TimeOfDay 0 0 0)
-getYear :: String -> String
-getYear date = s where
-    (y, m, d) = getDate date
-    s = show y
-getMonth :: String -> String
-getMonth date = s where
-    (y, m, d) = getDate date
-    s = show m
-getDay :: String -> String
-getDay date = s where
-    (y, m, d) = getDate date
-    s = show d
--for Dates exported via xls2csv tool
-type MT = Maybe (DU.Local DU.DateTime)
-type MS = Maybe String
--getDate'' :: String -> String
--getDate'' gd = d where
--    start = "1900-01-01T00:00:00Z" 
--    da = (DU.parseRfc3339 start :: MT) >>= DU.addDays ( (read gd :: Integer) -2) >>= DU.renderRfc3339 :: MS
--    d = fromJust da
--
--getDate''' :: String -> String
--getDate''' gd = d where
--    start = "1900-01-01T00:00:00Z" 
--    da = (DU.parseRfc3339 start :: MT) >>= DU.addDays ( (read gd :: Integer) -2) >>= DU.renderIso8601CalendarDate :: MS
--    d = fromJust da
--
--date2greg :: String -> 
-date2greg date = (y, m, d) where
-    (y, m, d) = DT.toGregorian $ DT.addDays ((read date :: Integer) -2) $ DT.utctDay (read "1900-01-01 00:00:00" :: DT.UTCTime)
-getYear' :: String -> String
-getYear' date = s where
-    (y, m, d) = date2greg date
-    s = show y
-getMonth' :: String -> String
-getMonth' date = s where
-    (y, m, d) = date2greg date
-    s = show m
-getDay' :: String -> String
-getDay' date = s where
-    (y, m, d) = date2greg date
-    s = show d
--- a/src/Gargantext/Utils/LangDetect_hs
+++ b/src/Gargantext/Utils/LangDetect_hs
-- DEFINITIONS as SPECS 
-- (Engineering axioms for Gargantext)
------------------------------------------------------------------------
-- From file to corpus
------------------------------------------------------------------------
-- > A Corpus is a list of Documents
-data Corpus   = [Document]
-- > A Document should have a date, some text and a maybe a language.
-- > Remarks : 
--         > If no date then force one ?
--         > Analyze either text or numbers
--         > only one language per document
-data Document = Document { date     :: UTCTime
-                         , uce      :: Map Text $ Either (Maybe Text) (Maybe Double)
-                         , lang     :: Maybe Language
-                         }
-parseFiles :: Maybe ParserType -> [File] -> Corpus
-parseFiles = undefined
-- This function exists already (in Python)
-parseFile' :: ParserType -> File -> Maybe [Document]
-parseFile' = undefined
-- This function does not exist yet 
-parseFile :: Maybe ParserType -> File -> Maybe [Document]
-parseFile parserType file = documents
-    where
-        documents = case parserType of
-                      Nothing           -> case guessParserType file of
-                                            Nothing          -> askUser "Answer to the question with link to $doc"
-                                            Just parserType' -> parseFile (Just parserType') file
-                      Just parserType'' -> case parserType''         of
-                                            UnsupportedYet   -> askUser "Not supported yet, which priority ?"
-                                            otherwise        -> parseFile' parserType'' file
-data ParserType = RIS | ISI | XML | CSV | Europresse | Book | UnsupportedYet
-guessParserType :: File -> Maybe ParserType
-guessParserType = undefined
------------------------------------------------------------------------
-- What kind of interactions with our users ?
------------------------------------------------------------------------
-- Question is Text only
-type Question = Text
-- Possible Answers:
-data Answer = ClosedAnswer | NumAnswer | OpenAnswer
-- Definitions of the Answers
-type ClosedAnswer   = Bool
-type OpenAnswer     = Text
-type NumAnswer      = Int
-- Un formulaire est un mapping entre question et peut-être une réponse
-- Un formulaire vide a Nothing au champs (Maybe Answer)
-- Une question répondue a la valeur (Just Response)
-type Formular = Map Question (Maybe Answer)
-askUser :: Question -> ClosedAnswer
-askUser = undefined
-data Advice = BugReport | WishList
-askUser' :: Question -> Advice
-askUser' question = case askUser question of
-                      True  -> BugReport
-                      False -> WishList
------------------------------------------------------------------------
-- Specs for Lang Detection
------------------------------------------------------------------------
-data Language   = English | French 
-tagDoc :: Document -> Ngrams
-tagDoc doc = ngrams
-    where
-        ngrams = case lang doc of
-                   Nothing -> case guessLang doc of
-                                Nothing -> tag
------------------------------------------------------------------------
-- Specs for ngrams Worflow
------------------------------------------------------------------------
--- a/src/Gargantext/Utils/SaveGetHash_hs
+++ b/src/Gargantext/Utils/SaveGetHash_hs
-module Data.Gargantext.Utils.SaveGetHash where
-import System.FilePath (addExtension, joinPath)
-import Data.ByteString.Lazy.Internal (ByteString)
-import Data.List (elem, intersperse, insert)
-import Data.List.Extra (chunksOf)
-import Data.Digest.Pure.MD5 (md5)
-import System.Directory (getDirectoryContents, createDirectory, findFile, createDirectoryIfMissing)
-import Control.Monad (foldM)
-import Data.List (splitAt)
-import Data.ByteString.Lazy.Internal (packChars)
-import qualified Data.ByteString.Lazy as BL 
-import Codec.Compression.Zlib (compress, decompress)
-data Config = Config {
-      root          :: String
-    , chunkSize     :: Int
-    , compression   :: Bool
-    } deriving Show
-conf = Config {
-      root="/tmp/robot"
-    , chunkSize=2
-    , compression = True
-              }
-chunkUrl :: Int -> ByteString -> [[Char]]
-chunkUrl a url = chunksOf a $ show $ md5 url
-- replace it with createDirectoryIfMissing
-existOrCreate :: [[Char]] -> FilePath -> IO [[Char]]
-existOrCreate path_ dir = do
-    let path = joinPath path_
-    let returnPath = return $ path_ ++ [dir]
-    is <- elem dir <$> getDirectoryContents path  -- ?
-    case is of
-      True  -> do
-          returnPath
-      False -> do
-          createDirectory $ path ++ "/" ++ dir
-          returnPath
-doPath :: [[Char]] -> [FilePath] -> IO [[Char]]
-doPath root path = foldM (\x y -> existOrCreate x y) root path
-splitAt' :: Int -> Int -> [Char] -> ([Char], [Char], [Char])
-splitAt' i1 i2 x = (a, b, c) where
-    (a, a') = splitAt i1 x
-    (b, c)  = splitAt i2 a'
-- ne pas écraser le fichier s'il existe
-- spliter l'url proprement
-saveFile :: ByteString -> String -> IO String
-saveFile url'' file = do
-    let url' = chunkUrl (chunkSize conf) url''
-    let url = init url'
-    -- add extension according to the filetype
-    let filename = Prelude.foldl addExtension (last url') ["html", "zlib"]
-    doPath [(root conf)] url
-    let path = (root conf) ++ "/" ++ joinPath url ++ "/" ++ filename
-    --case (findFile ["/tmp/sdfs"] "file.hmtl.zib"
-    -- Nothing -> create
-    -- _ -> change name
-    case (compression conf) of
-      True  -> BL.writeFile path (compress $ packChars file)
-      False -> writeFile path file
-    return path
-getFile :: FilePath -> IO ByteString
-getFile path = do
-    case (compression conf) of
-      True  -> decompress <$> BL.readFile path
-      False -> packChars  <$> Prelude.readFile path
-- resources
-- add Resource
-- levensthein distance...