Removed dead code in `Core.Text...`

3d9b4e21 · Grégoire Locqueville · 91c73c45 · 3d9b4e21 · 3d9b4e21 · 3d9b4e21
Commit 3d9b4e21 authored Dec 13, 2024 by Grégoire Locqueville
15 changed files
--- a/gargantext.cabal
+++ b/gargantext.cabal
@@ -362,7 +362,6 @@ library
      Gargantext.Core.Text.Corpus.Parsers.FrameWrite
      Gargantext.Core.Text.Corpus.Parsers.GrandDebat
      Gargantext.Core.Text.Corpus.Parsers.Iramuteq
-      Gargantext.Core.Text.Corpus.Parsers.Isidore
      Gargantext.Core.Text.Corpus.Parsers.JSON
      Gargantext.Core.Text.Corpus.Parsers.JSON.Istex
      Gargantext.Core.Text.Corpus.Parsers.RIS
@@ -530,7 +529,6 @@ library
    , haskell-throttle
    , hlcm ^>= 0.2.2
    , hsinfomap ^>= 0.1
-    , hsparql ^>= 0.3.8
    , hstatistics ^>= 0.3.1
    , http-api-data >= 0.5 && < 0.6
    , http-client ^>= 0.7.14
@@ -573,7 +571,6 @@ library
    , quickcheck-instances ^>= 0.3.25.2
    , rake ^>= 0.0.1
    , random ^>= 1.2.1
-    , rdf4h ^>= 3.1.1
    , regex
    , replace-attoparsec ^>= 1.5.0.0
    , resource-pool >= 0.4.0.0 && < 0.5
@@ -604,7 +601,6 @@ library
    , stm-containers >= 1.2.0.3 && < 1.3
    , stringsearch >= 0.3.6.6
    , swagger2 ^>= 2.8.7
-    , tagsoup ^>= 0.14.8
    , template-haskell ^>= 2.19.0.0
    , temporary ^>= 1.3
    , text ^>= 2.0.2

--- a/src/Gargantext/Core/Text.hs
+++ b/src/Gargantext/Core/Text.hs
@@ -15,7 +15,6 @@ Text gathers terms in unit of contexts.
 module Gargantext.Core.Text
  where

-import Data.Text (split)
 import Data.Text qualified as DT
 import Gargantext.Prelude hiding (filter)
 import NLP.FullStop (segment)
@@ -85,18 +84,6 @@ instance Collage MultiTerme Mot where
 sentences :: Text -> [Text]
 sentences txt = map DT.pack $ segment $ DT.unpack txt

-sentences' :: Text -> [Text]
-sentences' txt = split isCharStop txt
-
-isCharStop :: Char -> Bool
-isCharStop c = c `elem` ['.','?','!']
-
-unsentences :: [Text] -> Text
-unsentences txts = DT.unwords txts
-
 -- | Ngrams size
 size :: Text -> Int
 size t = 1 + DT.count " " t
-
-
-
--- a/src/Gargantext/Core/Text/Context.hs
+++ b/src/Gargantext/Core/Text/Context.hs
@@ -23,10 +23,7 @@ How to split contexts is describes in this module.
 module Gargantext.Core.Text.Context
  where

-import Data.Text (pack, unpack)
-import Gargantext.Core.Text
 import Gargantext.Prelude hiding (length)
-import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))

 ------------------------------------------------------------------------
 type Term = Text
@@ -38,31 +35,5 @@ type TermList = [(Label, [MultiTerm])]
 type Sentence  a = [a] -- or a nominal group
 type Corpus    a = [Sentence a] -- a list of sentences

-- type ConText a = [Sentence a]
-- type Corpus a = [ConText a]
------------------------------------------------------------------------
-
 -- | Contexts definition to build/unbuild contexts.
 data SplitContext = Chars Int | Sentences Int | Paragraphs Int
-
-- | splitBy contexts of Chars or Sentences or Paragraphs
-- To see some examples at a higher level (sentences and paragraph), see
-- 'Gargantext.Core.Text.Examples.ex_terms'
--
-- >>> splitBy (Chars 0) (pack "abcde")
-- ["a","b","c","d","e"]
--
-- >>> splitBy (Chars 1) (pack "abcde")
-- ["ab","bc","cd","de"]
--
-- >>> splitBy (Chars 2) (pack "abcde")
-- ["abc","bcd","cde"]
-splitBy :: SplitContext -> Text -> [Text]
-splitBy (Chars     n)  = map pack        . chunkAlong (n+1) 1 . unpack
-splitBy (Sentences n)  = map unsentences . chunkAlong (n+1) 1 . sentences
-splitBy (Paragraphs _) = map unTag       . filter isTagText   . parseTags
-  where
-    unTag :: IsString p => Tag p -> p
-    unTag (TagText x) = x
-    unTag _           = ""
-
--- a/src/Gargantext/Core/Text/Corpus.hs
+++ b/src/Gargantext/Core/Text/Corpus.hs
-module Gargantext.Core.Text.Corpus (makeSubcorpusFromQuery, subcorpusEasy) where
+module Gargantext.Core.Text.Corpus (makeSubcorpusFromQuery) where

 import Control.Lens (view)
 import Data.Set.Internal qualified as Set (singleton)
-import Data.Text qualified as T
-import Gargantext.API.Dev (runCmdReplEasy)
 import Gargantext.API.Errors.Types (BackendInternalError(InternalNodeError))
 import Gargantext.Core (Lang(EN))
 import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv)
@@ -18,7 +16,7 @@ import Gargantext.Database.Action.Metrics (updateContextScore, updateNgramsOccur
 import Gargantext.Database.Action.Search (searchInCorpus)
 import Gargantext.Database.Action.User (getUserId)
 import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus, hc_lang)
-import Gargantext.Database.Admin.Types.Node (CorpusId, NodeId(UnsafeMkNodeId), NodeType(..), nodeId2ContextId)
+import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), nodeId2ContextId)
 import Gargantext.Database.Prelude (DBCmdWithEnv)
 import Gargantext.Database.Query.Facet.Types (facetDoc_id)
 import Gargantext.Database.Query.Table.Node (insertDefaultNode, copyNodeStories, defaultList, getNodeWithType)
@@ -28,21 +26,6 @@ import Gargantext.Database.Schema.Node (node_hyperdata)
 import Gargantext.Prelude


-
-- | A version of the below function for use in the REPL (so you don't need to
-- manually import tons of constructors etc.)
-subcorpusEasy :: Text -- ^ Username
-              -> Int  -- ^ Original corpus ID
-              -> Text -- ^ Search string
-              -> Bool -- ^ Whether to reuse the parent term list (True) or recompute one from scratch (False)
-              -> IO ()
-subcorpusEasy username cId rawQuery reuseParentList = do
-  let eitherQuery = Q.parseQuery $ Q.RawQuery rawQuery
-  case eitherQuery of
-    Left msg -> print $ "Error parsing query \"" <> rawQuery <> "\": " <> T.pack msg
-    Right query -> void $ runCmdReplEasy $ makeSubcorpusFromQuery (UserName username) (UnsafeMkNodeId cId) query reuseParentList
-
-
 -- | Given a "parent" corpus and a query, search for all docs in the parent
 --   that match the query, and create a corpus from those. The created corpus
 --   is inserted in the tree as a child of the parent corpus.

--- a/src/Gargantext/Core/Text/Corpus/API/Hal.hs
+++ b/src/Gargantext/Core/Text/Corpus/API/Hal.hs
@@ -23,13 +23,8 @@ import Gargantext.Defaults qualified as Defaults
 import Gargantext.Prelude hiding (intercalate)
 import HAL qualified
 import HAL.Doc.Document qualified as HAL
-import HAL.Types qualified as HAL
 import Servant.Client (ClientError)

-get :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO [HyperdataDocument]
-get la q ml = do
-  eDocs <- HAL.getMetadataWith [q] (Just 0) (fromIntegral <$> ml) la
-  either (panicTrace . pack . show) (mapM (toDoc' la) . HAL._docs) eDocs

 getC :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
 getC la q ml = do

--- a/src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+++ b/src/Gargantext/Core/Text/Corpus/API/Isidore.hs
@@ -11,22 +11,18 @@ Portability : POSIX

 {-# LANGUAGE ScopedTypeVariables #-}

-module Gargantext.Core.Text.Corpus.API.Isidore (
-  get
-
-  -- * Internals (possibly unused?)
-  , isidore2tsvFile
+module Gargantext.Core.Text.Corpus.API.Isidore
+  ( get
  ) where

 import Data.Text qualified as Text
 import Gargantext.Core (Lang(..))
 import Gargantext.Core.Text.Corpus.Parsers (cleanText)
-import Gargantext.Core.Text.Corpus.Parsers.TSV (writeDocs2Tsv)
 import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
 import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
 import Gargantext.Defaults qualified as Defaults
 import Gargantext.Prelude hiding (get)
-import Isidore qualified as Isidore
+import Isidore qualified
 import Isidore.Client
 import Servant.Client ( ClientError(DecodeFailure) )

@@ -50,13 +46,6 @@ get lang l q a = do
  hDocs <- mapM (isidoreToDoc lang) (toIsidoreDocs iDocs)
  pure hDocs

-isidore2tsvFile :: FilePath -> Lang -> Maybe Isidore.Limit
-    -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-    -> IO ()
-isidore2tsvFile fp lang li tq aq = do
-  hdocs <- get lang li tq aq
-  writeDocs2Tsv fp hdocs
-
 isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
 isidoreToDoc lang (IsidoreDoc t a d u s as) = do
  let

--- a/src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+++ b/src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
@@ -10,12 +10,10 @@ Portability : POSIX
 -}

 {-# LANGUAGE DerivingStrategies #-}
-{-# LANGUAGE LambdaCase         #-}

 module Gargantext.Core.Text.Corpus.API.Pubmed
    ( get
    -- * Internals for testing
-    , ESearch(..)
    , convertQuery
    , getESearch
    )

--- a/src/Gargantext/Core/Text/Corpus/Parsers.hs
+++ b/src/Gargantext/Core/Text/Corpus/Parsers.hs
@@ -29,7 +29,6 @@ module Gargantext.Core.Text.Corpus.Parsers (
  , cleanText
  , parseFormatC
  , splitOn
-  , etale
  ) where

 -- import Gargantext.Core.Text.Learn (detectLangDefault)
@@ -49,7 +48,6 @@ import Gargantext.API.Node.Corpus.New.Types (FileFormat(..))
 import Gargantext.Core (Lang(..))
 import Gargantext.Core.Text.Corpus.Parsers.TSV (parseHal, parseTsv, parseTsvC)
 import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
-import Gargantext.Core.Text.Corpus.Parsers.FrameWrite (text2titleParagraphs)
 import Gargantext.Core.Text.Corpus.Parsers.Iramuteq qualified as Iramuteq
 import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex)
 import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS
@@ -178,44 +176,6 @@ filterZIPFileNameP Istex f = (takeExtension (unEntrySelector f) == ".json") &&
 filterZIPFileNameP _     _ = True


-etale :: [HyperdataDocument] -> [HyperdataDocument]
-etale = concatMap etale'
-  where
-    etale' :: HyperdataDocument -> [HyperdataDocument]
-    etale' h = map (\t -> h { _hd_abstract = Just t })
-            $ map snd
-            $ text2titleParagraphs 7 (maybe "" identity $ _hd_abstract h)
-
-
-- parseFormat :: FileType -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument])
-- parseFormat TsvGargV3 bs = pure $ parseTsv' $ DBL.fromStrict bs
-- parseFormat TsvHal    bs = pure $ parseHal' $ DBL.fromStrict bs
-- parseFormat RisPresse bs = do
--   docs <- mapM (toDoc RIS)
--           <$> snd
--           <$> enrichWith RisPresse
--           $ partitionEithers
--           $ [runParser'  RisPresse bs]
--   pure $ Right docs
-- parseFormat WOS bs = do
--   docs <- mapM (toDoc WOS)
--           <$> snd
--           <$> enrichWith WOS
--           $ partitionEithers
--           $ [runParser'  WOS bs]
--   pure $ Right docs
-- parseFormat ZIP bs = do
--   path <- emptySystemTempFile "parsed-zip"
--   DB.writeFile path bs
--   parsedZip <- withArchive path $ do
--     DM.keys <$> getEntries
--   pure $ Left $ "Not implemented for ZIP, parsedZip" <> show parsedZip
-- parseFormat _ _ = undefined
-
-- | Parse file into documents
-- TODO manage errors here
-- TODO: to debug maybe add the filepath in error message
-
 parseFile :: FileType
          -> FileFormat
          -> FilePath

--- a/src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
+++ b/src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
@@ -21,16 +21,13 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
 module Gargantext.Core.Text.Corpus.Parsers.Date (
    dateSplit
  , mDateSplit
-  , defaultDay
-  , defaultUTCTime
  , split'
  ) where

 import Data.List qualified as List
 import Data.Text (unpack, splitOn, replace)
 import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian)
-import Data.Time.Calendar qualified as DTC
-import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
+import Data.Time.Clock (UTCTime(..))
 import Gargantext.Prelude hiding (replace)

 ------------------------------------------------------------------------
@@ -113,11 +110,3 @@ readDate txt = do
  --let format = cs $ iso8601DateFormat (Just "%F %H:%M:%S")
  let format = cs $ iso8601DateFormat Nothing
  parseTimeM True defaultTimeLocale (unpack format) (cs txt)
-
-
-defaultDay :: DTC.Day
-defaultDay = DTC.fromGregorian 1 1 1
-
-defaultUTCTime :: UTCTime
-defaultUTCTime = UTCTime { utctDay = defaultDay
-                         , utctDayTime = secondsToDiffTime 0 }
--- a/src/Gargantext/Core/Text/Corpus/Parsers/FrameWrite.hs
+++ b/src/Gargantext/Core/Text/Corpus/Parsers/FrameWrite.hs
@@ -35,39 +35,6 @@ import Text.Parsec.String
 -- par défaut: un doc == 1 NodeWrite
 -- ## mean each ## section will be a new document with title the subsubsection title. Either it features options for author, date etc. or it will inherit the document's option.

-sample :: Text
-sample =
-  unlines
-    [ "title1"
--    , "title2"
--    , "=="
--     , "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
-    , "date: 2021-09-10"
-    , "source: someSource"
-    , "document contents 1"
-    , "document contents 2"
-    ]
-
-sampleUnordered :: Text
-sampleUnordered =
-  unlines
-    [ "title1"
-    , "title2"
-    , "=="
-    , "document contents 1"
-    , "date: 2021-09-10"
-    , "authors: FirstName1, LastName1; FirstName2, LastName2"
-    , "source: someSource"
-    , "document contents 2"
-    ]
-
-- parseSample = parse documentP "sample" (unpack sample)
-- parseSampleUnordered = parse documentP "sampleUnordered" (unpack sampleUnordered)
-parseLinesSample :: Either ParseError Parsed
-parseLinesSample = parseLines sample
-parseLinesSampleUnordered :: Either ParseError Parsed
-parseLinesSampleUnordered = parseLines sampleUnordered
-
 data Author =
    Author { firstName :: Text
           , lastName  :: Text }
@@ -114,14 +81,6 @@ parseLines text = foldl f emptyParsed <$> lst
    f (Parsed { .. }) (LSource  s ) = Parsed { source   = Just s                  , .. }
    f (Parsed { .. }) (LTitle   t ) = Parsed { title    = t                       , .. }

-- Source should be the name of the node
-- First line of each Context should be the title.
-documentLinesP :: Parser [Line]
-documentLinesP = do
-  t  <- titleP
-  ls <- lineP `sepBy` newline
-  pure $ [LTitle $ pack t] ++ ls
-
 documentLines :: Parser [Line]
 documentLines = do
  ls <- lineP `sepBy` newline
@@ -157,27 +116,6 @@ contentsLineP = do

 --------------------

-- documentP = do
--   t <- titleP
--   a <- optionMaybe authorsP
--   d <- optionMaybe dateP
--   s <- optionMaybe sourceP
--   c <- contentsP
--   pure $ Parsed { title = pack t
--                 , authors = fromMaybe [] a
--                 , date = pack <$> d
--                 , source = pack <$> s
--                 , contents = pack c }
-
-titleDelimiterP :: Parser ()
-titleDelimiterP = do
-  _ <- newline
-  -- _ <- try (string "==")
-  pure ()
-
-titleP :: Parser [Char]
-titleP = manyTill anyChar (try titleDelimiterP)
-
 authorsPrefixP :: Parser [Char]
 authorsPrefixP = do
  _ <- string "authors:"
@@ -225,12 +163,6 @@ sourceP = try sourcePrefixP
      _ <- string "source:"
      many (char ' ')

-- contentsP :: Parser String
-- contentsP = many anyChar
-
-tokenEnd :: Parser ()
-tokenEnd = void (char '\n') <|> eof
-
 --- MISC Tools
 -- Using ChunkAlong here enable redundancies in short corpora of texts
 -- maybe use splitEvery or chunkAlong depending on the size of the whole text
@@ -249,8 +181,3 @@ text2titleParagraphs n = catMaybes
 doTitle :: [Text] -> Maybe (Text, Text)
 doTitle (t:ts) = Just (t, DT.unwords ts)
 doTitle [] = Nothing
-
-
-clean :: Text -> Text
-clean = DT.unwords . List.filter (\w -> DT.length w < 25) . DT.words
-
--- a/src/Gargantext/Core/Text/Corpus/Parsers/Iramuteq.hs
+++ b/src/Gargantext/Core/Text/Corpus/Parsers/Iramuteq.hs
@@ -12,19 +12,18 @@ commentary with @some markup@.
 -}


-module Gargantext.Core.Text.Corpus.Parsers.Iramuteq (parseIramuteqFile, parser, keys) where
+module Gargantext.Core.Text.Corpus.Parsers.Iramuteq
+  ( parser
+  , keys
+  )
+where

 import Control.Applicative
 import Data.Attoparsec.ByteString (Parser, takeTill, parseOnly)
 import Data.Attoparsec.ByteString.Char8 (isEndOfLine, takeWhile, endOfLine)
 import Data.ByteString (ByteString)
 import Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
-import qualified Data.ByteString as DB

-parseIramuteqFile :: FilePath -> IO (Either String [[(ByteString, ByteString)]])
-parseIramuteqFile fp = do
-  txts <- DB.readFile fp
-  pure $ parseOnly parser txts

 -------------------------------------------------------------
 parser :: Parser [[(ByteString, ByteString)]]

--- a/src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
+++ b/src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
-{-|
-Module      : Gargantext.Core.Text.Corpus.Parsers.Isidore
-Description : To query French Humanities publication database
-Copyright   : (c) CNRS, 2019-Present
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-
-TODO:
- put endpoint in configuration file
- more flexible fields of research
- type database name
- use more ontologies to help building corpora
-}
-
-{-# OPTIONS_GHC -fno-warn-deprecations #-}
-
-{-# LANGUAGE ScopedTypeVariables #-}
-
-module Gargantext.Core.Text.Corpus.Parsers.Isidore where
-
-import Data.ByteString.Lazy (ByteString)
-import Data.RDF ( Node(LNode, UNode), LValue(PlainLL, TypedL, PlainL) )
-import Data.Text qualified as T
-import Database.HSparql.Connection ( BindingValue(..), EndPoint, structureContent )
-import Database.HSparql.QueryGenerator
-import Gargantext.Core (Lang)
-import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
-import Gargantext.Prelude hiding (ByteString)
-import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
-import Prelude qualified
-
-route :: EndPoint
-route = "https://isidore.science/sparql/"
-
-selectQueryRaw' :: Prelude.String -> Prelude.String -> IO (Response ByteString)
-selectQueryRaw' uri q = getWith opts uri
-  where
-    opts = defaults & header "Accept"     .~ ["application/sparql-results+xml"]
-                    & header "User-Agent" .~ ["gargantext-hsparql-client"]
-                    & param  "query"      .~ [T.pack q]
-
-isidoreGet :: Lang -> Int -> Text -> IO (Maybe [HyperdataDocument])
-isidoreGet la li q = do
-  bindingValues <- isidoreGet' li q
-  case bindingValues of
-    Nothing -> pure Nothing
-    Just dv -> pure $ Just $ map (bind2doc la) dv
-
-isidoreGet' :: Int -> Text -> IO (Maybe [[BindingValue]])
-isidoreGet' l q = do
-  let s = createSelectQuery $ isidoreSelect l q
-  putStrLn s
-  r <- selectQueryRaw' route s
-  putStrLn (show $ r ^. responseStatus :: Text)
-  pure $ structureContent $ r ^. responseBody
- -- res <- selectQuery route $ simpleSelect q
- -- pure res
-
-isidoreSelect :: Int -> Text -> Query SelectQuery
-isidoreSelect lim q = do
-  -- See Predefined Namespace Prefixes:
-  -- https://isidore.science/sparql?nsdecl
-  isidore <- prefix "isidore" (iriRef "http://isidore.science/class/")
-  rdf     <- prefix "rdf"     (iriRef "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
-  dcterms <- prefix "dcterms" (iriRef "http://purl.org/dc/terms/")
-  dc      <- prefix "dc"      (iriRef "http://purl.org/dc/elements/1.1/")
-  --iso     <- prefix "fra"        (iriRef "http://lexvo.org/id/iso639-3/")
-  --ore     <- prefix "ore"    (iriRef "http://www.openarchives.org/ore/terms/")
-  --bif     <- prefix "bif"    (iriRef "bif:")
-
-  link'    <- var
-  title    <- var
-  date     <- var
-  abstract <- var
-  authors  <- var
-  source   <- var
-  langDoc  <- var
-  publisher <- var
-  --agg       <- var
-
-  triple_ link' (rdf     .:. "type")     (isidore .:. "Document")
-  triple_ link' (dcterms .:. "title")    title
-  triple_ link' (dcterms .:. "date")     date
-  triple_ link' (dcterms .:. "creator")  authors
-  --triple_ link (dcterms .:. "language") langDoc
-  triple_ link' (dc      .:. "description") abstract
-  --triple_ link (ore .:. "isAggregatedBy") agg
-  --triple_ agg (dcterms .:. "title") title
-
-  optional_ $ triple_ link' (dcterms .:. "source")      source
-  optional_ $ triple_ link' (dcterms .:. "publisher")   publisher
-
-  -- TODO FIX BUG with (.||.) operator
-  --filterExpr_ $ (.||.) (contains title q) (contains abstract q)
-  --filterExpr_ (containsWith authors q) -- (contains abstract q)
-  --filterExpr_ (containsWith title q) -- (contains abstract q)
-  --filterExpr_ $ (.||.) (containsWith title q) (contains abstract q)
-  filterExpr_ (containsWith title q)
-
-  -- TODO FIX filter with lang
-  --filterExpr_ $ langMatches title (str ("fra" :: Text))
-  --filterExpr_ $ (.==.) langDoc (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
-
-  orderNextDesc date
-  limit_ lim
-  distinct_
-  selectVars [link', date, langDoc, authors, source, publisher, title, abstract]
-
-- | TODO : check if all cases are taken into account
-unbound :: Lang -> BindingValue -> Maybe Text
-unbound _ Unbound         = Nothing
-unbound _ (Bound (UNode x)) = Just x
-unbound _ (Bound (LNode (TypedL x _))) = Just x
-unbound _ (Bound (LNode (PlainL x)))   = Just x
-unbound l (Bound (LNode (PlainLL x l')))   = if l' == T.toLower (show l) then Just x else Nothing
-unbound _ _ = Nothing
-
-bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
-bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract ] =
-  HyperdataDocument { _hd_bdd = Just "Isidore"
-                    , _hd_doi = Nothing
-                    , _hd_url = unbound l link'
-                    , _hd_page = Nothing
-                    , _hd_title = unbound l title
-                    , _hd_authors = unbound l authors
-                    , _hd_institutes = Nothing
-                    , _hd_source = unbound l publisher
-                    , _hd_abstract = unbound l abstract
-                    , _hd_publication_date = unbound l date
-                    , _hd_publication_year = Nothing
-                    , _hd_publication_month = Nothing
-                    , _hd_publication_day = Nothing
-                    , _hd_publication_hour = Nothing
-                    , _hd_publication_minute = Nothing
-                    , _hd_publication_second = Nothing
-                    , _hd_language_iso2 = unbound l langDoc
-                    , _hd_institutes_tree = Nothing }
-
-bind2doc _ _  = undefined
--- a/src/Gargantext/Core/Text/Corpus/Parsers/TSV.hs
+++ b/src/Gargantext/Core/Text/Corpus/Parsers/TSV.hs
@@ -15,7 +15,6 @@ TSV parser for Gargantext corpus files.
 module Gargantext.Core.Text.Corpus.Parsers.TSV where

 import Conduit ( ConduitT, (.|), yieldMany, mapC )
-import Data.ByteString qualified as BS
 import Data.ByteString.Lazy qualified as BL
 import Data.Csv
 import Data.Text (pack)
@@ -26,23 +25,11 @@ import Data.Text.Read     qualified as DTR
 import Data.Time.Segment (jour)
 import Data.Vector (Vector)
 import Data.Vector qualified as V
-import Gargantext.Core.Text ( sentences, unsentences )
-import Gargantext.Core.Text.Context ( splitBy, SplitContext(..) )
 import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
 import Gargantext.Prelude hiding (length, show)
 import Protolude

---------------------------------------------------------------
-headerTsvGargV3 :: Header
-headerTsvGargV3 =
-  header [ "title"
-         , "source"
-         , "publication_year"
-         , "publication_month"
-         , "publication_day"
-         , "abstract"
-         , "authors"
-         ]
+
 ---------------------------------------------------------------
 data TsvGargV3 = TsvGargV3
    { d_docId             :: !Int
@@ -55,92 +42,10 @@ data TsvGargV3 = TsvGargV3
    , d_authors           :: !Text
    }
    deriving (Show)
---------------------------------------------------------------
-- | Doc 2 HyperdataDocument
-toDoc :: TsvGargV3 -> HyperdataDocument
-toDoc (TsvGargV3 did dt _ dpy dpm dpd dab dau) =
-  HyperdataDocument { _hd_bdd = Just "TSV"
-                    , _hd_doi = Just . pack . show $ did
-                    , _hd_url = Nothing
-                    , _hd_page = Nothing
-                    , _hd_title = Just dt
-                    , _hd_authors = Nothing
-                    , _hd_institutes = Just dau
-                    , _hd_source = Just dab
-                    , _hd_abstract = Nothing
-                    , _hd_publication_date = Nothing
-                    , _hd_publication_year = Just dpy
-                    , _hd_publication_month = Just dpm
-                    , _hd_publication_day = Just dpd
-                    , _hd_publication_hour = Nothing
-                    , _hd_publication_minute = Nothing
-                    , _hd_publication_second = Nothing
-                    , _hd_language_iso2 = Nothing
-                    , _hd_institutes_tree = Nothing }
-
---------------------------------------------------------------
-- | Types Conversions
-toDocs :: Vector TsvDoc -> [TsvGargV3]
-toDocs v = V.toList
-         $ V.zipWith (\nId (TsvDoc { .. }) -- (TsvDoc t s mPy pm pd abst auth)
-                       -> TsvGargV3 { d_docId = nId
-                                    , d_title = tsv_title
-                                    , d_source = tsv_source
-                                    , d_publication_year = fromMIntOrDec defaultYear tsv_publication_year
-                                    , d_publication_month = fromMaybe defaultMonth tsv_publication_month
-                                    , d_publication_day = fromMaybe defaultDay tsv_publication_day
-                                    , d_abstract = tsv_abstract
-                                    , d_authors = tsv_authors })
-                       (V.enumFromN 1 (V.length v'')) v''
-          where
-            v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
-            seps= V.fromList [Paragraphs 1, Sentences 3, Chars 3]

---------------------------------------------------------------
-fromDocs :: Vector TsvGargV3 -> Vector TsvDoc
-fromDocs = V.map fromDocs'
-  where
-    fromDocs' (TsvGargV3 { .. }) = TsvDoc { tsv_title = d_title
-                                          , tsv_source = d_source
-                                          , tsv_publication_year = Just $ IntOrDec d_publication_year
-                                          , tsv_publication_month = Just d_publication_month
-                                          , tsv_publication_day = Just d_publication_day
-                                          , tsv_abstract = d_abstract
-                                          , tsv_authors = d_authors }
-
---------------------------------------------------------------
-- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average
-splitDoc :: Mean -> SplitContext -> TsvDoc -> Vector TsvDoc
-splitDoc m splt doc =
- let docSize = (T.length $ tsv_abstract doc) in
- if (docSize > 1000) && (mod (round m) docSize >= 10)
-   then splitDoc' splt doc
-   else V.fromList [doc]
-  where
-    splitDoc' :: SplitContext -> TsvDoc -> Vector TsvDoc
-    splitDoc' contextSize (TsvDoc { .. }) = V.fromList $ [firstDoc] <> nextDocs
-        where
-          firstDoc = TsvDoc { tsv_abstract = firstAbstract, .. }
-          firstAbstract = head' "splitDoc'1" abstracts
-
-          nextDocs = map (\txt -> TsvDoc { tsv_title = head' "splitDoc'2" $ sentences txt
-                                         , tsv_abstract = unsentences $ tail' "splitDoc'1" $ sentences txt
-                                         , .. }
-                          ) (tail' "splitDoc'2" abstracts)
-
-          abstracts    = (splitBy $ contextSize) tsv_abstract
-
---------------------------------------------------------------
 ---------------------------------------------------------------
 type Mean = Double

-docsSize :: Vector TsvDoc -> Mean
-docsSize tsvDoc = mean ls
-  where
-    ls = V.toList $ V.map (fromIntegral . T.length . tsv_abstract) tsvDoc
-
-
 ---------------------------------------------------------------
 newtype IntOrDec = IntOrDec Int
  deriving (Show, Eq, Read)
@@ -195,28 +100,11 @@ instance ToNamedRecord TsvDoc where
                , "authors"           .= tsv_authors
                ]

-hyperdataDocument2tsvDoc :: HyperdataDocument -> TsvDoc
-hyperdataDocument2tsvDoc h = TsvDoc { tsv_title = m $ _hd_title h
-                                    , tsv_source = m $ _hd_source h
-                                    , tsv_publication_year = Just $ IntOrDec $ mI $ _hd_publication_year h
-                                    , tsv_publication_month = Just $ mI $ _hd_publication_month h
-                                    , tsv_publication_day = Just $ mI $ _hd_publication_day   h
-                                    , tsv_abstract = m $ _hd_abstract h
-                                    , tsv_authors = m $ _hd_authors h }
-
-  where
-    m = maybe "" identity
-    mI = maybe 0 identity
-
-
 data Delimiter = Tab | Comma | Line deriving (Eq, Show)

 tsvDecodeOptions :: Delimiter -> DecodeOptions
 tsvDecodeOptions d = defaultDecodeOptions {decDelimiter = delimiter d}

-tsvEncodeOptions :: Delimiter -> EncodeOptions
-tsvEncodeOptions d = defaultEncodeOptions {encDelimiter = delimiter d}
-
 delimiter :: Delimiter -> Word8
 delimiter Tab   = fromIntegral $ ord '\t'
 delimiter Comma = fromIntegral $ ord ','
@@ -368,35 +256,6 @@ getHeaders bl del = do

 ------------------------------------------------------------------------

-readFileLazy :: (FromNamedRecord a)
-             => proxy a
-             -> Delimiter
-             -> FilePath
-             -> IO (Either Text (Header, Vector a))
-readFileLazy d f = fmap (readByteStringLazy d f) . BL.readFile
-
-readFileStrict :: (FromNamedRecord a)
-               => proxy a
-               -> Delimiter
-               -> FilePath
-               -> IO (Either Text (Header, Vector a))
-readFileStrict d f = fmap (readByteStringStrict d f) . BS.readFile
-
-readByteStringLazy :: (FromNamedRecord a)
-                   => proxy a
-                   -> Delimiter
-                   -> BL.ByteString
-                   -> Either Text (Header, Vector a)
-readByteStringLazy _f d bs = first pack $ decodeByNameWith (tsvDecodeOptions d) bs
-
-readByteStringStrict :: (FromNamedRecord a)
-                     => proxy a
-                     -> Delimiter
-                     -> BS.ByteString
-                     -> Either Text (Header, Vector a)
-readByteStringStrict d ff = readByteStringLazy d ff . BL.fromStrict
-
------------------------------------------------------------------------
 -- | TODO use readFileLazy
 readTSVFile :: FilePath -> IO (Either Text (Header, Vector TsvDoc))
 readTSVFile fp = do
@@ -424,20 +283,6 @@ readTsvHal fp = do
 readTsvHalLazyBS :: BL.ByteString -> Either Text (Header, Vector TsvHal)
 readTsvHalLazyBS bs = first pack $ decodeByNameWith (tsvDecodeOptions Tab) bs

-readTsvHalBSStrict :: BS.ByteString -> Either Text (Header, Vector TsvHal)
-readTsvHalBSStrict bs = readTsvHalLazyBS $ BL.fromStrict bs
-
------------------------------------------------------------------------
-writeFile :: FilePath -> (Header, Vector TsvDoc) -> IO ()
-writeFile fp (h, vs) = BL.writeFile fp $
-                      encodeByNameWith (tsvEncodeOptions Tab) h (V.toList vs)
-
-writeDocs2Tsv :: FilePath -> [HyperdataDocument] -> IO ()
-writeDocs2Tsv fp hs = BL.writeFile fp $ hyperdataDocument2tsv hs
-
-hyperdataDocument2tsv :: [HyperdataDocument] -> BL.ByteString
-hyperdataDocument2tsv hs = encodeByNameWith (tsvEncodeOptions Tab) headerTsvGargV3 (map hyperdataDocument2tsvDoc hs)
-
 ------------------------------------------------------------------------
 -- Hal Format
 data TsvHal = TsvHal
@@ -575,27 +420,11 @@ parseHal fp = do
  r <- readTsvHal fp
  pure $ V.toList . V.map tsvHal2doc . snd <$> r

-parseHal' :: BL.ByteString -> Either Text [HyperdataDocument]
-parseHal' bs = V.toList . V.map tsvHal2doc . snd <$> readTsvHalLazyBS bs
-
 ------------------------------------------------------------------------

 parseTsv :: FilePath -> IO (Either Text [HyperdataDocument])
 parseTsv fp = fmap (V.toList . V.map tsv2doc . snd) <$> readTSVFile fp

-{-
-parseTsv' ::  BL.ByteString -> Either Text [HyperdataDocument]
-parseTsv' bs = (V.toList . V.map tsv2doc . snd) <$> readTsvLazyBS Comma bs
-}
-
-parseTsv' :: BL.ByteString -> Either Text [HyperdataDocument]
-parseTsv' bs = do
-  let
-    result = case (testCorrectFile bs) of
-      Left _err -> Left _err
-      Right del -> readTsvLazyBS del bs
-  V.toList . V.map tsv2doc . snd <$> result
-
 parseTsvC :: BL.ByteString
          -> Either Text (Integer, ConduitT () HyperdataDocument Identity ())
 parseTsvC bs =

--- a/src/Gargantext/Core/Text/Corpus/Query.hs
+++ b/src/Gargantext/Core/Text/Corpus/Query.hs
@@ -8,7 +8,6 @@ module Gargantext.Core.Text.Corpus.Query (
  , QueryTerm(..)
  , getQuery
  , parseQuery
-  , mapQuery
  , renderQuery
  , renderQueryTerm
  , interpretQuery
@@ -130,6 +129,3 @@ parseQuery (RawQuery txt) = bimap show (Query . BoolExpr.boolTreeToCNF) $

 renderQuery :: Query -> RawQuery
 renderQuery (Query cnf) = RawQuery . T.pack $ BoolExpr.boolExprPrinter (showsPrec 0) (BoolExpr.fromCNF cnf) ""
-
-mapQuery :: (QueryTerm -> QueryTerm) -> Query -> Query
-mapQuery f = Query . fmap (map f) . getQuery
--- a/weeder.toml
+++ b/weeder.toml
@@ -22,6 +22,11 @@ roots = [ '^Main\.main$'
        , '^Gargantext\.API\.Ngrams\.List\.importTsvFile$'


+        # Used by the tests
+        , '^Gargantext\.Core\.Text\.Corpus\.API\.Pubmed\.convertQuery$'
+        , '^Gargantext\.Core\.Text\.Corpus\.API\.Pubmed\.getESearch$'
+
+
        # Template Haskell

        # Weeder is not smart enough to know what functions will be used by