Commit 0c74d722 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Remove dead modules from gargantext-cli

parent 602187c0
Pipeline #6521 failed with stages
in 21 minutes and 17 seconds
{-|
Module : CleanCsvCorpus.hs
Description : Gargantext starter
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Given a Gargantext CSV File and its Query This script cleans and
compress the contexts around the main terms of the query.
-}
module CLI.CleanCsvCorpus where
import Data.SearchEngine qualified as S
import Data.Set qualified as S
import Data.Text (pack)
import Data.Vector (Vector)
import Data.Vector qualified as V
import Gargantext.Core.Text.Corpus.Parsers.TSV qualified as TSV
import Gargantext.Core.Text.Search
import Gargantext.Prelude
------------------------------------------------------------------------
type Query = [S.Term]
filterDocs :: [DocId] -> Vector TSV.TsvGargV3 -> Vector TSV.TsvGargV3
filterDocs docIds = V.filter (\doc -> S.member (TSV.d_docId doc) $ S.fromList docIds )
main :: IO ()
main = do
let rPath = "/tmp/Gargantext_Corpus.csv"
let wPath = "/tmp/Gargantext_Corpus_bis.csv"
--let q = ["water", "scarcity", "morocco", "shortage","flood"]
let q = ["gratuit", "gratuité", "culture", "culturel"]
eDocs <- TSV.readTSVFile rPath
case eDocs of
Right (h, tsvDocs) -> do
putStrLn ("Number of documents before:" <> show (V.length tsvDocs) :: Text)
putStrLn ("Mean size of docs:" <> show ( TSV.docsSize tsvDocs) :: Text)
let docs = TSV.toDocs tsvDocs
let engine = S.insertDocs docs initialDocSearchEngine
let docIds = S.query engine (map pack q)
let docs' = TSV.fromDocs $ filterDocs docIds (V.fromList docs)
putStrLn ("Number of documents after:" <> show (V.length docs') :: Text)
putStrLn ("Mean size of docs:" <> show (TSV.docsSize docs') :: Text)
TSV.writeFile wPath (h, docs')
Left e -> panicTrace $ "Error: " <> e
module CLI.Utils (
mapMP
, mapConcurrentlyChunked
) where
import Control.Concurrent.Async as CCA (mapConcurrently)
import Data.List.Split (chunksOf)
import Gargantext.Prelude
import System.IO (hFlush)
------------------------------------------------------------------------
-- | Tools
mapMP :: MonadIO m => (a -> m b) -> [a] -> m [b]
mapMP f xs = do
bs <- zipWithM g (cycle "-\\|/") xs
liftIO $ hPutStr stderr ("\rDone\n" :: Text)
pure bs
where
g c x = do
liftIO $ hPutStr stderr ['\r',c]
liftIO $ hFlush stderr
f x
-- | Optimi that need further developments (not used yet)
mapConcurrentlyChunked :: (a -> IO b) -> [a] -> IO [b]
mapConcurrentlyChunked f ts = do
caps <- getNumCapabilities
let n = 1 `max` (length ts `div` caps)
concat <$> mapConcurrently (mapM f) (chunksOf n ts)
......@@ -648,7 +648,6 @@ executable gargantext-cli
main-is: Main.hs
other-modules:
CLI.Admin
CLI.CleanCsvCorpus
CLI.FileDiff
CLI.FilterTermsAndCooc
CLI.Import
......@@ -662,7 +661,6 @@ executable gargantext-cli
CLI.Server.Routes
CLI.Types
CLI.Upgrade
CLI.Utils
Paths_gargantext
hs-source-dirs:
bin/gargantext-cli
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment