Verified Commit dca7d379 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 341-dev-websockets

parents 5286cb51 e147c115
Pipeline #6249 failed with stages
## Version 0.0.7.1.6.3
* [BACK][FIX][CSV; TSV in all codebase (#348)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/348)
* [FRONT][FIX][CSV TSV in all codebase (#676)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/676)
* [FRONT][FIX][TSV export for ngrams table]
## Version 0.0.7.1.6.2
* [FRONT][FIX][Pb with term selection in phylomemy (#673)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/673)
## Version 0.0.7.1.6.1 ## Version 0.0.7.1.6.1
* [FRONT][FIX][Display Phylomemy parameters (#580)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/580) * [FRONT][FIX][Display Phylomemy parameters (#580)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/580)
......
{ {
"corpusPath" : "Gargantext_DocsList-nodeId-185487.csv", "corpusPath" : "Gargantext_DocsList-nodeId-185487.tsv",
"listPath" : "Gargantext_NgramsList-185488.csv", "listPath" : "Gargantext_NgramsList-185488.tsv",
"outputPath" : "data", "outputPath" : "data",
"corpusParser" : { "corpusParser" : {
"tag" : "Csv", "tag" : "Tsv",
"_csv_limit" : 1500000 "_tsv_limit" : 1500000
}, },
"listParser" : "V3", "listParser" : "V3",
"phyloName" : "bpa", "phyloName" : "bpa",
......
...@@ -19,15 +19,15 @@ import Data.Set qualified as S ...@@ -19,15 +19,15 @@ import Data.Set qualified as S
import Data.Text (pack) import Data.Text (pack)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Vector qualified as V import Data.Vector qualified as V
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as CSV import Gargantext.Core.Text.Corpus.Parsers.TSV qualified as TSV
import Gargantext.Core.Text.Search import Gargantext.Core.Text.Search
import Gargantext.Prelude import Gargantext.Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
type Query = [S.Term] type Query = [S.Term]
filterDocs :: [DocId] -> Vector CSV.CsvGargV3 -> Vector CSV.CsvGargV3 filterDocs :: [DocId] -> Vector TSV.TsvGargV3 -> Vector TSV.TsvGargV3
filterDocs docIds = V.filter (\doc -> S.member (CSV.d_docId doc) $ S.fromList docIds ) filterDocs docIds = V.filter (\doc -> S.member (TSV.d_docId doc) $ S.fromList docIds )
main :: IO () main :: IO ()
...@@ -37,19 +37,19 @@ main = do ...@@ -37,19 +37,19 @@ main = do
--let q = ["water", "scarcity", "morocco", "shortage","flood"] --let q = ["water", "scarcity", "morocco", "shortage","flood"]
let q = ["gratuit", "gratuité", "culture", "culturel"] let q = ["gratuit", "gratuité", "culture", "culturel"]
eDocs <- CSV.readCSVFile rPath eDocs <- TSV.readTSVFile rPath
case eDocs of case eDocs of
Right (h, csvDocs) -> do Right (h, tsvDocs) -> do
putStrLn ("Number of documents before:" <> show (V.length csvDocs) :: Text) putStrLn ("Number of documents before:" <> show (V.length tsvDocs) :: Text)
putStrLn ("Mean size of docs:" <> show ( CSV.docsSize csvDocs) :: Text) putStrLn ("Mean size of docs:" <> show ( TSV.docsSize tsvDocs) :: Text)
let docs = CSV.toDocs csvDocs let docs = TSV.toDocs tsvDocs
let engine = S.insertDocs docs initialDocSearchEngine let engine = S.insertDocs docs initialDocSearchEngine
let docIds = S.query engine (map pack q) let docIds = S.query engine (map pack q)
let docs' = CSV.fromDocs $ filterDocs docIds (V.fromList docs) let docs' = TSV.fromDocs $ filterDocs docIds (V.fromList docs)
putStrLn ("Number of documents after:" <> show (V.length docs') :: Text) putStrLn ("Number of documents after:" <> show (V.length docs') :: Text)
putStrLn ("Mean size of docs:" <> show (CSV.docsSize docs') :: Text) putStrLn ("Mean size of docs:" <> show (TSV.docsSize docs') :: Text)
CSV.writeFile wPath (h, docs') TSV.writeFile wPath (h, docs')
Left e -> panicTrace $ "Error: " <> e Left e -> panicTrace $ "Error: " <> e
...@@ -26,8 +26,8 @@ import Data.Text.Lazy.Encoding qualified as TLE ...@@ -26,8 +26,8 @@ import Data.Text.Lazy.Encoding qualified as TLE
import Data.Tuple.Extra (both) import Data.Tuple.Extra (both)
import Data.Vector qualified as DV import Data.Vector qualified as DV
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (readCSVFile, csv_title, csv_abstract, csv_publication_year, fromMIntOrDec, defaultYear) import Gargantext.Core.Text.Corpus.Parsers.TSV (readTSVFile, tsv_title, tsv_abstract, tsv_publication_year, fromMIntOrDec, defaultYear)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.TSV (tsvMapTermList)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs) import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
import Gargantext.Core.Text.Terms.WithList ( Patterns, buildPatterns, extractTermsWithList ) import Gargantext.Core.Text.Terms.WithList ( Patterns, buildPatterns, extractTermsWithList )
import Gargantext.Prelude import Gargantext.Prelude
...@@ -72,16 +72,16 @@ main = do ...@@ -72,16 +72,16 @@ main = do
[corpusFile, termListFile, outputFile] <- getArgs [corpusFile, termListFile, outputFile] <- getArgs
--corpus :: IO (DM.IntMap [[Text]]) --corpus :: IO (DM.IntMap [[Text]])
eCorpusFile <- readCSVFile corpusFile eCorpusFile <- readTSVFile corpusFile
case eCorpusFile of case eCorpusFile of
Right cf -> do Right cf -> do
let corpus = DM.fromListWith (<>) let corpus = DM.fromListWith (<>)
. DV.toList . DV.toList
. DV.map (\n -> (fromMIntOrDec defaultYear $ csv_publication_year n, [(csv_title n) <> " " <> (csv_abstract n)])) . DV.map (\n -> (fromMIntOrDec defaultYear $ tsv_publication_year n, [(tsv_title n) <> " " <> (tsv_abstract n)]))
. snd $ cf . snd $ cf
-- termListMap :: [Text] -- termListMap :: [Text]
termList <- csvMapTermList termListFile termList <- tsvMapTermList termListFile
putText $ show $ length termList putText $ show $ length termList
......
...@@ -41,7 +41,7 @@ main = do ...@@ -41,7 +41,7 @@ main = do
let let
--tt = (Unsupervised EN 6 0 Nothing) --tt = (Unsupervised EN 6 0 Nothing)
tt = (Multi EN) tt = (Multi EN)
format = CsvGargV3 -- CsvHal --WOS format = TsvGargV3 -- TsvHal --WOS
limit' = case (readMaybe limit :: Maybe Limit) of limit' = case (readMaybe limit :: Maybe Limit) of
Nothing -> panicTrace $ "Cannot read limit: " <> (Text.pack limit) Nothing -> panicTrace $ "Cannot read limit: " <> (Text.pack limit)
Just l -> l Just l -> l
...@@ -49,8 +49,8 @@ main = do ...@@ -49,8 +49,8 @@ main = do
mkCorpusUser = MkCorpusUserNormalCorpusName (UserName $ cs user) (cs name :: Text) mkCorpusUser = MkCorpusUserNormalCorpusName (UserName $ cs user) (cs name :: Text)
corpus = flowCorpusFile mkCorpusUser limit' tt format Plain corpusPath Nothing DevJobHandle corpus = flowCorpusFile mkCorpusUser limit' tt format Plain corpusPath Nothing DevJobHandle
corpusCsvHal :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId corpusTsvHal :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
corpusCsvHal = flowCorpusFile mkCorpusUser limit' tt CsvHal Plain corpusPath Nothing DevJobHandle corpusTsvHal = flowCorpusFile mkCorpusUser limit' tt TsvHal Plain corpusPath Nothing DevJobHandle
annuaire :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId annuaire :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
annuaire = flowAnnuaire (MkCorpusUserNormalCorpusName (UserName $ cs user) "Annuaire") (Multi EN) corpusPath DevJobHandle annuaire = flowAnnuaire (MkCorpusUserNormalCorpusName (UserName $ cs user) "Annuaire") (Multi EN) corpusPath DevJobHandle
...@@ -71,8 +71,8 @@ main = do ...@@ -71,8 +71,8 @@ main = do
then runCmdGargDev env corpus then runCmdGargDev env corpus
else pure 0 --(cs "false") else pure 0 --(cs "false")
_ <- if fun == "corpusCsvHal" _ <- if fun == "corpusTsvHal"
then runCmdGargDev env corpusCsvHal then runCmdGargDev env corpusTsvHal
else pure 0 --(cs "false") else pure 0 --(cs "false")
_ <- if fun == "annuaire" _ <- if fun == "annuaire"
......
...@@ -26,7 +26,7 @@ phyloConfig outdir = PhyloConfig { ...@@ -26,7 +26,7 @@ phyloConfig outdir = PhyloConfig {
corpusPath = "corpus.csv" corpusPath = "corpus.csv"
, listPath = "list.csv" , listPath = "list.csv"
, outputPath = outdir , outputPath = outdir
, corpusParser = Csv {_csv_limit = 150000} , corpusParser = Tsv {_tsv_limit = 150000}
, listParser = V4 , listParser = V4
, phyloName = "phylo_profile_test" , phyloName = "phylo_profile_test"
, phyloScale = 2 , phyloScale = 2
......
...@@ -31,9 +31,9 @@ import Gargantext.API.Ngrams.Prelude (toTermList) ...@@ -31,9 +31,9 @@ import Gargantext.API.Ngrams.Prelude (toTermList)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseFile) import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight) import Gargantext.Core.Text.Corpus.Parsers.TSV (tsv_title, tsv_abstract, tsv_publication_year, tsv_publication_month, tsv_publication_day, tsv'_source, tsv'_title, tsv'_abstract, tsv'_publication_year, tsv'_publication_month, tsv'_publication_day, tsv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv import Gargantext.Core.Text.Corpus.Parsers.TSV qualified as Tsv
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.TSV (tsvMapTermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
......
...@@ -15,9 +15,9 @@ import Gargantext.API.Ngrams.Prelude (toTermList) ...@@ -15,9 +15,9 @@ import Gargantext.API.Ngrams.Prelude (toTermList)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseFile) import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight) import Gargantext.Core.Text.Corpus.Parsers.TSV (tsv_title, tsv_abstract, tsv_publication_year, tsv_publication_month, tsv_publication_day, tsv'_source, tsv'_title, tsv'_abstract, tsv'_publication_year, tsv'_publication_month, tsv'_publication_day, tsv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv import Gargantext.Core.Text.Corpus.Parsers.TSV qualified as Tsv
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.TSV (tsvMapTermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
...@@ -76,29 +76,29 @@ wosToDocs limit patterns time path = do ...@@ -76,29 +76,29 @@ wosToDocs limit patterns time path = do
<$> fromRight [] <$> parseFile WOS Plain (path <> file) ) files <$> fromRight [] <$> parseFile WOS Plain (path <> file) ) files
-- To transform a Csv file into a list of Document -- To transform a Tsv file into a list of Document
csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document] tsvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs parser patterns time path = tsvToDocs parser patterns time path =
case parser of case parser of
Wos _ -> Prelude.error "csvToDocs: unimplemented" Wos _ -> Prelude.error "tsvToDocs: unimplemented"
Csv limit -> Vector.toList Tsv limit -> Vector.toList
<$> Vector.take limit <$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) (fromMaybe Csv.defaultMonth $ csv_publication_month row) (fromMaybe Csv.defaultDay $ csv_publication_day row) time) <$> Vector.map (\row -> Document (toPhyloDate (Tsv.fromMIntOrDec Tsv.defaultYear $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) (fromMaybe Csv.defaultMonth $ csv_publication_month row) (fromMaybe Csv.defaultDay $ csv_publication_day row) time) (toPhyloDate' (Tsv.fromMIntOrDec Tsv.defaultYear $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row)) (termsInText patterns $ (tsv_title row) <> " " <> (tsv_abstract row))
Nothing Nothing
[] []
time time
) <$> snd <$> either (\err -> panicTrace $ "CSV error" <> (show err)) identity <$> Csv.readCSVFile path ) <$> snd <$> either (\err -> panicTrace $ "TSV error" <> (show err)) identity <$> Tsv.readTSVFile path
Csv' limit -> Vector.toList Tsv' limit -> Vector.toList
<$> Vector.take limit <$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time) <$> Vector.map (\row -> Document (toPhyloDate (tsv'_publication_year row) (tsv'_publication_month row) (tsv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time) (toPhyloDate' (tsv'_publication_year row) (tsv'_publication_month row) (tsv'_publication_day row) time)
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row)) (termsInText patterns $ (tsv'_title row) <> " " <> (tsv'_abstract row))
(Just $ csv'_weight row) (Just $ tsv'_weight row)
(map (T.strip . pack) $ splitOn ";" (unpack $ (csv'_source row))) (map (T.strip . pack) $ splitOn ";" (unpack $ (tsv'_source row)))
time time
) <$> snd <$> Csv.readWeightedCsv path ) <$> snd <$> Tsv.readWeightedTsv path
-- To parse a file into a list of Document -- To parse a file into a list of Document
...@@ -107,8 +107,8 @@ fileToDocsAdvanced parser path time lst = do ...@@ -107,8 +107,8 @@ fileToDocsAdvanced parser path time lst = do
let patterns = buildPatterns lst let patterns = buildPatterns lst
case parser of case parser of
Wos limit -> wosToDocs limit patterns time path Wos limit -> wosToDocs limit patterns time path
Csv _ -> csvToDocs parser patterns time path Tsv _ -> tsvToDocs parser patterns time path
Csv' _ -> csvToDocs parser patterns time path Tsv' _ -> tsvToDocs parser patterns time path
fileToDocsDefault :: CorpusParser -> FilePath -> [TimeUnit] -> TermList -> IO [Document] fileToDocsDefault :: CorpusParser -> FilePath -> [TimeUnit] -> TermList -> IO [Document]
fileToDocsDefault parser path timeUnits lst = fileToDocsDefault parser path timeUnits lst =
...@@ -140,7 +140,7 @@ readListV4 path = do ...@@ -140,7 +140,7 @@ readListV4 path = do
fileToList :: ListParser -> FilePath -> IO TermList fileToList :: ListParser -> FilePath -> IO TermList
fileToList parser path = fileToList parser path =
case parser of case parser of
V3 -> csvMapTermList path V3 -> tsvMapTermList path
V4 -> fromJust V4 -> fromJust
<$> toTermList MapTerm NgramsTerms <$> toTermList MapTerm NgramsTerms
<$> readListV4 path <$> readListV4 path
......
...@@ -5,7 +5,7 @@ cabal-version: 3.4 ...@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.7.1.6.1 version: 0.0.7.1.6.3
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -38,13 +38,13 @@ data-files: ...@@ -38,13 +38,13 @@ data-files:
test-data/phylo/bpa_phylo_test.json test-data/phylo/bpa_phylo_test.json
test-data/phylo/cleopatre.golden.json test-data/phylo/cleopatre.golden.json
test-data/phylo/nadal.golden.json test-data/phylo/nadal.golden.json
test-data/phylo/nadal_docslist.golden.csv test-data/phylo/nadal_docslist.golden.tsv
test-data/phylo/nadal_ngramslist.golden.csv test-data/phylo/nadal_ngramslist.golden.tsv
test-data/phylo/issue-290-small.golden.json test-data/phylo/issue-290-small.golden.json
test-data/phylo/open_science.json test-data/phylo/open_science.json
test-data/phylo/small-phylo.golden.json test-data/phylo/small-phylo.golden.json
test-data/phylo/small_phylo_docslist.csv test-data/phylo/small_phylo_docslist.tsv
test-data/phylo/small_phylo_ngramslist.csv test-data/phylo/small_phylo_ngramslist.tsv
test-data/phylo/187481.json test-data/phylo/187481.json
test-data/phylo/phylo2dot2json.golden.json test-data/phylo/phylo2dot2json.golden.json
test-data/stemming/lancaster.txt test-data/stemming/lancaster.txt
...@@ -190,13 +190,13 @@ library ...@@ -190,13 +190,13 @@ library
Gargantext.Core.Text.Corpus.API.OpenAlex Gargantext.Core.Text.Corpus.API.OpenAlex
Gargantext.Core.Text.Corpus.API.Pubmed Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.Parsers Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV Gargantext.Core.Text.Corpus.Parsers.TSV
Gargantext.Core.Text.Corpus.Parsers.Date Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.Corpus.Query Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.List Gargantext.Core.Text.List
Gargantext.Core.Text.List.Group.WithStem Gargantext.Core.Text.List.Group.WithStem
Gargantext.Core.Text.List.Formats.CSV Gargantext.Core.Text.List.Formats.TSV
Gargantext.Core.Text.Metrics Gargantext.Core.Text.Metrics
Gargantext.Core.Text.Metrics.CharByChar Gargantext.Core.Text.Metrics.CharByChar
Gargantext.Core.Text.Metrics.Count Gargantext.Core.Text.Metrics.Count
......
...@@ -96,7 +96,7 @@ data GargJob ...@@ -96,7 +96,7 @@ data GargJob
= TableNgramsJob = TableNgramsJob
| ForgotPasswordJob | ForgotPasswordJob
| UpdateNgramsListJobJSON | UpdateNgramsListJobJSON
| UpdateNgramsListJobCSV | UpdateNgramsListJobTSV
| AddContactJob | AddContactJob
| AddFileJob | AddFileJob
| DocumentFromWriteNodeJob | DocumentFromWriteNodeJob
......
...@@ -18,7 +18,7 @@ module Gargantext.API.Ngrams.List ...@@ -18,7 +18,7 @@ module Gargantext.API.Ngrams.List
where where
import Data.ByteString.Lazy qualified as BSL import Data.ByteString.Lazy qualified as BSL
import Data.Csv qualified as Csv import Data.Csv qualified as Tsv
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict (toList) import Data.Map.Strict (toList)
...@@ -57,7 +57,7 @@ getAPI :: Named.GETAPI (AsServerT (GargM Env BackendInternalError)) ...@@ -57,7 +57,7 @@ getAPI :: Named.GETAPI (AsServerT (GargM Env BackendInternalError))
getAPI = Named.GETAPI $ \listId -> Named.ListEndpoints getAPI = Named.GETAPI $ \listId -> Named.ListEndpoints
{ listJSONEp = getJson listId { listJSONEp = getJson listId
, listJSONZipEp = getJsonZip listId , listJSONZipEp = getJsonZip listId
, listCSVEp = getCsv listId , listTSVEp = getTsv listId
} }
-- --
...@@ -91,17 +91,17 @@ getJsonZip lId = do ...@@ -91,17 +91,17 @@ getJsonZip lId = do
] ]
) nlz ) nlz
getCsv :: HasNodeStory env err m getTsv :: HasNodeStory env err m
=> ListId => ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap) -> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
getCsv lId = do getTsv lId = do
lst <- getNgramsList lId lst <- getNgramsList lId
pure $ case Map.lookup NgramsTerms lst of pure $ case Map.lookup NgramsTerms lst of
Nothing -> noHeader Map.empty Nothing -> noHeader Map.empty
Just (Versioned { _v_data }) -> Just (Versioned { _v_data }) ->
addHeader (concat [ "attachment; filename=GarganText_NgramsList-" addHeader (concat [ "attachment; filename=GarganText_NgramsList-"
, pack $ show (_NodeId lId) , pack $ show (_NodeId lId)
, ".csv" , ".tsv"
] ]
) _v_data ) _v_data
...@@ -140,43 +140,43 @@ postAsyncJSON l ngramsList jobHandle = do ...@@ -140,43 +140,43 @@ postAsyncJSON l ngramsList jobHandle = do
-- --
-- CSV API -- TSV API
-- --
csvAPI :: Named.CSVAPI (AsServerT (GargM Env BackendInternalError)) tsvAPI :: Named.TSVAPI (AsServerT (GargM Env BackendInternalError))
csvAPI = csvPostAsync tsvAPI = tsvPostAsync
------------------------------------------------------------------------ ------------------------------------------------------------------------
csvPostAsync :: Named.CSVAPI (AsServerT (GargM Env BackendInternalError)) tsvPostAsync :: Named.TSVAPI (AsServerT (GargM Env BackendInternalError))
csvPostAsync = Named.CSVAPI $ \lId -> AsyncJobs $ tsvPostAsync = Named.TSVAPI $ \lId -> AsyncJobs $
serveJobsAPI UpdateNgramsListJobCSV $ \jHandle f -> do serveJobsAPI UpdateNgramsListJobTSV $ \jHandle f -> do
case ngramsListFromCSVData (_wtf_data f) of case ngramsListFromTSVData (_wtf_data f) of
Left err -> serverError $ err500 { errReasonPhrase = err } Left err -> serverError $ err500 { errReasonPhrase = err }
Right ngramsList -> postAsyncJSON lId ngramsList jHandle Right ngramsList -> postAsyncJSON lId ngramsList jHandle
-- | Tries converting a text file into an 'NgramList', so that we can reuse the -- | Tries converting a text file into an 'NgramList', so that we can reuse the
-- existing JSON endpoint for the CSV upload. -- existing JSON endpoint for the TSV upload.
ngramsListFromCSVData :: Text -> Either Prelude.String NgramsList ngramsListFromTSVData :: Text -> Either Prelude.String NgramsList
ngramsListFromCSVData csvData = case decodeCsv of ngramsListFromTSVData tsvData = case decodeTsv of
-- /NOTE/ The legacy CSV data only supports terms in imports and exports, so this is -- /NOTE/ The legacy TSV data only supports terms in imports and exports, so this is
-- all we care about. -- all we care about.
Left err -> Left $ "Invalid CSV found in ngramsListFromCSVData: " <> err Left err -> Left $ "Invalid TSV found in ngramsListFromTSVData: " <> err
Right terms -> pure $ Map.fromList [ (NgramsTerms, Versioned 0 $ mconcat . Vec.toList $ terms) ] Right terms -> pure $ Map.fromList [ (NgramsTerms, Versioned 0 $ mconcat . Vec.toList $ terms) ]
where where
binaryData = BSL.fromStrict $ P.encodeUtf8 csvData binaryData = BSL.fromStrict $ P.encodeUtf8 tsvData
decodeCsv :: Either Prelude.String (Vector NgramsTableMap) decodeTsv :: Either Prelude.String (Vector NgramsTableMap)
decodeCsv = Csv.decodeWithP csvToNgramsTableMap decodeTsv = Tsv.decodeWithP tsvToNgramsTableMap
(Csv.defaultDecodeOptions { Csv.decDelimiter = fromIntegral (P.ord '\t') }) (Tsv.defaultDecodeOptions { Tsv.decDelimiter = fromIntegral (P.ord '\t') })
Csv.HasHeader Tsv.HasHeader
binaryData binaryData
-- | Converts a plain CSV 'Record' into an NgramsTableMap -- | Converts a plain TSV 'Record' into an NgramsTableMap
csvToNgramsTableMap :: Csv.Record -> Csv.Parser NgramsTableMap tsvToNgramsTableMap :: Tsv.Record -> Tsv.Parser NgramsTableMap
csvToNgramsTableMap record = case Vec.toList record of tsvToNgramsTableMap record = case Vec.toList record of
(map P.decodeUtf8 -> [status, label, forms]) (map P.decodeUtf8 -> [status, label, forms])
-> pure $ conv status label forms -> pure $ conv status label forms
_ -> Prelude.fail "csvToNgramsTableMap failed" _ -> Prelude.fail "tsvToNgramsTableMap failed"
where where
conv :: Text -> Text -> Text -> NgramsTableMap conv :: Text -> Text -> Text -> NgramsTableMap
...@@ -199,12 +199,12 @@ csvToNgramsTableMap record = case Vec.toList record of ...@@ -199,12 +199,12 @@ csvToNgramsTableMap record = case Vec.toList record of
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | This is for debugging the CSV parser in the REPL -- | This is for debugging the TSV parser in the REPL
importCsvFile :: forall env err m. (HasNodeStory env err m, HasServerError err, MonadJobStatus m) importTsvFile :: forall env err m. (HasNodeStory env err m, HasServerError err, MonadJobStatus m)
=> ListId -> P.FilePath -> m () => ListId -> P.FilePath -> m ()
importCsvFile lId fp = do importTsvFile lId fp = do
contents <- liftBase $ P.readFile fp contents <- liftBase $ P.readFile fp
case ngramsListFromCSVData contents of case ngramsListFromTSVData contents of
Left err -> serverError $ err500 { errReasonPhrase = err } Left err -> serverError $ err500 { errReasonPhrase = err }
Right ngramsList -> postAsyncJSON lId ngramsList (noJobHandle @m Proxy) Right ngramsList -> postAsyncJSON lId ngramsList (noJobHandle @m Proxy)
......
...@@ -24,7 +24,7 @@ import Control.Category ((>>>)) ...@@ -24,7 +24,7 @@ import Control.Category ((>>>))
import Control.Lens (makePrisms, Iso', iso, from, (.=), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^?), (%~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~), over) import Control.Lens (makePrisms, Iso', iso, from, (.=), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^?), (%~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~), over)
import Data.Aeson hiding ((.=)) import Data.Aeson hiding ((.=))
import Data.Csv (defaultEncodeOptions, encodeByNameWith, header, namedRecord, EncodeOptions(..), NamedRecord, Quoting(QuoteNone)) import Data.Csv (defaultEncodeOptions, encodeByNameWith, header, namedRecord, EncodeOptions(..), NamedRecord, Quoting(QuoteNone))
import Data.Csv qualified as Csv import Data.Csv qualified as Tsv
import Data.HashMap.Strict.InsOrd qualified as InsOrdHashMap import Data.HashMap.Strict.InsOrd qualified as InsOrdHashMap
import Data.List qualified as List import Data.List qualified as List
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
...@@ -47,7 +47,7 @@ import Gargantext.Database.Admin.Types.Node (ContextId) ...@@ -47,7 +47,7 @@ import Gargantext.Database.Admin.Types.Node (ContextId)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM') import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
import Gargantext.Prelude hiding (IsString, hash, from, replace, to) import Gargantext.Prelude hiding (IsString, hash, from, replace, to)
import Gargantext.Prelude.Crypto.Hash (IsHashable(..)) import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
import Gargantext.Utils.Servant (CSV, ZIP) import Gargantext.Utils.Servant (TSV, ZIP)
import Gargantext.Utils.Zip (zipContentsPure) import Gargantext.Utils.Zip (zipContentsPure)
import Servant ( FromHttpApiData(parseUrlPiece), ToHttpApiData(toUrlPiece), Required, Strict, QueryParam', MimeRender(.. ), MimeUnrender(..)) import Servant ( FromHttpApiData(parseUrlPiece), ToHttpApiData(toUrlPiece), Required, Strict, QueryParam', MimeRender(.. ), MimeUnrender(..))
import Servant.Job.Utils (jsonOptions) import Servant.Job.Utils (jsonOptions)
...@@ -299,10 +299,10 @@ data NgramsSearchQuery = NgramsSearchQuery ...@@ -299,10 +299,10 @@ data NgramsSearchQuery = NgramsSearchQuery
type NgramsTableMap = Map NgramsTerm NgramsRepoElement type NgramsTableMap = Map NgramsTerm NgramsRepoElement
-- CSV: -- TSV:
-- header: status\tlabel\tforms -- header: status\tlabel\tforms
-- item: map\taccountability\taccounting|&|accoutns|&|account -- item: map\taccountability\taccounting|&|accoutns|&|account
instance MimeRender CSV NgramsTableMap where instance MimeRender TSV NgramsTableMap where
-- mimeRender _ _val = encode ([] :: [(Text, Text)]) -- mimeRender _ _val = encode ([] :: [(Text, Text)])
mimeRender _ val = encodeByNameWith encOptions (header ["status", "label", "forms"]) $ fn <$> Map.toList val mimeRender _ val = encodeByNameWith encOptions (header ["status", "label", "forms"]) $ fn <$> Map.toList val
where where
...@@ -310,9 +310,9 @@ instance MimeRender CSV NgramsTableMap where ...@@ -310,9 +310,9 @@ instance MimeRender CSV NgramsTableMap where
, encQuoting = QuoteNone } , encQuoting = QuoteNone }
fn :: (NgramsTerm, NgramsRepoElement) -> NamedRecord fn :: (NgramsTerm, NgramsRepoElement) -> NamedRecord
fn (NgramsTerm term, NgramsRepoElement { _nre_list, _nre_children }) = fn (NgramsTerm term, NgramsRepoElement { _nre_list, _nre_children }) =
namedRecord [ "status" Csv..= toText _nre_list namedRecord [ "status" Tsv..= toText _nre_list
, "label" Csv..= term , "label" Tsv..= term
, "forms" Csv..= T.intercalate "|&|" (unNgramsTerm <$> mSetToList _nre_children)] , "forms" Tsv..= T.intercalate "|&|" (unNgramsTerm <$> mSetToList _nre_children)]
toText :: ListType -> Text toText :: ListType -> Text
toText CandidateTerm = "candidate" toText CandidateTerm = "candidate"
toText MapTerm = "map" toText MapTerm = "map"
......
...@@ -240,8 +240,8 @@ addToCorpusWithForm user cid nwf jobHandle = do ...@@ -240,8 +240,8 @@ addToCorpusWithForm user cid nwf jobHandle = do
let limit = fromIntegral limit' :: Integer let limit = fromIntegral limit' :: Integer
let let
parseC = case (nwf ^. wf_filetype) of parseC = case (nwf ^. wf_filetype) of
CSV -> Parser.parseFormatC Parser.CsvGargV3 TSV -> Parser.parseFormatC Parser.TsvGargV3
CSV_HAL -> Parser.parseFormatC Parser.CsvHal TSV_HAL -> Parser.parseFormatC Parser.TsvHal
Iramuteq -> Parser.parseFormatC Parser.Iramuteq Iramuteq -> Parser.parseFormatC Parser.Iramuteq
Istex -> Parser.parseFormatC Parser.Istex Istex -> Parser.parseFormatC Parser.Istex
JSON -> Parser.parseFormatC Parser.JSON JSON -> Parser.parseFormatC Parser.JSON
......
...@@ -8,8 +8,8 @@ import Servant ...@@ -8,8 +8,8 @@ import Servant
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
data FileType = CSV data FileType = TSV
| CSV_HAL | TSV_HAL
| Istex | Istex
| PresseRIS | PresseRIS
| WOS | WOS
...@@ -17,14 +17,14 @@ data FileType = CSV ...@@ -17,14 +17,14 @@ data FileType = CSV
| JSON | JSON
deriving (Eq, Show, Generic) deriving (Eq, Show, Generic)
instance ToSchema FileType instance ToSchema FileType
instance Arbitrary FileType where arbitrary = elements [CSV, PresseRIS] instance Arbitrary FileType where arbitrary = elements [TSV, PresseRIS]
instance ToParamSchema FileType instance ToParamSchema FileType
instance FromJSON FileType instance FromJSON FileType
instance ToJSON FileType instance ToJSON FileType
instance FromHttpApiData FileType where instance FromHttpApiData FileType where
parseUrlPiece "CSV" = pure CSV parseUrlPiece "TSV" = pure TSV
parseUrlPiece "CSV_HAL" = pure CSV_HAL parseUrlPiece "TSV_HAL" = pure TSV_HAL
parseUrlPiece "Istex" = pure Istex parseUrlPiece "Istex" = pure Istex
parseUrlPiece "PresseRis" = pure PresseRIS parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece "WOS" = pure WOS parseUrlPiece "WOS" = pure WOS
......
...@@ -41,7 +41,7 @@ documentExportAPI :: IsGargServer env err m ...@@ -41,7 +41,7 @@ documentExportAPI :: IsGargServer env err m
documentExportAPI userNodeId dId = Named.DocumentExportAPI $ Named.DocumentExportEndpoints documentExportAPI userNodeId dId = Named.DocumentExportAPI $ Named.DocumentExportEndpoints
{ exportJSONEp = getDocumentsJSON userNodeId dId { exportJSONEp = getDocumentsJSON userNodeId dId
, exportJSONZipEp = getDocumentsJSONZip userNodeId dId , exportJSONZipEp = getDocumentsJSONZip userNodeId dId
, exportCSVEp = getDocumentsCSV userNodeId dId , exportTSVEp = getDocumentsTSV userNodeId dId
} }
-------------------------------------------------- --------------------------------------------------
...@@ -98,16 +98,16 @@ getDocumentsJSONZip userNodeId pId = do ...@@ -98,16 +98,16 @@ getDocumentsJSONZip userNodeId pId = do
, dezFileName dexpz , dezFileName dexpz
, ".zip" ]) dexpz , ".zip" ]) dexpz
getDocumentsCSV :: NodeId getDocumentsTSV :: NodeId
-- ^ The Node ID of the target user -- ^ The Node ID of the target user
-> DocId -> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] T.Text) -- [Document] -> GargNoServer (Headers '[Header "Content-Disposition" T.Text] T.Text) -- [Document]
getDocumentsCSV userNodeId pId = do getDocumentsTSV userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId dJSON <- getDocumentsJSON userNodeId pId
let DocumentExport { _de_documents } = getResponse dJSON let DocumentExport { _de_documents } = getResponse dJSON
let ret = TE.decodeUtf8 $ BSC.toStrict $ encodeDefaultOrderedByNameWith (defaultEncodeOptions {encDelimiter = fromIntegral $ ord '\t', encQuoting = QuoteAll }) _de_documents let ret = TE.decodeUtf8 $ BSC.toStrict $ encodeDefaultOrderedByNameWith (defaultEncodeOptions {encDelimiter = fromIntegral $ ord '\t', encQuoting = QuoteAll }) _de_documents
pure $ addHeader (T.concat [ "attachment; filename=GarganText_DocsList-" pure $ addHeader (T.concat [ "attachment; filename=GarganText_DocsList-"
, T.pack $ show pId , T.pack $ show pId
, ".csv"]) , ".tsv"])
ret ret
...@@ -89,7 +89,7 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti ...@@ -89,7 +89,7 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti
-- FIXME(adn) Audit this conversion. -- FIXME(adn) Audit this conversion.
addToCorpusWithForm (RootId userNodeId) addToCorpusWithForm (RootId userNodeId)
cId cId
(NewWithForm { _wf_filetype = CSV (NewWithForm { _wf_filetype = TSV
, _wf_fileformat = Plain , _wf_fileformat = Plain
, _wf_data = body , _wf_data = body
, _wf_lang , _wf_lang
......
...@@ -31,7 +31,7 @@ data DocumentExportEndpoints mode = DocumentExportEndpoints ...@@ -31,7 +31,7 @@ data DocumentExportEndpoints mode = DocumentExportEndpoints
:> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExport) :> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExport)
, exportJSONZipEp :: mode :- "json.zip" , exportJSONZipEp :: mode :- "json.zip"
:> Get '[ZIP] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExportZIP) :> Get '[ZIP] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExportZIP)
, exportCSVEp :: mode :- "csv" , exportTSVEp :: mode :- "tsv"
:> Get '[PlainText] (Headers '[Servant.Header "Content-Disposition" Text] Text) :> Get '[PlainText] (Headers '[Servant.Header "Content-Disposition" Text] Text)
} deriving Generic } deriving Generic
......
...@@ -5,7 +5,7 @@ module Gargantext.API.Routes.Named.List ( ...@@ -5,7 +5,7 @@ module Gargantext.API.Routes.Named.List (
GETAPI(..) GETAPI(..)
, ListEndpoints(..) , ListEndpoints(..)
, JSONAPI(..) , JSONAPI(..)
, CSVAPI(..) , TSVAPI(..)
) where ) where
import Data.Text (Text) import Data.Text (Text)
...@@ -30,7 +30,7 @@ data ListEndpoints mode = ListEndpoints ...@@ -30,7 +30,7 @@ data ListEndpoints mode = ListEndpoints
:> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList) :> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
, listJSONZipEp :: mode :- "json.zip" , listJSONZipEp :: mode :- "json.zip"
:> Get '[GUS.ZIP] (Headers '[Header "Content-Disposition" Text] NgramsListZIP) :> Get '[GUS.ZIP] (Headers '[Header "Content-Disposition" Text] NgramsListZIP)
, listCSVEp :: mode :- "csv" :> Get '[GUS.CSV] (Headers '[Header "Content-Disposition" Text] NgramsTableMap) , listTSVEp :: mode :- "tsv" :> Get '[GUS.TSV] (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
} deriving Generic } deriving Generic
newtype JSONAPI mode = JSONAPI newtype JSONAPI mode = JSONAPI
...@@ -44,11 +44,11 @@ newtype JSONAPI mode = JSONAPI ...@@ -44,11 +44,11 @@ newtype JSONAPI mode = JSONAPI
} deriving Generic } deriving Generic
newtype CSVAPI mode = CSVAPI newtype TSVAPI mode = TSVAPI
{ updateListCSVEp :: mode :- Summary "Update List (legacy v3 CSV)" { updateListTSVEp :: mode :- Summary "Update List (legacy v3 TSV)"
:> "lists" :> "lists"
:> Capture "listId" ListId :> Capture "listId" ListId
:> "csv" :> "tsv"
:> "add" :> "add"
:> "form" :> "form"
:> "async" :> "async"
......
...@@ -91,7 +91,7 @@ data GargPrivateAPI' mode = GargPrivateAPI' ...@@ -91,7 +91,7 @@ data GargPrivateAPI' mode = GargPrivateAPI'
, addWithQueryEp :: mode :- NamedRoutes AddWithQuery , addWithQueryEp :: mode :- NamedRoutes AddWithQuery
, listGetAPI :: mode :- NamedRoutes List.GETAPI , listGetAPI :: mode :- NamedRoutes List.GETAPI
, listJsonAPI :: mode :- NamedRoutes List.JSONAPI , listJsonAPI :: mode :- NamedRoutes List.JSONAPI
, listCsvAPI :: mode :- NamedRoutes List.CSVAPI , listTsvAPI :: mode :- NamedRoutes List.TSVAPI
, shareUrlEp :: mode :- "shareurl" :> NamedRoutes ShareURL , shareUrlEp :: mode :- "shareurl" :> NamedRoutes ShareURL
} deriving Generic } deriving Generic
......
...@@ -63,6 +63,6 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId) ...@@ -63,6 +63,6 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId)
, addWithQueryEp = addCorpusWithQuery (RootId userNodeId) , addWithQueryEp = addCorpusWithQuery (RootId userNodeId)
, listGetAPI = List.getAPI , listGetAPI = List.getAPI
, listJsonAPI = List.jsonAPI , listJsonAPI = List.jsonAPI
, listCsvAPI = List.csvAPI , listTsvAPI = List.tsvAPI
, shareUrlEp = shareURL , shareUrlEp = shareURL
} }
...@@ -19,7 +19,7 @@ import Data.Morpheus.Types (GQLType) ...@@ -19,7 +19,7 @@ import Data.Morpheus.Types (GQLType)
import Data.Set qualified as S import Data.Set qualified as S
import Data.Text (splitOn) import Data.Text (splitOn)
import Data.Vector qualified as DV import Data.Vector qualified as DV
import Gargantext.Core.Text.Corpus.Parsers.CSV as CSV import Gargantext.Core.Text.Corpus.Parsers.TSV as TSV
import Gargantext.Core.Text.Metrics.Utils as Utils import Gargantext.Core.Text.Metrics.Utils as Utils
import Gargantext.Prelude import Gargantext.Prelude
...@@ -104,9 +104,9 @@ mapIdSchool :: Map Text Text ...@@ -104,9 +104,9 @@ mapIdSchool :: Map Text Text
mapIdSchool = M.fromList $ Gargantext.Prelude.map mapIdSchool = M.fromList $ Gargantext.Prelude.map
(\(School { school_shortName, school_id }) -> (school_id, school_shortName)) schools (\(School { school_shortName, school_id }) -> (school_id, school_shortName)) schools
hal_data :: IO (Either Text (DV.Vector CsvHal)) hal_data :: IO (Either Text (DV.Vector TsvHal))
hal_data = do hal_data = do
r <- CSV.readCsvHal "doc/corpus_imt/Gargantext_Corpus.csv" r <- TSV.readTsvHal "doc/corpus_imt/Gargantext_Corpus.csv"
pure $ snd <$> r pure $ snd <$> r
names :: S.Set Text names :: S.Set Text
...@@ -117,7 +117,7 @@ toSchoolName t = case M.lookup t mapIdSchool of ...@@ -117,7 +117,7 @@ toSchoolName t = case M.lookup t mapIdSchool of
Nothing -> t Nothing -> t
Just t' -> t' Just t' -> t'
publisBySchool :: DV.Vector CsvHal -> [(Maybe Text, Int)] publisBySchool :: DV.Vector TsvHal -> [(Maybe Text, Int)]
publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n)) publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n))
$ DL.filter (\i -> S.member (fst i) names) $ DL.filter (\i -> S.member (fst i) names)
$ DL.reverse $ DL.reverse
...@@ -126,5 +126,5 @@ publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSc ...@@ -126,5 +126,5 @@ publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSc
$ Utils.freq $ Utils.freq
$ DL.concat $ DL.concat
$ DV.toList $ DV.toList
$ DV.map (\n -> splitOn ( ", ") (csvHal_instStructId_i n) ) $ DV.map (\n -> splitOn ( ", ") (tsvHal_instStructId_i n) )
$ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data' $ DV.filter (\n -> tsvHal_publication_year n == 2017) hal_data'
...@@ -22,7 +22,7 @@ import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header ) ...@@ -22,7 +22,7 @@ import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header )
import Data.Text qualified as T import Data.Text qualified as T
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Vector qualified as Vector import Data.Vector qualified as Vector
import Gargantext.Core.Text.Corpus.Parsers.CSV ( csvDecodeOptions, Delimiter(Tab) ) import Gargantext.Core.Text.Corpus.Parsers.TSV ( tsvDecodeOptions, Delimiter(Tab) )
import Gargantext.Database.Admin.Types.Hyperdata.Contact import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Prelude import Gargantext.Prelude
import System.FilePath.Posix (takeExtension) import System.FilePath.Posix (takeExtension)
...@@ -30,7 +30,7 @@ import System.FilePath.Posix (takeExtension) ...@@ -30,7 +30,7 @@ import System.FilePath.Posix (takeExtension)
------------------------------------------------------------------------ ------------------------------------------------------------------------
readFile_Annuaire :: FilePath -> IO [HyperdataContact] readFile_Annuaire :: FilePath -> IO [HyperdataContact]
readFile_Annuaire fp = case takeExtension fp of readFile_Annuaire fp = case takeExtension fp of
".csv" -> readCSVFile_Annuaire fp ".csv" -> readTSVFile_Annuaire fp
".data" -> deserialiseImtUsersFromFile fp ".data" -> deserialiseImtUsersFromFile fp
unknownExt -> panicTrace $ "[G.C.E.I.readFile_Annuaire] extension unknown: " <> T.pack unknownExt unknownExt -> panicTrace $ "[G.C.E.I.readFile_Annuaire] extension unknown: " <> T.pack unknownExt
...@@ -69,7 +69,7 @@ data IMTUser = IMTUser ...@@ -69,7 +69,7 @@ data IMTUser = IMTUser
, date_modification :: Maybe Text , date_modification :: Maybe Text
} deriving (Eq, Show, Generic) } deriving (Eq, Show, Generic)
-- | CSV instance -- | TSV instance
instance FromNamedRecord IMTUser where instance FromNamedRecord IMTUser where
parseNamedRecord r = do parseNamedRecord r = do
id <- r .: "id" id <- r .: "id"
...@@ -105,21 +105,21 @@ instance FromNamedRecord IMTUser where ...@@ -105,21 +105,21 @@ instance FromNamedRecord IMTUser where
date_modification <- r .: "date_modification" date_modification <- r .: "date_modification"
pure $ IMTUser {..} pure $ IMTUser {..}
headerCSVannuaire :: Header headerTSVannuaire :: Header
headerCSVannuaire = headerTSVannuaire =
header ["id","entite","mail","nom","prenom","fonction","fonction2","tel","fax","service","groupe","entite2","service2","groupe2","bureau","url","pservice","pfonction","afonction","afonction2","grprech","appellation","lieu","aprecision","atel","sexe","statut","idutilentite","actif","idutilsiecoles","date_modification"] header ["id","entite","mail","nom","prenom","fonction","fonction2","tel","fax","service","groupe","entite2","service2","groupe2","bureau","url","pservice","pfonction","afonction","afonction2","grprech","appellation","lieu","aprecision","atel","sexe","statut","idutilentite","actif","idutilsiecoles","date_modification"]
readCSVFile_Annuaire :: FilePath -> IO [HyperdataContact] readTSVFile_Annuaire :: FilePath -> IO [HyperdataContact]
readCSVFile_Annuaire fp = do readTSVFile_Annuaire fp = do
users <- snd <$> readCSVFile_Annuaire' fp users <- snd <$> readTSVFile_Annuaire' fp
pure $ map imtUser2gargContact $ Vector.toList users pure $ map imtUser2gargContact $ Vector.toList users
readCSVFile_Annuaire' :: FilePath -> IO (Header, Vector IMTUser) readTSVFile_Annuaire' :: FilePath -> IO (Header, Vector IMTUser)
readCSVFile_Annuaire' = fmap readCsvHalLazyBS' . BL.readFile readTSVFile_Annuaire' = fmap readTsvHalLazyBS' . BL.readFile
where where
readCsvHalLazyBS' :: BL.ByteString -> (Header, Vector IMTUser) readTsvHalLazyBS' :: BL.ByteString -> (Header, Vector IMTUser)
readCsvHalLazyBS' bs = case decodeByNameWith (csvDecodeOptions Tab) bs of readTsvHalLazyBS' bs = case decodeByNameWith (tsvDecodeOptions Tab) bs of
Left e -> panicTrace (cs e) Left e -> panicTrace (cs e)
Right rows -> rows Right rows -> rows
......
...@@ -13,19 +13,19 @@ Format Converter. ...@@ -13,19 +13,19 @@ Format Converter.
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
module Gargantext.Core.Text.Convert (risPress2csvWrite) module Gargantext.Core.Text.Convert (risPress2tsvWrite)
where where
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat(..), FileType(..)) import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat(..), FileType(..))
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv) import Gargantext.Core.Text.Corpus.Parsers.TSV (writeDocs2Tsv)
import Gargantext.Prelude import Gargantext.Prelude
risPress2csvWrite :: FilePath -> IO () risPress2tsvWrite :: FilePath -> IO ()
risPress2csvWrite f = do risPress2tsvWrite f = do
eContents <- parseFile RisPresse Plain (f <> ".ris") eContents <- parseFile RisPresse Plain (f <> ".ris")
case eContents of case eContents of
Right contents -> writeDocs2Csv (f <> ".csv") contents Right contents -> writeDocs2Tsv (f <> ".csv") contents
Left e -> panicTrace $ "Error: " <> e Left e -> panicTrace $ "Error: " <> e
......
...@@ -15,13 +15,13 @@ module Gargantext.Core.Text.Corpus.API.Isidore ( ...@@ -15,13 +15,13 @@ module Gargantext.Core.Text.Corpus.API.Isidore (
get get
-- * Internals (possibly unused?) -- * Internals (possibly unused?)
, isidore2csvFile , isidore2tsvFile
) where ) where
import Data.Text qualified as Text import Data.Text qualified as Text
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (cleanText) import Gargantext.Core.Text.Corpus.Parsers (cleanText)
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv) import Gargantext.Core.Text.Corpus.Parsers.TSV (writeDocs2Tsv)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
...@@ -50,12 +50,12 @@ get lang l q a = do ...@@ -50,12 +50,12 @@ get lang l q a = do
hDocs <- mapM (isidoreToDoc lang) (toIsidoreDocs iDocs) hDocs <- mapM (isidoreToDoc lang) (toIsidoreDocs iDocs)
pure hDocs pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit isidore2tsvFile :: FilePath -> Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO () -> IO ()
isidore2csvFile fp lang li tq aq = do isidore2tsvFile fp lang li tq aq = do
hdocs <- get lang li tq aq hdocs <- get lang li tq aq
writeDocs2Csv fp hdocs writeDocs2Tsv fp hdocs
isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc lang (IsidoreDoc t a d u s as) = do isidoreToDoc lang (IsidoreDoc t a d u s as) = do
......
...@@ -47,7 +47,7 @@ import Data.Text qualified as DT ...@@ -47,7 +47,7 @@ import Data.Text qualified as DT
import Data.Tuple.Extra (both) -- , first, second) import Data.Tuple.Extra (both) -- , first, second)
import Gargantext.API.Node.Corpus.New.Types (FileFormat(..)) import Gargantext.API.Node.Corpus.New.Types (FileFormat(..))
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.CSV (parseHal, parseCsv, parseCsvC) import Gargantext.Core.Text.Corpus.Parsers.TSV (parseHal, parseTsv, parseTsvC)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Core.Text.Corpus.Parsers.FrameWrite (text2titleParagraphs) import Gargantext.Core.Text.Corpus.Parsers.FrameWrite (text2titleParagraphs)
import Gargantext.Core.Text.Corpus.Parsers.Iramuteq qualified as Iramuteq import Gargantext.Core.Text.Corpus.Parsers.Iramuteq qualified as Iramuteq
...@@ -78,8 +78,8 @@ type ParseError = Text ...@@ -78,8 +78,8 @@ type ParseError = Text
data FileType = WOS data FileType = WOS
| RIS | RIS
| RisPresse | RisPresse
| CsvGargV3 | TsvGargV3
| CsvHal | TsvHal
| Iramuteq | Iramuteq
| JSON | JSON
| Istex | Istex
...@@ -109,11 +109,11 @@ parseFormatC ft ff bs0 = first ParseFormatError <$> do_parse ft ff bs0 ...@@ -109,11 +109,11 @@ parseFormatC ft ff bs0 = first ParseFormatError <$> do_parse ft ff bs0
-> FileFormat -> FileFormat
-> DB.ByteString -> DB.ByteString
-> m (Either DT.Text (Integer, ConduitT () HyperdataDocument IO ())) -> m (Either DT.Text (Integer, ConduitT () HyperdataDocument IO ()))
do_parse CsvGargV3 Plain bs = do do_parse TsvGargV3 Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs let eParsedC = parseTsvC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC) pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
do_parse CsvHal Plain bs = do do_parse TsvHal Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs let eParsedC = parseTsvC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC) pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
do_parse Istex Plain bs = do do_parse Istex Plain bs = do
ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs
...@@ -188,8 +188,8 @@ etale = concatMap etale' ...@@ -188,8 +188,8 @@ etale = concatMap etale'
-- parseFormat :: FileType -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument]) -- parseFormat :: FileType -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument])
-- parseFormat CsvGargV3 bs = pure $ parseCsv' $ DBL.fromStrict bs -- parseFormat TsvGargV3 bs = pure $ parseTsv' $ DBL.fromStrict bs
-- parseFormat CsvHal bs = pure $ parseHal' $ DBL.fromStrict bs -- parseFormat TsvHal bs = pure $ parseHal' $ DBL.fromStrict bs
-- parseFormat RisPresse bs = do -- parseFormat RisPresse bs = do
-- docs <- mapM (toDoc RIS) -- docs <- mapM (toDoc RIS)
-- <$> snd -- <$> snd
...@@ -220,8 +220,8 @@ parseFile :: FileType ...@@ -220,8 +220,8 @@ parseFile :: FileType
-> FileFormat -> FileFormat
-> FilePath -> FilePath
-> IO (Either Text [HyperdataDocument]) -> IO (Either Text [HyperdataDocument])
parseFile CsvGargV3 Plain p = parseCsv p parseFile TsvGargV3 Plain p = parseTsv p
parseFile CsvHal Plain p = parseHal p parseFile TsvHal Plain p = parseHal p
parseFile RisPresse Plain p = do parseFile RisPresse Plain p = do
docs <- join $ mapM (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p docs <- join $ mapM (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p
pure $ Right docs pure $ Right docs
......
...@@ -18,7 +18,7 @@ import Data.ByteString.Lazy qualified as DBL ...@@ -18,7 +18,7 @@ import Data.ByteString.Lazy qualified as DBL
import Data.List qualified as List import Data.List qualified as List
import Data.Text qualified as DT import Data.Text qualified as DT
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.CSV (hyperdataDocument2csv) import Gargantext.Core.Text.Corpus.Parsers.TSV (hyperdataDocument2tsv)
import Gargantext.Core.Text.Corpus.Parsers.FrameWrite (text2titleParagraphs) import Gargantext.Core.Text.Corpus.Parsers.FrameWrite (text2titleParagraphs)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude import Gargantext.Prelude
...@@ -29,13 +29,13 @@ import System.Directory -- (getDirectoryContents) ...@@ -29,13 +29,13 @@ import System.Directory -- (getDirectoryContents)
type FileOut = FilePath type FileOut = FilePath
book2csv :: Int -> FileDir -> FileOut -> IO () book2tsv :: Int -> FileDir -> FileOut -> IO ()
book2csv n f_in f_out = do book2tsv n f_in f_out = do
files <- filesOf f_in files <- filesOf f_in
texts <- readPublis f_in files texts <- readPublis f_in files
let publis = concatMap (file2publi n) texts let publis = concatMap (file2publi n) texts
let docs = zipWith publiToHyperdata [1..] publis let docs = zipWith publiToHyperdata [1..] publis
DBL.writeFile f_out (hyperdataDocument2csv docs) DBL.writeFile f_out (hyperdataDocument2tsv docs)
filesOf :: FileDir -> IO [FilePath] filesOf :: FileDir -> IO [FilePath]
filesOf fd = List.sort -- sort by filename filesOf fd = List.sort -- sort by filename
......
...@@ -14,14 +14,14 @@ Json parser to export towoard CSV GargV3 format. ...@@ -14,14 +14,14 @@ Json parser to export towoard CSV GargV3 format.
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Corpus.Parsers.Json2Csv (json2csv, readPatents) module Gargantext.Core.Text.Corpus.Parsers.Json2Csv (json2tsv, readPatents)
where where
import Data.Aeson ( decode ) import Data.Aeson ( decode )
import Data.ByteString.Lazy (readFile) import Data.ByteString.Lazy (readFile)
import Data.Text (unpack) import Data.Text (unpack)
import Data.Vector (fromList) import Data.Vector (fromList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3) import Gargantext.Core.Text.Corpus.Parsers.TSV (TsvDoc(..), writeFile, headerTsvGargV3)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (readFile, writeFile) import Gargantext.Prelude hiding (readFile, writeFile)
...@@ -41,20 +41,20 @@ readPatents fp = decode <$> readFile fp ...@@ -41,20 +41,20 @@ readPatents fp = decode <$> readFile fp
type FilePathIn = FilePath type FilePathIn = FilePath
type FilePathOut = FilePath type FilePathOut = FilePath
json2csv :: FilePathIn -> FilePathOut -> IO () json2tsv :: FilePathIn -> FilePathOut -> IO ()
json2csv fin fout = do json2tsv fin fout = do
patents <- maybe (panicTrace "json2csv error") identity <$> readPatents fin patents <- maybe (panicTrace "json2tsv error") identity <$> readPatents fin
writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents) writeFile fout (headerTsvGargV3, fromList $ map patent2tsvDoc patents)
patent2csvDoc :: Patent -> CsvDoc patent2tsvDoc :: Patent -> TsvDoc
patent2csvDoc (Patent { .. }) = patent2tsvDoc (Patent { .. }) =
CsvDoc { csv_title = _patent_title TsvDoc { tsv_title = _patent_title
, csv_source = "Source" , tsv_source = "Source"
, csv_publication_year = Just $ read (unpack _patent_year) , tsv_publication_year = Just $ read (unpack _patent_year)
, csv_publication_month = Just $ Defaults.month , tsv_publication_month = Just $ Defaults.month
, csv_publication_day = Just $ Defaults.day , tsv_publication_day = Just $ Defaults.day
, csv_abstract = _patent_abstract , tsv_abstract = _patent_abstract
, csv_authors = "Authors" } , tsv_authors = "Authors" }
......
{-| {-|
Module : Gargantext.Core.Text.List.Formats.CSV Module : Gargantext.Core.Text.List.Formats.TSV
Description : Description :
Copyright : (c) CNRS, 2018-Present Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -7,12 +7,12 @@ Maintainer : team@gargantext.org ...@@ -7,12 +7,12 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
CSV parser for Gargantext corpus files. TSV parser for Gargantext corpus files.
-} -}
module Gargantext.Core.Text.List.Formats.CSV where module Gargantext.Core.Text.List.Formats.TSV where
import Control.Applicative import Control.Applicative
import Data.ByteString.Lazy qualified as BL import Data.ByteString.Lazy qualified as BL
...@@ -25,74 +25,74 @@ import Gargantext.Core.Text.Context ...@@ -25,74 +25,74 @@ import Gargantext.Core.Text.Context
import Gargantext.Prelude hiding (length) import Gargantext.Prelude hiding (length)
------------------------------------------------------------------------ ------------------------------------------------------------------------
csvMapTermList :: FilePath -> IO TermList tsvMapTermList :: FilePath -> IO TermList
csvMapTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp tsvMapTermList fp = tsv2list TsvMap <$> snd <$> fromTsvListFile fp
csv2list :: CsvListType -> Vector CsvList -> TermList tsv2list :: TsvListType -> Vector TsvList -> TermList
csv2list lt vs = V.toList $ V.map (\(CsvList _ label forms) tsv2list lt vs = V.toList $ V.map (\(TsvList _ label forms)
-> (DT.words label, [DT.words label] <> (filter (not . null) . map DT.words $ DT.splitOn csvListFormsDelimiter forms))) -> (DT.words label, [DT.words label] <> (filter (not . null) . map DT.words $ DT.splitOn tsvListFormsDelimiter forms)))
$ V.filter (\l -> csvList_status l == lt ) vs $ V.filter (\l -> tsvList_status l == lt ) vs
------------------------------------------------------------------------ ------------------------------------------------------------------------
data CsvListType = CsvMap | CsvStop | CsvCandidate data TsvListType = TsvMap | TsvStop | TsvCandidate
deriving (Read, Show, Eq) deriving (Read, Show, Eq)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- CSV List Main Configuration -- TSV List Main Configuration
csvListFieldDelimiter :: Char tsvListFieldDelimiter :: Char
csvListFieldDelimiter = '\t' tsvListFieldDelimiter = '\t'
csvListFormsDelimiter :: Text tsvListFormsDelimiter :: Text
csvListFormsDelimiter = "|&|" tsvListFormsDelimiter = "|&|"
------------------------------------------------------------------------ ------------------------------------------------------------------------
data CsvList = CsvList data TsvList = TsvList
{ csvList_status :: !CsvListType { tsvList_status :: !TsvListType
, csvList_label :: !Text , tsvList_label :: !Text
, csvList_forms :: !Text , tsvList_forms :: !Text
} }
deriving (Show) deriving (Show)
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance FromNamedRecord CsvList where instance FromNamedRecord TsvList where
parseNamedRecord r = CsvList <$> r .: "status" parseNamedRecord r = TsvList <$> r .: "status"
<*> r .: "label" <*> r .: "label"
<*> r .: "forms" <*> r .: "forms"
instance ToNamedRecord CsvList where instance ToNamedRecord TsvList where
toNamedRecord (CsvList s l f) = toNamedRecord (TsvList s l f) =
namedRecord [ "status" .= s namedRecord [ "status" .= s
, "label" .= l , "label" .= l
, "forms" .= f , "forms" .= f
] ]
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance FromField CsvListType where instance FromField TsvListType where
parseField "map" = pure CsvMap parseField "map" = pure TsvMap
parseField "main" = pure CsvCandidate parseField "main" = pure TsvCandidate
parseField "candidate" = pure CsvCandidate -- backward compat parseField "candidate" = pure TsvCandidate -- backward compat
parseField "stop" = pure CsvStop parseField "stop" = pure TsvStop
parseField _ = mzero parseField _ = mzero
instance ToField CsvListType where instance ToField TsvListType where
toField CsvMap = "map" toField TsvMap = "map"
toField CsvCandidate = "main" toField TsvCandidate = "main"
toField CsvStop = "stop" toField TsvStop = "stop"
------------------------------------------------------------------------ ------------------------------------------------------------------------
csvDecodeOptions :: DecodeOptions tsvDecodeOptions :: DecodeOptions
csvDecodeOptions = (defaultDecodeOptions tsvDecodeOptions = (defaultDecodeOptions
{decDelimiter = fromIntegral $ ord csvListFieldDelimiter} {decDelimiter = fromIntegral $ ord tsvListFieldDelimiter}
) )
csvEncodeOptions :: EncodeOptions tsvEncodeOptions :: EncodeOptions
csvEncodeOptions = ( defaultEncodeOptions tsvEncodeOptions = ( defaultEncodeOptions
{encDelimiter = fromIntegral $ ord csvListFieldDelimiter} {encDelimiter = fromIntegral $ ord tsvListFieldDelimiter}
) )
------------------------------------------------------------------------ ------------------------------------------------------------------------
fromCsvListFile :: FilePath -> IO (Header, Vector CsvList) fromTsvListFile :: FilePath -> IO (Header, Vector TsvList)
fromCsvListFile fp = do fromTsvListFile fp = do
csvData <- BL.readFile fp tsvData <- BL.readFile fp
case decodeByNameWith csvDecodeOptions csvData of case decodeByNameWith tsvDecodeOptions tsvData of
Left e -> panicTrace (pack e) Left e -> panicTrace (pack e)
Right csvList -> pure csvList Right tsvList -> pure tsvList
------------------------------------------------------------------------ ------------------------------------------------------------------------
toCsvListFile :: FilePath -> (Header, Vector CsvList) -> IO () toTsvListFile :: FilePath -> (Header, Vector TsvList) -> IO ()
toCsvListFile fp (h, vs) = BL.writeFile fp $ toTsvListFile fp (h, vs) = BL.writeFile fp $
encodeByNameWith csvEncodeOptions h (V.toList vs) encodeByNameWith tsvEncodeOptions h (V.toList vs)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org ...@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
This search Engine is first made to clean CSV file according to a query. This search Engine is first made to clean TSV file according to a query.
Starting from this model, a specific Gargantext engine will be made Starting from this model, a specific Gargantext engine will be made
(using more metrics scores/features). (using more metrics scores/features).
...@@ -17,7 +17,7 @@ module Gargantext.Core.Text.Search where ...@@ -17,7 +17,7 @@ module Gargantext.Core.Text.Search where
import Data.Ix import Data.Ix
import Data.SearchEngine import Data.SearchEngine
import Gargantext.Core.Text.Corpus.Parsers.CSV import Gargantext.Core.Text.Corpus.Parsers.TSV
import Gargantext.Core.Text.Terms.Mono (monoTexts) import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Text.Terms.Mono.Stem as ST import Gargantext.Core.Text.Terms.Mono.Stem as ST
import Gargantext.Prelude import Gargantext.Prelude
...@@ -29,7 +29,7 @@ import Gargantext.Prelude ...@@ -29,7 +29,7 @@ import Gargantext.Prelude
type DocId = Int type DocId = Int
type DocSearchEngine = SearchEngine type DocSearchEngine = SearchEngine
CsvGargV3 TsvGargV3
DocId DocId
DocField DocField
NoFeatures NoFeatures
...@@ -42,7 +42,7 @@ initialDocSearchEngine :: DocSearchEngine ...@@ -42,7 +42,7 @@ initialDocSearchEngine :: DocSearchEngine
initialDocSearchEngine = initialDocSearchEngine =
initSearchEngine docSearchConfig defaultSearchRankParameters initSearchEngine docSearchConfig defaultSearchRankParameters
docSearchConfig :: SearchConfig CsvGargV3 DocId DocField NoFeatures docSearchConfig :: SearchConfig TsvGargV3 DocId DocField NoFeatures
docSearchConfig = docSearchConfig =
SearchConfig { SearchConfig {
documentKey = d_docId, documentKey = d_docId,
...@@ -51,7 +51,7 @@ docSearchConfig = ...@@ -51,7 +51,7 @@ docSearchConfig =
documentFeatureValue = const noFeatures documentFeatureValue = const noFeatures
} }
where where
extractTerms :: CsvGargV3 -> DocField -> [Text] extractTerms :: TsvGargV3 -> DocField -> [Text]
extractTerms doc TitleField = monoTexts (d_title doc) extractTerms doc TitleField = monoTexts (d_title doc)
extractTerms doc AbstractField = monoTexts (d_abstract doc) extractTerms doc AbstractField = monoTexts (d_abstract doc)
......
...@@ -13,7 +13,7 @@ where ...@@ -13,7 +13,7 @@ where
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.Encoding as TE import qualified Data.Text.Encoding as TE
import Gargantext.Utils.Servant (CSV, Markdown) import Gargantext.Utils.Servant (TSV, Markdown)
import Network.HTTP.Client (newManager, Request(..)) import Network.HTTP.Client (newManager, Request(..))
import Network.HTTP.Client.TLS (tlsManagerSettings) import Network.HTTP.Client.TLS (tlsManagerSettings)
import Protolude import Protolude
...@@ -26,10 +26,10 @@ newtype DocId = DocId { fromDocId :: Text } ...@@ -26,10 +26,10 @@ newtype DocId = DocId { fromDocId :: Text }
newtype Data = Data { fromData :: Text } newtype Data = Data { fromData :: Text }
data ContentType a = data ContentType a =
CTPlain a CTPlain a
| CTCSV a | CTTSV a
-- TODO SocialCalc, Excel XML ? -- TODO SocialCalc, Excel XML ?
instance MimeRender CSV Data where instance MimeRender TSV Data where
mimeRender p (Data d) = mimeRender p d mimeRender p (Data d) = mimeRender p d
instance MimeRender PlainText Data where instance MimeRender PlainText Data where
mimeRender p (Data d) = mimeRender p d mimeRender p (Data d) = mimeRender p d
...@@ -49,14 +49,14 @@ type EthercalcAPI = ...@@ -49,14 +49,14 @@ type EthercalcAPI =
:> ReqBody '[PlainText] Data :> ReqBody '[PlainText] Data
:> Put '[PlainText] Text :> Put '[PlainText] Text
-- csv -- tsv
:<|> :<|>
ReqBody '[CSV] Data ReqBody '[TSV] Data
:> Post '[PlainText, CSV] Text :> Post '[PlainText, TSV] Text
:<|> :<|>
Capture "docId" DocId Capture "docId" DocId
:> ReqBody '[CSV] Data :> ReqBody '[TSV] Data
:> Put '[PlainText, CSV] Text :> Put '[PlainText, TSV] Text
) )
ethercalcAPI :: Proxy EthercalcAPI ethercalcAPI :: Proxy EthercalcAPI
...@@ -64,16 +64,16 @@ ethercalcAPI = Proxy ...@@ -64,16 +64,16 @@ ethercalcAPI = Proxy
ethercalcNewPlain :: Data -> ClientM Text ethercalcNewPlain :: Data -> ClientM Text
ethercalcUpdatePlain :: DocId -> Data -> ClientM Text ethercalcUpdatePlain :: DocId -> Data -> ClientM Text
ethercalcNewCSV :: Data -> ClientM Text ethercalcNewTSV :: Data -> ClientM Text
ethercalcUpdateCSV :: DocId -> Data -> ClientM Text ethercalcUpdateTSV :: DocId -> Data -> ClientM Text
ethercalcNewPlain :<|> ethercalcUpdatePlain ethercalcNewPlain :<|> ethercalcUpdatePlain
:<|> ethercalcNewCSV :<|> ethercalcUpdateCSV = client ethercalcAPI :<|> ethercalcNewTSV :<|> ethercalcUpdateTSV = client ethercalcAPI
------------------------------ ------------------------------
-- | Create new or update existing Ethercalc document (depending on -- | Create new or update existing Ethercalc document (depending on
-- `Maybe DocId` constructor). `Data` can be in various formats (CSV, -- `Maybe DocId` constructor). `Data` can be in various formats (TSV,
-- etc). -- etc).
ethercalc :: Host -> Maybe DocId -> ContentType Data -> IO (Either ClientError Text) ethercalc :: Host -> Maybe DocId -> ContentType Data -> IO (Either ClientError Text)
ethercalc (Host host) mDocId ctD = do ethercalc (Host host) mDocId ctD = do
...@@ -81,9 +81,9 @@ ethercalc (Host host) mDocId ctD = do ...@@ -81,9 +81,9 @@ ethercalc (Host host) mDocId ctD = do
let env = mkClientEnv manager' (BaseUrl Https (T.unpack host) 443 "") let env = mkClientEnv manager' (BaseUrl Https (T.unpack host) 443 "")
case (mDocId, ctD) of case (mDocId, ctD) of
(Nothing, CTPlain d) -> runClientM (ethercalcNewPlain d) env (Nothing, CTPlain d) -> runClientM (ethercalcNewPlain d) env
(Nothing, CTCSV d) -> runClientM (ethercalcNewCSV d) env (Nothing, CTTSV d) -> runClientM (ethercalcNewTSV d) env
(Just docId, CTPlain d) -> runClientM (ethercalcUpdatePlain docId d) env (Just docId, CTPlain d) -> runClientM (ethercalcUpdatePlain docId d) env
(Just docId, CTCSV d) -> runClientM (ethercalcUpdateCSV docId d) env (Just docId, CTTSV d) -> runClientM (ethercalcUpdateTSV docId d) env
----------------------------------- -----------------------------------
......
...@@ -43,8 +43,8 @@ import Test.QuickCheck.Instances.Vector() ...@@ -43,8 +43,8 @@ import Test.QuickCheck.Instances.Vector()
data CorpusParser = data CorpusParser =
Wos {_wos_limit :: Int} Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int} | Tsv {_tsv_limit :: Int}
| Csv' {_csv'_limit :: Int} | Tsv' {_tsv'_limit :: Int}
deriving (Show,Generic,Eq, ToExpr) deriving (Show,Generic,Eq, ToExpr)
instance ToSchema CorpusParser where instance ToSchema CorpusParser where
...@@ -223,7 +223,7 @@ defaultConfig = ...@@ -223,7 +223,7 @@ defaultConfig =
PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
, listPath = "list.csv" -- useful for commandline only , listPath = "list.csv" -- useful for commandline only
, outputPath = "data/" , outputPath = "data/"
, corpusParser = Csv 150000 , corpusParser = Tsv 150000
, listParser = V4 , listParser = V4
, phyloName = pack "Phylo Name" , phyloName = pack "Phylo Name"
, phyloScale = 2 , phyloScale = 2
...@@ -725,8 +725,8 @@ instance Arbitrary PhyloConfig where ...@@ -725,8 +725,8 @@ instance Arbitrary PhyloConfig where
instance Arbitrary CorpusParser where instance Arbitrary CorpusParser where
arbitrary = oneof [ Wos <$> arbitrary arbitrary = oneof [ Wos <$> arbitrary
, Csv <$> arbitrary , Tsv <$> arbitrary
, Csv' <$> arbitrary , Tsv' <$> arbitrary
] ]
instance Arbitrary ListParser where instance Arbitrary ListParser where
......
...@@ -344,7 +344,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -344,7 +344,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
Fis _ _ -> Fis _ _ ->
let fis = parMap rpar (\(prd,docs) -> let fis = parMap rpar (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of case (corpusParser $ getConfig phylo) of
Csv' _ -> let lst = toList Tsv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs) $ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst) in (prd, map (\f -> Clustering (Set.toList $ fst f) ((fst . snd) f) prd ((fst . snd . snd) f) (((snd . snd . snd) f))) lst)
_ -> let lst = toList _ -> let lst = toList
......
...@@ -70,8 +70,8 @@ getHyperdataFrameContents (HyperdataFrame { _hf_base, _hf_frame_id }) = do ...@@ -70,8 +70,8 @@ getHyperdataFrameContents (HyperdataFrame { _hf_base, _hf_frame_id }) = do
r <- Wreq.get $ T.unpack path r <- Wreq.get $ T.unpack path
pure $ decodeUtf8 $ toStrict $ r ^. Wreq.responseBody pure $ decodeUtf8 $ toStrict $ r ^. Wreq.responseBody
getHyperdataFrameCSV :: HyperdataFrame -> IO Text getHyperdataFrameTSV :: HyperdataFrame -> IO Text
getHyperdataFrameCSV (HyperdataFrame { _hf_base, _hf_frame_id }) = do getHyperdataFrameTSV (HyperdataFrame { _hf_base, _hf_frame_id }) = do
let path = T.concat [_hf_base, "/", _hf_frame_id, ".csv"] let path = T.concat [_hf_base, "/", _hf_frame_id, ".csv"]
r <- Wreq.get $ T.unpack path r <- Wreq.get $ T.unpack path
pure $ decodeUtf8 $ toStrict $ r ^. Wreq.responseBody pure $ decodeUtf8 $ toStrict $ r ^. Wreq.responseBody
...@@ -69,7 +69,7 @@ parseGargJob s = case s of ...@@ -69,7 +69,7 @@ parseGargJob s = case s of
"tablengrams" -> Just TableNgramsJob "tablengrams" -> Just TableNgramsJob
"forgotpassword" -> Just ForgotPasswordJob "forgotpassword" -> Just ForgotPasswordJob
"updatengramslistjson" -> Just UpdateNgramsListJobJSON "updatengramslistjson" -> Just UpdateNgramsListJobJSON
"updatengramslistcsv" -> Just UpdateNgramsListJobCSV "updatengramslisttsv" -> Just UpdateNgramsListJobTSV
"addcontact" -> Just AddContactJob "addcontact" -> Just AddContactJob
"addfile" -> Just AddFileJob "addfile" -> Just AddFileJob
"documentfromwritenode" -> Just DocumentFromWriteNodeJob "documentfromwritenode" -> Just DocumentFromWriteNodeJob
......
...@@ -21,18 +21,18 @@ import Protolude.Partial (read) ...@@ -21,18 +21,18 @@ import Protolude.Partial (read)
import Servant ( Accept(contentType), MimeRender(..), MimeUnrender(mimeUnrender) ) import Servant ( Accept(contentType), MimeRender(..), MimeUnrender(mimeUnrender) )
data CSV = CSV data TSV = TSV
instance Accept CSV where instance Accept TSV where
contentType _ = "text" // "csv" /: ("charset", "utf-8") contentType _ = "text" // "csv" /: ("charset", "utf-8")
instance (DefaultOrdered a, ToNamedRecord a) => MimeRender CSV [a] where instance (DefaultOrdered a, ToNamedRecord a) => MimeRender TSV [a] where
mimeRender _ = encodeDefaultOrderedByName mimeRender _ = encodeDefaultOrderedByName
instance MimeRender CSV T.Text where instance MimeRender TSV T.Text where
mimeRender _ = BSC.fromStrict . TE.encodeUtf8 mimeRender _ = BSC.fromStrict . TE.encodeUtf8
instance Read a => MimeUnrender CSV a where instance Read a => MimeUnrender TSV a where
mimeUnrender _ bs = case BSC.take len bs of mimeUnrender _ bs = case BSC.take len bs of
"text/csv" -> pure . read . BSC.unpack $ BSC.drop len bs "text/csv" -> pure . read . BSC.unpack $ BSC.drop len bs
_ -> Left "didn't start with the magic incantation" _ -> Left "didn't start with the magic incantation"
...@@ -40,7 +40,7 @@ instance Read a => MimeUnrender CSV a where ...@@ -40,7 +40,7 @@ instance Read a => MimeUnrender CSV a where
len :: Int64 len :: Int64
len = fromIntegral $ length ("text/csv" :: Prelude.String) len = fromIntegral $ length ("text/csv" :: Prelude.String)
--instance ToNamedRecord a => MimeRender CSV [a] where --instance ToNamedRecord a => MimeRender TSV [a] where
-- mimeRender _ val = encode val -- mimeRender _ val = encode val
---------------------------- ----------------------------
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -1323,8 +1323,8 @@ ...@@ -1323,8 +1323,8 @@
"tag": "MaxClique" "tag": "MaxClique"
}, },
"corpusParser": { "corpusParser": {
"_csv_limit": 150000, "_tsv_limit": 150000,
"tag": "Csv" "tag": "Tsv"
}, },
"corpusPath": "corpus.csv", "corpusPath": "corpus.csv",
"defaultMode": false, "defaultMode": false,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -25,7 +25,7 @@ import Data.Text.IO qualified as TIO ...@@ -25,7 +25,7 @@ import Data.Text.IO qualified as TIO
import Fmt import Fmt
import Gargantext.API.Admin.Auth.Types (Token) import Gargantext.API.Admin.Auth.Types (Token)
import Gargantext.API.Ngrams qualified as APINgrams import Gargantext.API.Ngrams qualified as APINgrams
import Gargantext.API.Ngrams.List ( ngramsListFromCSVData ) import Gargantext.API.Ngrams.List ( ngramsListFromTSVData )
import Gargantext.API.Ngrams.Types ( MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTablePatch(..), NgramsTerm(..), Versioned(..), mSetToList, toNgramsPatch, ne_children, ne_ngrams, vc_data, _NgramsTable ) import Gargantext.API.Ngrams.Types ( MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTablePatch(..), NgramsTerm(..), Versioned(..), mSetToList, toNgramsPatch, ne_children, ne_ngrams, vc_data, _NgramsTable )
import Gargantext.Core.Text.Ngrams import Gargantext.Core.Text.Ngrams
import Gargantext.Core.Types ( CorpusId, ListId, ListType(..), NodeId, _NodeId ) import Gargantext.Core.Types ( CorpusId, ListId, ListType(..), NodeId, _NodeId )
...@@ -190,7 +190,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do ...@@ -190,7 +190,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
it "parses CSV via ngramsListFromCSVData" $ \((_testEnv, _port), _app) -> do it "parses CSV via ngramsListFromCSVData" $ \((_testEnv, _port), _app) -> do
simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.csv") simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.csv")
ngramsListFromCSVData simpleNgrams `shouldBe` ngramsListFromTSVData simpleNgrams `shouldBe`
Right (Map.fromList [ (NgramsTerms, Versioned 0 $ Map.fromList [ Right (Map.fromList [ (NgramsTerms, Versioned 0 $ Map.fromList [
(NgramsTerm "abelian group", NgramsRepoElement 1 MapTerm Nothing Nothing (MSet mempty)) (NgramsTerm "abelian group", NgramsRepoElement 1 MapTerm Nothing Nothing (MSet mempty))
, (NgramsTerm "brazorf", NgramsRepoElement 1 StopTerm Nothing Nothing (MSet mempty)) , (NgramsTerm "brazorf", NgramsRepoElement 1 StopTerm Nothing Nothing (MSet mempty))
......
...@@ -12,7 +12,7 @@ import Data.GraphViz.Attributes.Complete qualified as Graphviz ...@@ -12,7 +12,7 @@ import Data.GraphViz.Attributes.Complete qualified as Graphviz
import Data.Text.Lazy as TL import Data.Text.Lazy as TL
import Data.TreeDiff import Data.TreeDiff
import Data.Vector qualified as V import Data.Vector qualified as V
import Gargantext.Core.Text.List.Formats.CSV import Gargantext.Core.Text.List.Formats.TSV
import Gargantext.Core.Types.Phylo import Gargantext.Core.Types.Phylo
import Gargantext.Core.Viz.Phylo hiding (EdgeType(..)) import Gargantext.Core.Viz.Phylo hiding (EdgeType(..))
import Gargantext.Core.Viz.Phylo.API.Tools (readPhylo, phylo2dot2json) import Gargantext.Core.Viz.Phylo.API.Tools (readPhylo, phylo2dot2json)
...@@ -30,7 +30,7 @@ phyloTestConfig = PhyloConfig { ...@@ -30,7 +30,7 @@ phyloTestConfig = PhyloConfig {
corpusPath = "corpus.csv" corpusPath = "corpus.csv"
, listPath = "list.csv" , listPath = "list.csv"
, outputPath = "data/" , outputPath = "data/"
, corpusParser = Csv {_csv_limit = 150000} , corpusParser = Tsv {_tsv_limit = 150000}
, listParser = V4 , listParser = V4
, phyloName = "Phylo Name" , phyloName = "Phylo Name"
, phyloScale = 2 , phyloScale = 2
...@@ -79,8 +79,8 @@ testCleopatreWithoutLinkExpectedOutput = do ...@@ -79,8 +79,8 @@ testCleopatreWithoutLinkExpectedOutput = do
testNadalWithoutLinkExpectedOutput :: Assertion testNadalWithoutLinkExpectedOutput :: Assertion
testNadalWithoutLinkExpectedOutput = do testNadalWithoutLinkExpectedOutput = do
corpusPath' <- getDataFileName "test-data/phylo/nadal_docslist.golden.csv" corpusPath' <- getDataFileName "test-data/phylo/nadal_docslist.golden.tsv"
listPath' <- getDataFileName "test-data/phylo/nadal_ngramslist.golden.csv" listPath' <- getDataFileName "test-data/phylo/nadal_ngramslist.golden.tsv"
let config = phyloTestConfig { corpusPath = corpusPath' let config = phyloTestConfig { corpusPath = corpusPath'
, listPath = listPath' , listPath = listPath'
, listParser = V3 , listParser = V3
...@@ -97,11 +97,11 @@ testNadalWithoutLinkExpectedOutput = do ...@@ -97,11 +97,11 @@ testNadalWithoutLinkExpectedOutput = do
testSmallPhyloWithoutLinkExpectedOutput :: Assertion testSmallPhyloWithoutLinkExpectedOutput :: Assertion
testSmallPhyloWithoutLinkExpectedOutput = do testSmallPhyloWithoutLinkExpectedOutput = do
bpaConfig <- getDataFileName "bench-data/phylo/bpa-config.json" bpaConfig <- getDataFileName "bench-data/phylo/bpa-config.json"
corpusPath' <- getDataFileName "test-data/phylo/small_phylo_docslist.csv" corpusPath' <- getDataFileName "test-data/phylo/small_phylo_docslist.tsv"
listPath' <- getDataFileName "test-data/phylo/small_phylo_ngramslist.csv" listPath' <- getDataFileName "test-data/phylo/small_phylo_ngramslist.tsv"
(Right config) <- fmap (\pcfg -> pcfg { corpusPath = corpusPath' (Right config) <- fmap (\pcfg -> pcfg { corpusPath = corpusPath'
, listPath = listPath' , listPath = listPath'
}) <$> (eitherDecodeFileStrict' bpaConfig) }) <$> (JSON.eitherDecodeFileStrict' bpaConfig)
mapList <- fileToList (listParser config) (listPath config) mapList <- fileToList (listParser config) (listPath config)
corpus <- fileToDocsDefault (corpusParser config) corpus <- fileToDocsDefault (corpusParser config)
(corpusPath config) (corpusPath config)
...@@ -234,13 +234,13 @@ testRelComp_Connected = do ...@@ -234,13 +234,13 @@ testRelComp_Connected = do
testToPhyloDeterminism :: Assertion testToPhyloDeterminism :: Assertion
testToPhyloDeterminism = do testToPhyloDeterminism = do
corpusPath' <- getDataFileName "test-data/phylo/GarganText_DocsList-nodeId-187481.csv" corpusPath' <- getDataFileName "test-data/phylo/GarganText_DocsList-nodeId-187481.tsv"
listPath' <- getDataFileName "test-data/phylo/GarganText_NgramsList-187482.csv" listPath' <- getDataFileName "test-data/phylo/GarganText_NgramsList-187482.tsv"
let config = phyloTestConfig { corpusPath = corpusPath' let config = phyloTestConfig { corpusPath = corpusPath'
, listPath = listPath' , listPath = listPath'
, listParser = V3 , listParser = V3
} }
mapList <- csvMapTermList (listPath config) mapList <- tsvMapTermList (listPath config)
corpus <- fileToDocsDefault (corpusParser config) corpus <- fileToDocsDefault (corpusParser config)
(corpusPath config) (corpusPath config)
[Year 3 1 5,Month 3 1 5,Week 4 2 5] [Year 3 1 5,Month 3 1 5,Week 4 2 5]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment