Commit c0afe078 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Reorganize TSV parsing code

parent 6c591f21
......@@ -20,7 +20,7 @@ import Data.Tuple.Extra (both)
import Data.Vector qualified as DV
import GHC.Generics
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.TSV (readTSVFile, tsv_title, tsv_abstract, tsv_publication_year, fromMIntOrDec, defaultYear)
import Gargantext.Core.Text.Corpus.Parsers.TSV (readTSVFile, unIntOrDec, tsv_title, tsv_abstract, tsv_publication_year, defaultYear)
import Gargantext.Core.Text.List.Formats.TSV (tsvMapTermList)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
import Gargantext.Core.Text.Terms.WithList ( Patterns, buildPatterns, extractTermsWithList )
......@@ -52,7 +52,7 @@ filterTermsAndCoocCLI (CorpusFile corpusFile) (TermListFile termListFile) (Outpu
Right cf -> do
let corpus = DM.fromListWith (<>)
. DV.toList
. (\n -> (fromMIntOrDec defaultYear $ tsv_publication_year n, [(tsv_title n) <> " " <> (tsv_abstract n)]))
. (\n -> (maybe defaultYear unIntOrDec $ tsv_publication_year n, [(tsv_title n) <> " " <> (tsv_abstract n)]))
. snd $ cf
-- termListMap :: [Text]
......@@ -82,8 +82,8 @@ tsvToDocs parser patterns time path =
Wos _ -> Prelude.error "tsvToDocs: unimplemented"
Tsv limit -> Vector.toList
<$> Vector.take limit
<$> (\row -> Document (toPhyloDate (Tsv.fromMIntOrDec Tsv.defaultYear $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
(toPhyloDate' (Tsv.fromMIntOrDec Tsv.defaultYear $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
<$> (\row -> Document (toPhyloDate (maybe Tsv.defaultYear Tsv.unIntOrDec $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
(toPhyloDate' (maybe Tsv.defaultYear Tsv.unIntOrDec $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
(termsInText patterns $ (tsv_title row) <> " " <> (tsv_abstract row))
......@@ -22,7 +22,7 @@ import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header )
import Data.Text qualified as T
import Data.Vector (Vector)
import Data.Vector qualified as Vector
import Gargantext.Core.Text.Corpus.Parsers.TSV ( tsvDecodeOptions, ColumnDelimiter(Tab) )
import Gargantext.Core.Text.Corpus.Parsers.TSV (defaultDecodingOptionsWithDelimiter, ColumnDelimiter(Tab) )
import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Prelude
import System.FilePath.Posix (takeExtension)
......@@ -119,7 +119,7 @@ readTSVFile_Annuaire' :: FilePath -> IO (Header, Vector IMTUser)
readTSVFile_Annuaire' = fmap readTsvHalLazyBS' . BL.readFile
readTsvHalLazyBS' :: BL.ByteString -> (Header, Vector IMTUser)
readTsvHalLazyBS' bs = case decodeByNameWith (tsvDecodeOptions Tab) bs of
readTsvHalLazyBS' bs = case decodeByNameWith (defaultDecodingOptionsWithDelimiter Tab) bs of
Left e -> panicTrace (cs e)
Right rows -> rows
