Commit 51c8a407 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Reorganize TSV parsing code

parent e4dfb4bd
Pipeline #6835 canceled with stages
...@@ -20,7 +20,7 @@ import Data.Tuple.Extra (both) ...@@ -20,7 +20,7 @@ import Data.Tuple.Extra (both)
import Data.Vector qualified as DV import Data.Vector qualified as DV
import GHC.Generics import GHC.Generics
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.TSV (readTSVFile, tsv_title, tsv_abstract, tsv_publication_year, fromMIntOrDec, defaultYear) import Gargantext.Core.Text.Corpus.Parsers.TSV (readTSVFile, unIntOrDec, tsv_title, tsv_abstract, tsv_publication_year, defaultYear)
import Gargantext.Core.Text.List.Formats.TSV (tsvMapTermList) import Gargantext.Core.Text.List.Formats.TSV (tsvMapTermList)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs) import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
import Gargantext.Core.Text.Terms.WithList ( Patterns, buildPatterns, extractTermsWithList ) import Gargantext.Core.Text.Terms.WithList ( Patterns, buildPatterns, extractTermsWithList )
...@@ -52,7 +52,7 @@ filterTermsAndCoocCLI (CorpusFile corpusFile) (TermListFile termListFile) (Outpu ...@@ -52,7 +52,7 @@ filterTermsAndCoocCLI (CorpusFile corpusFile) (TermListFile termListFile) (Outpu
Right cf -> do Right cf -> do
let corpus = DM.fromListWith (<>) let corpus = DM.fromListWith (<>)
. DV.toList . DV.toList
. DV.map (\n -> (fromMIntOrDec defaultYear $ tsv_publication_year n, [(tsv_title n) <> " " <> (tsv_abstract n)])) . DV.map (\n -> (maybe defaultYear unIntOrDec $ tsv_publication_year n, [(tsv_title n) <> " " <> (tsv_abstract n)]))
. snd $ cf . snd $ cf
-- termListMap :: [Text] -- termListMap :: [Text]
......
...@@ -82,8 +82,8 @@ tsvToDocs parser patterns time path = ...@@ -82,8 +82,8 @@ tsvToDocs parser patterns time path =
Wos _ -> Prelude.error "tsvToDocs: unimplemented" Wos _ -> Prelude.error "tsvToDocs: unimplemented"
Tsv limit -> Vector.toList Tsv limit -> Vector.toList
<$> Vector.take limit <$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (Tsv.fromMIntOrDec Tsv.defaultYear $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time) <$> Vector.map (\row -> Document (toPhyloDate (maybe Tsv.defaultYear Tsv.unIntOrDec $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
(toPhyloDate' (Tsv.fromMIntOrDec Tsv.defaultYear $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time) (toPhyloDate' (maybe Tsv.defaultYear Tsv.unIntOrDec $ tsv_publication_year row) (fromMaybe Tsv.defaultMonth $ tsv_publication_month row) (fromMaybe Tsv.defaultDay $ tsv_publication_day row) time)
(termsInText patterns $ (tsv_title row) <> " " <> (tsv_abstract row)) (termsInText patterns $ (tsv_title row) <> " " <> (tsv_abstract row))
Nothing Nothing
[] []
......
...@@ -22,7 +22,7 @@ import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header ) ...@@ -22,7 +22,7 @@ import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header )
import Data.Text qualified as T import Data.Text qualified as T
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Vector qualified as Vector import Data.Vector qualified as Vector
import Gargantext.Core.Text.Corpus.Parsers.TSV ( tsvDecodeOptions, ColumnDelimiter(Tab) ) import Gargantext.Core.Text.Corpus.Parsers.TSV (defaultDecodingOptionsWithDelimiter, ColumnDelimiter(Tab) )
import Gargantext.Database.Admin.Types.Hyperdata.Contact import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Prelude import Gargantext.Prelude
import System.FilePath.Posix (takeExtension) import System.FilePath.Posix (takeExtension)
...@@ -119,7 +119,7 @@ readTSVFile_Annuaire' :: FilePath -> IO (Header, Vector IMTUser) ...@@ -119,7 +119,7 @@ readTSVFile_Annuaire' :: FilePath -> IO (Header, Vector IMTUser)
readTSVFile_Annuaire' = fmap readTsvHalLazyBS' . BL.readFile readTSVFile_Annuaire' = fmap readTsvHalLazyBS' . BL.readFile
where where
readTsvHalLazyBS' :: BL.ByteString -> (Header, Vector IMTUser) readTsvHalLazyBS' :: BL.ByteString -> (Header, Vector IMTUser)
readTsvHalLazyBS' bs = case decodeByNameWith (tsvDecodeOptions Tab) bs of readTsvHalLazyBS' bs = case decodeByNameWith (defaultDecodingOptionsWithDelimiter Tab) bs of
Left e -> panicTrace (cs e) Left e -> panicTrace (cs e)
Right rows -> rows Right rows -> rows
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment