Commit eca1c790 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Bin.Phylo] parse fix list of Text.

parent e4d30996
Pipeline #356 failed with stage
...@@ -23,7 +23,7 @@ Phylo binaries ...@@ -23,7 +23,7 @@ Phylo binaries
module Main where module Main where
import Data.Aeson import Data.Aeson
import Data.Text (Text) import Data.Text (Text, unwords)
import GHC.Generics import GHC.Generics
import GHC.IO (FilePath) import GHC.IO (FilePath)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -60,7 +60,7 @@ filterTerms :: Patterns -> (a, Text) -> (a, [Text]) ...@@ -60,7 +60,7 @@ filterTerms :: Patterns -> (a, Text) -> (a, [Text])
filterTerms patterns (year', doc) = (year',termsInText patterns doc) filterTerms patterns (year', doc) = (year',termsInText patterns doc)
where where
termsInText :: Patterns -> Text -> [Text] termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = extractTermsWithList' pats txt termsInText pats txt = DL.nub $ DL.concat $ map (map unwords) $ extractTermsWithList pats txt
-- csvToCorpus :: Int -> FilePath -> IO (DM.Map Int [Text]) -- csvToCorpus :: Int -> FilePath -> IO (DM.Map Int [Text])
...@@ -71,6 +71,17 @@ csvToCorpus limit csv = DV.toList ...@@ -71,6 +71,17 @@ csvToCorpus limit csv = DV.toList
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n))) . DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> readCsv csv . snd <$> readCsv csv
type ListPath = FilePath
type CorpusPath = FilePath
type Limit = Int
parse :: Limit -> CorpusPath -> ListPath -> IO [Document]
parse limit corpus liste = do
corpus' <- csvToCorpus limit corpus
liste' <- csvGraphTermList liste
let patterns = buildPatterns liste'
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus'
main :: IO () main :: IO ()
main = do main = do
...@@ -108,4 +119,3 @@ main = do ...@@ -108,4 +119,3 @@ main = do
L.writeFile outputPath $ encode corpusParsed L.writeFile outputPath $ encode corpusParsed
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment