Commit d2029ea7 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[API] CSV

parent 9ba39267
...@@ -59,7 +59,7 @@ import Gargantext.Ext.IMT (toSchoolName) ...@@ -59,7 +59,7 @@ import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile) import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.List (buildNgramsLists,StopSize(..)) import Gargantext.Text.List (buildNgramsLists,StopSize(..))
import Gargantext.Text.Parsers (parseDocs, FileFormat) import Gargantext.Text.Parsers (parseFile, FileFormat)
import Gargantext.Text.Terms (TermType(..), tt_lang) import Gargantext.Text.Terms (TermType(..), tt_lang)
import Gargantext.Text.Terms (extractTerms) import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
...@@ -114,7 +114,7 @@ flowCorpusFile :: FlowCmdM env ServantErr m ...@@ -114,7 +114,7 @@ flowCorpusFile :: FlowCmdM env ServantErr m
flowCorpusFile u n l la ff fp = do flowCorpusFile u n l la ff fp = do
docs <- liftIO ( splitEvery 500 docs <- liftIO ( splitEvery 500
<$> take l <$> take l
<$> parseDocs ff fp <$> parseFile ff fp
) )
flowCorpus u n la (map (map toHyperdataDocument) docs) flowCorpus u n la (map (map toHyperdataDocument) docs)
......
...@@ -15,16 +15,18 @@ Format Converter. ...@@ -15,16 +15,18 @@ Format Converter.
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Convert (risPress2csv) module Gargantext.Text.Convert (risPress2csvWrite)
where where
import System.FilePath (FilePath()) -- , takeExtension) import System.FilePath (FilePath()) -- , takeExtension)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Parsers.CSV (writeDocs2Csv) import Gargantext.Text.Parsers.CSV (writeDocs2Csv)
import Gargantext.Text.Parsers (parseDocs, FileFormat(..)) import Gargantext.Text.Parsers (parseFile, FileFormat(..))
risPress2csv :: FilePath -> IO () risPress2csvWrite :: FilePath -> IO ()
risPress2csv f = parseDocs RisPresse (f <> ".ris") risPress2csvWrite f = parseFile RisPresse (f <> ".ris")
>>= \hs -> writeDocs2Csv (f <> ".csv") hs >>= \hs -> writeDocs2Csv (f <> ".csv") hs
...@@ -22,7 +22,7 @@ please follow the types. ...@@ -22,7 +22,7 @@ please follow the types.
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers (parse, FileFormat(..), clean, parseDocs) module Gargantext.Text.Parsers (parse, FileFormat(..), clean, parseFile)
where where
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries) import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
...@@ -84,11 +84,11 @@ data FileFormat = WOS | RIS | RisPresse ...@@ -84,11 +84,11 @@ data FileFormat = WOS | RIS | RisPresse
-- | Parse file into documents -- | Parse file into documents
-- TODO manage errors here -- TODO manage errors here
parseDocs :: FileFormat -> FilePath -> IO [HyperdataDocument] parseFile :: FileFormat -> FilePath -> IO [HyperdataDocument]
parseDocs CsvHalFormat p = parseHal p parseFile CsvHalFormat p = parseHal p
parseDocs RisPresse p = join $ mapM (toDoc RIS) <$> snd <$> enrichWith presseEnrich <$> parse' RIS p parseFile RisPresse p = join $ mapM (toDoc RIS) <$> snd <$> enrichWith presseEnrich <$> parse' RIS p
parseDocs WOS p = join $ mapM (toDoc WOS) <$> snd <$> enrichWith (map (first WOS.keys)) <$> parse' WOS p parseFile WOS p = join $ mapM (toDoc WOS) <$> snd <$> enrichWith (map (first WOS.keys)) <$> parse' WOS p
parseDocs ff p = join $ mapM (toDoc ff) <$> snd <$> parse ff p parseFile ff p = join $ mapM (toDoc ff) <$> snd <$> parse ff p
type Year = Int type Year = Int
type Month = Int type Month = Int
...@@ -182,4 +182,3 @@ clean txt = DBC.map clean' txt ...@@ -182,4 +182,3 @@ clean txt = DBC.map clean' txt
clean' '\r' = ' ' clean' '\r' = ' '
clean' c = c clean' c = c
...@@ -229,8 +229,7 @@ writeCsv fp (h, vs) = BL.writeFile fp $ ...@@ -229,8 +229,7 @@ writeCsv fp (h, vs) = BL.writeFile fp $
encodeByNameWith csvEncodeOptions h (V.toList vs) encodeByNameWith csvEncodeOptions h (V.toList vs)
writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO () writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO ()
writeDocs2Csv fp hs = BL.writeFile fp $ writeDocs2Csv fp hs = BL.writeFile fp $ hyperdataDocument2csv hs
encodeByNameWith csvEncodeOptions headerCsvGargV3 (map hyperdataDocument2csvDoc hs)
hyperdataDocument2csv :: [HyperdataDocument] -> BL.ByteString hyperdataDocument2csv :: [HyperdataDocument] -> BL.ByteString
hyperdataDocument2csv hs = encodeByNameWith csvEncodeOptions headerCsvGargV3 (map hyperdataDocument2csvDoc hs) hyperdataDocument2csv hs = encodeByNameWith csvEncodeOptions headerCsvGargV3 (map hyperdataDocument2csvDoc hs)
...@@ -346,4 +345,3 @@ parseHal :: FilePath -> IO [HyperdataDocument] ...@@ -346,4 +345,3 @@ parseHal :: FilePath -> IO [HyperdataDocument]
parseHal fp = map csvHal2doc <$> V.toList <$> snd <$> readHal fp parseHal fp = map csvHal2doc <$> V.toList <$> snd <$> readHal fp
------------------------------------------------------------------------ ------------------------------------------------------------------------
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment