Commit d2029ea7 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[API] CSV

parent 9ba39267
......@@ -59,7 +59,7 @@ import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Prelude
import Gargantext.Text.List (buildNgramsLists,StopSize(..))
import Gargantext.Text.Parsers (parseDocs, FileFormat)
import Gargantext.Text.Parsers (parseFile, FileFormat)
import Gargantext.Text.Terms (TermType(..), tt_lang)
import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
......@@ -114,7 +114,7 @@ flowCorpusFile :: FlowCmdM env ServantErr m
flowCorpusFile u n l la ff fp = do
docs <- liftIO ( splitEvery 500
<$> take l
<$> parseDocs ff fp
<$> parseFile ff fp
)
flowCorpus u n la (map (map toHyperdataDocument) docs)
......
......@@ -15,16 +15,18 @@ Format Converter.
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Convert (risPress2csv)
module Gargantext.Text.Convert (risPress2csvWrite)
where
import System.FilePath (FilePath()) -- , takeExtension)
import Gargantext.Prelude
import Gargantext.Text.Parsers.CSV (writeDocs2Csv)
import Gargantext.Text.Parsers (parseDocs, FileFormat(..))
import Gargantext.Text.Parsers (parseFile, FileFormat(..))
risPress2csv :: FilePath -> IO ()
risPress2csv f = parseDocs RisPresse (f <> ".ris")
risPress2csvWrite :: FilePath -> IO ()
risPress2csvWrite f = parseFile RisPresse (f <> ".ris")
>>= \hs -> writeDocs2Csv (f <> ".csv") hs
......@@ -22,7 +22,7 @@ please follow the types.
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers (parse, FileFormat(..), clean, parseDocs)
module Gargantext.Text.Parsers (parse, FileFormat(..), clean, parseFile)
where
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
......@@ -84,11 +84,11 @@ data FileFormat = WOS | RIS | RisPresse
-- | Parse file into documents
-- TODO manage errors here
parseDocs :: FileFormat -> FilePath -> IO [HyperdataDocument]
parseDocs CsvHalFormat p = parseHal p
parseDocs RisPresse p = join $ mapM (toDoc RIS) <$> snd <$> enrichWith presseEnrich <$> parse' RIS p
parseDocs WOS p = join $ mapM (toDoc WOS) <$> snd <$> enrichWith (map (first WOS.keys)) <$> parse' WOS p
parseDocs ff p = join $ mapM (toDoc ff) <$> snd <$> parse ff p
parseFile :: FileFormat -> FilePath -> IO [HyperdataDocument]
parseFile CsvHalFormat p = parseHal p
parseFile RisPresse p = join $ mapM (toDoc RIS) <$> snd <$> enrichWith presseEnrich <$> parse' RIS p
parseFile WOS p = join $ mapM (toDoc WOS) <$> snd <$> enrichWith (map (first WOS.keys)) <$> parse' WOS p
parseFile ff p = join $ mapM (toDoc ff) <$> snd <$> parse ff p
type Year = Int
type Month = Int
......@@ -182,4 +182,3 @@ clean txt = DBC.map clean' txt
clean' '\r' = ' '
clean' c = c
......@@ -229,8 +229,7 @@ writeCsv fp (h, vs) = BL.writeFile fp $
encodeByNameWith csvEncodeOptions h (V.toList vs)
writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO ()
writeDocs2Csv fp hs = BL.writeFile fp $
encodeByNameWith csvEncodeOptions headerCsvGargV3 (map hyperdataDocument2csvDoc hs)
writeDocs2Csv fp hs = BL.writeFile fp $ hyperdataDocument2csv hs
hyperdataDocument2csv :: [HyperdataDocument] -> BL.ByteString
hyperdataDocument2csv hs = encodeByNameWith csvEncodeOptions headerCsvGargV3 (map hyperdataDocument2csvDoc hs)
......@@ -346,4 +345,3 @@ parseHal :: FilePath -> IO [HyperdataDocument]
parseHal fp = map csvHal2doc <$> V.toList <$> snd <$> readHal fp
------------------------------------------------------------------------
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment