Commit e4cbfa19 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Upload CSV + new Lang all

parent f84213ca
Pipeline #712 failed with stage
......@@ -43,7 +43,7 @@ import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Database.Types.Node (ToHyperdataDocument(..))
import Gargantext.Database.Types.Node (UserId)
import Gargantext.Prelude
import Gargantext.Text.Corpus.Parsers.CSV (parseCsv')--parseHal')--, parseCsv')
import Gargantext.Text.Corpus.Parsers.CSV (parseCsv', parseHal')
import Gargantext.Text.Terms (TermType(..))
import Servant
import Servant.API.Flatten (Flat)
......@@ -223,15 +223,17 @@ addToCorpusWithForm :: FlowCmdM env err m
-> m ScraperStatus
addToCorpusWithForm cid (WithForm ft d) logStatus = do
printDebug "ft" ft
parse = case ft of
Just CSV_HAL -> parseHal'
Just CSV -> parseCsv'
_ -> parseHal'
CSV_HAL -> parseHal'
CSV -> parseCsv'
_ -> parseHal'
docs = splitEvery 500
$ take 1000000
$ parseCsv' (cs d)
$ parse (cs d)
logStatus ScraperStatus { _scst_succeeded = Just 1
, _scst_failed = Just 0
......@@ -36,7 +36,6 @@ import qualified Gargantext.Text.Corpus.API.Istex as ISTEX
-- | Get External API metadata main function
get :: ExternalAPIs -> Query -> Maybe Limit -> IO [HyperdataDocument]
get All _ _ = undefined
get PubMed q l = PUBMED.get q l
......@@ -49,6 +48,7 @@ get IsTex_FR q l = ISTEX.get FR q l
get Isidore_EN q l = ISIDORE.get EN (fromIntegral <$> l) (Just q) Nothing
get Isidore_FR q l = ISIDORE.get FR (fromIntegral <$> l) (Just q) Nothing
get _ _ _ = undefined
-- | Some Sugar for the documentation
type Query = PUBMED.Query
type Limit = PUBMED.Limit
......@@ -84,7 +84,7 @@ parseDate' format def lang s = do
parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR
parserLang EN = DC.EN
-- parserLang _ = panic "not implemented"
parserLang _ = panic "not implemented"
-- | Final Date parser API
-- IO can be avoided here:
......@@ -117,6 +117,7 @@ detectLangDefault = detectCat 99 eventLang
textSample :: Lang -> String
textSample EN = EN.textSample
textSample FR = FR.textSample
textSample _ = panic "textSample: not impl yet"
--textSample DE = DE.textSample
--textSample SP = SP.textSample
--textSample CH = CH.textSample
......@@ -55,7 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang' = case lang of
EN -> N.English
FR -> N.French
--_ -> panic $ DT.pack "not implemented yet"
_ -> panic $ DT.pack "not implemented yet"
......@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group :: Lang -> [TokenTag] -> [TokenTag]
group EN =
group FR =
-- group _ = panic $ pack "group :: Lang not implemeted yet"
group _ = panic $ pack "group :: Lang not implemeted yet"
......@@ -124,7 +124,7 @@ corenlp' lang txt = do
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
-- _ -> panic $ pack "not implemented yet"
_ -> panic $ pack "not implemented yet"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyLBS (cs txt) url
httpJSON request
