Commit e4cbfa19 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Upload CSV + new Lang all

parent f84213ca
...@@ -43,7 +43,7 @@ import Gargantext.Database.Types.Node (CorpusId) ...@@ -43,7 +43,7 @@ import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Database.Types.Node (ToHyperdataDocument(..)) import Gargantext.Database.Types.Node (ToHyperdataDocument(..))
import Gargantext.Database.Types.Node (UserId) import Gargantext.Database.Types.Node (UserId)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Corpus.Parsers.CSV (parseCsv')--parseHal')--, parseCsv') import Gargantext.Text.Corpus.Parsers.CSV (parseCsv', parseHal')
import Gargantext.Text.Terms (TermType(..)) import Gargantext.Text.Terms (TermType(..))
import Servant import Servant
import Servant.API.Flatten (Flat) import Servant.API.Flatten (Flat)
...@@ -223,15 +223,17 @@ addToCorpusWithForm :: FlowCmdM env err m ...@@ -223,15 +223,17 @@ addToCorpusWithForm :: FlowCmdM env err m
-> m ScraperStatus -> m ScraperStatus
addToCorpusWithForm cid (WithForm ft d) logStatus = do addToCorpusWithForm cid (WithForm ft d) logStatus = do
printDebug "ft" ft
let let
parse = case ft of parse = case ft of
Just CSV_HAL -> parseHal' CSV_HAL -> parseHal'
Just CSV -> parseCsv' CSV -> parseCsv'
_ -> parseHal' _ -> parseHal'
docs = splitEvery 500 docs = splitEvery 500
$ take 1000000 $ take 1000000
$ parseCsv' (cs d) $ parse (cs d)
logStatus ScraperStatus { _scst_succeeded = Just 1 logStatus ScraperStatus { _scst_succeeded = Just 1
, _scst_failed = Just 0 , _scst_failed = Just 0
......
...@@ -36,7 +36,6 @@ import qualified Gargantext.Text.Corpus.API.Istex as ISTEX ...@@ -36,7 +36,6 @@ import qualified Gargantext.Text.Corpus.API.Istex as ISTEX
-- | Get External API metadata main function -- | Get External API metadata main function
get :: ExternalAPIs -> Query -> Maybe Limit -> IO [HyperdataDocument] get :: ExternalAPIs -> Query -> Maybe Limit -> IO [HyperdataDocument]
get All _ _ = undefined
get PubMed q l = PUBMED.get q l get PubMed q l = PUBMED.get q l
...@@ -49,6 +48,7 @@ get IsTex_FR q l = ISTEX.get FR q l ...@@ -49,6 +48,7 @@ get IsTex_FR q l = ISTEX.get FR q l
get Isidore_EN q l = ISIDORE.get EN (fromIntegral <$> l) (Just q) Nothing get Isidore_EN q l = ISIDORE.get EN (fromIntegral <$> l) (Just q) Nothing
get Isidore_FR q l = ISIDORE.get FR (fromIntegral <$> l) (Just q) Nothing get Isidore_FR q l = ISIDORE.get FR (fromIntegral <$> l) (Just q) Nothing
get _ _ _ = undefined
-- | Some Sugar for the documentation -- | Some Sugar for the documentation
type Query = PUBMED.Query type Query = PUBMED.Query
type Limit = PUBMED.Limit type Limit = PUBMED.Limit
......
...@@ -84,7 +84,7 @@ parseDate' format def lang s = do ...@@ -84,7 +84,7 @@ parseDate' format def lang s = do
parserLang :: Lang -> DC.Lang parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR parserLang FR = DC.FR
parserLang EN = DC.EN parserLang EN = DC.EN
-- parserLang _ = panic "not implemented" parserLang _ = panic "not implemented"
-- | Final Date parser API -- | Final Date parser API
-- IO can be avoided here: -- IO can be avoided here:
......
...@@ -117,6 +117,7 @@ detectLangDefault = detectCat 99 eventLang ...@@ -117,6 +117,7 @@ detectLangDefault = detectCat 99 eventLang
textSample :: Lang -> String textSample :: Lang -> String
textSample EN = EN.textSample textSample EN = EN.textSample
textSample FR = FR.textSample textSample FR = FR.textSample
textSample _ = panic "textSample: not impl yet"
--textSample DE = DE.textSample --textSample DE = DE.textSample
--textSample SP = SP.textSample --textSample SP = SP.textSample
--textSample CH = CH.textSample --textSample CH = CH.textSample
......
...@@ -55,7 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack ...@@ -55,7 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang' = case lang of lang' = case lang of
EN -> N.English EN -> N.English
FR -> N.French FR -> N.French
--_ -> panic $ DT.pack "not implemented yet" _ -> panic $ DT.pack "not implemented yet"
...@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags ...@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group :: Lang -> [TokenTag] -> [TokenTag] group :: Lang -> [TokenTag] -> [TokenTag]
group EN = En.group group EN = En.group
group FR = Fr.group group FR = Fr.group
-- group _ = panic $ pack "group :: Lang not implemeted yet" group _ = panic $ pack "group :: Lang not implemeted yet"
...@@ -124,7 +124,7 @@ corenlp' lang txt = do ...@@ -124,7 +124,7 @@ corenlp' lang txt = do
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}" EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}" -- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}" FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
-- _ -> panic $ pack "not implemented yet" _ -> panic $ pack "not implemented yet"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyLBS (cs txt) url let request = setRequestBodyLBS (cs txt) url
httpJSON request httpJSON request
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment