Commit a43c33d0 authored by qlobbe's avatar qlobbe

Merge branch 'dev-phylo' of https://gitlab.iscpif.fr/gargantext/haskell-gargantext into dev-phylo

parents c77a8b8d 8873a848
Pipeline #2789 failed with stage
in 99 minutes and 1 second
## Version 0.0.5.6.7
* [BACK] fix limit with MAX_DOCS_SCRAPERS
* [FEAT] Users Password Sugar function : in repl, runCmdReplEasy $ updateUsersPassword ["user@mail.com"]
## Version 0.0.5.6.6
* [BACK] CSV List post and reindex after (for both CSV and JSON)
## Version 0.0.5.6.5
* [BACK] HAL parser with Conduit
## Version 0.0.5.6.4
* [FRONT] Forest Tooltip + Async progress bar fix
## Version 0.0.5.6.3 ## Version 0.0.5.6.3
* [BACK][EXPORT][GEXF] node size * [BACK][EXPORT][GEXF] node size
......
module Auth where module Auth where
import Prelude
import Data.Maybe
import Core import Core
import Options import Options
......
module Core (problem, whenVerbose) where module Core (problem, whenVerbose) where
import Prelude
import Options import Options
import Options.Generic import Options.Generic
......
...@@ -2,11 +2,11 @@ module Main where ...@@ -2,11 +2,11 @@ module Main where
import Control.Monad import Control.Monad
import Network.HTTP.Client import Network.HTTP.Client
import Options.Generic
import Servant.Client
import Options import Options
import Options.Generic
import Prelude
import Script (script) import Script (script)
import Servant.Client
main :: IO () main :: IO ()
main = do main = do
......
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Options where module Options where
import Prelude
import Options.Generic import Options.Generic
-- | Some general options to be specified on the command line. -- | Some general options to be specified on the command line.
......
module Script (script) where module Script (script) where
import Control.Monad.IO.Class
import Gargantext.API.Client
import Servant.Client
import Auth import Auth
import Control.Monad.IO.Class
import Core import Core
import Gargantext.API.Client
import Options import Options
import Prelude
import Servant.Client
import Tracking import Tracking
-- | An example script. Tweak, rebuild and re-run the executable to see the -- | An example script. Tweak, rebuild and re-run the executable to see the
......
...@@ -8,6 +8,7 @@ module Tracking ...@@ -8,6 +8,7 @@ module Tracking
import Core import Core
import Options import Options
import Prelude
import Control.Monad.IO.Class import Control.Monad.IO.Class
import Data.List (intersperse) import Data.List (intersperse)
......
...@@ -17,10 +17,11 @@ module Main where ...@@ -17,10 +17,11 @@ module Main where
import Control.Exception (finally) import Control.Exception (finally)
import Data.Either import Data.Either
import Data.Maybe (Maybe(..))
import Data.Text (Text) import Data.Text (Text)
import Prelude (read)
import System.Environment (getArgs) import System.Environment (getArgs)
import qualified Data.Text as Text import qualified Data.Text as Text
import Text.Read (readMaybe)
import Gargantext.API.Dev (withDevEnv, runCmdDev) import Gargantext.API.Dev (withDevEnv, runCmdDev)
import Gargantext.API.Admin.EnvTypes (DevEnv(..)) import Gargantext.API.Admin.EnvTypes (DevEnv(..))
...@@ -46,11 +47,14 @@ main = do ...@@ -46,11 +47,14 @@ main = do
--tt = (Unsupervised EN 6 0 Nothing) --tt = (Unsupervised EN 6 0 Nothing)
tt = (Multi EN) tt = (Multi EN)
format = CsvGargV3 -- CsvHal --WOS format = CsvGargV3 -- CsvHal --WOS
limit' = case (readMaybe limit :: Maybe Int) of
Nothing -> panic $ "Cannot read limit: " <> (Text.pack limit)
Just l -> l
corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath Nothing (\_ -> pure ()) corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt format corpusPath Nothing (\_ -> pure ())
corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath Nothing (\_ -> pure ()) corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt CsvHal corpusPath Nothing (\_ -> pure ())
annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath (\_ -> pure ()) annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath (\_ -> pure ())
......
...@@ -16,16 +16,17 @@ HOST=$(getter "DB_HOST") ...@@ -16,16 +16,17 @@ HOST=$(getter "DB_HOST")
PORT=$(getter "DB_PORT") PORT=$(getter "DB_PORT")
#psql -c "CREATE USER \"${USER}\"" psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'" psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql -c "DROP DATABASE IF EXISTS \"${NAME}\"" psql -c "DROP DATABASE IF EXISTS \"${NAME}\""
createdb "${NAME}" createdb "${NAME}"
psql "${NAME}" < extensions.sql psql "${NAME}" < extensions.sql
#psql "${NAME}" < schema.sql
# if new
#psql "${NAME}" < schema.sql
#../../bin/psql ../../gargantext.ini < gargandb.dump psql ${NAME} < $2
psql -c "ALTER DATABASE \"${NAME}\" OWNER to \"${USER}\"" psql -c "ALTER DATABASE \"${NAME}\" OWNER to \"${USER}\""
......
name: gargantext name: gargantext
version: '0.0.5.6.3' version: '0.0.5.6.7'
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -57,6 +57,7 @@ library: ...@@ -57,6 +57,7 @@ library:
- Gargantext.API.Ngrams.Prelude - Gargantext.API.Ngrams.Prelude
- Gargantext.API.Admin.Settings - Gargantext.API.Admin.Settings
- Gargantext.API.Admin.EnvTypes - Gargantext.API.Admin.EnvTypes
- Gargantext.API.Admin.Auth.Types
- Gargantext.API.Admin.Types - Gargantext.API.Admin.Types
- Gargantext.API.Prelude - Gargantext.API.Prelude
- Gargantext.API.Client - Gargantext.API.Client
...@@ -327,6 +328,42 @@ executables: ...@@ -327,6 +328,42 @@ executables:
- unordered-containers - unordered-containers
- full-text-search - full-text-search
gargantext-client:
main: Main.hs
source-dirs: bin/gargantext-client
ghc-options:
- -Wall
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
default-extensions:
- DataKinds
- DeriveGeneric
- FlexibleContexts
- FlexibleInstances
- GeneralizedNewtypeDeriving
- MultiParamTypeClasses
- NamedFieldPuns
- NoImplicitPrelude
- OverloadedStrings
- RankNTypes
- RecordWildCards
dependencies:
- base
- extra
- servant
- text
- optparse-generic
- exceptions
- servant-client
- servant-auth-client
- gargantext
- ekg-json
- http-client
gargantext-phylo: gargantext-phylo:
main: Main.hs main: Main.hs
source-dirs: bin/gargantext-phylo source-dirs: bin/gargantext-phylo
......
...@@ -20,16 +20,16 @@ import Data.Aeson ...@@ -20,16 +20,16 @@ import Data.Aeson
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (Map, toList) import Data.Map (Map, toList)
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes, fromMaybe)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text, concat, pack) import Data.Text (Text, concat, pack, splitOn)
import Data.Vector (Vector) import Data.Vector (Vector)
import Gargantext.API.Admin.Orchestrator.Types import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams (setListNgrams) import Gargantext.API.Ngrams (setListNgrams)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Tools (getTermsWith) import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Prelude (GargServer) import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Terms (ExtractedNgrams(..)) import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
...@@ -40,9 +40,11 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM) ...@@ -40,9 +40,11 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast') import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Types.Hyperdata.Document import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Context import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node (_node_parent_id)
import Gargantext.Database.Types (Indexed(..)) import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude import Gargantext.Prelude
import Network.HTTP.Media ((//), (/:)) import Network.HTTP.Media ((//), (/:))
...@@ -53,22 +55,12 @@ import qualified Data.Csv as Csv ...@@ -53,22 +55,12 @@ import qualified Data.Csv as Csv
import qualified Data.HashMap.Strict as HashMap import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vec import qualified Data.Vector as Vec
import qualified Prelude as Prelude import qualified Prelude as Prelude
import qualified Protolude as P import qualified Protolude as P
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO refactor
{-
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI
:<|> CSVPostAPI
api :: ListId -> GargServer API
api l = get l :<|> postAsync l :<|> csvPostAsync l
-}
----------------------
type GETAPI = Summary "Get List" type GETAPI = Summary "Get List"
:> "lists" :> "lists"
:> Capture "listId" ListId :> Capture "listId" ListId
...@@ -122,11 +114,11 @@ get lId = do ...@@ -122,11 +114,11 @@ get lId = do
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO : purge list -- TODO : purge list
-- TODO talk -- TODO talk
post :: FlowCmdM env err m setList :: FlowCmdM env err m
=> ListId => ListId
-> NgramsList -> NgramsList
-> m Bool -> m Bool
post l m = do setList l m = do
-- TODO check with Version for optim -- TODO check with Version for optim
printDebug "New list as file" l printDebug "New list as file" l
_ <- mapM (\(nt, Versioned _v ns) -> setListNgrams l nt ns) $ toList m _ <- mapM (\(nt, Versioned _v ns) -> setListNgrams l nt ns) $ toList m
...@@ -150,12 +142,12 @@ reIndexWith cId lId nt lts = do ...@@ -150,12 +142,12 @@ reIndexWith cId lId nt lts = do
<$> HashMap.toList <$> HashMap.toList
<$> getTermsWith identity [lId] nt lts <$> getTermsWith identity [lId] nt lts
printDebug "ts" ts -- printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans) -- Taking the ngrams with 0 occurrences only (orphans)
occs <- getOccByNgramsOnlyFast' cId lId nt ts occs <- getOccByNgramsOnlyFast' cId lId nt ts
printDebug "occs" occs -- printDebug "occs" occs
let orphans = List.concat let orphans = List.concat
$ map (\t -> case HashMap.lookup t occs of $ map (\t -> case HashMap.lookup t occs of
...@@ -163,11 +155,11 @@ reIndexWith cId lId nt lts = do ...@@ -163,11 +155,11 @@ reIndexWith cId lId nt lts = do
Just n -> if n <= 1 then [t] else [ ] Just n -> if n <= 1 then [t] else [ ]
) ts ) ts
printDebug "orphans" orphans -- printDebug "orphans" orphans
-- Get all documents of the corpus -- Get all documents of the corpus
docs <- selectDocNodes cId docs <- selectDocNodes cId
printDebug "docs length" (List.length docs) -- printDebug "docs length" (List.length docs)
-- Checking Text documents where orphans match -- Checking Text documents where orphans match
-- TODO Tests here -- TODO Tests here
...@@ -184,7 +176,7 @@ reIndexWith cId lId nt lts = do ...@@ -184,7 +176,7 @@ reIndexWith cId lId nt lts = do
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]]) (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
) docs ) docs
printDebug "ngramsByDoc" ngramsByDoc -- printDebug "ngramsByDoc" ngramsByDoc
-- Saving the indexation in database -- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc _ <- mapM (saveDocNgramsWith lId) ngramsByDoc
...@@ -210,7 +202,7 @@ postAsync lId = ...@@ -210,7 +202,7 @@ postAsync lId =
JobFunction (\f log' -> JobFunction (\f log' ->
let let
log'' x = do log'' x = do
printDebug "postAsync ListId" x -- printDebug "postAsync ListId" x
liftBase $ log' x liftBase $ log' x
in postAsync' lId f log'') in postAsync' lId f log'')
...@@ -223,20 +215,32 @@ postAsync' l (WithFile _ m _) logStatus = do ...@@ -223,20 +215,32 @@ postAsync' l (WithFile _ m _) logStatus = do
logStatus JobLog { _scst_succeeded = Just 0 logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 1 , _scst_remaining = Just 2
, _scst_events = Just [] , _scst_events = Just []
} }
printDebug "New list as file" l printDebug "New list as file" l
_ <- post l m _ <- setList l m
-- printDebug "Done" r -- printDebug "Done" r
pure JobLog { _scst_succeeded = Just 1 logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
corpus_node <- getNode l -- (Proxy :: Proxy HyperdataList)
let corpus_id = fromMaybe (panic "") (_node_parent_id corpus_node)
_ <- reIndexWith corpus_id l NgramsTerms (Set.fromList [MapTerm, CandidateTerm])
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 0 , _scst_remaining = Just 0
, _scst_events = Just [] , _scst_events = Just []
} }
------------------------------------------------------------------------
------------------------------------------------------------------------
type CSVPostAPI = Summary "Update List (legacy v3 CSV)" type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
:> "csv" :> "csv"
:> "add" :> "add"
...@@ -257,12 +261,22 @@ readCsvText t = case eDec of ...@@ -257,12 +261,22 @@ readCsvText t = case eDec of
parseCsvData :: [(Text, Text, Text)] -> Map NgramsTerm NgramsRepoElement parseCsvData :: [(Text, Text, Text)] -> Map NgramsTerm NgramsRepoElement
parseCsvData lst = Map.fromList $ conv <$> lst parseCsvData lst = Map.fromList $ conv <$> lst
where where
conv (_status, label, _forms) = conv (status, label, forms) =
(NgramsTerm label, NgramsRepoElement { _nre_size = 1 (NgramsTerm label, NgramsRepoElement { _nre_size = 1
, _nre_list = CandidateTerm , _nre_list = case status == "map" of
True -> MapTerm
False -> case status == "main" of
True -> CandidateTerm
False -> StopTerm
, _nre_root = Nothing , _nre_root = Nothing
, _nre_parent = Nothing , _nre_parent = Nothing
, _nre_children = MSet Map.empty }) , _nre_children = MSet
$ Map.fromList
$ map (\form -> (NgramsTerm form, ()))
$ filter (/= "")
$ splitOn "|&|" forms
}
)
csvPost :: FlowCmdM env err m csvPost :: FlowCmdM env err m
=> ListId => ListId
...@@ -277,11 +291,14 @@ csvPost l m = do ...@@ -277,11 +291,14 @@ csvPost l m = do
--printDebug "[csvPost] lst" lst --printDebug "[csvPost] lst" lst
printDebug "[csvPost] p" p printDebug "[csvPost] p" p
_ <- setListNgrams l NgramsTerms p _ <- setListNgrams l NgramsTerms p
pure True printDebug "ReIndexing List" l
------------------------------------------------------------------------ corpus_node <- getNode l -- (Proxy :: Proxy HyperdataList)
let corpus_id = fromMaybe (panic "") (_node_parent_id corpus_node)
_ <- reIndexWith corpus_id l NgramsTerms (Set.fromList [MapTerm, CandidateTerm])
pure True
------------------------------------------------------------------------
csvPostAsync :: GargServer CSVAPI csvPostAsync :: GargServer CSVAPI
csvPostAsync lId = csvPostAsync lId =
serveJobsAPI $ serveJobsAPI $
......
...@@ -22,6 +22,7 @@ Portability : POSIX ...@@ -22,6 +22,7 @@ Portability : POSIX
module Gargantext.API.Node.Contact module Gargantext.API.Node.Contact
where where
import Conduit
import Data.Aeson import Data.Aeson
import Data.Either (Either(Right)) import Data.Either (Either(Right))
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
...@@ -93,7 +94,7 @@ addContact u nId (AddContactParams fn ln) logStatus = do ...@@ -93,7 +94,7 @@ addContact u nId (AddContactParams fn ln) logStatus = do
, _scst_remaining = Just 1 , _scst_remaining = Just 1
, _scst_events = Just [] , _scst_events = Just []
} }
_ <- flow (Nothing :: Maybe HyperdataAnnuaire) u (Right [nId]) (Multi EN) Nothing [[hyperdataContact fn ln]] logStatus _ <- flow (Nothing :: Maybe HyperdataAnnuaire) u (Right [nId]) (Multi EN) Nothing (Just 1, yield $ hyperdataContact fn ln) logStatus
pure JobLog { _scst_succeeded = Just 2 pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0 , _scst_failed = Just 0
......
...@@ -18,6 +18,8 @@ New corpus means either: ...@@ -18,6 +18,8 @@ New corpus means either:
module Gargantext.API.Node.Corpus.New module Gargantext.API.Node.Corpus.New
where where
import Conduit
import Control.Lens hiding (elements, Empty) import Control.Lens hiding (elements, Empty)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
...@@ -39,7 +41,7 @@ import Gargantext.Prelude ...@@ -39,7 +41,7 @@ import Gargantext.Prelude
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs, ScraperEvent(..), scst_events) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs, ScraperEvent(..), scst_events)
import Gargantext.API.Admin.Types (HasSettings) import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotal, jobLogFailTotalWithMessage) import Gargantext.API.Job (addEvent, jobLogSuccess, jobLogFailTotal, jobLogFailTotalWithMessage)
import Gargantext.API.Node.Corpus.New.File import Gargantext.API.Node.Corpus.New.File
import Gargantext.API.Node.Corpus.Searx import Gargantext.API.Node.Corpus.Searx
import Gargantext.API.Node.Corpus.Types import Gargantext.API.Node.Corpus.Types
...@@ -213,15 +215,20 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -213,15 +215,20 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- TODO if cid is folder -> create Corpus -- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus -- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private -- if cid is root -> create corpus in Private
txts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs] eTxts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs]
let lTxts = lefts eTxts
logStatus JobLog { _scst_succeeded = Just 2 case lTxts of
[] -> do
let txts = rights eTxts
-- TODO Sum lenghts of each txt elements
logStatus $ JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just $ 1 + length txts , _scst_remaining = Just $ 1 + length txts
, _scst_events = Just [] , _scst_events = Just []
} }
cids <- mapM (\txt -> flowDataText user txt (Multi l) cid Nothing logStatus) txts cids <- mapM (\txt -> do
flowDataText user txt (Multi l) cid Nothing logStatus) txts
printDebug "corpus id" cids printDebug "corpus id" cids
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text) printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user sendMail user
...@@ -232,6 +239,14 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -232,6 +239,14 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
, _scst_events = Just [] , _scst_events = Just []
} }
(err:_) -> do
pure $ addEvent "ERROR" (T.pack $ show err) $
JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 1
, _scst_remaining = Just 0
, _scst_events = Just []
}
type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint" type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
:> "corpus" :> "corpus"
...@@ -268,15 +283,16 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do ...@@ -268,15 +283,16 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
_ -> cs d _ -> cs d
eDocs <- liftBase $ parse data' eDocs <- liftBase $ parse data'
case eDocs of case eDocs of
Right docs' -> do Right docs -> do
-- TODO Add progress (jobStatus) update for docs - this is a -- TODO Add progress (jobStatus) update for docs - this is a
-- long action -- long action
limit' <- view $ hasConfig . gc_max_docs_parsers limit' <- view $ hasConfig . gc_max_docs_parsers
let limit = fromIntegral limit' let limit = fromIntegral limit'
if length docs' > limit then do if length docs > limit then do
printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show $ length docs') printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show $ length docs)
let panicMsg' = [ "[addToCorpusWithForm] number of docs (" let panicMsg' = [ "[addToCorpusWithForm] number of docs ("
, show $ length docs' , show $ length docs
, ") exceeds the MAX_DOCS_PARSERS limit (" , ") exceeds the MAX_DOCS_PARSERS limit ("
, show limit , show limit
, ")" ] , ")" ]
...@@ -285,7 +301,6 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do ...@@ -285,7 +301,6 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
panic panicMsg panic panicMsg
else else
pure () pure ()
let docs = splitEvery 500 $ take limit docs'
printDebug "Parsing corpus finished : " cid printDebug "Parsing corpus finished : " cid
logStatus jobLog2 logStatus jobLog2
...@@ -296,7 +311,8 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do ...@@ -296,7 +311,8 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
(Right [cid]) (Right [cid])
(Multi $ fromMaybe EN l) (Multi $ fromMaybe EN l)
Nothing Nothing
(map (map toHyperdataDocument) docs) (Just $ fromIntegral $ length docs, yieldMany docs .| mapC toHyperdataDocument)
--(map (map toHyperdataDocument) docs)
logStatus logStatus
printDebug "Extraction finished : " cid printDebug "Extraction finished : " cid
......
...@@ -69,21 +69,32 @@ api :: UserId -> NodeId -> GargServer API ...@@ -69,21 +69,32 @@ api :: UserId -> NodeId -> GargServer API
api uId nId = api uId nId =
serveJobsAPI $ serveJobsAPI $
JobFunction (\q log' -> do JobFunction (\q log' -> do
documentUpload uId nId q (liftBase . log') documentUploadAsync uId nId q (liftBase . log')
) )
documentUpload :: (FlowCmdM env err m) documentUploadAsync :: (FlowCmdM env err m)
=> UserId => UserId
-> NodeId -> NodeId
-> DocumentUpload -> DocumentUpload
-> (JobLog -> m ()) -> (JobLog -> m ())
-> m JobLog -> m JobLog
documentUpload _uId nId doc logStatus = do documentUploadAsync _uId nId doc logStatus = do
let jl = JobLog { _scst_succeeded = Just 0 let jl = JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 1 , _scst_remaining = Just 1
, _scst_events = Just [] } , _scst_events = Just [] }
logStatus jl logStatus jl
docIds <- documentUpload nId doc
printDebug "documentUploadAsync" docIds
pure $ jobLogSuccess jl
documentUpload :: (FlowCmdM env err m)
=> NodeId
-> DocumentUpload
-> m [DocId]
documentUpload nId doc = do
mcId <- getClosestParentIdByType' nId NodeCorpus mcId <- getClosestParentIdByType' nId NodeCorpus
let cId = case mcId of let cId = case mcId of
Just c -> c Just c -> c
...@@ -116,5 +127,6 @@ documentUpload _uId nId doc logStatus = do ...@@ -116,5 +127,6 @@ documentUpload _uId nId doc logStatus = do
docIds <- insertMasterDocs (Nothing :: Maybe HyperdataCorpus) (Multi EN) [hd] docIds <- insertMasterDocs (Nothing :: Maybe HyperdataCorpus) (Multi EN) [hd]
_ <- Doc.add cId docIds _ <- Doc.add cId docIds
pure docIds
pure $ jobLogSuccess jl
...@@ -16,6 +16,7 @@ Portability : POSIX ...@@ -16,6 +16,7 @@ Portability : POSIX
module Gargantext.API.Node.DocumentsFromWriteNodes module Gargantext.API.Node.DocumentsFromWriteNodes
where where
import Conduit
import Control.Lens ((^.)) import Control.Lens ((^.))
import Data.Aeson import Data.Aeson
import Data.Either (Either(..), rights) import Data.Either (Either(..), rights)
...@@ -100,7 +101,7 @@ documentsFromWriteNodes uId nId _p logStatus = do ...@@ -100,7 +101,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
let parsed = rights parsedE let parsed = rights parsedE
_ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing logStatus _ <- flowDataText (RootId (NodeId uId)) (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed)) (Multi EN) cId Nothing logStatus
pure $ jobLogSuccess jobLog pure $ jobLogSuccess jobLog
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -36,7 +36,8 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM) ...@@ -36,7 +36,8 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore) import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Admin.Types.Hyperdata import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (defaultList, getNode, insertNodes, node) import Gargantext.Database.Query.Table.Node (defaultList, getNode)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms)) import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Database.Schema.Node (node_parent_id) import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Prelude (Bool(..), Ord, Eq, (<$>), ($), liftBase, (.), printDebug, pure, show, cs, (<>), panic, (<*>)) import Gargantext.Prelude (Bool(..), Ord, Eq, (<$>), ($), liftBase, (.), printDebug, pure, show, cs, (<>), panic, (<*>))
...@@ -191,7 +192,7 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do ...@@ -191,7 +192,7 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
, _scst_events = Just [] , _scst_events = Just []
} }
updateNode userId phyloId (UpdateNodePhylo config) logStatus = do updateNode _userId phyloId (UpdateNodePhylo config) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1 logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 2 , _scst_remaining = Just 2
...@@ -210,7 +211,7 @@ updateNode userId phyloId (UpdateNodePhylo config) logStatus = do ...@@ -210,7 +211,7 @@ updateNode userId phyloId (UpdateNodePhylo config) logStatus = do
, _scst_events = Just [] , _scst_events = Just []
} }
_phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId] _ <- updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
pure JobLog { _scst_succeeded = Just 3 pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0 , _scst_failed = Just 0
......
...@@ -18,6 +18,8 @@ module Gargantext.Core.Text.Corpus.API ...@@ -18,6 +18,8 @@ module Gargantext.Core.Text.Corpus.API
) )
where where
import Conduit
import Data.Either (Either(..))
import Data.Maybe import Data.Maybe
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs) import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
...@@ -27,21 +29,25 @@ import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL ...@@ -27,21 +29,25 @@ import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
import Servant.Client (ClientError)
-- | TODO put in gargantext.init
default_limit :: Maybe Integer
default_limit = Just 10000
-- | Get External API metadata main function -- | Get External API metadata main function
get :: ExternalAPIs get :: ExternalAPIs
-> Lang -> Lang
-> Query -> Query
-> Maybe Limit -> Maybe Limit
-> IO [HyperdataDocument] -- -> IO [HyperdataDocument]
get PubMed _la q _l = PUBMED.get q default_limit -- EN only by default -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get HAL la q _l = HAL.get la q default_limit get PubMed _la q limit = PUBMED.get q limit
get IsTex la q _l = ISTEX.get la q default_limit --docs <- PUBMED.get q default_limit -- EN only by default
get Isidore la q _l = ISIDORE.get la (fromIntegral <$> default_limit) (Just q) Nothing --pure (Just $ fromIntegral $ length docs, yieldMany docs)
get HAL la q limit = HAL.getC la q limit
get IsTex la q limit = do
docs <- ISTEX.get la q limit
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
get Isidore la q limit = do
docs <- ISIDORE.get la (fromIntegral <$> limit) (Just q) Nothing
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
get _ _ _ _ = undefined get _ _ _ _ = undefined
-- | Some Sugar for the documentation -- | Some Sugar for the documentation
......
...@@ -12,8 +12,11 @@ Portability : POSIX ...@@ -12,8 +12,11 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Hal module Gargantext.Core.Text.Corpus.API.Hal
where where
import Conduit
import Data.Either
import Data.Maybe import Data.Maybe
import Data.Text (Text, pack, intercalate) import Data.Text (Text, pack, intercalate)
import Servant.Client (ClientError)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
...@@ -25,8 +28,16 @@ import qualified HAL.Doc.Corpus as HAL ...@@ -25,8 +28,16 @@ import qualified HAL.Doc.Corpus as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument] get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do get la q ml = do
docs <- HAL.getMetadataWith q (Just 0) (fromIntegral <$> ml) eDocs <- HAL.getMetadataWith q (Just 0) ml
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) docs either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
getC :: Lang -> Text -> Maybe Integer -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do
eRes <- HAL.getMetadataWithC q (Just 0) ml
pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes
-- case eRes of
-- Left err -> panic $ pack $ show err
-- Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
......
...@@ -13,9 +13,12 @@ Portability : POSIX ...@@ -13,9 +13,12 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Pubmed module Gargantext.Core.Text.Corpus.API.Pubmed
where where
import Conduit
import Data.Either (Either)
import Data.Maybe import Data.Maybe
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as Text import qualified Data.Text as Text
import Servant.Client (ClientError)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
...@@ -31,17 +34,21 @@ type Limit = PubMed.Limit ...@@ -31,17 +34,21 @@ type Limit = PubMed.Limit
-- | TODO put default pubmed query in gargantext.ini -- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs -- by default: 10K docs
get :: Query -> Maybe Limit -> IO [HyperdataDocument] get :: Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN)) get q l = do
<$> PubMed.getMetadataWith q l eRes <- PubMed.getMetadataWithC q l
pure $ (\(len, docsC) -> (len, docsC .| mapC (toDoc EN))) <$> eRes
--either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
-- <$> PubMed.getMetadataWithC q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus) toDoc l (PubMedDoc.PubMed { pubmed_id
(PubMedDoc.PubMedDate a y m d) , pubmed_article = PubMedDoc.PubMedArticle t j as aus
, pubmed_date = PubMedDoc.PubMedDate a y m d }
) = HyperdataDocument { _hd_bdd = Just "PubMed" ) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing , _hd_uniqId = Just $ Text.pack $ show pubmed_id
, _hd_uniqIdBdd = Nothing , _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = t , _hd_title = t
......
...@@ -34,7 +34,7 @@ import Gargantext.Prelude ...@@ -34,7 +34,7 @@ import Gargantext.Prelude
import Servant.API (FromHttpApiData(..), ToHttpApiData(..)) import Servant.API (FromHttpApiData(..), ToHttpApiData(..))
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Text.Read (read) import Text.Read (readMaybe)
type CorpusName = Text type CorpusName = Text
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -74,7 +74,11 @@ instance Semigroup ListType ...@@ -74,7 +74,11 @@ instance Semigroup ListType
instance FromHttpApiData ListType where instance FromHttpApiData ListType where
parseUrlPiece = Right . read . unpack parseUrlPiece s = Right s'
where
s' = case (readMaybe $ unpack s) of
Nothing -> panic $ "Cannot read url piece: " <> s
Just s'' -> s''
instance ToHttpApiData ListType where instance ToHttpApiData ListType where
toUrlPiece = pack . show toUrlPiece = pack . show
......
...@@ -203,23 +203,23 @@ data PhyloConfig = ...@@ -203,23 +203,23 @@ data PhyloConfig =
------------------------------------------------------------------------ ------------------------------------------------------------------------
data PhyloSubConfig = data PhyloSubConfig =
PhyloSubConfig { _sc_phyloProximity :: Proximity PhyloSubConfig { _sc_phyloProximity :: Double
, _sc_phyloSynchrony :: Synchrony , _sc_phyloSynchrony :: Double
, _sc_phyloQuality :: Quality , _sc_phyloQuality :: Double
, _sc_timeUnit :: TimeUnit , _sc_timeUnit :: TimeUnit
, _sc_clique :: Clique , _sc_clique :: Clique
, _sc_exportFilter :: [Filter] , _sc_exportFilter :: Double
} }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
subConfig2config :: PhyloSubConfig -> PhyloConfig subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { phyloProximity = _sc_phyloProximity subConfig subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard $ _sc_phyloProximity subConfig
, phyloSynchrony = _sc_phyloSynchrony subConfig , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = _sc_phyloQuality subConfig , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
, timeUnit = _sc_timeUnit subConfig , timeUnit = _sc_timeUnit subConfig
, clique = _sc_clique subConfig , clique = _sc_clique subConfig
, exportFilter = _sc_exportFilter subConfig , exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
} }
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -30,7 +30,8 @@ import Gargantext.Core.Viz.Phylo.Example (phyloExample) ...@@ -30,7 +30,8 @@ import Gargantext.Core.Viz.Phylo.Example (phyloExample)
import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
import Gargantext.Database.Admin.Types.Hyperdata import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..)) import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import Gargantext.Database.Query.Table.Node (insertNodes, node) import Gargantext.Database.Query.Table.Node (getClosestParentIdByType)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Prelude import Gargantext.Prelude
import Network.HTTP.Media ((//), (/:)) import Network.HTTP.Media ((//), (/:))
import Servant import Servant
...@@ -90,7 +91,10 @@ type GetPhylo = QueryParam "listId" ListId ...@@ -90,7 +91,10 @@ type GetPhylo = QueryParam "listId" ListId
-- Fix Filter parameters -- Fix Filter parameters
-- TODO fix parameters to default config that should be in Node -- TODO fix parameters to default config that should be in Node
getPhylo :: PhyloId -> GargServer GetPhylo getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo phyloId _lId _level _minSizeBranch = getPhyloDataJson phyloId getPhylo phyloId _lId _level _minSizeBranch = do
theData <- getPhyloDataJson phyloId
-- printDebug "getPhylo" theData
pure theData
getPhyloDataJson :: PhyloId -> GargNoServer Value getPhyloDataJson :: PhyloId -> GargNoServer Value
getPhyloDataJson phyloId = do getPhyloDataJson phyloId = do
...@@ -118,17 +122,19 @@ type PostPhylo = QueryParam "listId" ListId ...@@ -118,17 +122,19 @@ type PostPhylo = QueryParam "listId" ListId
-- :> ReqBody '[JSON] PhyloQueryBuild -- :> ReqBody '[JSON] PhyloQueryBuild
:> (Post '[JSON] NodeId) :> (Post '[JSON] NodeId)
postPhylo :: CorpusId -> UserId -> GargServer PostPhylo postPhylo :: PhyloId -> UserId -> GargServer PostPhylo
postPhylo corpusId userId _lId = do postPhylo phyloId _userId _lId = do
-- TODO get Reader settings -- TODO get Reader settings
-- s <- ask -- s <- ask
-- let -- let
-- _vrs = Just ("1" :: Text) -- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4") -- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q) -- _prm = initPhyloParam vrs sft (Just q)
phy <- flowPhyloAPI defaultConfig corpusId -- params corpusId <- getClosestParentIdByType phyloId NodeCorpus
phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId] phy <- flowPhyloAPI defaultConfig (fromMaybe (panic "[G.C.V.P.API] no corpus ID found") corpusId) -- params
pure $ NodeId (fromIntegral phyloId) -- phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
_ <- updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
pure phyloId
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | DELETE Phylo == delete a node -- | DELETE Phylo == delete a node
......
...@@ -9,7 +9,6 @@ Portability : POSIX ...@@ -9,7 +9,6 @@ Portability : POSIX
-} -}
module Gargantext.Core.Viz.Phylo.API.Tools module Gargantext.Core.Viz.Phylo.API.Tools
where where
......
...@@ -209,6 +209,7 @@ exportToDot phylo export = ...@@ -209,6 +209,7 @@ exportToDot phylo export =
graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))] graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
<> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio , Ratio FillRatio
-- , Ratio AutoRatio
, Style [SItem Filled []],Color [toWColor White]] , Style [SItem Filled []],Color [toWColor White]]
{-- home made attributes -} {-- home made attributes -}
<> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)) <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
......
...@@ -46,8 +46,10 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) ...@@ -46,8 +46,10 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
) )
where where
import Conduit
import Control.Lens ((^.), view, _Just, makeLenses) import Control.Lens ((^.), view, _Just, makeLenses)
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Conduit.Internal (zipSources)
import Data.Either import Data.Either
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable) import Data.Hashable (Hashable)
...@@ -60,6 +62,7 @@ import qualified Data.Text as T ...@@ -60,6 +62,7 @@ import qualified Data.Text as T
import Data.Traversable (traverse) import Data.Traversable (traverse)
import Data.Tuple.Extra (first, second) import Data.Tuple.Extra (first, second)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Servant.Client (ClientError)
import System.FilePath (FilePath) import System.FilePath (FilePath)
import qualified Data.HashMap.Strict as HashMap import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
...@@ -103,6 +106,7 @@ import Gargantext.Prelude ...@@ -103,6 +106,7 @@ import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash) import Gargantext.Prelude.Crypto.Hash (Hash)
import qualified Gargantext.Core.Text.Corpus.API as API import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Database.Query.Table.Node.Document.Add as Doc (add) import qualified Gargantext.Database.Query.Table.Node.Document.Add as Doc (add)
import qualified Prelude as Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Imports for upgrade function -- Imports for upgrade function
...@@ -127,7 +131,8 @@ allDataOrigins = map InternalOrigin API.externalAPIs ...@@ -127,7 +131,8 @@ allDataOrigins = map InternalOrigin API.externalAPIs
--------------- ---------------
data DataText = DataOld ![NodeId] data DataText = DataOld ![NodeId]
| DataNew ![[HyperdataDocument]] | DataNew !(Maybe Integer, ConduitT () HyperdataDocument IO ())
-- | DataNew ![[HyperdataDocument]]
-- TODO use the split parameter in config file -- TODO use the split parameter in config file
getDataText :: FlowCmdM env err m getDataText :: FlowCmdM env err m
...@@ -135,10 +140,10 @@ getDataText :: FlowCmdM env err m ...@@ -135,10 +140,10 @@ getDataText :: FlowCmdM env err m
-> TermType Lang -> TermType Lang
-> API.Query -> API.Query
-> Maybe API.Limit -> Maybe API.Limit
-> m DataText -> m (Either ClientError DataText)
getDataText (ExternalOrigin api) la q li = liftBase $ DataNew getDataText (ExternalOrigin api) la q li = liftBase $ do
<$> splitEvery 500 eRes <- API.get api (_tt_lang la) q li
<$> API.get api (_tt_lang la) q li pure $ DataNew <$> eRes
getDataText (InternalOrigin _) _la q _li = do getDataText (InternalOrigin _) _la q _li = do
(_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus (_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
...@@ -146,10 +151,11 @@ getDataText (InternalOrigin _) _la q _li = do ...@@ -146,10 +151,11 @@ getDataText (InternalOrigin _) _la q _li = do
(Left "") (Left "")
(Nothing :: Maybe HyperdataCorpus) (Nothing :: Maybe HyperdataCorpus)
ids <- map fst <$> searchDocInDatabase cId (stemIt q) ids <- map fst <$> searchDocInDatabase cId (stemIt q)
pure $ DataOld ids pure $ Right $ DataOld ids
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
flowDataText :: ( FlowCmdM env err m flowDataText :: forall env err m.
( FlowCmdM env err m
) )
=> User => User
-> DataText -> DataText
...@@ -161,7 +167,8 @@ flowDataText :: ( FlowCmdM env err m ...@@ -161,7 +167,8 @@ flowDataText :: ( FlowCmdM env err m
flowDataText u (DataOld ids) tt cid mfslw _ = flowCorpusUser (_tt_lang tt) u (Right [cid]) corpusType ids mfslw flowDataText u (DataOld ids) tt cid mfslw _ = flowCorpusUser (_tt_lang tt) u (Right [cid]) corpusType ids mfslw
where where
corpusType = (Nothing :: Maybe HyperdataCorpus) corpusType = (Nothing :: Maybe HyperdataCorpus)
flowDataText u (DataNew txt) tt cid mfslw logStatus = flowCorpus u (Right [cid]) tt mfslw txt logStatus flowDataText u (DataNew (mLen, txtC)) tt cid mfslw logStatus =
flowCorpus u (Right [cid]) tt mfslw (mLen, (transPipe liftBase txtC)) logStatus
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO use proxy -- TODO use proxy
...@@ -173,8 +180,9 @@ flowAnnuaire :: (FlowCmdM env err m) ...@@ -173,8 +180,9 @@ flowAnnuaire :: (FlowCmdM env err m)
-> (JobLog -> m ()) -> (JobLog -> m ())
-> m AnnuaireId -> m AnnuaireId
flowAnnuaire u n l filePath logStatus = do flowAnnuaire u n l filePath logStatus = do
docs <- liftBase $ (( splitEvery 500 <$> readFile_Annuaire filePath) :: IO [[HyperdataContact]]) -- TODO Conduit for file
flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing docs logStatus docs <- liftBase $ ((readFile_Annuaire filePath) :: IO [HyperdataContact])
flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing (Just $ fromIntegral $ length docs, yieldMany docs) logStatus
------------------------------------------------------------------------ ------------------------------------------------------------------------
flowCorpusFile :: (FlowCmdM env err m) flowCorpusFile :: (FlowCmdM env err m)
...@@ -185,12 +193,13 @@ flowCorpusFile :: (FlowCmdM env err m) ...@@ -185,12 +193,13 @@ flowCorpusFile :: (FlowCmdM env err m)
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> (JobLog -> m ()) -> (JobLog -> m ())
-> m CorpusId -> m CorpusId
flowCorpusFile u n l la ff fp mfslw logStatus = do flowCorpusFile u n _l la ff fp mfslw logStatus = do
eParsed <- liftBase $ parseFile ff fp eParsed <- liftBase $ parseFile ff fp
case eParsed of case eParsed of
Right parsed -> do Right parsed -> do
let docs = splitEvery 500 $ take l parsed flowCorpus u n la mfslw (Just $ fromIntegral $ length parsed, yieldMany parsed .| mapC toHyperdataDocument) logStatus
flowCorpus u n la mfslw (map (map toHyperdataDocument) docs) logStatus --let docs = splitEvery 500 $ take l parsed
--flowCorpus u n la mfslw (yieldMany $ map (map toHyperdataDocument) docs) logStatus
Left e -> panic $ "Error: " <> (T.pack e) Left e -> panic $ "Error: " <> (T.pack e)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -201,13 +210,14 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a) ...@@ -201,13 +210,14 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
-> Either CorpusName [CorpusId] -> Either CorpusName [CorpusId]
-> TermType Lang -> TermType Lang
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> [[a]] -> (Maybe Integer, ConduitT () a m ())
-> (JobLog -> m ()) -> (JobLog -> m ())
-> m CorpusId -> m CorpusId
flowCorpus = flow (Nothing :: Maybe HyperdataCorpus) flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)
flow :: ( FlowCmdM env err m flow :: forall env err m a c.
( FlowCmdM env err m
, FlowCorpus a , FlowCorpus a
, MkCorpus c , MkCorpus c
) )
...@@ -216,22 +226,39 @@ flow :: ( FlowCmdM env err m ...@@ -216,22 +226,39 @@ flow :: ( FlowCmdM env err m
-> Either CorpusName [CorpusId] -> Either CorpusName [CorpusId]
-> TermType Lang -> TermType Lang
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> [[a]] -> (Maybe Integer, ConduitT () a m ())
-> (JobLog -> m ()) -> (JobLog -> m ())
-> m CorpusId -> m CorpusId
flow c u cn la mfslw docs logStatus = do flow c u cn la mfslw (mLength, docsC) logStatus = do
-- TODO if public insertMasterDocs else insertUserDocs -- TODO if public insertMasterDocs else insertUserDocs
ids <- traverse (\(idx, doc) -> do ids <- runConduit $
id <- insertMasterDocs c la doc zipSources (yieldMany [1..]) docsC
logStatus JobLog { _scst_succeeded = Just $ 1 + idx .| mapMC insertDoc
.| sinkList
-- ids <- traverse (\(idx, doc) -> do
-- id <- insertMasterDocs c la doc
-- logStatus JobLog { _scst_succeeded = Just $ 1 + idx
-- , _scst_failed = Just 0
-- , _scst_remaining = Just $ length docs - idx
-- , _scst_events = Just []
-- }
-- pure id
-- ) (zip [1..] docs)
flowCorpusUser (la ^. tt_lang) u cn c ids mfslw
where
insertDoc :: (Integer, a) -> m NodeId
insertDoc (idx, doc) = do
id <- insertMasterDocs c la [doc]
case mLength of
Nothing -> pure ()
Just len -> do
logStatus JobLog { _scst_succeeded = Just $ fromIntegral $ 1 + idx
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just $ length docs - idx , _scst_remaining = Just $ fromIntegral $ len - idx
, _scst_events = Just [] , _scst_events = Just []
} }
pure id pure $ Prelude.head id
) (zip [1..] docs)
flowCorpusUser (la ^. tt_lang) u cn c (concat ids) mfslw
...@@ -250,7 +277,7 @@ flowCorpusUser l user corpusName ctype ids mfslw = do ...@@ -250,7 +277,7 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
-- User Flow -- User Flow
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus user corpusName ctype (userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus user corpusName ctype
-- NodeTexts is first -- NodeTexts is first
_tId <- insertDefaultNode NodeTexts userCorpusId userId _tId <- insertDefaultNodeIfNotExists NodeTexts userCorpusId userId
-- printDebug "NodeTexts: " tId -- printDebug "NodeTexts: " tId
-- NodeList is second -- NodeList is second
...@@ -276,8 +303,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do ...@@ -276,8 +303,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
-- _ <- insertOccsUpdates userCorpusId mastListId -- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId -- printDebug "userListId" userListId
-- User Graph Flow -- User Graph Flow
_ <- insertDefaultNode NodeDashboard userCorpusId userId _ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId
_ <- insertDefaultNode NodeGraph userCorpusId userId _ <- insertDefaultNodeIfNotExists NodeGraph userCorpusId userId
--_ <- mkPhylo userCorpusId userId --_ <- mkPhylo userCorpusId userId
-- Annuaire Flow -- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId -- _ <- mkAnnuaire rootUserId userId
...@@ -322,7 +349,7 @@ saveDocNgramsWith :: ( FlowCmdM env err m) ...@@ -322,7 +349,7 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
-> m () -> m ()
saveDocNgramsWith lId mapNgramsDocs' = do saveDocNgramsWith lId mapNgramsDocs' = do
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs' terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
printDebug "terms2id" terms2id --printDebug "terms2id" terms2id
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs' let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
...@@ -331,7 +358,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do ...@@ -331,7 +358,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$ map (first _ngramsTerms . second Map.keys) $ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs $ HashMap.toList mapNgramsDocs
printDebug "saveDocNgramsWith" mapCgramsId --printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams -- insertDocNgrams
_return <- insertContextNodeNgrams2 _return <- insertContextNodeNgrams2
$ catMaybes [ ContextNodeNgrams2 <$> Just nId $ catMaybes [ ContextNodeNgrams2 <$> Just nId
...@@ -504,5 +531,3 @@ extractInsert docs = do ...@@ -504,5 +531,3 @@ extractInsert docs = do
documentsWithId documentsWithId
_ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs' _ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
pure () pure ()
...@@ -27,7 +27,7 @@ import Gargantext.Database.Query.Table.User ...@@ -27,7 +27,7 @@ import Gargantext.Database.Query.Table.User
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Pass.User (gargPass) import Gargantext.Prelude.Crypto.Pass.User (gargPass)
import Gargantext.Prelude.Mail.Types (MailConfig) import Gargantext.Prelude.Mail.Types (MailConfig)
------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
newUsers :: (CmdM env err m, MonadRandom m, HasNodeError err, HasMail env) newUsers :: (CmdM env err m, MonadRandom m, HasNodeError err, HasMail env)
=> [EmailAddress] -> m Int64 => [EmailAddress] -> m Int64
...@@ -35,6 +35,18 @@ newUsers us = do ...@@ -35,6 +35,18 @@ newUsers us = do
us' <- mapM newUserQuick us us' <- mapM newUserQuick us
config <- view $ mailSettings config <- view $ mailSettings
newUsers' config us' newUsers' config us'
------------------------------------------------------------------------
updateUsersPassword :: (CmdM env err m, MonadRandom m, HasNodeError err, HasMail env)
=> [EmailAddress] -> m Int64
updateUsersPassword us = do
us' <- mapM newUserQuick us
config <- view $ mailSettings
_ <- mapM (\u -> updateUser (SendEmail True) config u) us'
pure 1
------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
newUserQuick :: (MonadRandom m) newUserQuick :: (MonadRandom m)
=> Text -> m (NewUser GargPassword) => Text -> m (NewUser GargPassword)
...@@ -44,6 +56,7 @@ newUserQuick n = do ...@@ -44,6 +56,7 @@ newUserQuick n = do
Just (u', _m) -> u' Just (u', _m) -> u'
Nothing -> panic "[G.D.A.U.N.newUserQuick]: Email invalid" Nothing -> panic "[G.D.A.U.N.newUserQuick]: Email invalid"
pure (NewUser u n (GargPassword pass)) pure (NewUser u n (GargPassword pass))
------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | guessUserName -- | guessUserName
...@@ -68,7 +81,6 @@ newUsers' cfg us = do ...@@ -68,7 +81,6 @@ newUsers' cfg us = do
printDebug "newUsers'" us printDebug "newUsers'" us
pure r pure r
------------------------------------------------------------------------ ------------------------------------------------------------------------
updateUser :: HasNodeError err updateUser :: HasNodeError err
=> SendEmail -> MailConfig -> NewUser GargPassword -> Cmd err Int64 => SendEmail -> MailConfig -> NewUser GargPassword -> Cmd err Int64
updateUser (SendEmail send) cfg u = do updateUser (SendEmail send) cfg u = do
......
...@@ -24,7 +24,7 @@ import Data.ByteString.Char8 (hPutStrLn) ...@@ -24,7 +24,7 @@ import Data.ByteString.Char8 (hPutStrLn)
import Data.Either.Extra (Either) import Data.Either.Extra (Either)
import Data.Pool (Pool, withResource) import Data.Pool (Pool, withResource)
import Data.Profunctor.Product.Default (Default) import Data.Profunctor.Product.Default (Default)
import Data.Text (unpack, Text) import Data.Text (pack, unpack, Text)
import Data.Word (Word16) import Data.Word (Word16)
import Database.PostgreSQL.Simple (Connection, connect) import Database.PostgreSQL.Simple (Connection, connect)
import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError) import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError)
...@@ -36,7 +36,7 @@ import Opaleye (Unpackspec, showSql, FromFields, Select, runSelect, SqlJsonb, De ...@@ -36,7 +36,7 @@ import Opaleye (Unpackspec, showSql, FromFields, Select, runSelect, SqlJsonb, De
import Opaleye.Aggregate (countRows) import Opaleye.Aggregate (countRows)
import System.IO (FilePath) import System.IO (FilePath)
import System.IO (stderr) import System.IO (stderr)
import Text.Read (read) import Text.Read (readMaybe)
import qualified Data.ByteString as DB import qualified Data.ByteString as DB
import qualified Data.List as DL import qualified Data.List as DL
import qualified Database.PostgreSQL.Simple as PGS import qualified Database.PostgreSQL.Simple as PGS
...@@ -176,9 +176,13 @@ databaseParameters :: FilePath -> IO PGS.ConnectInfo ...@@ -176,9 +176,13 @@ databaseParameters :: FilePath -> IO PGS.ConnectInfo
databaseParameters fp = do databaseParameters fp = do
ini <- readIniFile' fp ini <- readIniFile' fp
let val' key = unpack $ val ini "database" key let val' key = unpack $ val ini "database" key
let dbPortRaw = val' "DB_PORT"
let dbPort = case (readMaybe dbPortRaw :: Maybe Word16) of
Nothing -> panic $ "DB_PORT incorrect: " <> (pack dbPortRaw)
Just d -> d
pure $ PGS.ConnectInfo { PGS.connectHost = val' "DB_HOST" pure $ PGS.ConnectInfo { PGS.connectHost = val' "DB_HOST"
, PGS.connectPort = read (val' "DB_PORT") :: Word16 , PGS.connectPort = dbPort
, PGS.connectUser = val' "DB_USER" , PGS.connectUser = val' "DB_USER"
, PGS.connectPassword = val' "DB_PASS" , PGS.connectPassword = val' "DB_PASS"
, PGS.connectDatabase = val' "DB_NAME" , PGS.connectDatabase = val' "DB_NAME"
......
...@@ -255,6 +255,14 @@ insertDefaultNode :: HasDBid NodeType ...@@ -255,6 +255,14 @@ insertDefaultNode :: HasDBid NodeType
=> NodeType -> ParentId -> UserId -> Cmd err [NodeId] => NodeType -> ParentId -> UserId -> Cmd err [NodeId]
insertDefaultNode nt p u = insertNode nt Nothing Nothing p u insertDefaultNode nt p u = insertNode nt Nothing Nothing p u
insertDefaultNodeIfNotExists :: HasDBid NodeType
=> NodeType -> ParentId -> UserId -> Cmd err [NodeId]
insertDefaultNodeIfNotExists nt p u = do
children <- getChildrenByType p nt
case children of
[] -> insertDefaultNode nt p u
xs -> pure xs
insertNode :: HasDBid NodeType insertNode :: HasDBid NodeType
=> NodeType -> Maybe Name -> Maybe DefaultHyperdata -> ParentId -> UserId -> Cmd err [NodeId] => NodeType -> Maybe Name -> Maybe DefaultHyperdata -> ParentId -> UserId -> Cmd err [NodeId]
insertNode nt n h p u = insertNodesR [nodeW nt n h p u] insertNode nt n h p u = insertNodesR [nodeW nt n h p u]
......
...@@ -69,11 +69,11 @@ extra-deps: ...@@ -69,11 +69,11 @@ extra-deps:
# External Data API connectors # External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 9cdba6423decad5acfacb0f274212fd8723ce734 commit: 12cb16c391577bff4295e3dd1b126281d78037b4
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: 020f5f9b308f5c23c925aedf5fb11f8b4728fb19 commit: 3bf77f28d3dc71d2e8349cbf422a34cf4c23cd11
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment