Commit a43c33d0 authored by qlobbe's avatar qlobbe

Merge branch 'dev-phylo' of https://gitlab.iscpif.fr/gargantext/haskell-gargantext into dev-phylo

parents c77a8b8d 8873a848
Pipeline #2789 failed with stage
in 99 minutes and 1 second
## Version 0.0.5.6.7
* [BACK] fix limit with MAX_DOCS_SCRAPERS
* [FEAT] Users Password Sugar function : in repl, runCmdReplEasy $ updateUsersPassword ["user@mail.com"]
## Version 0.0.5.6.6
* [BACK] CSV List post and reindex after (for both CSV and JSON)
## Version 0.0.5.6.5
* [BACK] HAL parser with Conduit
## Version 0.0.5.6.4
* [FRONT] Forest Tooltip + Async progress bar fix
## Version 0.0.5.6.3
* [BACK][EXPORT][GEXF] node size
......
module Auth where
import Prelude
import Data.Maybe
import Core
import Options
......
module Core (problem, whenVerbose) where
import Prelude
import Options
import Options.Generic
......
......@@ -2,11 +2,11 @@ module Main where
import Control.Monad
import Network.HTTP.Client
import Options.Generic
import Servant.Client
import Options
import Options.Generic
import Prelude
import Script (script)
import Servant.Client
main :: IO ()
main = do
......
{-# LANGUAGE TypeOperators #-}
module Options where
import Prelude
import Options.Generic
-- | Some general options to be specified on the command line.
......
module Script (script) where
import Control.Monad.IO.Class
import Gargantext.API.Client
import Servant.Client
import Auth
import Control.Monad.IO.Class
import Core
import Gargantext.API.Client
import Options
import Prelude
import Servant.Client
import Tracking
-- | An example script. Tweak, rebuild and re-run the executable to see the
......
......@@ -8,6 +8,7 @@ module Tracking
import Core
import Options
import Prelude
import Control.Monad.IO.Class
import Data.List (intersperse)
......
......@@ -17,10 +17,11 @@ module Main where
import Control.Exception (finally)
import Data.Either
import Data.Maybe (Maybe(..))
import Data.Text (Text)
import Prelude (read)
import System.Environment (getArgs)
import qualified Data.Text as Text
import Text.Read (readMaybe)
import Gargantext.API.Dev (withDevEnv, runCmdDev)
import Gargantext.API.Admin.EnvTypes (DevEnv(..))
......@@ -46,11 +47,14 @@ main = do
--tt = (Unsupervised EN 6 0 Nothing)
tt = (Multi EN)
format = CsvGargV3 -- CsvHal --WOS
limit' = case (readMaybe limit :: Maybe Int) of
Nothing -> panic $ "Cannot read limit: " <> (Text.pack limit)
Just l -> l
corpus :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt format corpusPath Nothing (\_ -> pure ())
corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt format corpusPath Nothing (\_ -> pure ())
corpusCsvHal :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) (read limit :: Int) tt CsvHal corpusPath Nothing (\_ -> pure ())
corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt CsvHal corpusPath Nothing (\_ -> pure ())
annuaire :: forall m. FlowCmdM DevEnv GargError m => m CorpusId
annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath (\_ -> pure ())
......
......@@ -16,16 +16,17 @@ HOST=$(getter "DB_HOST")
PORT=$(getter "DB_PORT")
#psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql -c "CREATE USER \"${USER}\""
psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql -c "DROP DATABASE IF EXISTS \"${NAME}\""
createdb "${NAME}"
psql "${NAME}" < extensions.sql
#psql "${NAME}" < schema.sql
# if new
#psql "${NAME}" < schema.sql
#../../bin/psql ../../gargantext.ini < gargandb.dump
psql ${NAME} < $2
psql -c "ALTER DATABASE \"${NAME}\" OWNER to \"${USER}\""
......
name: gargantext
version: '0.0.5.6.3'
version: '0.0.5.6.7'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -57,6 +57,7 @@ library:
- Gargantext.API.Ngrams.Prelude
- Gargantext.API.Admin.Settings
- Gargantext.API.Admin.EnvTypes
- Gargantext.API.Admin.Auth.Types
- Gargantext.API.Admin.Types
- Gargantext.API.Prelude
- Gargantext.API.Client
......@@ -327,6 +328,42 @@ executables:
- unordered-containers
- full-text-search
gargantext-client:
main: Main.hs
source-dirs: bin/gargantext-client
ghc-options:
- -Wall
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
default-extensions:
- DataKinds
- DeriveGeneric
- FlexibleContexts
- FlexibleInstances
- GeneralizedNewtypeDeriving
- MultiParamTypeClasses
- NamedFieldPuns
- NoImplicitPrelude
- OverloadedStrings
- RankNTypes
- RecordWildCards
dependencies:
- base
- extra
- servant
- text
- optparse-generic
- exceptions
- servant-client
- servant-auth-client
- gargantext
- ekg-json
- http-client
gargantext-phylo:
main: Main.hs
source-dirs: bin/gargantext-phylo
......
......@@ -20,16 +20,16 @@ import Data.Aeson
import Data.Either (Either(..))
import Data.HashMap.Strict (HashMap)
import Data.Map (Map, toList)
import Data.Maybe (catMaybes)
import Data.Maybe (catMaybes, fromMaybe)
import Data.Set (Set)
import Data.Text (Text, concat, pack)
import Data.Text (Text, concat, pack, splitOn)
import Data.Vector (Vector)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams (setListNgrams)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
......@@ -40,9 +40,11 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node (_node_parent_id)
import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude
import Network.HTTP.Media ((//), (/:))
......@@ -53,22 +55,12 @@ import qualified Data.Csv as Csv
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.Vector as Vec
import qualified Prelude as Prelude
import qualified Protolude as P
------------------------------------------------------------------------
-- | TODO refactor
{-
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI
:<|> CSVPostAPI
api :: ListId -> GargServer API
api l = get l :<|> postAsync l :<|> csvPostAsync l
-}
----------------------
type GETAPI = Summary "Get List"
:> "lists"
:> Capture "listId" ListId
......@@ -122,11 +114,11 @@ get lId = do
------------------------------------------------------------------------
-- TODO : purge list
-- TODO talk
post :: FlowCmdM env err m
setList :: FlowCmdM env err m
=> ListId
-> NgramsList
-> m Bool
post l m = do
setList l m = do
-- TODO check with Version for optim
printDebug "New list as file" l
_ <- mapM (\(nt, Versioned _v ns) -> setListNgrams l nt ns) $ toList m
......@@ -149,13 +141,13 @@ reIndexWith cId lId nt lts = do
<$> map (\(k,vs) -> k:vs)
<$> HashMap.toList
<$> getTermsWith identity [lId] nt lts
printDebug "ts" ts
-- printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans)
occs <- getOccByNgramsOnlyFast' cId lId nt ts
printDebug "occs" occs
-- printDebug "occs" occs
let orphans = List.concat
$ map (\t -> case HashMap.lookup t occs of
......@@ -163,11 +155,11 @@ reIndexWith cId lId nt lts = do
Just n -> if n <= 1 then [t] else [ ]
) ts
printDebug "orphans" orphans
-- printDebug "orphans" orphans
-- Get all documents of the corpus
docs <- selectDocNodes cId
printDebug "docs length" (List.length docs)
-- printDebug "docs length" (List.length docs)
-- Checking Text documents where orphans match
-- TODO Tests here
......@@ -184,7 +176,7 @@ reIndexWith cId lId nt lts = do
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
) docs
printDebug "ngramsByDoc" ngramsByDoc
-- printDebug "ngramsByDoc" ngramsByDoc
-- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc
......@@ -210,7 +202,7 @@ postAsync lId =
JobFunction (\f log' ->
let
log'' x = do
printDebug "postAsync ListId" x
-- printDebug "postAsync ListId" x
liftBase $ log' x
in postAsync' lId f log'')
......@@ -223,20 +215,32 @@ postAsync' l (WithFile _ m _) logStatus = do
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_remaining = Just 2
, _scst_events = Just []
}
printDebug "New list as file" l
_ <- post l m
_ <- setList l m
-- printDebug "Done" r
pure JobLog { _scst_succeeded = Just 1
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
corpus_node <- getNode l -- (Proxy :: Proxy HyperdataList)
let corpus_id = fromMaybe (panic "") (_node_parent_id corpus_node)
_ <- reIndexWith corpus_id l NgramsTerms (Set.fromList [MapTerm, CandidateTerm])
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
------------------------------------------------------------------------
------------------------------------------------------------------------
type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
:> "csv"
:> "add"
......@@ -257,12 +261,22 @@ readCsvText t = case eDec of
parseCsvData :: [(Text, Text, Text)] -> Map NgramsTerm NgramsRepoElement
parseCsvData lst = Map.fromList $ conv <$> lst
where
conv (_status, label, _forms) =
conv (status, label, forms) =
(NgramsTerm label, NgramsRepoElement { _nre_size = 1
, _nre_list = CandidateTerm
, _nre_list = case status == "map" of
True -> MapTerm
False -> case status == "main" of
True -> CandidateTerm
False -> StopTerm
, _nre_root = Nothing
, _nre_parent = Nothing
, _nre_children = MSet Map.empty })
, _nre_children = MSet
$ Map.fromList
$ map (\form -> (NgramsTerm form, ()))
$ filter (/= "")
$ splitOn "|&|" forms
}
)
csvPost :: FlowCmdM env err m
=> ListId
......@@ -277,11 +291,14 @@ csvPost l m = do
--printDebug "[csvPost] lst" lst
printDebug "[csvPost] p" p
_ <- setListNgrams l NgramsTerms p
pure True
------------------------------------------------------------------------
printDebug "ReIndexing List" l
corpus_node <- getNode l -- (Proxy :: Proxy HyperdataList)
let corpus_id = fromMaybe (panic "") (_node_parent_id corpus_node)
_ <- reIndexWith corpus_id l NgramsTerms (Set.fromList [MapTerm, CandidateTerm])
pure True
------------------------------------------------------------------------
csvPostAsync :: GargServer CSVAPI
csvPostAsync lId =
serveJobsAPI $
......
......@@ -22,6 +22,7 @@ Portability : POSIX
module Gargantext.API.Node.Contact
where
import Conduit
import Data.Aeson
import Data.Either (Either(Right))
import Data.Maybe (Maybe(..))
......@@ -93,7 +94,7 @@ addContact u nId (AddContactParams fn ln) logStatus = do
, _scst_remaining = Just 1
, _scst_events = Just []
}
_ <- flow (Nothing :: Maybe HyperdataAnnuaire) u (Right [nId]) (Multi EN) Nothing [[hyperdataContact fn ln]] logStatus
_ <- flow (Nothing :: Maybe HyperdataAnnuaire) u (Right [nId]) (Multi EN) Nothing (Just 1, yield $ hyperdataContact fn ln) logStatus
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
......
......@@ -18,6 +18,8 @@ New corpus means either:
module Gargantext.API.Node.Corpus.New
where
import Conduit
import Control.Lens hiding (elements, Empty)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
......@@ -39,7 +41,7 @@ import Gargantext.Prelude
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs, ScraperEvent(..), scst_events)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotal, jobLogFailTotalWithMessage)
import Gargantext.API.Job (addEvent, jobLogSuccess, jobLogFailTotal, jobLogFailTotalWithMessage)
import Gargantext.API.Node.Corpus.New.File
import Gargantext.API.Node.Corpus.Searx
import Gargantext.API.Node.Corpus.Types
......@@ -213,24 +215,37 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs]
logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just $ 1 + length txts
, _scst_events = Just []
}
cids <- mapM (\txt -> flowDataText user txt (Multi l) cid Nothing logStatus) txts
printDebug "corpus id" cids
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user
-- TODO ...
pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
eTxts <- mapM (\db -> getDataText db (Multi l) q maybeLimit) [database2origin dbs]
let lTxts = lefts eTxts
case lTxts of
[] -> do
let txts = rights eTxts
-- TODO Sum lenghts of each txt elements
logStatus $ JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just $ 1 + length txts
, _scst_events = Just []
}
cids <- mapM (\txt -> do
flowDataText user txt (Multi l) cid Nothing logStatus) txts
printDebug "corpus id" cids
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user
-- TODO ...
pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
(err:_) -> do
pure $ addEvent "ERROR" (T.pack $ show err) $
JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 1
, _scst_remaining = Just 0
, _scst_events = Just []
}
type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
......@@ -268,15 +283,16 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
_ -> cs d
eDocs <- liftBase $ parse data'
case eDocs of
Right docs' -> do
Right docs -> do
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
limit' <- view $ hasConfig . gc_max_docs_parsers
let limit = fromIntegral limit'
if length docs' > limit then do
printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show $ length docs')
if length docs > limit then do
printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show $ length docs)
let panicMsg' = [ "[addToCorpusWithForm] number of docs ("
, show $ length docs'
, show $ length docs
, ") exceeds the MAX_DOCS_PARSERS limit ("
, show limit
, ")" ]
......@@ -285,7 +301,6 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
panic panicMsg
else
pure ()
let docs = splitEvery 500 $ take limit docs'
printDebug "Parsing corpus finished : " cid
logStatus jobLog2
......@@ -296,7 +311,8 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
(Right [cid])
(Multi $ fromMaybe EN l)
Nothing
(map (map toHyperdataDocument) docs)
(Just $ fromIntegral $ length docs, yieldMany docs .| mapC toHyperdataDocument)
--(map (map toHyperdataDocument) docs)
logStatus
printDebug "Extraction finished : " cid
......
......@@ -69,21 +69,32 @@ api :: UserId -> NodeId -> GargServer API
api uId nId =
serveJobsAPI $
JobFunction (\q log' -> do
documentUpload uId nId q (liftBase . log')
documentUploadAsync uId nId q (liftBase . log')
)
documentUpload :: (FlowCmdM env err m)
documentUploadAsync :: (FlowCmdM env err m)
=> UserId
-> NodeId
-> DocumentUpload
-> (JobLog -> m ())
-> m JobLog
documentUpload _uId nId doc logStatus = do
documentUploadAsync _uId nId doc logStatus = do
let jl = JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just [] }
logStatus jl
docIds <- documentUpload nId doc
printDebug "documentUploadAsync" docIds
pure $ jobLogSuccess jl
documentUpload :: (FlowCmdM env err m)
=> NodeId
-> DocumentUpload
-> m [DocId]
documentUpload nId doc = do
mcId <- getClosestParentIdByType' nId NodeCorpus
let cId = case mcId of
Just c -> c
......@@ -116,5 +127,6 @@ documentUpload _uId nId doc logStatus = do
docIds <- insertMasterDocs (Nothing :: Maybe HyperdataCorpus) (Multi EN) [hd]
_ <- Doc.add cId docIds
pure docIds
pure $ jobLogSuccess jl
......@@ -16,6 +16,7 @@ Portability : POSIX
module Gargantext.API.Node.DocumentsFromWriteNodes
where
import Conduit
import Control.Lens ((^.))
import Data.Aeson
import Data.Either (Either(..), rights)
......@@ -100,7 +101,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
let parsed = rights parsedE
_ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing logStatus
_ <- flowDataText (RootId (NodeId uId)) (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed)) (Multi EN) cId Nothing logStatus
pure $ jobLogSuccess jobLog
------------------------------------------------------------------------
......
......@@ -36,7 +36,8 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (defaultList, getNode, insertNodes, node)
import Gargantext.Database.Query.Table.Node (defaultList, getNode)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Prelude (Bool(..), Ord, Eq, (<$>), ($), liftBase, (.), printDebug, pure, show, cs, (<>), panic, (<*>))
......@@ -191,7 +192,7 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
, _scst_events = Just []
}
updateNode userId phyloId (UpdateNodePhylo config) logStatus = do
updateNode _userId phyloId (UpdateNodePhylo config) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 2
......@@ -210,7 +211,7 @@ updateNode userId phyloId (UpdateNodePhylo config) logStatus = do
, _scst_events = Just []
}
_phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
_ <- updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0
......
......@@ -18,6 +18,8 @@ module Gargantext.Core.Text.Corpus.API
)
where
import Conduit
import Data.Either (Either(..))
import Data.Maybe
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..))
......@@ -27,21 +29,25 @@ import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
-- | TODO put in gargantext.init
default_limit :: Maybe Integer
default_limit = Just 10000
import Servant.Client (ClientError)
-- | Get External API metadata main function
get :: ExternalAPIs
-> Lang
-> Query
-> Maybe Limit
-> IO [HyperdataDocument]
get PubMed _la q _l = PUBMED.get q default_limit -- EN only by default
get HAL la q _l = HAL.get la q default_limit
get IsTex la q _l = ISTEX.get la q default_limit
get Isidore la q _l = ISIDORE.get la (fromIntegral <$> default_limit) (Just q) Nothing
-- -> IO [HyperdataDocument]
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get PubMed _la q limit = PUBMED.get q limit
--docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
get HAL la q limit = HAL.getC la q limit
get IsTex la q limit = do
docs <- ISTEX.get la q limit
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
get Isidore la q limit = do
docs <- ISIDORE.get la (fromIntegral <$> limit) (Just q) Nothing
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
get _ _ _ _ = undefined
-- | Some Sugar for the documentation
......
......@@ -12,8 +12,11 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Hal
where
import Conduit
import Data.Either
import Data.Maybe
import Data.Text (Text, pack, intercalate)
import Servant.Client (ClientError)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
......@@ -25,8 +28,16 @@ import qualified HAL.Doc.Corpus as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get la q ml = do
docs <- HAL.getMetadataWith q (Just 0) (fromIntegral <$> ml)
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) docs
eDocs <- HAL.getMetadataWith q (Just 0) ml
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
getC :: Lang -> Text -> Maybe Integer -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do
eRes <- HAL.getMetadataWithC q (Just 0) ml
pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes
-- case eRes of
-- Left err -> panic $ pack $ show err
-- Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
......
......@@ -13,9 +13,12 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Pubmed
where
import Conduit
import Data.Either (Either)
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
import Servant.Client (ClientError)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
......@@ -31,17 +34,21 @@ type Limit = PubMed.Limit
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get :: Query -> Maybe Limit -> IO [HyperdataDocument]
get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
<$> PubMed.getMetadataWith q l
get :: Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get q l = do
eRes <- PubMed.getMetadataWithC q l
pure $ (\(len, docsC) -> (len, docsC .| mapC (toDoc EN))) <$> eRes
--either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
-- <$> PubMed.getMetadataWithC q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
(PubMedDoc.PubMedDate a y m d)
toDoc l (PubMedDoc.PubMed { pubmed_id
, pubmed_article = PubMedDoc.PubMedArticle t j as aus
, pubmed_date = PubMedDoc.PubMedDate a y m d }
) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqId = Just $ Text.pack $ show pubmed_id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
......
......@@ -34,7 +34,7 @@ import Gargantext.Prelude
import Servant.API (FromHttpApiData(..), ToHttpApiData(..))
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Text.Read (read)
import Text.Read (readMaybe)
type CorpusName = Text
------------------------------------------------------------------------
......@@ -74,7 +74,11 @@ instance Semigroup ListType
instance FromHttpApiData ListType where
parseUrlPiece = Right . read . unpack
parseUrlPiece s = Right s'
where
s' = case (readMaybe $ unpack s) of
Nothing -> panic $ "Cannot read url piece: " <> s
Just s'' -> s''
instance ToHttpApiData ListType where
toUrlPiece = pack . show
......
......@@ -203,23 +203,23 @@ data PhyloConfig =
------------------------------------------------------------------------
data PhyloSubConfig =
PhyloSubConfig { _sc_phyloProximity :: Proximity
, _sc_phyloSynchrony :: Synchrony
, _sc_phyloQuality :: Quality
PhyloSubConfig { _sc_phyloProximity :: Double
, _sc_phyloSynchrony :: Double
, _sc_phyloQuality :: Double
, _sc_timeUnit :: TimeUnit
, _sc_clique :: Clique
, _sc_exportFilter :: [Filter]
, _sc_exportFilter :: Double
}
deriving (Show,Generic,Eq)
subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { phyloProximity = _sc_phyloProximity subConfig
, phyloSynchrony = _sc_phyloSynchrony subConfig
, phyloQuality = _sc_phyloQuality subConfig
subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard $ _sc_phyloProximity subConfig
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 1
, timeUnit = _sc_timeUnit subConfig
, clique = _sc_clique subConfig
, exportFilter = _sc_exportFilter subConfig
, exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
}
------------------------------------------------------------------------
......
......@@ -30,7 +30,8 @@ import Gargantext.Core.Viz.Phylo.Example (phyloExample)
import Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import Gargantext.Database.Query.Table.Node (insertNodes, node)
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Prelude
import Network.HTTP.Media ((//), (/:))
import Servant
......@@ -90,7 +91,10 @@ type GetPhylo = QueryParam "listId" ListId
-- Fix Filter parameters
-- TODO fix parameters to default config that should be in Node
getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo phyloId _lId _level _minSizeBranch = getPhyloDataJson phyloId
getPhylo phyloId _lId _level _minSizeBranch = do
theData <- getPhyloDataJson phyloId
-- printDebug "getPhylo" theData
pure theData
getPhyloDataJson :: PhyloId -> GargNoServer Value
getPhyloDataJson phyloId = do
......@@ -118,17 +122,19 @@ type PostPhylo = QueryParam "listId" ListId
-- :> ReqBody '[JSON] PhyloQueryBuild
:> (Post '[JSON] NodeId)
postPhylo :: CorpusId -> UserId -> GargServer PostPhylo
postPhylo corpusId userId _lId = do
postPhylo :: PhyloId -> UserId -> GargServer PostPhylo
postPhylo phyloId _userId _lId = do
-- TODO get Reader settings
-- s <- ask
-- let
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
phy <- flowPhyloAPI defaultConfig corpusId -- params
phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
pure $ NodeId (fromIntegral phyloId)
corpusId <- getClosestParentIdByType phyloId NodeCorpus
phy <- flowPhyloAPI defaultConfig (fromMaybe (panic "[G.C.V.P.API] no corpus ID found") corpusId) -- params
-- phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
_ <- updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
pure phyloId
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
......
......@@ -9,7 +9,6 @@ Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.API.Tools
where
......
......@@ -209,6 +209,7 @@ exportToDot phylo export =
graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
<> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio
-- , Ratio AutoRatio
, Style [SItem Filled []],Color [toWColor White]]
{-- home made attributes -}
<> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
......
......@@ -46,8 +46,10 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
)
where
import Conduit
import Control.Lens ((^.), view, _Just, makeLenses)
import Data.Aeson.TH (deriveJSON)
import Data.Conduit.Internal (zipSources)
import Data.Either
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
......@@ -60,6 +62,7 @@ import qualified Data.Text as T
import Data.Traversable (traverse)
import Data.Tuple.Extra (first, second)
import GHC.Generics (Generic)
import Servant.Client (ClientError)
import System.FilePath (FilePath)
import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
......@@ -103,6 +106,7 @@ import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Database.Query.Table.Node.Document.Add as Doc (add)
import qualified Prelude as Prelude
------------------------------------------------------------------------
-- Imports for upgrade function
......@@ -127,7 +131,8 @@ allDataOrigins = map InternalOrigin API.externalAPIs
---------------
data DataText = DataOld ![NodeId]
| DataNew ![[HyperdataDocument]]
| DataNew !(Maybe Integer, ConduitT () HyperdataDocument IO ())
-- | DataNew ![[HyperdataDocument]]
-- TODO use the split parameter in config file
getDataText :: FlowCmdM env err m
......@@ -135,10 +140,10 @@ getDataText :: FlowCmdM env err m
-> TermType Lang
-> API.Query
-> Maybe API.Limit
-> m DataText
getDataText (ExternalOrigin api) la q li = liftBase $ DataNew
<$> splitEvery 500
<$> API.get api (_tt_lang la) q li
-> m (Either ClientError DataText)
getDataText (ExternalOrigin api) la q li = liftBase $ do
eRes <- API.get api (_tt_lang la) q li
pure $ DataNew <$> eRes
getDataText (InternalOrigin _) _la q _li = do
(_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
......@@ -146,10 +151,11 @@ getDataText (InternalOrigin _) _la q _li = do
(Left "")
(Nothing :: Maybe HyperdataCorpus)
ids <- map fst <$> searchDocInDatabase cId (stemIt q)
pure $ DataOld ids
pure $ Right $ DataOld ids
-------------------------------------------------------------------------------
flowDataText :: ( FlowCmdM env err m
flowDataText :: forall env err m.
( FlowCmdM env err m
)
=> User
-> DataText
......@@ -161,7 +167,8 @@ flowDataText :: ( FlowCmdM env err m
flowDataText u (DataOld ids) tt cid mfslw _ = flowCorpusUser (_tt_lang tt) u (Right [cid]) corpusType ids mfslw
where
corpusType = (Nothing :: Maybe HyperdataCorpus)
flowDataText u (DataNew txt) tt cid mfslw logStatus = flowCorpus u (Right [cid]) tt mfslw txt logStatus
flowDataText u (DataNew (mLen, txtC)) tt cid mfslw logStatus =
flowCorpus u (Right [cid]) tt mfslw (mLen, (transPipe liftBase txtC)) logStatus
------------------------------------------------------------------------
-- TODO use proxy
......@@ -173,8 +180,9 @@ flowAnnuaire :: (FlowCmdM env err m)
-> (JobLog -> m ())
-> m AnnuaireId
flowAnnuaire u n l filePath logStatus = do
docs <- liftBase $ (( splitEvery 500 <$> readFile_Annuaire filePath) :: IO [[HyperdataContact]])
flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing docs logStatus
-- TODO Conduit for file
docs <- liftBase $ ((readFile_Annuaire filePath) :: IO [HyperdataContact])
flow (Nothing :: Maybe HyperdataAnnuaire) u n l Nothing (Just $ fromIntegral $ length docs, yieldMany docs) logStatus
------------------------------------------------------------------------
flowCorpusFile :: (FlowCmdM env err m)
......@@ -185,12 +193,13 @@ flowCorpusFile :: (FlowCmdM env err m)
-> Maybe FlowSocialListWith
-> (JobLog -> m ())
-> m CorpusId
flowCorpusFile u n l la ff fp mfslw logStatus = do
flowCorpusFile u n _l la ff fp mfslw logStatus = do
eParsed <- liftBase $ parseFile ff fp
case eParsed of
Right parsed -> do
let docs = splitEvery 500 $ take l parsed
flowCorpus u n la mfslw (map (map toHyperdataDocument) docs) logStatus
flowCorpus u n la mfslw (Just $ fromIntegral $ length parsed, yieldMany parsed .| mapC toHyperdataDocument) logStatus
--let docs = splitEvery 500 $ take l parsed
--flowCorpus u n la mfslw (yieldMany $ map (map toHyperdataDocument) docs) logStatus
Left e -> panic $ "Error: " <> (T.pack e)
------------------------------------------------------------------------
......@@ -201,13 +210,14 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
-> Either CorpusName [CorpusId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (Maybe Integer, ConduitT () a m ())
-> (JobLog -> m ())
-> m CorpusId
flowCorpus = flow (Nothing :: Maybe HyperdataCorpus)
flow :: ( FlowCmdM env err m
flow :: forall env err m a c.
( FlowCmdM env err m
, FlowCorpus a
, MkCorpus c
)
......@@ -216,23 +226,40 @@ flow :: ( FlowCmdM env err m
-> Either CorpusName [CorpusId]
-> TermType Lang
-> Maybe FlowSocialListWith
-> [[a]]
-> (Maybe Integer, ConduitT () a m ())
-> (JobLog -> m ())
-> m CorpusId
flow c u cn la mfslw docs logStatus = do
flow c u cn la mfslw (mLength, docsC) logStatus = do
-- TODO if public insertMasterDocs else insertUserDocs
ids <- traverse (\(idx, doc) -> do
id <- insertMasterDocs c la doc
logStatus JobLog { _scst_succeeded = Just $ 1 + idx
, _scst_failed = Just 0
, _scst_remaining = Just $ length docs - idx
, _scst_events = Just []
}
pure id
) (zip [1..] docs)
flowCorpusUser (la ^. tt_lang) u cn c (concat ids) mfslw
ids <- runConduit $
zipSources (yieldMany [1..]) docsC
.| mapMC insertDoc
.| sinkList
-- ids <- traverse (\(idx, doc) -> do
-- id <- insertMasterDocs c la doc
-- logStatus JobLog { _scst_succeeded = Just $ 1 + idx
-- , _scst_failed = Just 0
-- , _scst_remaining = Just $ length docs - idx
-- , _scst_events = Just []
-- }
-- pure id
-- ) (zip [1..] docs)
flowCorpusUser (la ^. tt_lang) u cn c ids mfslw
where
insertDoc :: (Integer, a) -> m NodeId
insertDoc (idx, doc) = do
id <- insertMasterDocs c la [doc]
case mLength of
Nothing -> pure ()
Just len -> do
logStatus JobLog { _scst_succeeded = Just $ fromIntegral $ 1 + idx
, _scst_failed = Just 0
, _scst_remaining = Just $ fromIntegral $ len - idx
, _scst_events = Just []
}
pure $ Prelude.head id
------------------------------------------------------------------------
......@@ -250,7 +277,7 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus user corpusName ctype
-- NodeTexts is first
_tId <- insertDefaultNode NodeTexts userCorpusId userId
_tId <- insertDefaultNodeIfNotExists NodeTexts userCorpusId userId
-- printDebug "NodeTexts: " tId
-- NodeList is second
......@@ -276,8 +303,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
-- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId
-- User Graph Flow
_ <- insertDefaultNode NodeDashboard userCorpusId userId
_ <- insertDefaultNode NodeGraph userCorpusId userId
_ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId
_ <- insertDefaultNodeIfNotExists NodeGraph userCorpusId userId
--_ <- mkPhylo userCorpusId userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
......@@ -322,7 +349,7 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
-> m ()
saveDocNgramsWith lId mapNgramsDocs' = do
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
printDebug "terms2id" terms2id
--printDebug "terms2id" terms2id
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
......@@ -331,7 +358,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs
printDebug "saveDocNgramsWith" mapCgramsId
--printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
_return <- insertContextNodeNgrams2
$ catMaybes [ ContextNodeNgrams2 <$> Just nId
......@@ -504,5 +531,3 @@ extractInsert docs = do
documentsWithId
_ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
pure ()
......@@ -27,7 +27,7 @@ import Gargantext.Database.Query.Table.User
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Pass.User (gargPass)
import Gargantext.Prelude.Mail.Types (MailConfig)
------------------------------------------------------------------------
------------------------------------------------------------------------
newUsers :: (CmdM env err m, MonadRandom m, HasNodeError err, HasMail env)
=> [EmailAddress] -> m Int64
......@@ -35,6 +35,18 @@ newUsers us = do
us' <- mapM newUserQuick us
config <- view $ mailSettings
newUsers' config us'
------------------------------------------------------------------------
updateUsersPassword :: (CmdM env err m, MonadRandom m, HasNodeError err, HasMail env)
=> [EmailAddress] -> m Int64
updateUsersPassword us = do
us' <- mapM newUserQuick us
config <- view $ mailSettings
_ <- mapM (\u -> updateUser (SendEmail True) config u) us'
pure 1
------------------------------------------------------------------------
------------------------------------------------------------------------
newUserQuick :: (MonadRandom m)
=> Text -> m (NewUser GargPassword)
......@@ -44,6 +56,7 @@ newUserQuick n = do
Just (u', _m) -> u'
Nothing -> panic "[G.D.A.U.N.newUserQuick]: Email invalid"
pure (NewUser u n (GargPassword pass))
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | guessUserName
......@@ -68,7 +81,6 @@ newUsers' cfg us = do
printDebug "newUsers'" us
pure r
------------------------------------------------------------------------
updateUser :: HasNodeError err
=> SendEmail -> MailConfig -> NewUser GargPassword -> Cmd err Int64
updateUser (SendEmail send) cfg u = do
......
......@@ -24,7 +24,7 @@ import Data.ByteString.Char8 (hPutStrLn)
import Data.Either.Extra (Either)
import Data.Pool (Pool, withResource)
import Data.Profunctor.Product.Default (Default)
import Data.Text (unpack, Text)
import Data.Text (pack, unpack, Text)
import Data.Word (Word16)
import Database.PostgreSQL.Simple (Connection, connect)
import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError)
......@@ -36,7 +36,7 @@ import Opaleye (Unpackspec, showSql, FromFields, Select, runSelect, SqlJsonb, De
import Opaleye.Aggregate (countRows)
import System.IO (FilePath)
import System.IO (stderr)
import Text.Read (read)
import Text.Read (readMaybe)
import qualified Data.ByteString as DB
import qualified Data.List as DL
import qualified Database.PostgreSQL.Simple as PGS
......@@ -176,9 +176,13 @@ databaseParameters :: FilePath -> IO PGS.ConnectInfo
databaseParameters fp = do
ini <- readIniFile' fp
let val' key = unpack $ val ini "database" key
let dbPortRaw = val' "DB_PORT"
let dbPort = case (readMaybe dbPortRaw :: Maybe Word16) of
Nothing -> panic $ "DB_PORT incorrect: " <> (pack dbPortRaw)
Just d -> d
pure $ PGS.ConnectInfo { PGS.connectHost = val' "DB_HOST"
, PGS.connectPort = read (val' "DB_PORT") :: Word16
, PGS.connectPort = dbPort
, PGS.connectUser = val' "DB_USER"
, PGS.connectPassword = val' "DB_PASS"
, PGS.connectDatabase = val' "DB_NAME"
......
......@@ -255,6 +255,14 @@ insertDefaultNode :: HasDBid NodeType
=> NodeType -> ParentId -> UserId -> Cmd err [NodeId]
insertDefaultNode nt p u = insertNode nt Nothing Nothing p u
insertDefaultNodeIfNotExists :: HasDBid NodeType
=> NodeType -> ParentId -> UserId -> Cmd err [NodeId]
insertDefaultNodeIfNotExists nt p u = do
children <- getChildrenByType p nt
case children of
[] -> insertDefaultNode nt p u
xs -> pure xs
insertNode :: HasDBid NodeType
=> NodeType -> Maybe Name -> Maybe DefaultHyperdata -> ParentId -> UserId -> Cmd err [NodeId]
insertNode nt n h p u = insertNodesR [nodeW nt n h p u]
......
......@@ -69,11 +69,11 @@ extra-deps:
# External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 9cdba6423decad5acfacb0f274212fd8723ce734
commit: 12cb16c391577bff4295e3dd1b126281d78037b4
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: 020f5f9b308f5c23c925aedf5fb11f8b4728fb19
commit: 3bf77f28d3dc71d2e8349cbf422a34cf4c23cd11
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment