Commit 3f99bf81 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Export Corpus with Document with Ngrams.

parent 631a22ee
...@@ -93,6 +93,7 @@ import Gargantext.API.Ngrams (HasRepo(..), HasRepoSaver(..), saveRepo, TableNgra ...@@ -93,6 +93,7 @@ import Gargantext.API.Ngrams (HasRepo(..), HasRepoSaver(..), saveRepo, TableNgra
import Gargantext.API.Node import Gargantext.API.Node
import Gargantext.API.Search (SearchPairsAPI, searchPairs) import Gargantext.API.Search (SearchPairsAPI, searchPairs)
import Gargantext.API.Types import Gargantext.API.Types
import qualified Gargantext.API.Export as Export
import qualified Gargantext.API.Corpus.New as New import qualified Gargantext.API.Corpus.New as New
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
import Gargantext.Database.Types.Node (NodeId, CorpusId, AnnuaireId) import Gargantext.Database.Types.Node (NodeId, CorpusId, AnnuaireId)
...@@ -261,6 +262,9 @@ type GargPrivateAPI' = ...@@ -261,6 +262,9 @@ type GargPrivateAPI' =
:> Capture "node2_id" NodeId :> Capture "node2_id" NodeId
:> NodeNodeAPI HyperdataAny :> NodeNodeAPI HyperdataAny
:<|> "corpus" :> Capture "node_id" CorpusId
:> Export.API
-- Annuaire endpoint -- Annuaire endpoint
:<|> "annuaire":> Summary "Annuaire endpoint" :<|> "annuaire":> Summary "Annuaire endpoint"
:> Capture "annuaire_id" AnnuaireId :> Capture "annuaire_id" AnnuaireId
...@@ -362,6 +366,7 @@ serverPrivateGargAPI' (AuthenticatedUser (NodeId uid)) ...@@ -362,6 +366,7 @@ serverPrivateGargAPI' (AuthenticatedUser (NodeId uid))
:<|> nodeAPI (Proxy :: Proxy HyperdataAny) uid :<|> nodeAPI (Proxy :: Proxy HyperdataAny) uid
:<|> nodeAPI (Proxy :: Proxy HyperdataCorpus) uid :<|> nodeAPI (Proxy :: Proxy HyperdataCorpus) uid
:<|> nodeNodeAPI (Proxy :: Proxy HyperdataAny) uid :<|> nodeNodeAPI (Proxy :: Proxy HyperdataAny) uid
:<|> Export.getCorpus -- uid
:<|> nodeAPI (Proxy :: Proxy HyperdataAnnuaire) uid :<|> nodeAPI (Proxy :: Proxy HyperdataAnnuaire) uid
:<|> nodeNodeAPI (Proxy :: Proxy HyperdataContact) uid :<|> nodeNodeAPI (Proxy :: Proxy HyperdataContact) uid
......
{-|
Module : Gargantext.API.Export
Description : Get Metrics from Storage (Database like)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main exports of Gargantext:
- corpus
- document and ngrams
- lists
-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Export
where
import Data.Aeson.TH (deriveJSON)
import Data.Map (Map)
import Data.Set (Set)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Types (GargNoServer)
import Gargantext.Core.Types --
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
import Gargantext.Database.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node (defaultList, HasNodeError)
import Gargantext.Database.Schema.NodeNode (selectDocNodes)
import Gargantext.Database.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId)
import Gargantext.Database.Utils (Cmd)
import Gargantext.Prelude
import Servant
import qualified Data.Map as Map
import qualified Data.Set as Set
-- Corpus Export
data Corpus =
Corpus { _c_corpus :: [Document]
-- , _c_listVersion :: Int
, _c_hash :: Text
} deriving (Generic)
-- | Document Export
data Document =
Document { _d_document :: Node HyperdataDocument
, _d_ngrams :: [Text]
-- , _d_hash :: Text
} deriving (Generic)
instance ToSchema Corpus where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
instance ToSchema Document where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
instance ToParamSchema Corpus where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Document where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
--------------------------------------------------
type API = Summary "Corpus Export"
:> "export"
:> QueryParam "listId" ListId
:> QueryParam "ngramsType" NgramsType
:> Get '[JSON] Corpus
--------------------------------------------------
getCorpus :: CorpusId
-> Maybe ListId
-> Maybe NgramsType
-> GargNoServer Corpus
getCorpus cId lId nt' = do
let
nt = case nt' of
Nothing -> NgramsTerms
Just t -> t
ns <- Map.fromList
<$> map (\n -> (_node_id n, n))
<$> selectDocNodes cId
repo <- getRepo
ngs <- getNodeNgrams cId lId nt repo
let r = Map.intersectionWith (\a b -> Document a (Set.toList b)) ns ngs
pure $ Corpus (Map.elems r) "HASH_TODO"
-- getCorpusNgrams :: CorpusId -> ListId ->
-- Exports List
-- Version number of the list
getNodeNgrams :: HasNodeError err
=> CorpusId
-> Maybe ListId
-> NgramsType
-> NgramsRepo
-> Cmd err (Map NodeId (Set Text))
getNodeNgrams cId lId' nt repo = do
lId <- case lId' of
Nothing -> defaultList cId
Just l -> pure l
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot GraphTerm $ mapTermListRoot [lId] nt repo
r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
pure r
$(deriveJSON (unPrefix "_c_") ''Corpus)
$(deriveJSON (unPrefix "_d_") ''Document)
...@@ -78,7 +78,7 @@ module Gargantext.API.Ngrams ...@@ -78,7 +78,7 @@ module Gargantext.API.Ngrams
, HasRepo(..) , HasRepo(..)
, RepoCmdM , RepoCmdM
, QueryParamR , QueryParamR
, TODO(..) , TODO
-- Internals -- Internals
, getNgramsTableMap , getNgramsTableMap
...@@ -143,7 +143,7 @@ import Database.PostgreSQL.Simple.FromField (FromField, fromField) ...@@ -143,7 +143,7 @@ import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import qualified Gargantext.Database.Schema.Ngrams as Ngrams import qualified Gargantext.Database.Schema.Ngrams as Ngrams
-- import Gargantext.Database.Schema.NodeNgram hiding (Action) -- import Gargantext.Database.Schema.NodeNgram hiding (Action)
import Gargantext.Prelude import Gargantext.Prelude
-- import Gargantext.Core.Types (ListTypeId, listTypeId) import Gargantext.Core.Types (TODO)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid) import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
import Servant hiding (Patch) import Servant hiding (Patch)
import System.Clock (getTime, TimeSpec, Clock(..)) import System.Clock (getTime, TimeSpec, Clock(..))
...@@ -152,12 +152,6 @@ import System.IO (stderr) ...@@ -152,12 +152,6 @@ import System.IO (stderr)
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
data TODO = TODO
deriving (Generic)
instance ToSchema TODO where
instance ToParamSchema TODO where
------------------------------------------------------------------------ ------------------------------------------------------------------------
--data FacetFormat = Table | Chart --data FacetFormat = Table | Chart
data TabType = Docs | Trash | MoreFav | MoreTrash data TabType = Docs | Trash | MoreFav | MoreTrash
......
...@@ -78,7 +78,6 @@ import qualified Gargantext.Text.List.Learn as Learn ...@@ -78,7 +78,6 @@ import qualified Gargantext.Text.List.Learn as Learn
import qualified Data.Vector as Vec import qualified Data.Vector as Vec
--} --}
type NodesAPI = Delete '[JSON] Int type NodesAPI = Delete '[JSON] Int
-- | Delete Nodes -- | Delete Nodes
...@@ -371,3 +370,4 @@ putNode :: forall err a. (HasNodeError err, JSONB a, ToJSON a) ...@@ -371,3 +370,4 @@ putNode :: forall err a. (HasNodeError err, JSONB a, ToJSON a)
-> Cmd err Int -> Cmd err Int
putNode n h = fromIntegral <$> updateHyperdata n h putNode n h = fromIntegral <$> updateHyperdata n h
------------------------------------------------------------- -------------------------------------------------------------
...@@ -22,7 +22,7 @@ import Servant.Job.Types ...@@ -22,7 +22,7 @@ import Servant.Job.Types
import Servant.Job.Utils (jsonOptions) import Servant.Job.Utils (jsonOptions)
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary import Test.QuickCheck.Arbitrary
import Gargantext.API.Ngrams (TODO(..)) import Gargantext.Core.Types (TODO(..))
instance Arbitrary a => Arbitrary (JobStatus 'Safe a) where instance Arbitrary a => Arbitrary (JobStatus 'Safe a) where
arbitrary = panic "TODO" arbitrary = panic "TODO"
......
...@@ -76,17 +76,17 @@ instance {-# OVERLAPPABLE #-} (MonadError e m) => ThrowAll' e (m a) where ...@@ -76,17 +76,17 @@ instance {-# OVERLAPPABLE #-} (MonadError e m) => ThrowAll' e (m a) where
throwAll' = throwError throwAll' = throwError
type GargServerC env err m = type GargServerC env err m =
( CmdM env err m ( CmdM env err m
, HasNodeError err , HasNodeError err
, HasInvalidError err , HasInvalidError err
, HasTreeError err , HasTreeError err
, HasServerError err , HasServerError err
, HasJoseError err , HasJoseError err
, ToJSON err -- TODO this is arguable , ToJSON err -- TODO this is arguable
, Exception err , Exception err
, HasRepo env , HasRepo env
, HasSettings env , HasSettings env
, HasJobEnv env ScraperStatus ScraperStatus , HasJobEnv env ScraperStatus ScraperStatus
) )
type GargServerT env err m api = GargServerC env err m => ServerT api m type GargServerT env err m api = GargServerC env err m => ServerT api m
...@@ -94,12 +94,25 @@ type GargServerT env err m api = GargServerC env err m => ServerT api m ...@@ -94,12 +94,25 @@ type GargServerT env err m api = GargServerC env err m => ServerT api m
type GargServer api = type GargServer api =
forall env err m. GargServerT env err m api forall env err m. GargServerT env err m api
-------------------------------------------------------------------
-- | This Type is needed to prepare the function before the GargServer
type GargNoServer' env err m =
( CmdM env err m
, HasRepo env
, HasSettings env
, HasNodeError err
)
type GargNoServer t =
forall env err m. GargNoServer' env err m => m t
-------------------------------------------------------------------
data GargError data GargError
= GargNodeError NodeError = GargNodeError NodeError
| GargTreeError TreeError | GargTreeError TreeError
| GargInvalidError Validation | GargInvalidError Validation
| GargJoseError Jose.Error | GargJoseError Jose.Error
| GargServerError ServerError | GargServerError ServerError
deriving (Show, Typeable) deriving (Show, Typeable)
makePrisms ''GargError makePrisms ''GargError
......
...@@ -25,32 +25,29 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main ...@@ -25,32 +25,29 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, Name , Name
, TableResult(..) , TableResult(..)
, NodeTableResult , NodeTableResult
, TODO(..)
) where ) where
--import qualified Data.Set as S
import Control.Lens (Prism', (#)) import Control.Lens (Prism', (#))
import Control.Monad.Error.Class (MonadError, throwError) import Control.Monad.Error.Class (MonadError, throwError)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Monoid import Data.Monoid
import Data.Semigroup import Data.Semigroup
import Data.Set (Set, empty) import Data.Set (Set, empty)
import Data.Swagger (ToParamSchema)
import Data.Swagger (ToSchema(..), genericDeclareNamedSchema) import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
--import qualified Data.Set as S
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import Data.Validity import Data.Validity
import GHC.Generics
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
import Gargantext.Prelude import Gargantext.Prelude
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import GHC.Generics
------------------------------------------------------------------------ ------------------------------------------------------------------------
type Name = Text type Name = Text
type Term = Text type Term = Text
type Stems = Set Text type Stems = Set Text
...@@ -158,3 +155,13 @@ instance Arbitrary a => Arbitrary (TableResult a) where ...@@ -158,3 +155,13 @@ instance Arbitrary a => Arbitrary (TableResult a) where
arbitrary = TableResult <$> arbitrary <*> arbitrary arbitrary = TableResult <$> arbitrary <*> arbitrary
type NodeTableResult a = TableResult (Node a) type NodeTableResult a = TableResult (Node a)
-- TO BE removed
data TODO = TODO
deriving (Generic)
instance ToSchema TODO where
instance ToParamSchema TODO where
...@@ -52,8 +52,6 @@ instance ToSchema NodeTree where ...@@ -52,8 +52,6 @@ instance ToSchema NodeTree where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nt_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nt_")
------------------------------------------------------------------------ ------------------------------------------------------------------------
--data Classification = Favorites | MyClassifcation --data Classification = Favorites | MyClassifcation
type HashId = Text type HashId = Text
......
...@@ -278,15 +278,32 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql| ...@@ -278,15 +278,32 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
GROUP BY nng.node2_id, ng.terms GROUP BY nng.node2_id, ng.terms
|] |]
------------------------------------------------------------------------
getNodesByNgramsOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text] getNodesByNgramsOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId ls nt ngs = getNodesByNgramsOnlyUser cId ls nt ngs =
Map.unionsWith (<>) Map.unionsWith (<>)
. map (fromListWith (<>) . map (second Set.singleton)) . map (fromListWith (<>)
. map (second Set.singleton))
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt) <$> mapM (selectNgramsOnlyByNodeUser cId ls nt)
(splitEvery 1000 ngs) (splitEvery 1000 ngs)
getNgramsByNodeOnlyUser :: NodeId
-> [ListId]
-> NgramsType
-> [Text]
-> Cmd err (Map NodeId (Set Text))
getNgramsByNodeOnlyUser cId ls nt ngs =
Map.unionsWith (<>)
. map (fromListWith (<>)
. map (second Set.singleton))
. map (map swap)
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt)
(splitEvery 1000 ngs)
------------------------------------------------------------------------
selectNgramsOnlyByNodeUser :: CorpusId -> [ListId] -> NgramsType -> [Text] selectNgramsOnlyByNodeUser :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, NodeId)] -> Cmd err [(Text, NodeId)]
selectNgramsOnlyByNodeUser cId ls nt tms = selectNgramsOnlyByNodeUser cId ls nt tms =
...@@ -319,7 +336,6 @@ queryNgramsOnlyByNodeUser = [sql| ...@@ -319,7 +336,6 @@ queryNgramsOnlyByNodeUser = [sql|
selectNgramsOnlyByNodeUser' :: CorpusId -> [ListId] -> NgramsType -> [Text] selectNgramsOnlyByNodeUser' :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, Int)] -> Cmd err [(Text, Int)]
selectNgramsOnlyByNodeUser' cId ls nt tms = selectNgramsOnlyByNodeUser' cId ls nt tms =
...@@ -349,7 +365,6 @@ queryNgramsOnlyByNodeUser' = [sql| ...@@ -349,7 +365,6 @@ queryNgramsOnlyByNodeUser' = [sql|
getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text] getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (Map Text (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs = getNgramsByDocOnlyUser cId ls nt ngs =
......
...@@ -41,8 +41,12 @@ import Database.PostgreSQL.Simple.ToRow (toRow) ...@@ -41,8 +41,12 @@ import Database.PostgreSQL.Simple.ToRow (toRow)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, formatPGSQuery) import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, formatPGSQuery)
import Gargantext.Core.Types (TODO(..))
import Gargantext.Prelude import Gargantext.Prelude
import Opaleye hiding (FromField) import Opaleye hiding (FromField)
import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
import Text.Read (read)
import Data.Swagger (ToParamSchema, toParamSchema)
import Prelude (Enum, Bounded, minBound, maxBound, Functor) import Prelude (Enum, Bounded, minBound, maxBound, Functor)
import qualified Database.PostgreSQL.Simple as PGS import qualified Database.PostgreSQL.Simple as PGS
...@@ -94,7 +98,7 @@ dbGetNgramsDb = runOpaQuery queryNgramsTable ...@@ -94,7 +98,7 @@ dbGetNgramsDb = runOpaQuery queryNgramsTable
-- ngrams in authors field of document has Authors Type -- ngrams in authors field of document has Authors Type
-- ngrams in text (title or abstract) of documents has Terms Type -- ngrams in text (title or abstract) of documents has Terms Type
data NgramsType = Authors | Institutes | Sources | NgramsTerms data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Ord, Enum, Bounded, Generic) deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance FromJSON NgramsType instance FromJSON NgramsType
instance FromJSONKey NgramsType where instance FromJSONKey NgramsType where
...@@ -115,6 +119,15 @@ instance FromField NgramsTypeId where ...@@ -115,6 +119,15 @@ instance FromField NgramsTypeId where
if (n :: Int) > 0 then return $ NgramsTypeId n if (n :: Int) > 0 then return $ NgramsTypeId n
else mzero else mzero
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
where where
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
......
...@@ -78,6 +78,8 @@ nodeNodeNgramsTable = Table "node_node_ngrams" ...@@ -78,6 +78,8 @@ nodeNodeNgramsTable = Table "node_node_ngrams"
} }
) )
------------------------------------------------
queryNodeNodeNgramsTable :: Query NodeNodeNgramsRead queryNodeNodeNgramsTable :: Query NodeNodeNgramsRead
queryNodeNodeNgramsTable = queryTable nodeNodeNgramsTable queryNodeNodeNgramsTable = queryTable nodeNodeNgramsTable
......
...@@ -106,7 +106,6 @@ type Node json = NodePoly NodeId NodeTypeId UserId (Maybe ParentId) NodeName U ...@@ -106,7 +106,6 @@ type Node json = NodePoly NodeId NodeTypeId UserId (Maybe ParentId) NodeName U
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance FromHttpApiData NodeId where instance FromHttpApiData NodeId where
parseUrlPiece n = pure $ NodeId $ (read . cs) n parseUrlPiece n = pure $ NodeId $ (read . cs) n
......
...@@ -37,7 +37,7 @@ import Gargantext.Prelude ...@@ -37,7 +37,7 @@ import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Main import Gargantext.Viz.Phylo.Main
import Gargantext.Viz.Phylo.Example import Gargantext.Viz.Phylo.Example
import Gargantext.API.Ngrams (TODO(..)) import Gargantext.Core.Types (TODO(..))
import Servant import Servant
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment