Commit 3f99bf81 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Export Corpus with Document with Ngrams.

parent 631a22ee
......@@ -93,6 +93,7 @@ import Gargantext.API.Ngrams (HasRepo(..), HasRepoSaver(..), saveRepo, TableNgra
import Gargantext.API.Node
import Gargantext.API.Search (SearchPairsAPI, searchPairs)
import Gargantext.API.Types
import qualified Gargantext.API.Export as Export
import qualified Gargantext.API.Corpus.New as New
import Gargantext.Database.Types.Node
import Gargantext.Database.Types.Node (NodeId, CorpusId, AnnuaireId)
......@@ -261,6 +262,9 @@ type GargPrivateAPI' =
:> Capture "node2_id" NodeId
:> NodeNodeAPI HyperdataAny
:<|> "corpus" :> Capture "node_id" CorpusId
:> Export.API
-- Annuaire endpoint
:<|> "annuaire":> Summary "Annuaire endpoint"
:> Capture "annuaire_id" AnnuaireId
......@@ -362,6 +366,7 @@ serverPrivateGargAPI' (AuthenticatedUser (NodeId uid))
:<|> nodeAPI (Proxy :: Proxy HyperdataAny) uid
:<|> nodeAPI (Proxy :: Proxy HyperdataCorpus) uid
:<|> nodeNodeAPI (Proxy :: Proxy HyperdataAny) uid
:<|> Export.getCorpus -- uid
:<|> nodeAPI (Proxy :: Proxy HyperdataAnnuaire) uid
:<|> nodeNodeAPI (Proxy :: Proxy HyperdataContact) uid
......
{-|
Module : Gargantext.API.Export
Description : Get Metrics from Storage (Database like)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main exports of Gargantext:
- corpus
- document and ngrams
- lists
-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Export
where
import Data.Aeson.TH (deriveJSON)
import Data.Map (Map)
import Data.Set (Set)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Types (GargNoServer)
import Gargantext.Core.Types --
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
import Gargantext.Database.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node (defaultList, HasNodeError)
import Gargantext.Database.Schema.NodeNode (selectDocNodes)
import Gargantext.Database.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId)
import Gargantext.Database.Utils (Cmd)
import Gargantext.Prelude
import Servant
import qualified Data.Map as Map
import qualified Data.Set as Set
-- Corpus Export
data Corpus =
Corpus { _c_corpus :: [Document]
-- , _c_listVersion :: Int
, _c_hash :: Text
} deriving (Generic)
-- | Document Export
data Document =
Document { _d_document :: Node HyperdataDocument
, _d_ngrams :: [Text]
-- , _d_hash :: Text
} deriving (Generic)
instance ToSchema Corpus where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
instance ToSchema Document where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
instance ToParamSchema Corpus where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Document where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
--------------------------------------------------
type API = Summary "Corpus Export"
:> "export"
:> QueryParam "listId" ListId
:> QueryParam "ngramsType" NgramsType
:> Get '[JSON] Corpus
--------------------------------------------------
getCorpus :: CorpusId
-> Maybe ListId
-> Maybe NgramsType
-> GargNoServer Corpus
getCorpus cId lId nt' = do
let
nt = case nt' of
Nothing -> NgramsTerms
Just t -> t
ns <- Map.fromList
<$> map (\n -> (_node_id n, n))
<$> selectDocNodes cId
repo <- getRepo
ngs <- getNodeNgrams cId lId nt repo
let r = Map.intersectionWith (\a b -> Document a (Set.toList b)) ns ngs
pure $ Corpus (Map.elems r) "HASH_TODO"
-- getCorpusNgrams :: CorpusId -> ListId ->
-- Exports List
-- Version number of the list
getNodeNgrams :: HasNodeError err
=> CorpusId
-> Maybe ListId
-> NgramsType
-> NgramsRepo
-> Cmd err (Map NodeId (Set Text))
getNodeNgrams cId lId' nt repo = do
lId <- case lId' of
Nothing -> defaultList cId
Just l -> pure l
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot GraphTerm $ mapTermListRoot [lId] nt repo
r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
pure r
$(deriveJSON (unPrefix "_c_") ''Corpus)
$(deriveJSON (unPrefix "_d_") ''Document)
......@@ -78,7 +78,7 @@ module Gargantext.API.Ngrams
, HasRepo(..)
, RepoCmdM
, QueryParamR
, TODO(..)
, TODO
-- Internals
, getNgramsTableMap
......@@ -143,7 +143,7 @@ import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import qualified Gargantext.Database.Schema.Ngrams as Ngrams
-- import Gargantext.Database.Schema.NodeNgram hiding (Action)
import Gargantext.Prelude
-- import Gargantext.Core.Types (ListTypeId, listTypeId)
import Gargantext.Core.Types (TODO)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, HasInvalidError, assertValid)
import Servant hiding (Patch)
import System.Clock (getTime, TimeSpec, Clock(..))
......@@ -152,12 +152,6 @@ import System.IO (stderr)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
data TODO = TODO
deriving (Generic)
instance ToSchema TODO where
instance ToParamSchema TODO where
------------------------------------------------------------------------
--data FacetFormat = Table | Chart
data TabType = Docs | Trash | MoreFav | MoreTrash
......
......@@ -78,7 +78,6 @@ import qualified Gargantext.Text.List.Learn as Learn
import qualified Data.Vector as Vec
--}
type NodesAPI = Delete '[JSON] Int
-- | Delete Nodes
......@@ -371,3 +370,4 @@ putNode :: forall err a. (HasNodeError err, JSONB a, ToJSON a)
-> Cmd err Int
putNode n h = fromIntegral <$> updateHyperdata n h
-------------------------------------------------------------
......@@ -22,7 +22,7 @@ import Servant.Job.Types
import Servant.Job.Utils (jsonOptions)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.API.Ngrams (TODO(..))
import Gargantext.Core.Types (TODO(..))
instance Arbitrary a => Arbitrary (JobStatus 'Safe a) where
arbitrary = panic "TODO"
......
......@@ -76,17 +76,17 @@ instance {-# OVERLAPPABLE #-} (MonadError e m) => ThrowAll' e (m a) where
throwAll' = throwError
type GargServerC env err m =
( CmdM env err m
, HasNodeError err
, HasInvalidError err
, HasTreeError err
, HasServerError err
, HasJoseError err
, ToJSON err -- TODO this is arguable
, Exception err
, HasRepo env
, HasSettings env
, HasJobEnv env ScraperStatus ScraperStatus
( CmdM env err m
, HasNodeError err
, HasInvalidError err
, HasTreeError err
, HasServerError err
, HasJoseError err
, ToJSON err -- TODO this is arguable
, Exception err
, HasRepo env
, HasSettings env
, HasJobEnv env ScraperStatus ScraperStatus
)
type GargServerT env err m api = GargServerC env err m => ServerT api m
......@@ -94,12 +94,25 @@ type GargServerT env err m api = GargServerC env err m => ServerT api m
type GargServer api =
forall env err m. GargServerT env err m api
-------------------------------------------------------------------
-- | This Type is needed to prepare the function before the GargServer
type GargNoServer' env err m =
( CmdM env err m
, HasRepo env
, HasSettings env
, HasNodeError err
)
type GargNoServer t =
forall env err m. GargNoServer' env err m => m t
-------------------------------------------------------------------
data GargError
= GargNodeError NodeError
| GargTreeError TreeError
= GargNodeError NodeError
| GargTreeError TreeError
| GargInvalidError Validation
| GargJoseError Jose.Error
| GargServerError ServerError
| GargJoseError Jose.Error
| GargServerError ServerError
deriving (Show, Typeable)
makePrisms ''GargError
......
......@@ -25,32 +25,29 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, Name
, TableResult(..)
, NodeTableResult
, TODO(..)
) where
--import qualified Data.Set as S
import Control.Lens (Prism', (#))
import Control.Monad.Error.Class (MonadError, throwError)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Monoid
import Data.Semigroup
import Data.Set (Set, empty)
import Data.Swagger (ToParamSchema)
import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
--import qualified Data.Set as S
import Data.Text (Text, unpack)
import Data.Validity
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import GHC.Generics
import Gargantext.Core.Types.Main
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Types.Node
import Gargantext.Prelude
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import GHC.Generics
------------------------------------------------------------------------
type Name = Text
type Term = Text
type Stems = Set Text
......@@ -158,3 +155,13 @@ instance Arbitrary a => Arbitrary (TableResult a) where
arbitrary = TableResult <$> arbitrary <*> arbitrary
type NodeTableResult a = TableResult (Node a)
-- TO BE removed
data TODO = TODO
deriving (Generic)
instance ToSchema TODO where
instance ToParamSchema TODO where
......@@ -52,8 +52,6 @@ instance ToSchema NodeTree where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nt_")
------------------------------------------------------------------------
--data Classification = Favorites | MyClassifcation
type HashId = Text
......
......@@ -278,15 +278,32 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
GROUP BY nng.node2_id, ng.terms
|]
------------------------------------------------------------------------
getNodesByNgramsOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId ls nt ngs =
Map.unionsWith (<>)
. map (fromListWith (<>) . map (second Set.singleton))
Map.unionsWith (<>)
. map (fromListWith (<>)
. map (second Set.singleton))
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt)
(splitEvery 1000 ngs)
getNgramsByNodeOnlyUser :: NodeId
-> [ListId]
-> NgramsType
-> [Text]
-> Cmd err (Map NodeId (Set Text))
getNgramsByNodeOnlyUser cId ls nt ngs =
Map.unionsWith (<>)
. map (fromListWith (<>)
. map (second Set.singleton))
. map (map swap)
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt)
(splitEvery 1000 ngs)
------------------------------------------------------------------------
selectNgramsOnlyByNodeUser :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, NodeId)]
selectNgramsOnlyByNodeUser cId ls nt tms =
......@@ -319,7 +336,6 @@ queryNgramsOnlyByNodeUser = [sql|
selectNgramsOnlyByNodeUser' :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, Int)]
selectNgramsOnlyByNodeUser' cId ls nt tms =
......@@ -349,7 +365,6 @@ queryNgramsOnlyByNodeUser' = [sql|
getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs =
......
......@@ -41,8 +41,12 @@ import Database.PostgreSQL.Simple.ToRow (toRow)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import GHC.Generics (Generic)
import Gargantext.Database.Utils (Cmd, runPGSQuery, runOpaQuery, formatPGSQuery)
import Gargantext.Core.Types (TODO(..))
import Gargantext.Prelude
import Opaleye hiding (FromField)
import Servant (FromHttpApiData, parseUrlPiece, Proxy(..))
import Text.Read (read)
import Data.Swagger (ToParamSchema, toParamSchema)
import Prelude (Enum, Bounded, minBound, maxBound, Functor)
import qualified Database.PostgreSQL.Simple as PGS
......@@ -94,7 +98,7 @@ dbGetNgramsDb = runOpaQuery queryNgramsTable
-- ngrams in authors field of document has Authors Type
-- ngrams in text (title or abstract) of documents has Terms Type
data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Ord, Enum, Bounded, Generic)
deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance FromJSON NgramsType
instance FromJSONKey NgramsType where
......@@ -115,6 +119,15 @@ instance FromField NgramsTypeId where
if (n :: Int) > 0 then return $ NgramsTypeId n
else mzero
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance QueryRunnerColumnDefault (Nullable PGInt4) NgramsTypeId
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
......
......@@ -78,6 +78,8 @@ nodeNodeNgramsTable = Table "node_node_ngrams"
}
)
------------------------------------------------
queryNodeNodeNgramsTable :: Query NodeNodeNgramsRead
queryNodeNodeNgramsTable = queryTable nodeNodeNgramsTable
......
......@@ -106,7 +106,6 @@ type Node json = NodePoly NodeId NodeTypeId UserId (Maybe ParentId) NodeName U
------------------------------------------------------------------------
instance FromHttpApiData NodeId where
parseUrlPiece n = pure $ NodeId $ (read . cs) n
......
......@@ -37,7 +37,7 @@ import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Main
import Gargantext.Viz.Phylo.Example
import Gargantext.API.Ngrams (TODO(..))
import Gargantext.Core.Types (TODO(..))
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment