1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
{-|
Module : Gargantext.API.Export
Description : Get Metrics from Storage (Database like)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main exports of Gargantext:
- corpus
- document and ngrams
- lists
-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Export
where
import Data.Aeson.TH (deriveJSON)
import Data.Map (Map)
import Data.Set (Set)
import Data.Maybe (fromMaybe)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Types (GargNoServer)
import Gargantext.Core.Types --
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
import Gargantext.Database.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node (defaultList, HasNodeError)
import Gargantext.Database.Schema.NodeNode (selectDocNodes)
import Gargantext.Database.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId)
import Gargantext.Database.Utils (Cmd)
import Gargantext.Prelude
import Gargantext.Prelude.Utils (sha)
import Servant
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.List as List
-- Corpus Export
data Corpus =
Corpus { _c_corpus :: [Document]
, _c_hash :: Hash
} deriving (Generic)
-- | Document Export
data Document =
Document { _d_document :: Node HyperdataDocument
, _d_ngrams :: Ngrams
, _d_hash :: Hash
} deriving (Generic)
data Ngrams =
Ngrams { _ng_ngrams :: [Text]
, _ng_hash :: Hash
} deriving (Generic)
type Hash = Text
-------
instance ToSchema Corpus where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
instance ToSchema Document where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
instance ToSchema Ngrams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
-------
instance ToParamSchema Corpus where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Document where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Ngrams where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
--------------------------------------------------
type API = Summary "Corpus Export"
:> "export"
:> QueryParam "listId" ListId
:> QueryParam "ngramsType" NgramsType
:> Get '[JSON] Corpus
--------------------------------------------------
getCorpus :: CorpusId
-> Maybe ListId
-> Maybe NgramsType
-> GargNoServer Corpus
getCorpus cId lId nt' = do
let
nt = case nt' of
Nothing -> NgramsTerms
Just t -> t
ns <- Map.fromList
<$> map (\n -> (_node_id n, n))
<$> selectDocNodes cId
repo <- getRepo
ngs <- getNodeNgrams cId lId nt repo
let -- uniqId is hash computed already for each document imported in database
r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
) ns ngs
where
ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
<> (ng_hash b)
pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
$ List.map _d_hash $ Map.elems r
)
getNodeNgrams :: HasNodeError err
=> CorpusId
-> Maybe ListId
-> NgramsType
-> NgramsRepo
-> Cmd err (Map NodeId (Set Text))
getNodeNgrams cId lId' nt repo = do
lId <- case lId' of
Nothing -> defaultList cId
Just l -> pure l
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot GraphTerm $ mapTermListRoot [lId] nt repo
r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
pure r
$(deriveJSON (unPrefix "_c_") ''Corpus)
$(deriveJSON (unPrefix "_d_") ''Document)
$(deriveJSON (unPrefix "_ng_") ''Ngrams)
-- TODO
-- Exports List
-- Version number of the list