1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{-|
Module : Gargantext.API.Node.Corpus.Export
Description : Get Metrics from Storage (Database like)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Main exports of Gargantext:
- corpus
- document and ngrams
- lists
-}
module Gargantext.API.Node.Corpus.Export
where
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Data.Set (Set)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.HashMap.Strict as HashMap
import Gargantext.API.Node.Corpus.Export.Types
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo')
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Prelude.Crypto.Hash (hash)
import Gargantext.Core.Types
import Gargantext.Core.NodeStory
import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
import Gargantext.Prelude
--------------------------------------------------
-- | Hashes are ordered by Set
getCorpus :: CorpusId
-> Maybe ListId
-> Maybe NgramsType
-> GargNoServer Corpus
getCorpus cId lId nt' = do
let
nt = case nt' of
Nothing -> NgramsTerms
Just t -> t
listId <- case lId of
Nothing -> defaultList cId
Just l -> pure l
ns <- Map.fromList
<$> map (\n -> (_node_id n, n))
<$> selectDocNodes cId
repo <- getRepo' [listId]
ngs <- getNodeNgrams cId listId nt repo
let -- uniqId is hash computed already for each document imported in database
r = Map.intersectionWith
(\a b -> Document { _d_document = a
, _d_ngrams = Ngrams (Set.toList b) (hash b)
, _d_hash = d_hash a b }
) ns (Map.map (Set.map unNgramsTerm) ngs)
where
d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
, hash b
]
pure $ Corpus { _c_corpus = Map.elems r
, _c_hash = hash $ List.map _d_hash $ Map.elems r }
getNodeNgrams :: HasNodeError err
=> CorpusId
-> ListId
-> NgramsType
-> NodeListStory
-> Cmd err (Map NodeId (Set NgramsTerm))
getNodeNgrams cId lId nt repo = do
-- lId <- case lId' of
-- Nothing -> defaultList cId
-- Just l -> pure l
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
-- TODO HashMap
r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
pure r
-- TODO
-- Exports List
-- Version number of the list