1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
{-|
Module : Gargantext.Database.Flow.List
Description : List Flow
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Database.Action.Flow.List
where
import Control.Concurrent
import Control.Lens (view, (^.), (+~), (%~), at)
import Control.Monad.Reader
import qualified Data.List as List
import qualified Data.Map as Map
import Data.Map (Map, toList)
import qualified Data.Map.Strict.Patch as PM
import Data.Maybe (catMaybes)
import Data.Text (Text)
import Gargantext.API.Ngrams.Types (HasRepoSaver(..), NgramsElement(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTablePatch(..), NgramsTerm(..), RepoCmdM, ne_ngrams, ngramsElementToRepo, r_history, r_state, r_version, repoVar)
import Gargantext.Core.Flow.Types
import Gargantext.Core.Types (HasInvalidError(..), assertValid)
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Core.Utils (something)
import Gargantext.Database.Action.Flow.Types
import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb, getCgramsId)
-- import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Database.Schema.Ngrams (Ngrams(..), NgramsType(..))
import Gargantext.Prelude
-- FLOW LIST
-- 1. select specific terms of the corpus when compared with others langs
-- (for now, suppose english)
-- 2. select specific terms of the corpus when compared with others corpora (same database)
-- 3. select clusters of terms (generic and specific)
{-
data FlowList = FlowListLang
| FlowListTficf
| FlowListSpeGen
flowList_Tficf :: UserCorpusId
-> MasterCorpusId
-> NgramsType
-> (Text -> Text)
-> Cmd err (Map Text (Double, Set Text))
flowList_Tficf u m nt f = do
u' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsUser u nt
m' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsMaster u m
pure $ sortTficf Down
$ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
flowList_Tficf' :: UserCorpusId
-> MasterCorpusId
-> NgramsType
-> Cmd err (Map Text (Double, Set Text))
flowList_Tficf' u m nt f = do
u' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsUser u nt
m' <- Map.filter (\s -> Set.size s > 1) <$> getNodesByNgramsMaster u m
pure $ sortTficf Down
$ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
-}
-- | TODO check optimization
mapNodeIdNgrams :: [DocumentIdWithNgrams a]
-> Map Ngrams (Map NgramsType (Map NodeId Int))
mapNodeIdNgrams = Map.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f
where
f :: DocumentIdWithNgrams a
-> Map Ngrams (Map NgramsType (Map NodeId Int))
f d = fmap (fmap (Map.singleton nId)) $ documentNgrams d
where
nId = documentId $ documentWithId d
------------------------------------------------------------------------
flowList_DbRepo :: FlowCmdM env err m
=> ListId
-> Map NgramsType [NgramsElement]
-> m ListId
flowList_DbRepo lId ngs = do
-- printDebug "listId flowList" lId
mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs)
let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> (unNgramsTerm <$> parent))
<*> getCgramsId mapCgramsId ntype ngram
| (ntype, ngs') <- Map.toList ngs
, NgramsElement (NgramsTerm ngram) _ _ _ _ parent _ <- ngs'
]
-- Inserting groups of ngrams
_r <- insert_Node_NodeNgrams_NodeNgrams
$ map (\(a,b) -> Node_NodeNgrams_NodeNgrams lId a b Nothing) toInsert
listInsert lId ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
pure lId
------------------------------------------------------------------------
------------------------------------------------------------------------
toNodeNgramsW :: ListId
-> [(NgramsType, [NgramsElement])]
-> [NodeNgramsW]
toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
where
toNodeNgramsW'' :: ListId
-> (NgramsType, [NgramsElement])
-> [NodeNgramsW]
toNodeNgramsW'' l' (ngrams_type, elms) =
[ NodeNgrams Nothing l' list_type ngrams_terms' ngrams_type Nothing Nothing Nothing 0 |
(NgramsElement (NgramsTerm ngrams_terms') _size list_type _occ _root _parent _children) <- elms
]
toNodeNgramsW' :: ListId
-> [(Text, [NgramsType])]
-> [NodeNgramsW]
toNodeNgramsW' l'' ngs = [ NodeNgrams Nothing l'' CandidateTerm terms ngrams_type Nothing Nothing Nothing 0
| (terms, ngrams_types) <- ngs
, ngrams_type <- ngrams_types
]
listInsert :: FlowCmdM env err m
=> ListId
-> Map NgramsType [NgramsElement]
-> m ()
listInsert lId ngs = mapM_ (\(typeList, ngElmts)
-> putListNgrams lId typeList ngElmts) (toList ngs)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- NOTE
-- This is no longer part of the API.
-- This function is maintained for its usage in Database.Action.Flow.List.
-- If the given list of ngrams elements contains ngrams already in
-- the repo, they will be ignored.
putListNgrams :: (HasInvalidError err, RepoCmdM env err m)
=> NodeId
-> TableNgrams.NgramsType
-> [NgramsElement]
-> m ()
putListNgrams _ _ [] = pure ()
putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
where
m = Map.fromList $ map (\n -> (n ^. ne_ngrams, ngramsElementToRepo n)) nes
putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
=> NodeId
-> TableNgrams.NgramsType
-> Map NgramsTerm NgramsRepoElement
-> m ()
putListNgrams' nodeId ngramsType ns = do
-- printDebug "[putListNgrams'] nodeId" nodeId
-- printDebug "[putListNgrams'] ngramsType" ngramsType
-- printDebug "[putListNgrams'] ns" ns
let p1 = NgramsTablePatch . PM.fromMap $ NgramsReplace Nothing . Just <$> ns
(p0, p0_validity) = PM.singleton nodeId p1
(p, p_validity) = PM.singleton ngramsType p0
assertValid p0_validity
assertValid p_validity
{-
-- TODO
v <- currentVersion
q <- commitStatePatch (Versioned v p)
assert empty q
-- What if another commit comes in between?
-- Shall we have a blindCommitStatePatch? It would not ask for a version but just a patch.
-- The modifyMVar_ would test the patch with applicable first.
-- If valid the rest would be atomic and no merge is required.
-}
var <- view repoVar
liftBase $ modifyMVar_ var $ \r -> do
pure $ r & r_version +~ 1
& r_history %~ (p :)
& r_state . at ngramsType %~
(Just .
(at nodeId %~
( Just
. (<> ns)
. something
)
)
. something
)
saveRepo
saveRepo :: ( MonadReader env m, MonadBase IO m, HasRepoSaver env )
=> m ()
saveRepo = liftBase =<< view repoSaver