Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
17fbec42
Commit
17fbec42
authored
Apr 30, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] doc ngrams insertion (WIP)
parent
6507a2d2
Pipeline
#1456
failed with stage
Changes
5
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
41 additions
and
34 deletions
+41
-34
List.hs
src/Gargantext/API/Ngrams/List.hs
+16
-21
Update.hs
src/Gargantext/API/Node/Update.hs
+3
-3
Prelude.hs
src/Gargantext/API/Prelude.hs
+4
-4
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+16
-4
Node.hs
src/Gargantext/Database/Schema/Node.hs
+2
-2
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
17fbec42
...
...
@@ -27,18 +27,18 @@ import GHC.Generics (Generic)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
,
Versioned
(
..
),
NgramsList
,
NgramsTerm
(
..
))
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Prelude
(
GargServer
,
GargNoServer
)
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
saveDocNgramsWith
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Flow.Utils
(
insertDocNgrams
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Ngrams
(
insertNgrams
)
import
Gargantext.Database.Query.Table.NodeNode
(
selectDocNodes
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Node
...
...
@@ -102,11 +102,14 @@ post l m = do
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
reIndexWith
::
CorpusId
reIndexWith
::
(
HasRepo
env
,
FlowCmdM
env
err
m
)
=>
CorpusId
->
ListId
->
NgramsType
->
Set
ListType
->
GargNoServer
()
->
m
()
reIndexWith
cId
lId
nt
lts
=
do
-- Getting [NgramsTerm]
ts
<-
List
.
concat
...
...
@@ -125,20 +128,15 @@ reIndexWith cId lId nt lts = do
Just
n
->
if
n
==
1
then
[
t
]
else
[ ]
)
ts
-- Getting the Id of orphan ngrams
mapTextNgramsId
<-
insertNgrams
(
map
(
text2ngrams
.
unNgramsTerm
)
orphans
)
printDebug
"orphans"
orphans
-- Get all documents of the corpus
docs
<-
selectDocNodes
cId
printDebug
"docs length"
(
List
.
length
docs
)
-- Checking Text documents where orphans match
-- TODO Tests here
let
ngramsByDoc
=
List
.
concat
ngramsByDoc
=
HashMap
.
fromList
$
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
))
$
List
.
concat
$
map
(
\
doc
->
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
([
unNgramsTerm
k
],
[]
))
orphans
)
$
Text
.
unlines
$
catMaybes
...
...
@@ -152,12 +150,9 @@ reIndexWith cId lId nt lts = do
printDebug
"ngramsByDoc"
ngramsByDoc
-- Saving the indexation in database
_
<-
insertDocNgrams
lId
(
HashMap
.
fromList
$
catMaybes
$
map
(
\
(
t
,
d
)
->
(,)
<$>
toIndexedNgrams
mapTextNgramsId
t
<*>
Just
d
)
ngramsByDoc
)
pure
()
_
<-
saveDocNgramsWith
lId
ngramsByDoc
pure
()
-- ngramsByDoc
toIndexedNgrams
::
HashMap
Text
NgramsId
->
Text
->
Maybe
(
Indexed
Int
Ngrams
)
toIndexedNgrams
m
t
=
Indexed
<$>
i
<*>
n
...
...
src/Gargantext/API/Node/Update.hs
View file @
17fbec42
...
...
@@ -118,13 +118,13 @@ updateNode _uId nid1 (LinkNodeReq nt nid2) logStatus = do
,
_scst_events
=
Just
[]
}
updateNode
_uId
n
Id
(
UpdateNodeParamsList
_mode
)
logStatus
=
do
updateNode
_uId
l
Id
(
UpdateNodeParamsList
_mode
)
logStatus
=
do
logStatus
JobLog
{
_scst_succeeded
=
Just
1
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
2
,
_scst_events
=
Just
[]
}
corpusId
<-
view
node_parent_id
<$>
getNode
n
Id
corpusId
<-
view
node_parent_id
<$>
getNode
l
Id
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
...
...
@@ -133,7 +133,7 @@ updateNode _uId nId (UpdateNodeParamsList _mode) logStatus = do
}
_
<-
case
corpusId
of
Just
cId
->
reIndexWith
cId
n
Id
NgramsTerms
(
Set
.
singleton
MapTerm
)
Just
cId
->
reIndexWith
cId
l
Id
NgramsTerms
(
Set
.
singleton
MapTerm
)
Nothing
->
pure
()
pure
JobLog
{
_scst_succeeded
=
Just
3
...
...
src/Gargantext/API/Prelude.hs
View file @
17fbec42
...
...
@@ -11,6 +11,7 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MonoLocalBinds #-}
module
Gargantext.API.Prelude
(
module
Gargantext
.
API
.
Prelude
...
...
@@ -30,10 +31,6 @@ import Crypto.JOSE.Error as Jose
import
Data.Aeson.Types
import
Data.Typeable
import
Data.Validity
import
Servant
import
Servant.Job.Async
import
Servant.Job.Core
(
HasServerError
(
..
),
serverError
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Admin.Types
import
Gargantext.API.Ngrams.Types
...
...
@@ -42,6 +39,9 @@ import Gargantext.Database.Prelude
import
Gargantext.Database.Query.Table.Node.Error
(
NodeError
(
..
),
HasNodeError
(
..
))
import
Gargantext.Database.Query.Tree
import
Gargantext.Prelude
import
Servant
import
Servant.Job.Async
import
Servant.Job.Core
(
HasServerError
(
..
),
serverError
)
class
HasJoseError
e
where
_JoseError
::
Prism'
e
Jose
.
Error
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
17fbec42
...
...
@@ -33,6 +33,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
,
flowCorpus
,
flowAnnuaire
,
insertMasterDocs
,
saveDocNgramsWith
,
getOrMkRoot
,
getOrMk_RootWithCorpus
...
...
@@ -280,6 +281,17 @@ insertMasterDocs c lang hs = do
(
extractNgramsT
$
withLang
lang
documentsWithId
)
documentsWithId
lId
<-
getOrMkList
masterCorpusId
masterUserId
_
<-
saveDocNgramsWith
lId
mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure
ids'
saveDocNgramsWith
::
(
FlowCmdM
env
err
m
)
=>
ListId
->
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
Int
))
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
...
...
@@ -287,10 +299,10 @@ insertMasterDocs c lang hs = do
let
indexedNgrams
=
HashMap
.
mapKeys
(
indexNgrams
terms2id
)
mapNgramsDocs
-- new
lId
<-
getOrMkList
masterCorpusId
masterUserId
mapCgramsId
<-
listInsertDb
lId
toNodeNgramsW'
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
HashMap
.
toList
mapNgramsDocs
-- insertDocNgrams
_return
<-
insertNodeNodeNgrams2
$
catMaybes
[
NodeNodeNgrams2
<$>
Just
nId
...
...
@@ -300,11 +312,11 @@ insertMasterDocs c lang hs = do
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
w
)
<-
Map
.
toList
mapNodeIdWeight
]
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
-- to be removed
_
<-
insertDocNgrams
lId
indexedNgrams
pure
ids'
pure
()
------------------------------------------------------------------------
-- TODO Type NodeDocumentUnicised
...
...
src/Gargantext/Database/Schema/Node.hs
View file @
17fbec42
...
...
@@ -138,8 +138,8 @@ type NodeSearchReadNull =
data
NodePolySearch
id
typename
user
I
d
parent
I
d
user
_i
d
parent
_i
d
name
date
hyperdata
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment