Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
175
Issues
175
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
17fbec42
Commit
17fbec42
authored
Apr 30, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] doc ngrams insertion (WIP)
parent
6507a2d2
Pipeline
#1456
failed with stage
Changes
5
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
41 additions
and
34 deletions
+41
-34
List.hs
src/Gargantext/API/Ngrams/List.hs
+16
-21
Update.hs
src/Gargantext/API/Node/Update.hs
+3
-3
Prelude.hs
src/Gargantext/API/Prelude.hs
+4
-4
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+16
-4
Node.hs
src/Gargantext/Database/Schema/Node.hs
+2
-2
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
17fbec42
...
...
@@ -27,18 +27,18 @@ import GHC.Generics (Generic)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
,
Versioned
(
..
),
NgramsList
,
NgramsTerm
(
..
))
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Node.Corpus.New.File
(
FileType
(
..
))
import
Gargantext.API.Prelude
(
GargServer
,
GargNoServer
)
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
saveDocNgramsWith
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Flow.Utils
(
insertDocNgrams
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getOccByNgramsOnlyFast'
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Ngrams
(
insertNgrams
)
import
Gargantext.Database.Query.Table.NodeNode
(
selectDocNodes
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Node
...
...
@@ -102,11 +102,14 @@ post l m = do
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
reIndexWith
::
CorpusId
reIndexWith
::
(
HasRepo
env
,
FlowCmdM
env
err
m
)
=>
CorpusId
->
ListId
->
NgramsType
->
Set
ListType
->
GargNoServer
()
->
m
()
reIndexWith
cId
lId
nt
lts
=
do
-- Getting [NgramsTerm]
ts
<-
List
.
concat
...
...
@@ -125,20 +128,15 @@ reIndexWith cId lId nt lts = do
Just
n
->
if
n
==
1
then
[
t
]
else
[ ]
)
ts
-- Getting the Id of orphan ngrams
mapTextNgramsId
<-
insertNgrams
(
map
(
text2ngrams
.
unNgramsTerm
)
orphans
)
printDebug
"orphans"
orphans
-- Get all documents of the corpus
docs
<-
selectDocNodes
cId
printDebug
"docs length"
(
List
.
length
docs
)
-- Checking Text documents where orphans match
-- TODO Tests here
let
ngramsByDoc
=
List
.
concat
ngramsByDoc
=
HashMap
.
fromList
$
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
))
$
List
.
concat
$
map
(
\
doc
->
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
([
unNgramsTerm
k
],
[]
))
orphans
)
$
Text
.
unlines
$
catMaybes
...
...
@@ -152,12 +150,9 @@ reIndexWith cId lId nt lts = do
printDebug
"ngramsByDoc"
ngramsByDoc
-- Saving the indexation in database
_
<-
insertDocNgrams
lId
(
HashMap
.
fromList
$
catMaybes
$
map
(
\
(
t
,
d
)
->
(,)
<$>
toIndexedNgrams
mapTextNgramsId
t
<*>
Just
d
)
ngramsByDoc
)
pure
()
_
<-
saveDocNgramsWith
lId
ngramsByDoc
pure
()
-- ngramsByDoc
toIndexedNgrams
::
HashMap
Text
NgramsId
->
Text
->
Maybe
(
Indexed
Int
Ngrams
)
toIndexedNgrams
m
t
=
Indexed
<$>
i
<*>
n
...
...
src/Gargantext/API/Node/Update.hs
View file @
17fbec42
...
...
@@ -118,13 +118,13 @@ updateNode _uId nid1 (LinkNodeReq nt nid2) logStatus = do
,
_scst_events
=
Just
[]
}
updateNode
_uId
n
Id
(
UpdateNodeParamsList
_mode
)
logStatus
=
do
updateNode
_uId
l
Id
(
UpdateNodeParamsList
_mode
)
logStatus
=
do
logStatus
JobLog
{
_scst_succeeded
=
Just
1
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
2
,
_scst_events
=
Just
[]
}
corpusId
<-
view
node_parent_id
<$>
getNode
n
Id
corpusId
<-
view
node_parent_id
<$>
getNode
l
Id
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
...
...
@@ -133,7 +133,7 @@ updateNode _uId nId (UpdateNodeParamsList _mode) logStatus = do
}
_
<-
case
corpusId
of
Just
cId
->
reIndexWith
cId
n
Id
NgramsTerms
(
Set
.
singleton
MapTerm
)
Just
cId
->
reIndexWith
cId
l
Id
NgramsTerms
(
Set
.
singleton
MapTerm
)
Nothing
->
pure
()
pure
JobLog
{
_scst_succeeded
=
Just
3
...
...
src/Gargantext/API/Prelude.hs
View file @
17fbec42
...
...
@@ -11,6 +11,7 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MonoLocalBinds #-}
module
Gargantext.API.Prelude
(
module
Gargantext
.
API
.
Prelude
...
...
@@ -30,10 +31,6 @@ import Crypto.JOSE.Error as Jose
import
Data.Aeson.Types
import
Data.Typeable
import
Data.Validity
import
Servant
import
Servant.Job.Async
import
Servant.Job.Core
(
HasServerError
(
..
),
serverError
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Admin.Types
import
Gargantext.API.Ngrams.Types
...
...
@@ -42,6 +39,9 @@ import Gargantext.Database.Prelude
import
Gargantext.Database.Query.Table.Node.Error
(
NodeError
(
..
),
HasNodeError
(
..
))
import
Gargantext.Database.Query.Tree
import
Gargantext.Prelude
import
Servant
import
Servant.Job.Async
import
Servant.Job.Core
(
HasServerError
(
..
),
serverError
)
class
HasJoseError
e
where
_JoseError
::
Prism'
e
Jose
.
Error
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
17fbec42
...
...
@@ -33,6 +33,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
,
flowCorpus
,
flowAnnuaire
,
insertMasterDocs
,
saveDocNgramsWith
,
getOrMkRoot
,
getOrMk_RootWithCorpus
...
...
@@ -280,6 +281,17 @@ insertMasterDocs c lang hs = do
(
extractNgramsT
$
withLang
lang
documentsWithId
)
documentsWithId
lId
<-
getOrMkList
masterCorpusId
masterUserId
_
<-
saveDocNgramsWith
lId
mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure
ids'
saveDocNgramsWith
::
(
FlowCmdM
env
err
m
)
=>
ListId
->
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
Int
))
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
...
...
@@ -287,10 +299,10 @@ insertMasterDocs c lang hs = do
let
indexedNgrams
=
HashMap
.
mapKeys
(
indexNgrams
terms2id
)
mapNgramsDocs
-- new
lId
<-
getOrMkList
masterCorpusId
masterUserId
mapCgramsId
<-
listInsertDb
lId
toNodeNgramsW'
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
HashMap
.
toList
mapNgramsDocs
-- insertDocNgrams
_return
<-
insertNodeNodeNgrams2
$
catMaybes
[
NodeNodeNgrams2
<$>
Just
nId
...
...
@@ -300,11 +312,11 @@ insertMasterDocs c lang hs = do
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
w
)
<-
Map
.
toList
mapNodeIdWeight
]
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
-- to be removed
_
<-
insertDocNgrams
lId
indexedNgrams
pure
ids'
pure
()
------------------------------------------------------------------------
-- TODO Type NodeDocumentUnicised
...
...
src/Gargantext/Database/Schema/Node.hs
View file @
17fbec42
...
...
@@ -138,8 +138,8 @@ type NodeSearchReadNull =
data
NodePolySearch
id
typename
user
I
d
parent
I
d
user
_i
d
parent
_i
d
name
date
hyperdata
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment