Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
146
Issues
146
List
Board
Labels
Milestones
Merge Requests
4
Merge Requests
4
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
35c2d0b0
Verified
Commit
35c2d0b0
authored
Dec 23, 2024
by
Przemyslaw Kaminski
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[ngrams] small simplification to docNgrams function
parent
161ac077
Pipeline
#7171
passed with stages
in 47 minutes and 11 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
25 deletions
+26
-25
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+16
-10
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+9
-13
Count.hs
test/Test/Ngrams/Count.hs
+1
-2
No files found.
src/Gargantext/Database/Action/Flow.hs
View file @
35c2d0b0
...
...
@@ -52,7 +52,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where
import
Conduit
import
Control.Lens
(
to
,
view
,
over
)
import
Control.Lens
(
to
,
view
)
import
Data.Bifunctor
qualified
as
B
import
Data.Conduit
qualified
as
C
import
Data.Conduit.Internal
(
zipSources
)
...
...
@@ -105,7 +105,8 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodesOnlyId
)
import
Gargantext.Database.Query.Table.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
),
getOrMkRoot
,
getOrMkRootWithCorpus
,
userFromMkCorpusUser
)
import
Gargantext.Database.Schema.Ngrams
(
indexNgrams
,
text2ngrams
)
import
Gargantext.Database.Schema.Context
(
context_oid_id
)
import
Gargantext.Database.Schema.Ngrams
(
indexNgrams
,
text2ngrams
,
NgramsId
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
DEBUG
),
MonadLogger
)
...
...
@@ -451,10 +452,12 @@ saveDocNgramsWith :: (IsDBCmd env err m)
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
--printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
let
mapNgramsDocsNoCount
=
over
(
traverse
.
traverse
.
traverse
)
fst
mapNgramsDocs'
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocsNoCount
-- let mapNgramsDocsNoCount :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
-- mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
(
terms2id
::
HashMap
.
HashMap
Text
NgramsId
)
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
let
mapNgramsDocs
::
HashMap
.
HashMap
Ngrams
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)))
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
-- new
mapCgramsId
<-
listInsertDb
lId
toNodeNgramsW'
...
...
@@ -505,15 +508,18 @@ reIndexWith cId lId nt lts = do
(
docs
::
[
ContextOnlyId
HyperdataDocument
])
<-
selectDocNodesOnlyId
cId
let
-- fromListWith (<>)
docNgramsMap
::
[[((
MatchedText
,
TermsCount
),
Map
NgramsType
(
Map
NodeId
Int
))]]
docNgramsMap
=
map
(
docNgrams
corpusLang
nt
ts
)
docs
docNgrams'
::
[([(
MatchedText
,
TermsCount
)],
NodeId
)]
docNgrams'
=
map
(
\
doc
->
(
docNgrams
corpusLang
ts
doc
,
doc
^.
context_oid_id
))
docs
withExtractedNgrams
::
[[(
ExtractedNgrams
,
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)))]]
withExtractedNgrams
=
map
(
map
(
\
((
k
,
cnt
),
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
over
(
traverse
.
traverse
)
(
\
p
->
(
p
,
cnt
))
v
)))
$
docNgramsMap
map
(
\
(
matched
,
nId
)
->
map
(
\
(
matchedText
,
cnt
)
->
(
SimpleNgrams
(
text2ngrams
matchedText
)
,
Map
.
singleton
nt
$
Map
.
singleton
nId
(
1
,
cnt
)
)
)
matched
)
$
docNgrams'
-- TODO Is this weight always equal to 1?
ngramsByDoc
::
[
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)))]
ngramsByDoc
=
map
(
HashMap
.
fromListWith
(
Map
.
unionWith
(
Map
.
unionWith
(
\
(
_a
,
b
)
(
_a'
,
b'
)
->
(
1
,
b
+
b'
)))))
$
withExtractedNgrams
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
35c2d0b0
...
...
@@ -19,7 +19,6 @@ where
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
DM
import
Data.Text
qualified
as
T
import
Gargantext.API.Ngrams.Types
qualified
as
NT
...
...
@@ -38,7 +37,7 @@ import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Doc
(
add
)
import
Gargantext.Database.Query.Table.Node.Document.Insert
(
ReturnId
,
addUniqId
,
insertDb
,
reId
,
reInserted
,
reUniqId
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
(
..
))
import
Gargantext.Database.Schema.Context
(
context_oid_hyperdata
,
context_oid_id
)
import
Gargantext.Database.Schema.Context
(
context_oid_hyperdata
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsId
,
NgramsTypeId
(
..
))
import
Gargantext.Database.Types
(
Indexed
(
..
),
index
)
import
Gargantext.Prelude
...
...
@@ -73,20 +72,17 @@ insertDocNgrams lId m = do
-- HyperdataDocument, return ngrams that are in this text, with counts.
-- This is a pure function (doesn't use corenlp nor PostgreSQL FTS).
docNgrams
::
Lang
->
NgramsType
->
[
NT
.
NgramsTerm
]
->
ContextOnlyId
HyperdataDocument
->
[((
MatchedText
,
TermsCount
),
Map
NgramsType
(
Map
NodeId
Int
))]
docNgrams
lang
nt
ts
doc
=
List
.
zip
(
termsInText
lang
(
buildPatternsWith
lang
ts
)
$
T
.
unlines
$
catMaybes
[
doc
^.
context_oid_hyperdata
.
hd_title
,
doc
^.
context_oid_hyperdata
.
hd_abstract
]
->
[(
MatchedText
,
TermsCount
)]
docNgrams
lang
ts
doc
=
(
termsInText
lang
(
buildPatternsWith
lang
ts
)
$
T
.
unlines
$
catMaybes
[
doc
^.
context_oid_hyperdata
.
hd_title
,
doc
^.
context_oid_hyperdata
.
hd_abstract
]
)
(
List
.
cycle
[
DM
.
fromList
$
[(
nt
,
DM
.
singleton
(
doc
^.
context_oid_id
)
1
)]])
documentIdWithNgrams
::
HasNodeError
err
...
...
test/Test/Ngrams/Count.hs
View file @
35c2d0b0
...
...
@@ -6,7 +6,6 @@ module Test.Ngrams.Count (tests) where
import
Gargantext.API.Ngrams
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
buildPatternsWith
,
termsInText
,
Pattern
(
..
))
import
Gargantext.Database.Action.Flow.Utils
(
docNgrams
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
),
emptyHyperdataDocument
)
...
...
@@ -105,5 +104,5 @@ testDocNgrams01 = do
let
hd
=
emptyHyperdataDocument
{
_hd_title
=
Just
"hello world"
,
_hd_abstract
=
Nothing
}
let
ctx
=
ContextOnlyId
1
hd
let
dNgrams
=
docNgrams
EN
NgramsTerms
terms
ctx
let
dNgrams
=
docNgrams
EN
terms
ctx
length
dNgrams
@?=
2
Przemyslaw Kaminski
@cgenie
mentioned in commit
03b33383
·
Jan 30, 2025
mentioned in commit
03b33383
mentioned in commit 03b33383dd67c1821a4edb4628923cf7bd039d90
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment