Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
35c2d0b0
Verified
Commit
35c2d0b0
authored
Dec 23, 2024
by
Przemyslaw Kaminski
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[ngrams] small simplification to docNgrams function
parent
161ac077
Pipeline
#7171
passed with stages
in 47 minutes and 11 seconds
Changes
3
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
25 deletions
+26
-25
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+16
-10
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+9
-13
Count.hs
test/Test/Ngrams/Count.hs
+1
-2
No files found.
src/Gargantext/Database/Action/Flow.hs
View file @
35c2d0b0
...
@@ -52,7 +52,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
...
@@ -52,7 +52,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where
where
import
Conduit
import
Conduit
import
Control.Lens
(
to
,
view
,
over
)
import
Control.Lens
(
to
,
view
)
import
Data.Bifunctor
qualified
as
B
import
Data.Bifunctor
qualified
as
B
import
Data.Conduit
qualified
as
C
import
Data.Conduit
qualified
as
C
import
Data.Conduit.Internal
(
zipSources
)
import
Data.Conduit.Internal
(
zipSources
)
...
@@ -105,7 +105,8 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
...
@@ -105,7 +105,8 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodesOnlyId
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodesOnlyId
)
import
Gargantext.Database.Query.Table.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Query.Table.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
),
getOrMkRoot
,
getOrMkRootWithCorpus
,
userFromMkCorpusUser
)
import
Gargantext.Database.Query.Tree.Root
(
MkCorpusUser
(
..
),
getOrMkRoot
,
getOrMkRootWithCorpus
,
userFromMkCorpusUser
)
import
Gargantext.Database.Schema.Ngrams
(
indexNgrams
,
text2ngrams
)
import
Gargantext.Database.Schema.Context
(
context_oid_id
)
import
Gargantext.Database.Schema.Ngrams
(
indexNgrams
,
text2ngrams
,
NgramsId
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
DEBUG
),
MonadLogger
)
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
DEBUG
),
MonadLogger
)
...
@@ -451,10 +452,12 @@ saveDocNgramsWith :: (IsDBCmd env err m)
...
@@ -451,10 +452,12 @@ saveDocNgramsWith :: (IsDBCmd env err m)
->
m
()
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
--printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
--printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
let
mapNgramsDocsNoCount
=
over
(
traverse
.
traverse
.
traverse
)
fst
mapNgramsDocs'
-- let mapNgramsDocsNoCount :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocsNoCount
-- mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
(
terms2id
::
HashMap
.
HashMap
Text
NgramsId
)
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
let
mapNgramsDocs
::
HashMap
.
HashMap
Ngrams
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)))
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
-- new
-- new
mapCgramsId
<-
listInsertDb
lId
toNodeNgramsW'
mapCgramsId
<-
listInsertDb
lId
toNodeNgramsW'
...
@@ -505,15 +508,18 @@ reIndexWith cId lId nt lts = do
...
@@ -505,15 +508,18 @@ reIndexWith cId lId nt lts = do
(
docs
::
[
ContextOnlyId
HyperdataDocument
])
<-
selectDocNodesOnlyId
cId
(
docs
::
[
ContextOnlyId
HyperdataDocument
])
<-
selectDocNodesOnlyId
cId
let
let
-- fromListWith (<>)
docNgrams'
::
[([(
MatchedText
,
TermsCount
)],
NodeId
)]
docNgramsMap
::
[[((
MatchedText
,
TermsCount
),
Map
NgramsType
(
Map
NodeId
Int
))]]
docNgrams'
=
map
(
\
doc
->
(
docNgrams
corpusLang
ts
doc
,
doc
^.
context_oid_id
))
docs
docNgramsMap
=
map
(
docNgrams
corpusLang
nt
ts
)
docs
withExtractedNgrams
::
[[(
ExtractedNgrams
,
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)))]]
withExtractedNgrams
::
[[(
ExtractedNgrams
,
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)))]]
withExtractedNgrams
=
withExtractedNgrams
=
map
(
map
(
\
((
k
,
cnt
),
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
over
(
traverse
.
traverse
)
(
\
p
->
(
p
,
cnt
))
v
)))
map
(
\
(
matched
,
nId
)
->
$
docNgramsMap
map
(
\
(
matchedText
,
cnt
)
->
(
SimpleNgrams
(
text2ngrams
matchedText
)
,
Map
.
singleton
nt
$
Map
.
singleton
nId
(
1
,
cnt
)
)
)
matched
)
$
docNgrams'
-- TODO Is this weight always equal to 1?
ngramsByDoc
::
[
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)))]
ngramsByDoc
::
[
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)))]
ngramsByDoc
=
map
(
HashMap
.
fromListWith
(
Map
.
unionWith
(
Map
.
unionWith
(
\
(
_a
,
b
)
(
_a'
,
b'
)
->
(
1
,
b
+
b'
)))))
ngramsByDoc
=
map
(
HashMap
.
fromListWith
(
Map
.
unionWith
(
Map
.
unionWith
(
\
(
_a
,
b
)
(
_a'
,
b'
)
->
(
1
,
b
+
b'
)))))
$
withExtractedNgrams
$
withExtractedNgrams
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
35c2d0b0
...
@@ -19,7 +19,6 @@ where
...
@@ -19,7 +19,6 @@ where
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
DM
import
Data.Map.Strict
qualified
as
DM
import
Data.Text
qualified
as
T
import
Data.Text
qualified
as
T
import
Gargantext.API.Ngrams.Types
qualified
as
NT
import
Gargantext.API.Ngrams.Types
qualified
as
NT
...
@@ -38,7 +37,7 @@ import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly
...
@@ -38,7 +37,7 @@ import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Doc
(
add
)
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Doc
(
add
)
import
Gargantext.Database.Query.Table.Node.Document.Insert
(
ReturnId
,
addUniqId
,
insertDb
,
reId
,
reInserted
,
reUniqId
)
import
Gargantext.Database.Query.Table.Node.Document.Insert
(
ReturnId
,
addUniqId
,
insertDb
,
reId
,
reInserted
,
reUniqId
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
(
..
))
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
(
..
))
import
Gargantext.Database.Schema.Context
(
context_oid_hyperdata
,
context_oid_id
)
import
Gargantext.Database.Schema.Context
(
context_oid_hyperdata
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsId
,
NgramsTypeId
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsId
,
NgramsTypeId
(
..
))
import
Gargantext.Database.Types
(
Indexed
(
..
),
index
)
import
Gargantext.Database.Types
(
Indexed
(
..
),
index
)
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -73,20 +72,17 @@ insertDocNgrams lId m = do
...
@@ -73,20 +72,17 @@ insertDocNgrams lId m = do
-- HyperdataDocument, return ngrams that are in this text, with counts.
-- HyperdataDocument, return ngrams that are in this text, with counts.
-- This is a pure function (doesn't use corenlp nor PostgreSQL FTS).
-- This is a pure function (doesn't use corenlp nor PostgreSQL FTS).
docNgrams
::
Lang
docNgrams
::
Lang
->
NgramsType
->
[
NT
.
NgramsTerm
]
->
[
NT
.
NgramsTerm
]
->
ContextOnlyId
HyperdataDocument
->
ContextOnlyId
HyperdataDocument
->
[((
MatchedText
,
TermsCount
),
->
[(
MatchedText
,
TermsCount
)]
Map
NgramsType
(
Map
NodeId
Int
))]
docNgrams
lang
ts
doc
=
docNgrams
lang
nt
ts
doc
=
(
List
.
zip
termsInText
lang
(
buildPatternsWith
lang
ts
)
(
termsInText
lang
(
buildPatternsWith
lang
ts
)
$
T
.
unlines
$
catMaybes
$
T
.
unlines
$
catMaybes
[
doc
^.
context_oid_hyperdata
.
hd_title
[
doc
^.
context_oid_hyperdata
.
hd_title
,
doc
^.
context_oid_hyperdata
.
hd_abstract
,
doc
^.
context_oid_hyperdata
.
hd_abstract
]
]
)
)
(
List
.
cycle
[
DM
.
fromList
$
[(
nt
,
DM
.
singleton
(
doc
^.
context_oid_id
)
1
)]])
documentIdWithNgrams
::
HasNodeError
err
documentIdWithNgrams
::
HasNodeError
err
...
...
test/Test/Ngrams/Count.hs
View file @
35c2d0b0
...
@@ -6,7 +6,6 @@ module Test.Ngrams.Count (tests) where
...
@@ -6,7 +6,6 @@ module Test.Ngrams.Count (tests) where
import
Gargantext.API.Ngrams
import
Gargantext.API.Ngrams
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
buildPatternsWith
,
termsInText
,
Pattern
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
buildPatternsWith
,
termsInText
,
Pattern
(
..
))
import
Gargantext.Database.Action.Flow.Utils
(
docNgrams
)
import
Gargantext.Database.Action.Flow.Utils
(
docNgrams
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
),
emptyHyperdataDocument
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
),
emptyHyperdataDocument
)
...
@@ -105,5 +104,5 @@ testDocNgrams01 = do
...
@@ -105,5 +104,5 @@ testDocNgrams01 = do
let
hd
=
emptyHyperdataDocument
{
_hd_title
=
Just
"hello world"
let
hd
=
emptyHyperdataDocument
{
_hd_title
=
Just
"hello world"
,
_hd_abstract
=
Nothing
}
,
_hd_abstract
=
Nothing
}
let
ctx
=
ContextOnlyId
1
hd
let
ctx
=
ContextOnlyId
1
hd
let
dNgrams
=
docNgrams
EN
NgramsTerms
terms
ctx
let
dNgrams
=
docNgrams
EN
terms
ctx
length
dNgrams
@?=
2
length
dNgrams
@?=
2
Przemyslaw Kaminski
@cgenie
mentioned in commit
03b33383
·
Jan 30, 2025
mentioned in commit
03b33383
mentioned in commit 03b33383dd67c1821a4edb4628923cf7bd039d90
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment