Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
146
Issues
146
List
Board
Labels
Milestones
Merge Requests
4
Merge Requests
4
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
85f1dffe
Verified
Commit
85f1dffe
authored
Dec 30, 2024
by
Przemyslaw Kaminski
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[ngrams] refactor opaque Int into TermsWeight newtype
parent
a81ea049
Pipeline
#7176
passed with stages
in 47 minutes and 37 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
28 additions
and
43 deletions
+28
-43
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+2
-2
Types.hs
src/Gargantext/Core/Types.hs
+4
-1
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+5
-5
Extract.hs
src/Gargantext/Database/Action/Flow/Extract.hs
+4
-4
Types.hs
src/Gargantext/Database/Action/Flow/Types.hs
+2
-2
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+11
-29
No files found.
src/Gargantext/Core/Text/Terms.hs
View file @
85f1dffe
...
...
@@ -52,7 +52,7 @@ import Gargantext.Core.Text.Terms.Mono (monoTerms)
import
Gargantext.Core.Text.Terms.Mono.Stem
(
stem
,
StemmingAlgorithm
(
..
))
import
Gargantext.Core.Text.Terms.Mono.Token.En
(
tokenize
)
import
Gargantext.Core.Text.Terms.Multi
(
multiterms
)
import
Gargantext.Core.Types
(
TermsCount
,
POS
,
Terms
(
..
),
TermsWithCount
)
import
Gargantext.Core.Types
(
TermsCount
,
TermsWeight
,
POS
,
Terms
(
..
),
TermsWithCount
)
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.Ngrams
(
insertNgrams
)
...
...
@@ -122,7 +122,7 @@ class ExtractNgramsT h
=>
NLPServerConfig
->
TermType
Lang
->
h
->
DBCmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
In
t
,
TermsCount
))
->
DBCmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeigh
t
,
TermsCount
))
------------------------------------------------------------------------
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
enrichedTerms
l
pa
po
(
Terms
{
..
})
=
...
...
src/Gargantext/Core/Types.hs
View file @
85f1dffe
...
...
@@ -20,7 +20,7 @@ commentary with @some markup@.
module
Gargantext.Core.Types
(
module
Gargantext
.
Core
.
Types
.
Main
,
module
Gargantext
.
Database
.
Admin
.
Types
.
Node
,
DebugMode
(
..
),
withDebugMode
,
Term
(
..
),
Terms
(
..
),
TermsCount
,
TermsWithCount
,
Term
(
..
),
Terms
(
..
),
TermsCount
,
TermsW
eight
(
..
),
TermsW
ithCount
,
TokenTag
(
..
),
POS
(
..
),
NER
(
..
)
,
Label
,
Stems
,
HasValidationError
(
..
),
assertValid
...
...
@@ -74,6 +74,9 @@ type TermsCount = Int
type
TermsWithCount
=
(
Terms
,
TermsCount
)
newtype
TermsWeight
=
TermsWeight
{
unTermsWeight
::
Int
}
deriving
newtype
(
Eq
,
Ord
,
Num
,
Show
)
------------------------------------------------------------------------
data
Tag
=
POS
|
NER
deriving
(
Show
,
Eq
)
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
85f1dffe
...
...
@@ -82,7 +82,7 @@ import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
NgramsTerms
),
Ngrams
(
_ngramsTerms
))
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms.Mono.Stem
(
stem
,
StemmingAlgorithm
(
..
))
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
,
TermsWeight
(
..
)
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
MapTerm
)
)
import
Gargantext.Database.Action.Flow.Extract
()
-- ExtractNgramsT instances
...
...
@@ -430,7 +430,7 @@ insertMasterDocs ncs c lang hs = do
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs'
::
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
In
t
,
TermsCount
)))
mapNgramsDocs'
::
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeigh
t
,
TermsCount
)))
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
ncs
$
withLang
lang
documentsWithId
)
...
...
@@ -445,7 +445,7 @@ insertMasterDocs ncs c lang hs = do
saveDocNgramsWith
::
(
IsDBCmd
env
err
m
)
=>
ListId
->
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
In
t
,
TermsCount
)))
->
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeigh
t
,
TermsCount
)))
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
--printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
...
...
@@ -453,7 +453,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
-- mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
(
terms2id
::
HashMap
.
HashMap
Text
NgramsId
)
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
let
mapNgramsDocs
::
HashMap
.
HashMap
Ngrams
(
Map
NgramsType
(
Map
NodeId
(
In
t
,
TermsCount
)))
let
mapNgramsDocs
::
HashMap
.
HashMap
Ngrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeigh
t
,
TermsCount
)))
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
-- new
...
...
@@ -465,7 +465,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
-- insertDocNgrams
let
ngrams2insert
=
catMaybes
[
ContextNodeNgrams2
(
nodeId2ContextId
nId
)
<$>
getCgramsId
mapCgramsId
ngrams_type
(
_ngramsTerms
terms''
)
<*>
Just
(
fromIntegral
w
::
Double
)
<*>
Just
(
fromIntegral
$
unTermsWeight
w
::
Double
)
|
(
terms''
,
mapNgramsTypes
)
<-
HashMap
.
toList
mapNgramsDocs
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
(
w
,
_cnt
))
<-
Map
.
toList
mapNodeIdWeight
...
...
src/Gargantext/Database/Action/Flow/Extract.hs
View file @
85f1dffe
...
...
@@ -25,7 +25,7 @@ import Gargantext.Core.Text (HasText(..))
import
Gargantext.Core.Text.Corpus.Parsers
(
splitOn
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractNgramsT
,
ExtractedNgrams
(
..
),
TermType
,
cleanExtractedNgrams
,
enrichedTerms
,
extractNgramsT
,
extractTerms
,
tt_lang
)
import
Gargantext.Core.Types
(
POS
(
NP
),
TermsCount
)
import
Gargantext.Core.Types
(
POS
(
NP
),
TermsCount
,
TermsWeight
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
,
cw_lastName
,
hc_who
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_authors
,
hd_bdd
,
hd_institutes
,
hd_source
)
import
Gargantext.Database.Admin.Types.Node
(
Node
)
...
...
@@ -42,7 +42,7 @@ instance ExtractNgramsT HyperdataContact
extractNgramsT
_ncs
l
hc
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extract
l
hc
where
extract
::
TermType
Lang
->
HyperdataContact
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
In
t
,
TermsCount
))
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeigh
t
,
TermsCount
))
extract
_l
hc'
=
do
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
\
a
->
[
a
])
...
...
@@ -59,11 +59,11 @@ instance ExtractNgramsT HyperdataDocument
extractNgramsT
::
NLPServerConfig
->
TermType
Lang
->
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
In
t
,
TermsCount
))
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeigh
t
,
TermsCount
))
extractNgramsT
ncs
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
hd
where
extractNgramsT'
::
HyperdataDocument
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
In
t
,
TermsCount
))
->
DBCmd
err
(
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
TermsWeigh
t
,
TermsCount
))
extractNgramsT'
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
...
...
src/Gargantext/Database/Action/Flow/Types.hs
View file @
85f1dffe
...
...
@@ -26,7 +26,7 @@ import Gargantext.Core.Text ( HasText )
import
Gargantext.API.Admin.Orchestrator.Types
qualified
as
API
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractNgramsT
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
,
TermsWeight
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
...
...
@@ -67,7 +67,7 @@ type FlowInsertDB a = ( AddUniqId a
data
DocumentIdWithNgrams
a
b
=
DocumentIdWithNgrams
{
documentWithId
::
Indexed
NodeId
a
,
documentNgrams
::
HashMap
b
(
Map
NgramsType
In
t
,
TermsCount
)
,
documentNgrams
::
HashMap
b
(
Map
NgramsType
TermsWeigh
t
,
TermsCount
)
}
deriving
(
Show
)
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
85f1dffe
...
...
@@ -28,7 +28,7 @@ import Gargantext.Core.Flow.Types (UniqId, uniqId)
import
Gargantext.Core.Text.Ngrams
(
Ngrams
,
NgramsType
)
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
MatchedText
,
buildPatternsWith
,
termsInText
)
import
Gargantext.Core.Types
(
TermsCount
)
import
Gargantext.Core.Types
(
TermsCount
,
TermsWeight
(
..
)
)
import
Gargantext.Core.Utils
(
addTuples
)
import
Gargantext.Data.HashMap.Strict.Utils
qualified
as
HashMap
import
Gargantext.Database.Action.Flow.Types
(
DocumentIdWithNgrams
(
..
),
FlowInsertDB
)
...
...
@@ -47,7 +47,7 @@ import Gargantext.Prelude.Crypto.Hash (Hash)
insertDocNgrams
::
ListId
->
HashMap
(
Indexed
NgramsId
Ngrams
)
(
Map
NgramsType
(
Map
DocId
(
In
t
,
TermsCount
)))
->
HashMap
(
Indexed
NgramsId
Ngrams
)
(
Map
NgramsType
(
Map
DocId
(
TermsWeigh
t
,
TermsCount
)))
->
DBCmd
err
Int
insertDocNgrams
lId
m
=
do
-- printDebug "[insertDocNgrams] ns" ns
...
...
@@ -56,11 +56,11 @@ insertDocNgrams lId m = do
ns
=
[
ContextNodeNgrams
(
nodeId2ContextId
docId
)
lId
(
ng
^.
index
)
(
NgramsTypeId
$
toDBid
t
)
(
fromIntegral
i
)
(
fromIntegral
$
unTermsWeight
w
)
cnt
|
(
ng
,
t2n2i
)
<-
HashMap
.
toList
m
,
(
t
,
n2i
)
<-
DM
.
toList
t2n2i
,
(
docId
,
(
i
,
cnt
))
<-
DM
.
toList
n2i
,
(
docId
,
(
w
,
cnt
))
<-
DM
.
toList
n2i
]
-- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
...
...
@@ -88,8 +88,8 @@ docNgrams lang ts doc =
documentIdWithNgrams
::
HasNodeError
err
=>
(
a
->
DBCmd
err
(
HashMap
.
HashMap
b
(
Map
NgramsType
Int
,
TermsCount
))
)
=>
(
a
->
DBCmd
err
(
HashMap
.
HashMap
b
(
Map
NgramsType
TermsWeight
,
TermsCount
))
)
->
[
Indexed
NodeId
a
]
->
DBCmd
err
[
DocumentIdWithNgrams
a
b
]
documentIdWithNgrams
f
=
traverse
toDocumentIdWithNgrams
...
...
@@ -104,7 +104,7 @@ mapNodeIdNgrams :: (Ord b, Hashable b)
=>
[
DocumentIdWithNgrams
a
b
]
->
HashMap
.
HashMap
b
(
Map
NgramsType
(
Map
NodeId
(
In
t
,
TermsCount
))
(
Map
NodeId
(
TermsWeigh
t
,
TermsCount
))
)
mapNodeIdNgrams
=
HashMap
.
unionsWith
(
DM
.
unionWith
(
DM
.
unionWith
addTuples
))
.
fmap
f
where
...
...
@@ -113,8 +113,8 @@ mapNodeIdNgrams = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . f
-- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types.
f
::
DocumentIdWithNgrams
a
b
->
HashMap
.
HashMap
b
(
Map
NgramsType
(
Map
NodeId
(
In
t
,
TermsCount
)))
f
d
=
fmap
(
\
(
ngramsTypeMap
,
cnt
)
->
fmap
(
\
i
->
DM
.
singleton
nId
(
i
,
cnt
))
ngramsTypeMap
)
$
documentNgrams
d
->
HashMap
.
HashMap
b
(
Map
NgramsType
(
Map
NodeId
(
TermsWeigh
t
,
TermsCount
)))
f
d
=
fmap
(
\
(
ngramsTypeMap
,
cnt
)
->
fmap
(
\
w
->
DM
.
singleton
nId
(
w
,
cnt
))
ngramsTypeMap
)
$
documentNgrams
d
where
nId
=
_index
$
documentWithId
d
...
...
@@ -192,35 +192,17 @@ ngramsByDoc :: Lang
->
NgramsType
->
[
NT
.
NgramsTerm
]
->
[
ContextOnlyId
HyperdataDocument
]
->
[
HashMap
.
HashMap
ExtractedNgrams
(
DM
.
Map
NgramsType
(
Map
NodeId
(
In
t
,
TermsCount
)))]
->
[
HashMap
.
HashMap
ExtractedNgrams
(
DM
.
Map
NgramsType
(
Map
NodeId
(
TermsWeigh
t
,
TermsCount
)))]
ngramsByDoc
l
nt
ts
docs
=
ngramsByDoc'
l
nt
ts
<$>
docs
-- | Given list of terms and a document, produce a map for this doc's
-- terms count and weights. Notice that the weight is always 1 here.
-- ngramsByDoc' :: Lang
-- -> NgramsType
-- -> [NT.NgramsTerm]
-- -> ContextOnlyId HyperdataDocument
-- -> HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (Int, TermsCount)))
-- ngramsByDoc' l nt ts doc =
-- HashMap.fromListWith (DM.unionWith (DM.unionWith (\(_a,b) (_a',b') -> (1,b+b')))) withExtractedNgrams
-- where
-- _docNgrams' :: ([(MatchedText, TermsCount)], NodeId)
-- _docNgrams'@(matched, nId) = (docNgrams l ts doc, doc ^. context_oid_id)
-- withExtractedNgrams :: [(ExtractedNgrams, Map NgramsType (Map NodeId (Int, TermsCount)))]
-- withExtractedNgrams =
-- map (\(matchedText, cnt) ->
-- ( SimpleNgrams (text2ngrams matchedText)
-- , DM.singleton nt $ DM.singleton nId (1, cnt) ) ) matched
ngramsByDoc'
::
Lang
->
NgramsType
->
[
NT
.
NgramsTerm
]
->
ContextOnlyId
HyperdataDocument
->
HashMap
.
HashMap
ExtractedNgrams
(
DM
.
Map
NgramsType
(
Map
NodeId
(
In
t
,
TermsCount
)))
->
HashMap
.
HashMap
ExtractedNgrams
(
DM
.
Map
NgramsType
(
Map
NodeId
(
TermsWeigh
t
,
TermsCount
)))
ngramsByDoc'
l
nt
ts
doc
=
HashMap
.
map
(
\
cnt
->
DM
.
singleton
nt
$
DM
.
singleton
nId
(
1
,
cnt
))
extractedMap
where
...
...
Przemyslaw Kaminski
@cgenie
mentioned in commit
03b33383
·
Jan 30, 2025
mentioned in commit
03b33383
mentioned in commit 03b33383dd67c1821a4edb4628923cf7bd039d90
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment