Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8f0fcd75
Commit
8f0fcd75
authored
Jan 07, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM] HashMap Ngrams ...
parent
aaf4b338
Pipeline
#1332
failed with stage
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
63 additions
and
46 deletions
+63
-46
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+12
-10
Utils.hs
src/Gargantext/Data/HashMap/Strict/Utils.hs
+4
-0
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+36
-15
List.hs
src/Gargantext/Database/Action/Flow/List.hs
+0
-14
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+7
-7
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+2
-0
Types.hs
src/Gargantext/Database/Types.hs
+2
-0
No files found.
src/Gargantext/Core/Text/Terms.hs
View file @
8f0fcd75
...
...
@@ -35,15 +35,17 @@ module Gargantext.Core.Text.Terms
where
import
Control.Lens
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
Map
import
Data.Text
(
Text
)
import
Data.Traversable
import
GHC.Base
(
String
)
import
GHC.Generics
(
Generic
)
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
GHC.Base
(
String
)
import
GHC.Generics
(
Generic
)
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
import
Gargantext.Core
import
Gargantext.Core.Flow.Types
...
...
@@ -114,17 +116,17 @@ class ExtractNgramsT h
extractNgramsT
::
HasText
h
=>
TermType
Lang
->
h
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
Hash
Map
Ngrams
(
Map
NgramsType
Int
))
filterNgrams
T
::
Int
->
Map
Ngrams
(
Map
NgramsType
Int
)
->
Map
Ngrams
(
Map
NgramsType
Int
)
filterNgrams
T
s
ms
=
Map
.
fromList
$
map
filter'
$
Map
.
toList
ms
filterNgrams
::
Int
->
Hash
Map
Ngrams
(
Map
NgramsType
Int
)
->
Hash
Map
Ngrams
(
Map
NgramsType
Int
)
filterNgrams
s
=
HashMap
.
mapKeys
filter
where
filter
'
(
ng
,
y
)
|
Text
.
length
(
ng
^.
ngramsTerms
)
<
s
=
(
ng
,
y
)
|
otherwise
=
(
text2ngrams
(
Text
.
take
s
(
ng
^.
ngramsTerms
)),
y
)
filter
ng
|
Text
.
length
(
ng
^.
ngramsTerms
)
<
s
=
ng
|
otherwise
=
text2ngrams
(
Text
.
take
s
(
ng
^.
ngramsTerms
)
)
-- =======================================================
...
...
src/Gargantext/Data/HashMap/Strict/Utils.hs
View file @
8f0fcd75
...
...
@@ -22,6 +22,10 @@ partition p m = (HashMap.filter p m, HashMap.filter (not . p) m)
partitionWithKey
::
(
Ord
a
,
Hashable
k
)
=>
(
k
->
a
->
Bool
)
->
HashMap
k
a
->
(
HashMap
k
a
,
HashMap
k
a
)
partitionWithKey
p
m
=
(
HashMap
.
filterWithKey
p
m
,
HashMap
.
filterWithKey
(
\
k
->
not
.
p
k
)
m
)
mapKeys
::
(
Ord
k2
,
Hashable
k2
)
=>
(
k1
->
k2
)
->
HashMap
k1
a
->
HashMap
k2
a
mapKeys
f
=
HashMap
.
fromList
.
HashMap
.
foldrWithKey
(
\
k
x
xs
->
(
f
k
,
x
)
:
xs
)
[]
------------------------------------------------------------------------
-- getKeyWithMaxValue :: Hashable k => HashMap k a -> Maybe k
getKeysOrderedByValueMaxFirst
::
(
Ord
k
,
Hashable
k
,
Ord
a
)
=>
HashMap
k
a
->
[
k
]
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
8f0fcd75
...
...
@@ -47,8 +47,9 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
import
Control.Lens
((
^.
),
view
,
_Just
,
makeLenses
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Either
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
import
Data.List
(
concat
)
import
qualified
Data.Map
as
Map
import
Data.Map
(
Map
,
lookup
)
import
Data.Maybe
(
catMaybes
)
import
Data.Monoid
...
...
@@ -58,6 +59,9 @@ import Data.Traversable (traverse)
import
Data.Tuple.Extra
(
first
,
second
)
import
GHC.Generics
(
Generic
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
import
qualified
Data.Map
as
Map
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Ext.IMT
(
toSchoolName
)
...
...
@@ -259,27 +263,27 @@ insertMasterDocs c lang hs = do
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs
::
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
mapNgramsDocs
::
Hash
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
$
withLang
lang
documentsWithId
)
documentsWithId
terms2id
<-
insertNgrams
$
Map
.
keys
mapNgramsDocs
terms2id
<-
insertNgrams
$
Hash
Map
.
keys
mapNgramsDocs
-- to be removed
let
indexedNgrams
=
Map
.
mapKeys
(
indexNgrams
terms2id
)
mapNgramsDocs
let
indexedNgrams
=
Hash
Map
.
mapKeys
(
indexNgrams
terms2id
)
mapNgramsDocs
-- new
lId
<-
getOrMkList
masterCorpusId
masterUserId
mapCgramsId
<-
listInsertDb
lId
toNodeNgramsW'
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
Map
.
toList
mapNgramsDocs
$
Hash
Map
.
toList
mapNgramsDocs
-- insertDocNgrams
_return
<-
insertNodeNodeNgrams2
$
catMaybes
[
NodeNodeNgrams2
<$>
Just
nId
<*>
getCgramsId
mapCgramsId
ngrams_type
(
_ngramsTerms
terms''
)
<*>
Just
(
fromIntegral
w
::
Double
)
|
(
terms''
,
mapNgramsTypes
)
<-
Map
.
toList
mapNgramsDocs
|
(
terms''
,
mapNgramsTypes
)
<-
Hash
Map
.
toList
mapNgramsDocs
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
w
)
<-
Map
.
toList
mapNodeIdWeight
]
...
...
@@ -339,40 +343,57 @@ mergeData rs = catMaybes . map toDocumentWithId . Map.toList
------------------------------------------------------------------------
documentIdWithNgrams
::
HasNodeError
err
=>
(
a
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
)))
->
Cmd
err
(
HashMap
b
(
Map
NgramsType
Int
)))
->
[
Indexed
NodeId
a
]
->
Cmd
err
[
DocumentIdWithNgrams
a
]
->
Cmd
err
[
DocumentIdWithNgrams
a
b
]
documentIdWithNgrams
f
=
traverse
toDocumentIdWithNgrams
where
toDocumentIdWithNgrams
d
=
do
e
<-
f
$
_unIndex
d
pure
$
DocumentIdWithNgrams
d
e
-- | TODO check optimization
mapNodeIdNgrams
::
(
Ord
b
,
Hashable
b
)
=>
[
DocumentIdWithNgrams
a
b
]
->
HashMap
b
(
Map
NgramsType
(
Map
NodeId
Int
)
)
mapNodeIdNgrams
=
HashMap
.
unionsWith
(
Map
.
unionWith
(
Map
.
unionWith
(
+
)))
.
fmap
f
where
f
::
DocumentIdWithNgrams
a
b
->
HashMap
b
(
Map
NgramsType
(
Map
NodeId
Int
))
f
d
=
fmap
(
fmap
(
Map
.
singleton
nId
))
$
documentNgrams
d
where
nId
=
_index
$
documentWithId
d
------------------------------------------------------------------------
instance
ExtractNgramsT
HyperdataContact
where
extractNgramsT
l
hc
=
filterNgrams
T
255
<$>
extract
l
hc
extractNgramsT
l
hc
=
filterNgrams
255
<$>
extract
l
hc
where
extract
::
TermType
Lang
->
HyperdataContact
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
Hash
Map
Ngrams
(
Map
NgramsType
Int
))
extract
_l
hc'
=
do
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
\
a
->
[
a
])
$
view
(
hc_who
.
_Just
.
cw_lastName
)
hc'
pure
$
Map
.
fromList
$
[(
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
pure
$
Hash
Map
.
fromList
$
[(
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
instance
ExtractNgramsT
HyperdataDocument
where
extractNgramsT
::
TermType
Lang
->
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT
lang
hd
=
filterNgrams
T
255
<$>
extractNgramsT'
lang
hd
->
Cmd
err
(
Hash
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT
lang
hd
=
filterNgrams
255
<$>
extractNgramsT'
lang
hd
where
extractNgramsT'
::
TermType
Lang
->
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
Hash
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT'
lang'
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
...
...
@@ -391,7 +412,7 @@ instance ExtractNgramsT HyperdataDocument
<$>
concat
<$>
liftBase
(
extractTerms
lang'
$
hasText
doc
)
pure
$
Map
.
fromList
$
[(
source
,
Map
.
singleton
Sources
1
)]
pure
$
Hash
Map
.
fromList
$
[(
source
,
Map
.
singleton
Sources
1
)]
<>
[(
i'
,
Map
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
<>
[(
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
t'
,
Map
.
singleton
NgramsTerms
1
)
|
t'
<-
terms'
]
...
...
src/Gargantext/Database/Action/Flow/List.hs
View file @
8f0fcd75
...
...
@@ -82,20 +82,6 @@ flowList_Tficf' u m nt f = do
-}
-- | TODO check optimization
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
a
]
->
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
)
)
mapNodeIdNgrams
=
Map
.
unionsWith
(
Map
.
unionWith
(
Map
.
unionWith
(
+
)))
.
fmap
f
where
f
::
DocumentIdWithNgrams
a
->
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
f
d
=
fmap
(
fmap
(
Map
.
singleton
nId
))
$
documentNgrams
d
where
nId
=
_index
$
documentWithId
d
------------------------------------------------------------------------
flowList_DbRepo
::
FlowCmdM
env
err
m
=>
ListId
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
8f0fcd75
...
...
@@ -14,6 +14,7 @@ module Gargantext.Database.Action.Flow.Utils
where
import
Data.Map
(
Map
)
import
Data.HashMap.Strict
(
HashMap
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Query.Table.NodeNodeNgrams
...
...
@@ -21,16 +22,15 @@ import Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Types
import
Gargantext.Prelude
import
qualified
Data.Map
as
DM
import
qualified
Data.HashMap.Strict
as
HashMap
type
DocumentWithId
a
=
Indexed
NodeId
a
data
DocumentIdWithNgrams
a
=
data
DocumentIdWithNgrams
a
b
=
DocumentIdWithNgrams
{
documentWithId
::
DocumentWith
Id
a
,
documentNgrams
::
Map
Ngrams
(
Map
NgramsType
Int
)
{
documentWithId
::
Indexed
Node
Id
a
,
documentNgrams
::
HashMap
b
(
Map
NgramsType
Int
)
}
deriving
(
Show
)
docNgrams2nodeNodeNgrams
::
CorpusId
->
DocNgrams
->
NodeNodeNgrams
...
...
@@ -51,11 +51,11 @@ insertDocNgramsOn cId dn =
$
(
map
(
docNgrams2nodeNodeNgrams
cId
)
dn
)
insertDocNgrams
::
CorpusId
->
Map
(
Indexed
Int
Ngrams
)
(
Map
NgramsType
(
Map
NodeId
Int
))
->
Hash
Map
(
Indexed
Int
Ngrams
)
(
Map
NgramsType
(
Map
NodeId
Int
))
->
Cmd
err
Int
insertDocNgrams
cId
m
=
insertDocNgramsOn
cId
[
DocNgrams
n
(
_index
ng
)
(
ngramsTypeId
t
)
(
fromIntegral
i
)
|
(
ng
,
t2n2i
)
<-
DM
.
toList
m
|
(
ng
,
t2n2i
)
<-
HashMap
.
toList
m
,
(
t
,
n2i
)
<-
DM
.
toList
t2n2i
,
(
n
,
i
)
<-
DM
.
toList
n2i
]
...
...
src/Gargantext/Database/Schema/Ngrams.hs
View file @
8f0fcd75
...
...
@@ -146,6 +146,8 @@ data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
}
deriving
(
Generic
,
Show
,
Eq
,
Ord
)
instance
Hashable
Ngrams
makeLenses
''
N
grams
instance
PGS
.
ToRow
Ngrams
where
toRow
(
UnsafeNgrams
t
s
)
=
[
toField
t
,
toField
s
]
...
...
src/Gargantext/Database/Types.hs
View file @
8f0fcd75
...
...
@@ -14,6 +14,7 @@ Portability : POSIX
module
Gargantext.Database.Types
where
import
Data.Hashable
(
Hashable
)
import
Gargantext.Core.Text
(
HasText
(
..
))
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Prelude
...
...
@@ -38,3 +39,4 @@ instance HasText a => HasText (Indexed i a)
where
hasText
(
Indexed
_
a
)
=
hasText
a
instance
(
Hashable
a
,
Hashable
b
)
=>
Hashable
(
Indexed
a
b
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment