Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
149
Issues
149
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8f0fcd75
Commit
8f0fcd75
authored
Jan 07, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM] HashMap Ngrams ...
parent
aaf4b338
Pipeline
#1332
failed with stage
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
63 additions
and
46 deletions
+63
-46
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+12
-10
Utils.hs
src/Gargantext/Data/HashMap/Strict/Utils.hs
+4
-0
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+36
-15
List.hs
src/Gargantext/Database/Action/Flow/List.hs
+0
-14
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+7
-7
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+2
-0
Types.hs
src/Gargantext/Database/Types.hs
+2
-0
No files found.
src/Gargantext/Core/Text/Terms.hs
View file @
8f0fcd75
...
...
@@ -35,15 +35,17 @@ module Gargantext.Core.Text.Terms
where
import
Control.Lens
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
Map
import
Data.Text
(
Text
)
import
Data.Traversable
import
GHC.Base
(
String
)
import
GHC.Generics
(
Generic
)
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
GHC.Base
(
String
)
import
GHC.Generics
(
Generic
)
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
import
Gargantext.Core
import
Gargantext.Core.Flow.Types
...
...
@@ -114,17 +116,17 @@ class ExtractNgramsT h
extractNgramsT
::
HasText
h
=>
TermType
Lang
->
h
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
Hash
Map
Ngrams
(
Map
NgramsType
Int
))
filterNgrams
T
::
Int
->
Map
Ngrams
(
Map
NgramsType
Int
)
->
Map
Ngrams
(
Map
NgramsType
Int
)
filterNgrams
T
s
ms
=
Map
.
fromList
$
map
filter'
$
Map
.
toList
ms
filterNgrams
::
Int
->
Hash
Map
Ngrams
(
Map
NgramsType
Int
)
->
Hash
Map
Ngrams
(
Map
NgramsType
Int
)
filterNgrams
s
=
HashMap
.
mapKeys
filter
where
filter
'
(
ng
,
y
)
|
Text
.
length
(
ng
^.
ngramsTerms
)
<
s
=
(
ng
,
y
)
|
otherwise
=
(
text2ngrams
(
Text
.
take
s
(
ng
^.
ngramsTerms
)),
y
)
filter
ng
|
Text
.
length
(
ng
^.
ngramsTerms
)
<
s
=
ng
|
otherwise
=
text2ngrams
(
Text
.
take
s
(
ng
^.
ngramsTerms
)
)
-- =======================================================
...
...
src/Gargantext/Data/HashMap/Strict/Utils.hs
View file @
8f0fcd75
...
...
@@ -22,6 +22,10 @@ partition p m = (HashMap.filter p m, HashMap.filter (not . p) m)
partitionWithKey
::
(
Ord
a
,
Hashable
k
)
=>
(
k
->
a
->
Bool
)
->
HashMap
k
a
->
(
HashMap
k
a
,
HashMap
k
a
)
partitionWithKey
p
m
=
(
HashMap
.
filterWithKey
p
m
,
HashMap
.
filterWithKey
(
\
k
->
not
.
p
k
)
m
)
mapKeys
::
(
Ord
k2
,
Hashable
k2
)
=>
(
k1
->
k2
)
->
HashMap
k1
a
->
HashMap
k2
a
mapKeys
f
=
HashMap
.
fromList
.
HashMap
.
foldrWithKey
(
\
k
x
xs
->
(
f
k
,
x
)
:
xs
)
[]
------------------------------------------------------------------------
-- getKeyWithMaxValue :: Hashable k => HashMap k a -> Maybe k
getKeysOrderedByValueMaxFirst
::
(
Ord
k
,
Hashable
k
,
Ord
a
)
=>
HashMap
k
a
->
[
k
]
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
8f0fcd75
...
...
@@ -47,8 +47,9 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
import
Control.Lens
((
^.
),
view
,
_Just
,
makeLenses
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Either
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
import
Data.List
(
concat
)
import
qualified
Data.Map
as
Map
import
Data.Map
(
Map
,
lookup
)
import
Data.Maybe
(
catMaybes
)
import
Data.Monoid
...
...
@@ -58,6 +59,9 @@ import Data.Traversable (traverse)
import
Data.Tuple.Extra
(
first
,
second
)
import
GHC.Generics
(
Generic
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
import
qualified
Data.Map
as
Map
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Ext.IMT
(
toSchoolName
)
...
...
@@ -259,27 +263,27 @@ insertMasterDocs c lang hs = do
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs
::
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
mapNgramsDocs
::
Hash
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
$
withLang
lang
documentsWithId
)
documentsWithId
terms2id
<-
insertNgrams
$
Map
.
keys
mapNgramsDocs
terms2id
<-
insertNgrams
$
Hash
Map
.
keys
mapNgramsDocs
-- to be removed
let
indexedNgrams
=
Map
.
mapKeys
(
indexNgrams
terms2id
)
mapNgramsDocs
let
indexedNgrams
=
Hash
Map
.
mapKeys
(
indexNgrams
terms2id
)
mapNgramsDocs
-- new
lId
<-
getOrMkList
masterCorpusId
masterUserId
mapCgramsId
<-
listInsertDb
lId
toNodeNgramsW'
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
Map
.
toList
mapNgramsDocs
$
Hash
Map
.
toList
mapNgramsDocs
-- insertDocNgrams
_return
<-
insertNodeNodeNgrams2
$
catMaybes
[
NodeNodeNgrams2
<$>
Just
nId
<*>
getCgramsId
mapCgramsId
ngrams_type
(
_ngramsTerms
terms''
)
<*>
Just
(
fromIntegral
w
::
Double
)
|
(
terms''
,
mapNgramsTypes
)
<-
Map
.
toList
mapNgramsDocs
|
(
terms''
,
mapNgramsTypes
)
<-
Hash
Map
.
toList
mapNgramsDocs
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
w
)
<-
Map
.
toList
mapNodeIdWeight
]
...
...
@@ -339,40 +343,57 @@ mergeData rs = catMaybes . map toDocumentWithId . Map.toList
------------------------------------------------------------------------
documentIdWithNgrams
::
HasNodeError
err
=>
(
a
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
)))
->
Cmd
err
(
HashMap
b
(
Map
NgramsType
Int
)))
->
[
Indexed
NodeId
a
]
->
Cmd
err
[
DocumentIdWithNgrams
a
]
->
Cmd
err
[
DocumentIdWithNgrams
a
b
]
documentIdWithNgrams
f
=
traverse
toDocumentIdWithNgrams
where
toDocumentIdWithNgrams
d
=
do
e
<-
f
$
_unIndex
d
pure
$
DocumentIdWithNgrams
d
e
-- | TODO check optimization
mapNodeIdNgrams
::
(
Ord
b
,
Hashable
b
)
=>
[
DocumentIdWithNgrams
a
b
]
->
HashMap
b
(
Map
NgramsType
(
Map
NodeId
Int
)
)
mapNodeIdNgrams
=
HashMap
.
unionsWith
(
Map
.
unionWith
(
Map
.
unionWith
(
+
)))
.
fmap
f
where
f
::
DocumentIdWithNgrams
a
b
->
HashMap
b
(
Map
NgramsType
(
Map
NodeId
Int
))
f
d
=
fmap
(
fmap
(
Map
.
singleton
nId
))
$
documentNgrams
d
where
nId
=
_index
$
documentWithId
d
------------------------------------------------------------------------
instance
ExtractNgramsT
HyperdataContact
where
extractNgramsT
l
hc
=
filterNgrams
T
255
<$>
extract
l
hc
extractNgramsT
l
hc
=
filterNgrams
255
<$>
extract
l
hc
where
extract
::
TermType
Lang
->
HyperdataContact
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
Hash
Map
Ngrams
(
Map
NgramsType
Int
))
extract
_l
hc'
=
do
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
\
a
->
[
a
])
$
view
(
hc_who
.
_Just
.
cw_lastName
)
hc'
pure
$
Map
.
fromList
$
[(
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
pure
$
Hash
Map
.
fromList
$
[(
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
instance
ExtractNgramsT
HyperdataDocument
where
extractNgramsT
::
TermType
Lang
->
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT
lang
hd
=
filterNgrams
T
255
<$>
extractNgramsT'
lang
hd
->
Cmd
err
(
Hash
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT
lang
hd
=
filterNgrams
255
<$>
extractNgramsT'
lang
hd
where
extractNgramsT'
::
TermType
Lang
->
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
Hash
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT'
lang'
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
...
...
@@ -391,7 +412,7 @@ instance ExtractNgramsT HyperdataDocument
<$>
concat
<$>
liftBase
(
extractTerms
lang'
$
hasText
doc
)
pure
$
Map
.
fromList
$
[(
source
,
Map
.
singleton
Sources
1
)]
pure
$
Hash
Map
.
fromList
$
[(
source
,
Map
.
singleton
Sources
1
)]
<>
[(
i'
,
Map
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
<>
[(
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
t'
,
Map
.
singleton
NgramsTerms
1
)
|
t'
<-
terms'
]
...
...
src/Gargantext/Database/Action/Flow/List.hs
View file @
8f0fcd75
...
...
@@ -82,20 +82,6 @@ flowList_Tficf' u m nt f = do
-}
-- | TODO check optimization
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
a
]
->
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
)
)
mapNodeIdNgrams
=
Map
.
unionsWith
(
Map
.
unionWith
(
Map
.
unionWith
(
+
)))
.
fmap
f
where
f
::
DocumentIdWithNgrams
a
->
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
f
d
=
fmap
(
fmap
(
Map
.
singleton
nId
))
$
documentNgrams
d
where
nId
=
_index
$
documentWithId
d
------------------------------------------------------------------------
flowList_DbRepo
::
FlowCmdM
env
err
m
=>
ListId
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
8f0fcd75
...
...
@@ -14,6 +14,7 @@ module Gargantext.Database.Action.Flow.Utils
where
import
Data.Map
(
Map
)
import
Data.HashMap.Strict
(
HashMap
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Query.Table.NodeNodeNgrams
...
...
@@ -21,16 +22,15 @@ import Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Types
import
Gargantext.Prelude
import
qualified
Data.Map
as
DM
import
qualified
Data.HashMap.Strict
as
HashMap
type
DocumentWithId
a
=
Indexed
NodeId
a
data
DocumentIdWithNgrams
a
=
data
DocumentIdWithNgrams
a
b
=
DocumentIdWithNgrams
{
documentWithId
::
DocumentWith
Id
a
,
documentNgrams
::
Map
Ngrams
(
Map
NgramsType
Int
)
{
documentWithId
::
Indexed
Node
Id
a
,
documentNgrams
::
HashMap
b
(
Map
NgramsType
Int
)
}
deriving
(
Show
)
docNgrams2nodeNodeNgrams
::
CorpusId
->
DocNgrams
->
NodeNodeNgrams
...
...
@@ -51,11 +51,11 @@ insertDocNgramsOn cId dn =
$
(
map
(
docNgrams2nodeNodeNgrams
cId
)
dn
)
insertDocNgrams
::
CorpusId
->
Map
(
Indexed
Int
Ngrams
)
(
Map
NgramsType
(
Map
NodeId
Int
))
->
Hash
Map
(
Indexed
Int
Ngrams
)
(
Map
NgramsType
(
Map
NodeId
Int
))
->
Cmd
err
Int
insertDocNgrams
cId
m
=
insertDocNgramsOn
cId
[
DocNgrams
n
(
_index
ng
)
(
ngramsTypeId
t
)
(
fromIntegral
i
)
|
(
ng
,
t2n2i
)
<-
DM
.
toList
m
|
(
ng
,
t2n2i
)
<-
HashMap
.
toList
m
,
(
t
,
n2i
)
<-
DM
.
toList
t2n2i
,
(
n
,
i
)
<-
DM
.
toList
n2i
]
...
...
src/Gargantext/Database/Schema/Ngrams.hs
View file @
8f0fcd75
...
...
@@ -146,6 +146,8 @@ data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
}
deriving
(
Generic
,
Show
,
Eq
,
Ord
)
instance
Hashable
Ngrams
makeLenses
''
N
grams
instance
PGS
.
ToRow
Ngrams
where
toRow
(
UnsafeNgrams
t
s
)
=
[
toField
t
,
toField
s
]
...
...
src/Gargantext/Database/Types.hs
View file @
8f0fcd75
...
...
@@ -14,6 +14,7 @@ Portability : POSIX
module
Gargantext.Database.Types
where
import
Data.Hashable
(
Hashable
)
import
Gargantext.Core.Text
(
HasText
(
..
))
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Prelude
...
...
@@ -38,3 +39,4 @@ instance HasText a => HasText (Indexed i a)
where
hasText
(
Indexed
_
a
)
=
hasText
a
instance
(
Hashable
a
,
Hashable
b
)
=>
Hashable
(
Indexed
a
b
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment