Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8852698b
Unverified
Commit
8852698b
authored
Jan 14, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FLOW] Use `Map a (Map NgramsType b)` instead of `Map (NgramsT a) b`
parent
46fafbe8
Pipeline
#126
canceled with stage
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
41 additions
and
34 deletions
+41
-34
Flow.hs
src/Gargantext/Database/Flow.hs
+26
-20
Pairing.hs
src/Gargantext/Database/Flow/Pairing.hs
+10
-10
Utils.hs
src/Gargantext/Database/Flow/Utils.hs
+5
-4
No files found.
src/Gargantext/Database/Flow.hs
View file @
8852698b
...
...
@@ -37,7 +37,7 @@ import Gargantext.Text.Terms (extractTerms)
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIdsDoc
,
addUniqIdsContact
,
ToDbData
(
..
))
import
Gargantext.Database.Root
(
getRoot
)
import
Gargantext.Database.Schema.Ngrams
(
insertNgrams
,
Ngrams
(
..
),
Ngrams
T
(
..
),
NgramsIndexed
(
..
),
indexNgramsT
,
NgramsType
(
..
),
text2ngrams
,
ngramsTypeId
)
import
Gargantext.Database.Schema.Ngrams
(
insertNgrams
,
Ngrams
(
..
),
Ngrams
Indexed
(
..
),
indexNgrams
,
NgramsType
(
..
),
text2ngrams
,
ngramsTypeId
)
import
Gargantext.Database.Schema.Node
(
mkRoot
,
mkCorpus
,
getOrMkList
,
mkGraph
,
mkDashboard
,
mkAnnuaire
,
getCorporaWithParentId
,
HasNodeError
,
NodeError
(
..
),
nodeError
)
import
Gargantext.Database.Schema.NodeNgram
(
NodeNgramPoly
(
..
),
insertNodeNgrams
)
import
Gargantext.Database.Schema.NodeNgramsNgrams
(
NodeNgramsNgramsPoly
(
..
),
insertNodeNgramsNgramsNew
)
...
...
@@ -51,7 +51,6 @@ import Gargantext.Prelude
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.Map
as
DM
import
qualified
Data.Set
as
DS
flowCorpus
::
HasNodeError
err
=>
FileFormat
->
FilePath
->
CorpusName
->
Cmd
err
CorpusId
...
...
@@ -114,8 +113,8 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
let
maps
=
mapNodeIdNgrams
docsWithNgrams
-- printDebug "maps" (maps)
terms2id
<-
insertNgrams
(
DS
.
toList
$
DS
.
map
_ngramsT
(
DM
.
keysSet
maps
))
let
indexedNgrams
=
DM
.
mapKeys
(
indexNgrams
T
terms2id
)
maps
terms2id
<-
insertNgrams
$
DM
.
keys
maps
let
indexedNgrams
=
DM
.
mapKeys
(
indexNgrams
terms2id
)
maps
-- printDebug "inserted ngrams" indexedNgrams
_
<-
insertToNodeNgrams
indexedNgrams
...
...
@@ -226,27 +225,27 @@ mergeData rs = catMaybes . map toDocumentWithId . DM.toList
data
DocumentIdWithNgrams
=
DocumentIdWithNgrams
{
documentWithId
::
!
DocumentWithId
,
document_ngrams
::
!
(
Map
(
NgramsT
Ngrams
)
Int
)
,
document_ngrams
::
!
(
Map
Ngrams
(
Map
NgramsType
Int
)
)
}
deriving
(
Show
)
-- TODO group terms
extractNgramsT
::
HasNodeError
err
=>
HyperdataDocument
->
Cmd
err
(
Map
(
NgramsT
Ngrams
)
Int
)
extractNgramsT
::
HasNodeError
err
=>
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
)
)
extractNgramsT
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
_hyperdataDocument_source
doc
let
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
_hyperdataDocument_institutes
doc
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
_hyperdataDocument_authors
doc
let
leText
=
catMaybes
[
_hyperdataDocument_title
doc
,
_hyperdataDocument_abstract
doc
]
terms'
<-
map
text2ngrams
<$>
map
(
intercalate
" "
.
_terms_label
)
<$>
concat
<$>
liftIO
(
extractTerms
(
Multi
EN
)
leText
)
pure
$
DM
.
fromList
$
[(
NgramsT
Sources
source
,
1
)]
<>
[(
NgramsT
Institutes
i'
,
1
)
|
i'
<-
institutes
]
<>
[(
NgramsT
Authors
a'
,
1
)
|
a'
<-
authors
]
<>
[(
NgramsT
NgramsTerms
t'
,
1
)
|
t'
<-
terms'
]
pure
$
DM
.
fromList
$
[(
source
,
DM
.
singleton
Sources
1
)]
<>
[(
i'
,
DM
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
<>
[(
a'
,
DM
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
t'
,
DM
.
singleton
NgramsTerms
1
)
|
t'
<-
terms'
]
documentIdWithNgrams
::
HasNodeError
err
=>
(
HyperdataDocument
->
Cmd
err
(
Map
(
NgramsT
Ngrams
)
Int
))
documentIdWithNgrams
::
HasNodeError
err
=>
(
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
)))
->
[
DocumentWithId
]
->
Cmd
err
[
DocumentIdWithNgrams
]
documentIdWithNgrams
f
=
mapM
toDocumentIdWithNgrams
where
...
...
@@ -255,14 +254,17 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams
pure
$
DocumentIdWithNgrams
d
e
-- | TODO check optimization
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
]
->
Map
(
NgramsT
Ngrams
)
(
Map
NodeId
Int
)
mapNodeIdNgrams
ds
=
DM
.
fromListWith
(
DM
.
unionWith
(
+
))
xs
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
]
->
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
)
)
mapNodeIdNgrams
=
DM
.
unionsWith
(
DM
.
unionWith
(
DM
.
unionWith
(
+
)))
.
fmap
f
where
xs
=
[(
ng
,
DM
.
singleton
nId
i
)
|
(
nId
,
n2i'
)
<-
ds'
,
(
ng
,
i
)
<-
DM
.
toList
n2i'
]
ds'
=
(
\
d
->
((
documentId
.
documentWithId
)
d
,
document_ngrams
d
))
<$>
ds
f
::
DocumentIdWithNgrams
->
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
f
d
=
fmap
(
fmap
(
DM
.
singleton
nId
))
$
document_ngrams
d
where
nId
=
documentId
$
documentWithId
d
------------------------------------------------------------------------
flowList
::
HasNodeError
err
=>
UserId
->
CorpusId
->
Map
(
NgramsT
NgramsIndexed
)
(
Map
NodeId
Int
)
->
Cmd
err
ListId
flowList
::
HasNodeError
err
=>
UserId
->
CorpusId
->
Map
NgramsIndexed
(
Map
NgramsType
(
Map
NodeId
Int
))
->
Cmd
err
ListId
flowList
uId
cId
ngs
=
do
-- printDebug "ngs:" ngs
lId
<-
getOrMkList
cId
uId
...
...
@@ -303,9 +305,13 @@ insertGroups lId ngrs =
]
------------------------------------------------------------------------
-- TODO: verify NgramsT lost here
ngrams2list
::
Map
(
NgramsT
NgramsIndexed
)
(
Map
NodeId
Int
)
->
[(
ListType
,
(
NgramsType
,
NgramsIndexed
))]
ngrams2list
=
zip
(
repeat
GraphList
)
.
map
(
\
(
NgramsT
ngt
ng
)
->
(
ngt
,
ng
))
.
DM
.
keys
ngrams2list
::
Map
NgramsIndexed
(
Map
NgramsType
a
)
->
[(
ListType
,
(
NgramsType
,
NgramsIndexed
))]
ngrams2list
m
=
[
(
GraphList
,
(
t
,
ng
))
|
(
ng
,
tm
)
<-
DM
.
toList
m
,
t
<-
DM
.
keys
tm
]
-- | TODO: weight of the list could be a probability
insertLists
::
HasNodeError
err
=>
ListId
->
[(
ListType
,
(
NgramsType
,
NgramsIndexed
))]
->
Cmd
err
Int
...
...
src/Gargantext/Database/Flow/Pairing.hs
View file @
8852698b
...
...
@@ -37,7 +37,7 @@ import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
import
Gargantext.Database.Node.Contact
import
Gargantext.Database.Flow.Utils
import
Gargantext.Database.Utils
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Types.Node
(
AnnuaireId
,
CorpusId
,
ContactId
)
import
Gargantext.Database.Types.Node
(
AnnuaireId
,
CorpusId
)
import
Gargantext.Database.Node.Children
import
Gargantext.Core.Types
(
NodeType
(
..
))
...
...
@@ -79,16 +79,16 @@ extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors
authors
=
map
text2ngrams
$
catMaybes
[
view
(
hc_who
.
_Just
.
cw_lastName
)
contact
]
--}
pairMaps
::
Map
(
NgramsT
Ngrams
)
(
Map
ContactId
Int
)
-- NP: notice how this function is no longer specific to the ContactId type
pairMaps
::
Map
(
NgramsT
Ngrams
)
a
->
Map
(
NgramsT
Ngrams
)
NgramsId
->
Map
(
NgramsT
NgramsIndexed
)
(
Map
ContactId
Int
)
pairMaps
m1
m2
=
DM
.
fromList
$
catMaybes
$
map
(
\
(
k
,
n
)
->
(,)
<$>
lookup'
k
m2
<*>
Just
n
)
$
DM
.
toList
m1
where
lookup'
k
@
(
NgramsT
nt
ng
)
m
=
case
DM
.
lookup
k
m
of
Nothing
->
Nothing
Just
nId
->
Just
$
NgramsT
nt
(
NgramsIndexed
ng
nId
)
->
Map
NgramsIndexed
(
Map
NgramsType
a
)
pairMaps
m1
m2
=
DM
.
fromList
[
(
NgramsIndexed
ng
nId
,
DM
.
singleton
nt
n2i
)
|
(
k
@
(
NgramsT
nt
ng
),
n2i
)
<-
DM
.
toList
m1
,
Just
nId
<-
[
DM
.
lookup
k
m2
]
]
-----------------------------------------------------------------------
getNgramsTindexed
::
CorpusId
->
NgramsType
->
Cmd
err
(
Map
(
NgramsT
Ngrams
)
NgramsId
)
...
...
src/Gargantext/Database/Flow/Utils.hs
View file @
8852698b
...
...
@@ -55,9 +55,10 @@ data DocumentIdWithNgrams a =
}
deriving
(
Show
)
-- | TODO for now, list Type is CandidateList, why ?
insertToNodeNgrams
::
Map
(
NgramsT
NgramsIndexed
)
(
Map
NodeId
Int
)
->
Cmd
err
Int
insertToNodeNgrams
m
=
insertNodeNgrams
[
NodeNgram
nId
((
_ngramsId
.
_ngramsT
)
ng
)
((
ngramsTypeId
.
_ngramsType
)
ng
)
(
listTypeId
CandidateList
)
(
fromIntegral
n
)
|
(
ng
,
nId2int
)
<-
DM
.
toList
m
,
(
nId
,
n
)
<-
DM
.
toList
nId2int
insertToNodeNgrams
::
Map
NgramsIndexed
(
Map
NgramsType
(
Map
NodeId
Int
))
->
Cmd
err
Int
insertToNodeNgrams
m
=
insertNodeNgrams
[
NodeNgram
n
(
_ngramsId
ng
)
(
ngramsTypeId
t
)
(
listTypeId
CandidateList
)
(
fromIntegral
i
)
|
(
ng
,
t2n2i
)
<-
DM
.
toList
m
,
(
t
,
n2i
)
<-
DM
.
toList
t2n2i
,
(
n
,
i
)
<-
DM
.
toList
n2i
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment