Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
f57909bb
Commit
f57909bb
authored
Feb 21, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Cosmetics] before new flow.
parent
93bf6e56
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
66 additions
and
22 deletions
+66
-22
Flow.hs
src/Gargantext/Database/Flow.hs
+66
-22
No files found.
src/Gargantext/Database/Flow.hs
View file @
f57909bb
...
@@ -41,7 +41,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
...
@@ -41,7 +41,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import
Gargantext.Database.Metrics.TFICF
(
getTficf
)
import
Gargantext.Database.Metrics.TFICF
(
getTficf
)
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Text.Metrics.TFICF
(
Tficf
(
..
))
import
Gargantext.Text.Metrics.TFICF
(
Tficf
(
..
))
--
import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
import
Gargantext.Database.Metrics.Count
(
getNgramsElementsWithParentNodeId
)
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIdsDoc
,
addUniqIdsContact
,
ToDbData
(
..
))
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIdsDoc
,
addUniqIdsContact
,
ToDbData
(
..
))
import
Gargantext.Database.Root
(
getRoot
)
import
Gargantext.Database.Root
(
getRoot
)
...
@@ -71,7 +71,6 @@ type FlowCmdM env err m =
...
@@ -71,7 +71,6 @@ type FlowCmdM env err m =
,
HasRepoVar
env
,
HasRepoVar
env
)
)
flowCorpus
::
FlowCmdM
env
ServantErr
m
flowCorpus
::
FlowCmdM
env
ServantErr
m
=>
FileFormat
->
FilePath
->
CorpusName
->
m
CorpusId
=>
FileFormat
->
FilePath
->
CorpusName
->
m
CorpusId
flowCorpus
ff
fp
cName
=
do
flowCorpus
ff
fp
cName
=
do
...
@@ -106,7 +105,12 @@ flowCorpus' :: FlowCmdM env err m
...
@@ -106,7 +105,12 @@ flowCorpus' :: FlowCmdM env err m
=>
NodeType
->
[
HyperdataDocument
]
=>
NodeType
->
[
HyperdataDocument
]
->
([
ReturnId
],
UserId
,
CorpusId
,
UserId
,
CorpusId
)
->
([
ReturnId
],
UserId
,
CorpusId
,
UserId
,
CorpusId
)
->
m
CorpusId
->
m
CorpusId
flowCorpus'
NodeCorpus
hyperdataDocuments
(
ids
,
masterUserId
,
masterCorpusId
,
userId
,
userCorpusId
)
=
do
flowCorpus'
NodeCorpus
hyperdataDocuments
(
ids
,
masterUserId
,
masterCorpusId
,
userId
,
userCorpusId
)
=
do
--------------------------------------------------
--------------------------------------------------
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
toInsert
hyperdataDocuments
)
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
toInsert
hyperdataDocuments
)
...
@@ -124,8 +128,8 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
...
@@ -124,8 +128,8 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
-- List Ngrams Flow
-- List Ngrams Flow
-- get elements
-- get elements
-- filter by TFICF
-- filter by TFICF
let
ngs
=
ngrams2list'
indexedNgrams
--
let ngs = ngrams2list' indexedNgrams
--let ngs =
getNgramsElementsWithParentNodeId masterCorpusId
ngs
<-
getNgramsElementsWithParentNodeId
masterCorpusId
_masterListId
<-
flowList
masterUserId
masterCorpusId
ngs
_masterListId
<-
flowList
masterUserId
masterCorpusId
ngs
_userListId
<-
flowListUser
userId
userCorpusId
ngs
100
_userListId
<-
flowListUser
userId
userCorpusId
ngs
100
--------------------------------------------------
--------------------------------------------------
...
@@ -138,13 +142,21 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
...
@@ -138,13 +142,21 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
pure
userCorpusId
pure
userCorpusId
-- del [corpusId2, corpusId]
-- del [corpusId2, corpusId]
flowCorpus'
NodeAnnuaire
_hyperdataDocuments
(
_ids
,
_masterUserId
,
_masterCorpusId
,
_userId
,
_userCorpusId
)
=
undefined
flowCorpus'
NodeAnnuaire
_hyperdataDocuments
(
_ids
,
_masterUserId
,
_masterCorpusId
,
_userId
,
_userCorpusId
)
=
undefined
flowCorpus'
_
_
_
=
undefined
flowCorpus'
_
_
_
=
undefined
type
CorpusName
=
Text
type
CorpusName
=
Text
subFlowCorpus
::
HasNodeError
err
=>
Username
->
CorpusName
->
Cmd
err
(
UserId
,
RootId
,
CorpusId
)
subFlowCorpus
::
HasNodeError
err
=>
Username
->
CorpusName
->
Cmd
err
(
UserId
,
RootId
,
CorpusId
)
subFlowCorpus
username
cName
=
do
subFlowCorpus
username
cName
=
do
maybeUserId
<-
getUser
username
maybeUserId
<-
getUser
username
userId
<-
case
maybeUserId
of
userId
<-
case
maybeUserId
of
...
@@ -187,7 +199,7 @@ subFlowCorpus username cName = do
...
@@ -187,7 +199,7 @@ subFlowCorpus username cName = do
toInsert
::
[
HyperdataDocument
]
->
Map
HashId
HyperdataDocument
toInsert
::
[
HyperdataDocument
]
->
Map
HashId
HyperdataDocument
toInsert
=
DM
.
fromList
.
map
(
\
d
->
(
maybe
err
identity
(
_hyperdataDocument_uniqId
d
),
d
))
toInsert
=
DM
.
fromList
.
map
(
\
d
->
(
maybe
err
identity
(
_hyperdataDocument_uniqId
d
),
d
))
where
where
err
=
"Database.Flow.toInsert"
err
=
"
[ERROR]
Database.Flow.toInsert"
toInserted
::
[
ReturnId
]
->
Map
HashId
ReturnId
toInserted
::
[
ReturnId
]
->
Map
HashId
ReturnId
toInserted
=
DM
.
fromList
.
map
(
\
r
->
(
reUniqId
r
,
r
)
)
toInserted
=
DM
.
fromList
.
map
(
\
r
->
(
reUniqId
r
,
r
)
)
...
@@ -198,7 +210,9 @@ data DocumentWithId =
...
@@ -198,7 +210,9 @@ data DocumentWithId =
,
documentData
::
!
HyperdataDocument
,
documentData
::
!
HyperdataDocument
}
deriving
(
Show
)
}
deriving
(
Show
)
mergeData
::
Map
HashId
ReturnId
->
Map
HashId
HyperdataDocument
->
[
DocumentWithId
]
mergeData
::
Map
HashId
ReturnId
->
Map
HashId
HyperdataDocument
->
[
DocumentWithId
]
mergeData
rs
=
catMaybes
.
map
toDocumentWithId
.
DM
.
toList
mergeData
rs
=
catMaybes
.
map
toDocumentWithId
.
DM
.
toList
where
where
toDocumentWithId
(
hash
,
hpd
)
=
toDocumentWithId
(
hash
,
hpd
)
=
...
@@ -213,13 +227,30 @@ data DocumentIdWithNgrams =
...
@@ -213,13 +227,30 @@ data DocumentIdWithNgrams =
}
deriving
(
Show
)
}
deriving
(
Show
)
-- TODO group terms
-- TODO group terms
extractNgramsT
::
HasNodeError
err
=>
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT
::
HasNodeError
err
=>
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT
doc
=
do
extractNgramsT
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
_hyperdataDocument_source
doc
let
source
=
text2ngrams
let
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
_hyperdataDocument_institutes
doc
$
maybe
"Nothing"
identity
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
_hyperdataDocument_authors
doc
$
_hyperdataDocument_source
doc
let
leText
=
catMaybes
[
_hyperdataDocument_title
doc
,
_hyperdataDocument_abstract
doc
]
terms'
<-
map
text2ngrams
<$>
map
(
intercalate
" "
.
_terms_label
)
<$>
concat
<$>
liftIO
(
extractTerms
(
Multi
EN
)
leText
)
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
_hyperdataDocument_institutes
doc
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
_hyperdataDocument_authors
doc
leText
=
catMaybes
[
_hyperdataDocument_title
doc
,
_hyperdataDocument_abstract
doc
]
terms'
<-
map
text2ngrams
<$>
map
(
intercalate
" "
.
_terms_label
)
<$>
concat
<$>
liftIO
(
extractTerms
(
Multi
EN
)
leText
)
pure
$
DM
.
fromList
$
[(
source
,
DM
.
singleton
Sources
1
)]
pure
$
DM
.
fromList
$
[(
source
,
DM
.
singleton
Sources
1
)]
<>
[(
i'
,
DM
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
<>
[(
i'
,
DM
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
...
@@ -229,8 +260,10 @@ extractNgramsT doc = do
...
@@ -229,8 +260,10 @@ extractNgramsT doc = do
documentIdWithNgrams
::
HasNodeError
err
documentIdWithNgrams
::
HasNodeError
err
=>
(
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
)))
=>
(
HyperdataDocument
->
[
DocumentWithId
]
->
Cmd
err
[
DocumentIdWithNgrams
]
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
)))
->
[
DocumentWithId
]
->
Cmd
err
[
DocumentIdWithNgrams
]
documentIdWithNgrams
f
=
mapM
toDocumentIdWithNgrams
documentIdWithNgrams
f
=
mapM
toDocumentIdWithNgrams
where
where
toDocumentIdWithNgrams
d
=
do
toDocumentIdWithNgrams
d
=
do
...
@@ -238,10 +271,12 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams
...
@@ -238,10 +271,12 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams
pure
$
DocumentIdWithNgrams
d
e
pure
$
DocumentIdWithNgrams
d
e
-- | TODO check optimization
-- | TODO check optimization
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
]
->
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
]
->
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
mapNodeIdNgrams
=
DM
.
unionsWith
(
DM
.
unionWith
(
DM
.
unionWith
(
+
)))
.
fmap
f
mapNodeIdNgrams
=
DM
.
unionsWith
(
DM
.
unionWith
(
DM
.
unionWith
(
+
)))
.
fmap
f
where
where
f
::
DocumentIdWithNgrams
->
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
f
::
DocumentIdWithNgrams
->
Map
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
f
d
=
fmap
(
fmap
(
DM
.
singleton
nId
))
$
document_ngrams
d
f
d
=
fmap
(
fmap
(
DM
.
singleton
nId
))
$
document_ngrams
d
where
where
nId
=
documentId
$
documentWithId
d
nId
=
documentId
$
documentWithId
d
...
@@ -271,7 +306,10 @@ flowList uId cId ngs = do
...
@@ -271,7 +306,10 @@ flowList uId cId ngs = do
pure
lId
pure
lId
flowListUser
::
FlowCmdM
env
err
m
flowListUser
::
FlowCmdM
env
err
m
=>
UserId
->
CorpusId
->
Map
NgramsType
[
NgramsElement
]
->
Int
->
m
ListId
=>
UserId
->
CorpusId
->
Map
NgramsType
[
NgramsElement
]
->
Int
->
m
ListId
flowListUser
uId
cId
ngsM
n
=
do
flowListUser
uId
cId
ngsM
n
=
do
lId
<-
getOrMkList
cId
uId
lId
<-
getOrMkList
cId
uId
...
@@ -301,7 +339,10 @@ groupNgramsBy isEqual cId = DM.fromList $ catMaybes [ isEqual n1 n2 | n1 <- DM.k
...
@@ -301,7 +339,10 @@ groupNgramsBy isEqual cId = DM.fromList $ catMaybes [ isEqual n1 n2 | n1 <- DM.k
-- TODO check: do not insert duplicates
-- TODO check: do not insert duplicates
insertGroups
::
HasNodeError
err
=>
ListId
->
Map
NgramsIndexed
NgramsIndexed
->
Cmd
err
Int
insertGroups
::
HasNodeError
err
=>
ListId
->
Map
NgramsIndexed
NgramsIndexed
->
Cmd
err
Int
insertGroups
lId
ngrs
=
insertGroups
lId
ngrs
=
insertNodeNgramsNgramsNew
[
NodeNgramsNgrams
lId
ng1
ng2
(
Just
1
)
insertNodeNgramsNgramsNew
[
NodeNgramsNgrams
lId
ng1
ng2
(
Just
1
)
|
(
ng1
,
ng2
)
<-
map
(
both
_ngramsId
)
$
DM
.
toList
ngrs
|
(
ng1
,
ng2
)
<-
map
(
both
_ngramsId
)
$
DM
.
toList
ngrs
...
@@ -329,7 +370,10 @@ ngrams2list' m = fromListWith (<>)
...
@@ -329,7 +370,10 @@ ngrams2list' m = fromListWith (<>)
-- | TODO: weight of the list could be a probability
-- | TODO: weight of the list could be a probability
insertLists
::
HasNodeError
err
=>
ListId
->
[(
ListType
,
(
NgramsType
,
NgramsIndexed
))]
->
Cmd
err
Int
insertLists
::
HasNodeError
err
=>
ListId
->
[(
ListType
,
(
NgramsType
,
NgramsIndexed
))]
->
Cmd
err
Int
insertLists
lId
lngs
=
insertNodeNgrams
[
NodeNgram
lId
(
_ngramsId
ng
)
Nothing
(
ngramsTypeId
ngt
)
(
fromIntegral
$
listTypeId
l
)
1
insertLists
lId
lngs
=
insertNodeNgrams
[
NodeNgram
lId
(
_ngramsId
ng
)
Nothing
(
ngramsTypeId
ngt
)
(
fromIntegral
$
listTypeId
l
)
1
|
(
l
,(
ngt
,
ng
))
<-
lngs
|
(
l
,(
ngt
,
ng
))
<-
lngs
]
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment