Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
34fbf078
Commit
34fbf078
authored
Dec 13, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FLOW][Database] ngrams extraction and insertion.
parent
b2ef5a09
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
16 deletions
+29
-16
Flow.hs
src/Gargantext/Database/Flow.hs
+28
-15
NodeNgram.hs
src/Gargantext/Database/Schema/NodeNgram.hs
+1
-1
No files found.
src/Gargantext/Database/Flow.hs
View file @
34fbf078
...
...
@@ -21,15 +21,18 @@ module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
--import Gargantext.Database.Node.Contact (HyperdataContact(..))
import
Data.Map
(
Map
,
lookup
)
import
Data.Maybe
(
Maybe
(
..
),
catMaybes
)
import
Data.Text
(
Text
,
splitOn
)
import
Data.Text
(
Text
,
splitOn
,
intercalate
)
import
Data.Tuple.Extra
(
both
,
second
)
import
Data.List
(
concat
)
import
GHC.Show
(
Show
)
import
Gargantext.Core.Types
(
NodePoly
(
..
),
ListType
(
..
),
listTypeId
)
import
Gargantext.Core.Types
(
NodePoly
(
..
),
ListType
(
..
),
listTypeId
,
Terms
(
..
)
)
import
Gargantext.Core.Types.Individu
(
Username
)
import
Gargantext.Core.Types.Main
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Bashql
(
runCmd'
)
-- , del)
import
Gargantext.Database.Config
(
userMaster
,
userArbitrary
,
corpusMasterName
)
import
Gargantext.Database.Flow.Utils
(
insertToNodeNgrams
)
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIdsDoc
,
addUniqIdsContact
,
ToDbData
(
..
))
import
Gargantext.Database.Root
(
getRootCmd
)
...
...
@@ -41,6 +44,7 @@ import Gargantext.Database.Schema.User (getUser, UserLight(..))
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Types.Node
(
NodeType
(
..
),
NodeId
)
import
Gargantext.Database.Utils
(
Cmd
(
..
))
import
Gargantext.Text.Terms
(
TermType
(
..
))
import
Gargantext.Ext.IMT
(
toSchoolName
)
import
Gargantext.Ext.IMTUser
(
deserialiseImtUsersFromFile
)
import
Gargantext.Prelude
...
...
@@ -102,7 +106,7 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
toInsert
hyperdataDocuments
)
-- printDebug "documentsWithId" documentsWithId
let
docsWithNgrams
=
documentIdWithNgrams
extractNgramsT
documentsWithId
docsWithNgrams
<-
documentIdWithNgrams
extractNgramsT
documentsWithId
-- printDebug "docsWithNgrams" docsWithNgrams
let
maps
=
mapNodeIdNgrams
docsWithNgrams
...
...
@@ -228,22 +232,31 @@ data DocumentIdWithNgrams =
-- TODO add Terms (Title + Abstract)
-- add f :: Text -> Text
-- newtype Ngrams = Ngrams Text
extractNgramsT
::
HyperdataDocument
->
Map
(
NgramsT
Ngrams
)
Int
extractNgramsT
doc
=
DM
.
fromList
$
[(
NgramsT
Sources
source
,
1
)]
<>
[(
NgramsT
Institutes
i'
,
1
)
|
i'
<-
institutes
]
<>
[(
NgramsT
Authors
a'
,
1
)
|
a'
<-
authors
]
where
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
_hyperdataDocument_source
doc
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
_hyperdataDocument_institute
s
doc
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
_hyperdataDocument_authors
doc
-- TODO group terms
-- TODO group terms
extractNgramsT
::
HyperdataDocument
->
IO
(
Map
(
NgramsT
Ngrams
)
Int
)
extractNgramsT
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
_hyperdataDocument_source
doc
let
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
_hyperdataDocument_institutes
doc
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
_hyperdataDocument_author
s
doc
let
leText
=
catMaybes
[
_hyperdataDocument_title
doc
,
_hyperdataDocument_abstract
doc
]
terms'
<-
map
text2ngrams
<$>
map
(
intercalate
" "
.
_terms_label
)
<$>
concat
<$>
extractTerms
(
Multi
EN
)
leText
pure
$
DM
.
fromList
$
[(
NgramsT
Sources
source
,
1
)]
<>
[(
NgramsT
Institutes
i'
,
1
)
|
i'
<-
institutes
]
<>
[(
NgramsT
Authors
a'
,
1
)
|
a'
<-
authors
]
<>
[(
NgramsT
NgramsTerms
t'
,
1
)
|
t'
<-
terms'
]
documentIdWithNgrams
::
(
HyperdataDocument
->
Map
(
NgramsT
Ngrams
)
Int
)
->
[
DocumentWithId
]
->
[
DocumentIdWithNgrams
]
documentIdWithNgrams
f
=
map
(
\
d
->
DocumentIdWithNgrams
d
((
f
.
documentData
)
d
))
documentIdWithNgrams
::
(
HyperdataDocument
->
IO
(
Map
(
NgramsT
Ngrams
)
Int
))
->
[
DocumentWithId
]
->
IO
[
DocumentIdWithNgrams
]
documentIdWithNgrams
f
=
mapM
toDocumentIdWithNgrams
where
toDocumentIdWithNgrams
d
=
do
e
<-
f
$
documentData
d
pure
$
DocumentIdWithNgrams
d
e
-- | TODO check optimization
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
]
->
Map
(
NgramsT
Ngrams
)
(
Map
NodeId
Int
)
...
...
src/Gargantext/Database/Schema/NodeNgram.hs
View file @
34fbf078
...
...
@@ -108,7 +108,7 @@ insertNodeNgramW nns =
insertNothing
=
(
Insert
{
iTable
=
nodeNgramTable
,
iRows
=
nns
,
iReturning
=
rCount
,
iOnConflict
=
Nothing
,
iOnConflict
=
(
Just
DoNothing
)
})
type
NgramsText
=
Text
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment