Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e50ae8f8
Commit
e50ae8f8
authored
Nov 29, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Annuaire] flow to insert in DB.
parent
28e68956
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
95 additions
and
65 deletions
+95
-65
Flow.hs
src/Gargantext/Database/Flow.hs
+64
-50
Contact.hs
src/Gargantext/Database/Node/Contact.hs
+28
-11
Insert.hs
src/Gargantext/Database/Node/Document/Insert.hs
+3
-4
No files found.
src/Gargantext/Database/Flow.hs
View file @
e50ae8f8
...
...
@@ -13,7 +13,7 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Database.Flow
(
flowDatabase
,
ngrams2list
)
module
Gargantext.Database.Flow
--
(flowDatabase, ngrams2list)
where
import
GHC.Show
(
Show
)
...
...
@@ -29,88 +29,99 @@ import Gargantext.Database.Bashql (runCmd') -- , del)
import
Gargantext.Database.Config
(
userMaster
,
userArbitrary
,
corpusMasterName
)
import
Gargantext.Database.Ngrams
(
insertNgrams
,
Ngrams
(
..
),
NgramsT
(
..
),
NgramsIndexed
(
..
),
indexNgramsT
,
ngramsTypeId
,
NgramsType
(
..
),
text2ngrams
)
import
Gargantext.Database.Node
(
getRoot
,
mkRoot
,
mkCorpus
,
Cmd
(
..
),
mkList
,
mkGraph
,
mkDashboard
)
--, mkAnnuaire)
import
Gargantext.Database.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIdsDoc
,
ToDbData
(
..
))
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIdsDoc
,
addUniqIdsContact
,
ToDbData
(
..
))
import
Gargantext.Database.NodeNgram
(
NodeNgramPoly
(
..
),
insertNodeNgrams
)
import
Gargantext.Database.NodeNgramsNgrams
(
NodeNgramsNgramsPoly
(
..
),
insertNodeNgramsNgramsNew
)
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
--
import Gargantext.Database.Node.Contact (HyperdataContact(..))
import
Gargantext.Database.Node.Contact
(
HyperdataContact
(
..
))
import
Gargantext.Database.User
(
getUser
,
UserLight
(
..
),
Username
)
import
Gargantext.Ext.IMT
(
toSchoolName
)
import
Gargantext.Prelude
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
)
type
UserId
=
Int
type
RootId
=
Int
type
UserId
=
Int
type
MasterUserId
=
Int
type
RootId
=
Int
type
CorpusId
=
Int
{-
flowCorpus :: [ToDbData] -> CorpusName -> IO CorpusId
flowCorpus = undefined
--}
flowDatabase
::
FileFormat
->
FilePath
->
CorpusName
->
IO
Int
flowDatabase
::
FileFormat
->
FilePath
->
CorpusName
->
IO
CorpusId
flowDatabase
ff
fp
cName
=
do
-- Corpus Flow
(
masterUserId
,
_
,
corpusId
)
<-
subFlowCorpus
userMaster
corpusMasterName
-- Documents Flow
hyperdataDocuments
<-
map
addUniqIdsDoc
<$>
parseDocs
ff
fp
let
hyperdataDocuments'
=
map
(
\
h
->
ToDbDocument
h
)
hyperdataDocuments
printDebug
"hyperdataDocuments"
(
length
hyperdataDocuments
)
params
<-
flowInsert
NodeCorpus
hyperdataDocuments
cName
flowCorpus
NodeCorpus
hyperdataDocuments
params
flowInsert
::
NodeType
->
[
HyperdataDocument
]
->
CorpusName
->
IO
([
ReturnId
],
MasterUserId
,
UserId
,
CorpusId
)
flowInsert
nt
hyperdataDocuments
cName
=
do
let
hyperdataDocuments'
=
case
nt
of
NodeCorpus
->
map
(
\
h
->
ToDbDocument
h
)
hyperdataDocuments
-- NodeAnnuaire -> map (\h -> ToDbContact h) hyperdataDocuments
(
masterUserId
,
_
,
masterCorpusId
)
<-
subFlowCorpus
userMaster
corpusMasterName
ids
<-
runCmd'
$
insertDocuments
masterUserId
masterCorpusId
hyperdataDocuments'
(
userId
,
_
,
userCorpusId
)
<-
subFlowCorpus
userArbitrary
cName
_
<-
runCmd'
$
add
userCorpusId
(
map
reId
ids
)
pure
(
ids
,
masterUserId
,
userId
,
userCorpusId
)
ids
<-
runCmd'
$
insertDocuments
masterUserId
corpusId
hyperdataDocuments'
-- printDebug "Docs IDs : " (ids)
-- idsRepeat <- runCmd' $ insertDocuments masterUserId corpusId hyperdataDocuments'
-- printDebug "Repeated Docs IDs : " (length idsRepeat)
let
idsNotRepeated
=
filter
(
\
r
->
reInserted
r
==
True
)
ids
--{-
-- Ngrams Flow
-- todo: flow for new documents only
let
tids
=
toInserted
ids
printDebug
"toInserted ids"
(
length
tids
)
flowInsertAnnuaire
children
name
=
do
(
masterUserId
,
_
,
masterCorpusId
)
<-
subFlowCorpus
userMaster
corpusMasterName
ids
<-
runCmd'
$
insertDocuments
masterUserId
masterCorpusId
children
(
userId
,
_
,
userCorpusId
)
<-
subFlowCorpus
userArbitrary
name
_
<-
runCmd'
$
add
userCorpusId
(
map
reId
ids
)
pure
(
ids
,
masterUserId
,
userId
,
userCorpusId
)
let
tihs
=
toInsert
hyperdataDocuments
printDebug
"toInsert hyperdataDocuments"
(
length
tihs
)
let
documentsWithId
=
mergeData
(
toInserted
idsNotRepeated
)
(
toInsert
hyperdataDocuments
)
-- printDebug "documentsWithId" documentsWithId
--}
-- docsWithNgrams <- documentIdWithNgrams documentsWithId extractNgramsT
--{-
-- flowCorpus :: NodeType -> [HyperdataDocument] -> ([ReturnId],MasterUserId,UserId,CorpusId) -> IO CorpusId
flowCorpus
NodeCorpus
hyperdataDocuments
(
ids
,
masterUserId
,
userId
,
userCorpusId
)
=
do
--}
--------------------------------------------------
-- List Ngrams Flow
userListId
<-
runCmd'
$
listFlowUser
userId
userCorpusId
printDebug
"Working on User ListId : "
userListId
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
toInsert
hyperdataDocuments
)
-- printDebug "documentsWithId" documentsWithId
let
docsWithNgrams
=
documentIdWithNgrams
extractNgramsT
documentsWithId
-- printDebug "docsWithNgrams" docsWithNgrams
let
maps
=
mapNodeIdNgrams
docsWithNgrams
-- printDebug "maps" (maps)
-- printDebug "maps" (maps)
indexedNgrams
<-
runCmd'
$
indexNgrams
maps
-- printDebug "inserted ngrams" indexedNgrams
_
<-
runCmd'
$
insertToNodeNgrams
indexedNgrams
-- List Flow
listId2
<-
runCmd'
$
listFlow
masterUserId
corpusId
indexedNgrams
printDebug
"list id : "
listId2
--}
(
userId
,
_
,
corpusId2
)
<-
subFlowCorpus
userArbitrary
cName
userListId
<-
runCmd'
$
listFlowUser
userId
corpusId2
printDebug
"UserList : "
userListId
inserted
<-
runCmd'
$
add
corpusId2
(
map
reId
ids
)
printDebug
"Added : "
(
length
inserted
)
_
<-
runCmd'
$
mkDashboard
corpusId2
userId
_
<-
runCmd'
$
mkGraph
corpusId2
userId
listId2
<-
runCmd'
$
listFlow
masterUserId
userCorpusId
indexedNgrams
printDebug
"Working on ListId : "
listId2
--}
--------------------------------------------------
_
<-
runCmd'
$
mkDashboard
userCorpusId
userId
_
<-
runCmd'
$
mkGraph
userCorpusId
userId
-- Annuaire Flow
-- _ <- runCmd' $ mkAnnuaire rootUserId userId
pure
corpusId2
pure
userCorpusId
-- runCmd' $ del [corpusId2, corpusId]
flowCorpus
NodeAnnuaire
_hyperdataDocuments
(
_ids
,
_masterUserId
,
_userId
,
_userCorpusId
)
=
undefined
flowCorpus
_
_
_
=
undefined
type
CorpusName
=
Text
subFlowCorpus
::
Username
->
CorpusName
->
IO
(
UserId
,
RootId
,
CorpusId
)
...
...
@@ -145,13 +156,13 @@ type NodeId = Int
type
ListId
=
Int
toInsert
::
[
HyperdataDocument
]
->
Map
HashId
HyperdataDocument
toInsert
=
DM
.
fromList
.
map
(
\
d
->
(
hash
(
_hyperdataDocument_uniqId
d
),
d
))
toInsert
=
DM
.
fromList
.
map
(
\
d
->
(
maybe
err
identity
(
_hyperdataDocument_uniqId
d
),
d
))
where
hash
=
maybe
"Error"
identity
err
=
"Database.Flow.toInsert"
toInserted
::
[
ReturnId
]
->
Map
HashId
ReturnId
toInserted
rs
=
DM
.
fromList
$
map
(
\
r
->
(
reUniqId
r
,
r
)
)
$
filter
(
\
r
->
reInserted
r
==
True
)
rs
toInserted
=
DM
.
fromList
.
map
(
\
r
->
(
reUniqId
r
,
r
)
)
.
filter
(
\
r
->
reInserted
r
==
True
)
data
DocumentWithId
=
DocumentWithId
{
documentId
::
NodeId
...
...
@@ -186,6 +197,9 @@ extractNgramsT doc = DM.fromList $ [(NgramsT Sources source, 1)]
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
_hyperdataDocument_authors
doc
-- TODO group terms
documentIdWithNgrams
::
(
HyperdataDocument
->
Map
(
NgramsT
Ngrams
)
Int
)
->
[
DocumentWithId
]
->
[
DocumentIdWithNgrams
]
documentIdWithNgrams
f
=
map
(
\
d
->
DocumentIdWithNgrams
d
((
f
.
documentData
)
d
))
...
...
src/Gargantext/Database/Node/Contact.hs
View file @
e50ae8f8
...
...
@@ -38,17 +38,22 @@ import Opaleye (QueryRunnerColumnDefault, queryRunnerColumnDefault, PGJsonb, fie
type
NodeContact
=
Node
HyperdataContact
data
HyperdataContact
=
HyperdataContact
{
_hc_bdd
::
Maybe
Text
-- ID of Database source
,
_hc_who
::
Maybe
ContactWho
,
_hc_where
::
Maybe
[
ContactWhere
]
,
_hc_lastValidation
::
Maybe
Text
,
_hc_uniqIdBdd
::
Maybe
Text
,
_hc_uniqId
::
Maybe
Text
HyperdataContact
{
_hc_who
::
Maybe
ContactWho
,
_hc_where
::
Maybe
[
ContactWhere
]
,
_hc_metaData
::
Maybe
ContactMetaData
}
deriving
(
Eq
,
Show
,
Generic
)
data
ContactMetaData
=
ContactMetaData
{
_cm_bdd
::
Maybe
Text
,
_cm_lastValidation
::
Maybe
Text
,
_cm_uniqIdBdd
::
Maybe
Text
,
_cm_uniqId
::
Maybe
Text
}
deriving
(
Eq
,
Show
,
Generic
)
arbitraryHyperdataContact
::
HyperdataContact
arbitraryHyperdataContact
=
HyperdataContact
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
arbitraryHyperdataContact
=
HyperdataContact
Nothing
Nothing
Nothing
data
ContactWho
=
ContactWho
{
_cw_id
::
Maybe
Int
...
...
@@ -61,13 +66,17 @@ data ContactWho =
data
ContactWhere
=
ContactWhere
{
_cw_organization
::
Maybe
[
Text
]
,
_cw_labTeamDepts
::
Maybe
[
Text
]
,
_cw_role
::
Maybe
Text
,
_cw_office
::
Maybe
Text
,
_cw_country
::
Maybe
Text
,
_cw_city
::
Maybe
Text
,
_cw_touch
::
Maybe
ContactTouch
,
_cw_start
::
Maybe
UTCTime
,
_cw_end
::
Maybe
UTCTime
,
_cw_entry
::
Maybe
UTCTime
,
_cw_exit
::
Maybe
UTCTime
}
deriving
(
Eq
,
Show
,
Generic
)
data
ContactTouch
=
...
...
@@ -86,21 +95,29 @@ nodeContactW maybeName maybeContact aId =
contact
=
maybe
arbitraryHyperdataContact
identity
maybeContact
-- | Main instances of Contact
-- | Specific Gargantext instance
instance
Hyperdata
HyperdataContact
-- | Database (Posgresql-simple instance)
instance
FromField
HyperdataContact
where
fromField
=
fromField'
-- | Database (Opaleye instance)
instance
QueryRunnerColumnDefault
PGJsonb
HyperdataContact
where
queryRunnerColumnDefault
=
fieldQueryRunnerColumn
-- | All lenses
makeLenses
''
C
ontactWho
makeLenses
''
C
ontactWhere
makeLenses
''
C
ontactTouch
makeLenses
''
C
ontactMetaData
makeLenses
''
H
yperdataContact
-- | All Json instances
$
(
deriveJSON
(
unPrefix
"_cw_"
)
''
C
ontactWho
)
$
(
deriveJSON
(
unPrefix
"_cw_"
)
''
C
ontactWhere
)
$
(
deriveJSON
(
unPrefix
"_ct_"
)
''
C
ontactTouch
)
$
(
deriveJSON
(
unPrefix
"_cm_"
)
''
C
ontactMetaData
)
$
(
deriveJSON
(
unPrefix
"_hc_"
)
''
H
yperdataContact
)
src/Gargantext/Database/Node/Document/Insert.hs
View file @
e50ae8f8
...
...
@@ -62,7 +62,6 @@ module Gargantext.Database.Node.Document.Insert where
import
Control.Lens
(
set
,
view
)
import
Control.Lens.Prism
import
Control.Lens.Cons
import
Control.Monad
(
join
)
import
Data.Aeson
(
toJSON
,
Value
)
import
Data.Maybe
(
maybe
)
import
Data.Text
(
Text
)
...
...
@@ -232,11 +231,11 @@ hashParametersDoc = [ \d -> maybe' (_hyperdataDocument_title d)
-- * Uniqueness of document definition
-- TODO factorize with above (use the function below for tests)
addUniqIdsContact
::
HyperdataContact
->
HyperdataContact
addUniqIdsContact
hc
=
set
hc_uniqIdBdd
(
Just
hashBdd
)
$
set
hc_uniqId
(
Just
hash
)
hc
addUniqIdsContact
hc
=
set
(
hc_metaData
.
_Just
.
cm_uniqIdBdd
.
_Just
)
hashBdd
$
set
(
hc_metaData
.
_Just
.
cm_uniqId
.
_Just
)
hash
hc
where
hash
=
uniqId
$
DT
.
concat
$
map
(
$
hc
)
hashParametersContact
hashBdd
=
uniqId
$
DT
.
concat
$
map
(
$
hc
)
([
(
\
d
->
maybe'
(
view
hc_bdd
d
)
)]
<>
hashParametersContact
)
hashBdd
=
uniqId
$
DT
.
concat
$
map
(
$
hc
)
([
\
d
->
maybe'
(
view
(
hc_metaData
.
_Just
.
cm_bdd
)
d
)]
<>
hashParametersContact
)
uniqId
::
Text
->
Text
uniqId
=
DT
.
pack
.
SHA
.
showDigest
.
SHA
.
sha256
.
DC
.
pack
.
DT
.
unpack
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment