Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
96920cfd
Commit
96920cfd
authored
Nov 28, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DB][FLOW] fix duplicate ngrams insertion.
parent
714462cc
Pipeline
#26
canceled with stage
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
84 additions
and
89 deletions
+84
-89
Flow.hs
src/Gargantext/Database/Flow.hs
+25
-23
Ngrams.hs
src/Gargantext/Database/Ngrams.hs
+27
-19
Node.hs
src/Gargantext/Database/Node.hs
+1
-8
Contact.hs
src/Gargantext/Database/Node/Contact.hs
+4
-13
Insert.hs
src/Gargantext/Database/Node/Document/Insert.hs
+27
-25
Utils.hs
src/Gargantext/Database/Utils.hs
+0
-1
No files found.
src/Gargantext/Database/Flow.hs
View file @
96920cfd
...
...
@@ -28,13 +28,13 @@ import Gargantext.Core.Types (NodePoly(..), ListType(..), listTypeId)
import
Gargantext.Database.Bashql
(
runCmd'
)
-- , del)
import
Gargantext.Database.Config
(
userMaster
,
userArbitrary
,
corpusMasterName
)
import
Gargantext.Database.Ngrams
(
insertNgrams
,
Ngrams
(
..
),
NgramsT
(
..
),
NgramsIndexed
(
..
),
indexNgramsT
,
ngramsTypeId
,
NgramsType
(
..
),
text2ngrams
)
import
Gargantext.Database.Node
(
getRoot
,
mkRoot
,
mkCorpus
,
Cmd
(
..
),
mkList
,
mkGraph
,
mkDashboard
,
mkAnnuaire
)
import
Gargantext.Database.Node
(
getRoot
,
mkRoot
,
mkCorpus
,
Cmd
(
..
),
mkList
)
--, mkGraph, mkDashboard)--
, mkAnnuaire)
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIds
,
Hyper
(
HyperDocument
))
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIds
,
ToDbData
(
..
))
import
Gargantext.Database.NodeNgram
(
NodeNgramPoly
(
..
),
insertNodeNgrams
)
import
Gargantext.Database.NodeNgramsNgrams
(
NodeNgramsNgramsPoly
(
..
),
insertNodeNgramsNgramsNew
)
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Node.Contact
(
HyperdataContact
(
..
))
--
import Gargantext.Database.Node.Contact (HyperdataContact(..))
import
Gargantext.Database.User
(
getUser
,
UserLight
(
..
),
Username
)
import
Gargantext.Ext.IMT
(
toSchoolName
)
import
Gargantext.Prelude
...
...
@@ -47,19 +47,20 @@ type CorpusId = Int
flowDatabase
::
FileFormat
->
FilePath
->
CorpusName
->
IO
Int
flowDatabase
ff
fp
cName
=
do
-- Corus Flow
-- Cor
p
us Flow
(
masterUserId
,
_
,
corpusId
)
<-
subFlowCorpus
userMaster
corpusMasterName
-- Documents Flow
hyperdataDocuments
<-
map
addUniqIds
<$>
parseDocs
ff
fp
let
hyperdataDocuments'
=
map
(
\
h
->
Hyper
Document
h
)
hyperdataDocuments
printDebug
"hyperdataDocuments"
hyperdataDocuments
let
hyperdataDocuments'
=
map
(
\
h
->
ToDb
Document
h
)
hyperdataDocuments
printDebug
"hyperdataDocuments"
(
length
hyperdataDocuments
)
ids
<-
runCmd'
$
insertDocuments
masterUserId
corpusId
hyperdataDocuments'
--printDebug "Docs IDs : " (ids)
idsRepeat
<-
runCmd'
$
insertDocuments
masterUserId
corpusId
hyperdataDocuments'
printDebug
"Repeated Docs IDs : "
(
length
idsRepeat
)
-- printDebug "Docs IDs : " (ids)
-- idsRepeat <- runCmd' $ insertDocuments masterUserId corpusId hyperdataDocuments'
-- printDebug "Repeated Docs IDs : " (length idsRepeat)
let
idsNotRepeated
=
filter
(
\
r
->
reInserted
r
==
True
)
ids
-- {-
-- Ngrams Flow
-- todo: flow for new documents only
let
tids
=
toInserted
ids
...
...
@@ -68,37 +69,38 @@ flowDatabase ff fp cName = do
let
tihs
=
toInsert
hyperdataDocuments
printDebug
"toInsert hyperdataDocuments"
(
length
tihs
)
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
toInsert
hyperdataDocuments
)
printDebug
"documentsWithId"
documentsWithId
let
documentsWithId
=
mergeData
(
toInserted
ids
NotRepeated
)
(
toInsert
hyperdataDocuments
)
--
printDebug "documentsWithId" documentsWithId
-- docsWithNgrams <- documentIdWithNgrams documentsWithId extractNgramsT
let
docsWithNgrams
=
documentIdWithNgrams
extractNgramsT
documentsWithId
printDebug
"docsWithNgrams"
docsWithNgrams
--
printDebug "docsWithNgrams" docsWithNgrams
let
maps
=
mapNodeIdNgrams
docsWithNgrams
printDebug
"maps"
(
maps
)
--
printDebug "maps" (maps)
indexedNgrams
<-
runCmd'
$
indexNgrams
maps
printDebug
"inserted ngrams"
indexedNgrams
--
printDebug "inserted ngrams" indexedNgrams
_
<-
runCmd'
$
insertToNodeNgrams
indexedNgrams
-- List Flow
listId2
<-
runCmd'
$
listFlow
masterUserId
corpusId
indexedNgrams
printDebug
"list id : "
listId2
(
userId
,
rootUserId
,
corpusId2
)
<-
subFlowCorpus
userArbitrary
cName
--(userId, rootUserId, corpusId2) <- subFlowCorpus userArbitrary cName
--}
(
userId
,
_
,
corpusId2
)
<-
subFlowCorpus
userArbitrary
cName
userListId
<-
runCmd'
$
listFlowUser
userId
corpusId2
printDebug
"UserList : "
userListId
inserted
<-
runCmd'
$
add
corpusId2
(
map
reId
ids
)
printDebug
"Inserted : "
(
length
inserted
)
_
<-
runCmd'
$
mkDashboard
corpusId2
userId
_
<-
runCmd'
$
mkGraph
corpusId2
userId
--
_
<-
runCmd'
$
mkDashboard
corpusId2
userId
--
_
<-
runCmd'
$
mkGraph
corpusId2
userId
-- Annuaire Flow
annuaireId
<-
runCmd'
$
mkAnnuaire
rootUserId
userId
-- _
<- runCmd' $ mkAnnuaire rootUserId userId
pure
corpusId2
-- runCmd' $ del [corpusId2, corpusId]
...
...
@@ -208,16 +210,16 @@ insertToNodeNgrams m = insertNodeNgrams [ NodeNgram Nothing nId ((_ngramsId
------------------------------------------------------------------------
listFlow
::
UserId
->
CorpusId
->
Map
(
NgramsT
NgramsIndexed
)
(
Map
NodeId
Int
)
->
Cmd
ListId
listFlow
uId
cId
ngs
=
do
printDebug
"ngs:"
ngs
--
printDebug "ngs:" ngs
lId
<-
maybe
(
panic
"mkList error"
)
identity
<$>
head
<$>
mkList
cId
uId
printDebug
"ngs"
(
DM
.
keys
ngs
)
--
printDebug "ngs" (DM.keys ngs)
-- TODO add stemming equivalence of 2 ngrams
let
groupEd
=
groupNgramsBy
(
\
(
NgramsT
t1
n1
)
(
NgramsT
t2
n2
)
->
if
(((
==
)
t1
t2
)
&&
((
==
)
n1
n2
))
then
(
Just
(
n1
,
n2
))
else
Nothing
)
ngs
_
<-
insertGroups
lId
groupEd
-- compute Candidate / Map
let
lists
=
ngrams2list
ngs
printDebug
"lists:"
lists
--
printDebug "lists:" lists
is
<-
insertLists
lId
lists
printDebug
"listNgrams inserted :"
is
...
...
src/Gargantext/Database/Ngrams.hs
View file @
96920cfd
...
...
@@ -24,6 +24,7 @@ Ngrams connection to the Database.
module
Gargantext.Database.Ngrams
where
-- import Opaleye
import
Debug.Trace
(
trace
)
import
Prelude
(
Enum
,
Bounded
,
minBound
,
maxBound
)
import
Control.Lens
(
makeLenses
,
view
)
import
Data.ByteString.Internal
(
ByteString
)
...
...
@@ -223,9 +224,11 @@ getNgramsTableData :: DPS.Connection
->
NodeType
->
NgramsType
->
NgramsTableParamUser
->
NgramsTableParamMaster
->
IO
[
NgramsTableData
]
getNgramsTableData
conn
nodeT
ngrmT
(
NgramsTableParam
ul
uc
)
(
NgramsTableParam
ml
mc
)
=
getNgramsTableData
conn
nodeT
ngrmT
(
NgramsTableParam
ul
uc
)
(
NgramsTableParam
ml
mc
)
=
do
_
<-
trace
$
show
(
ul
,
uc
,
nodeTId
,
ngrmTId
,
ml
,
mc
,
nodeTId
,
ngrmTId
,
uc
)
map
(
\
(
t
,
n
,
nt
,
w
)
->
NgramsTableData
t
n
(
fromListTypeId
nt
)
w
)
<$>
DPS
.
query
conn
querySelectTableNgrams
(
ul
,
uc
,
nodeTId
,
ngrmTId
,
ml
,
mc
,
nodeTId
,
ngrmTId
)
-- <$> DPS.query conn querySelectTableNgrams (ul,uc,nodeTId,ngrmTId,ml,mc,nodeTId,ngrmTId)
<$>
DPS
.
query
conn
querySelectTableNgrams
(
ul
,
uc
,
nodeTId
,
ngrmTId
,
ml
,
mc
,
nodeTId
,
ngrmTId
,
uc
)
where
nodeTId
=
nodeTypeId
nodeT
ngrmTId
=
ngramsTypeId
ngrmT
...
...
@@ -235,23 +238,28 @@ querySelectTableNgrams :: DPS.Query
querySelectTableNgrams
=
[
sql
|
WITH tableUser AS (
SELECT ngs.terms, ngs.n, nn1.ngrams_type,nn2.weight FROM ngrams ngs
JOIN nodes_ngrams nn1 ON nn1.ngram_id = ngs.id
JOIN nodes_ngrams nn2 ON nn2.ngram_id = ngs.id
JOIN nodes n ON n.id = nn2.node_id
WHERE nn1.node_id = ? -- User listId
AND n.parent_id = ? -- User CorpusId or AnnuaireId
AND n.typename = ? -- both type of childs (Documents or Contacts)
AND nn2.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
), tableMaster AS (
SELECT ngs.terms, ngs.n, nn1.ngrams_type,nn2.weight FROM ngrams ngs
JOIN nodes_ngrams nn1 ON nn1.ngram_id = ngs.id
JOIN nodes_ngrams nn2 ON nn2.ngram_id = ngs.id
JOIN nodes n ON n.id = nn2.node_id
WHERE nn1.node_id = ? -- Master listId
AND n.parent_id = ? -- Master CorpusId or AnnuaireId
AND n.typename = ? -- both type of childs (Documents or Contacts)
AND nn2.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
SELECT ngs.terms, ngs.n, list.ngrams_type, corp.weight FROM ngrams ngs
JOIN nodes_ngrams list ON list.ngram_id = ngs.id
JOIN nodes_ngrams corp ON corp.ngram_id = ngs.id
JOIN nodes n ON n.id = corp.node_id
WHERE list.node_id = ? -- User listId
AND n.parent_id = ? -- User CorpusId or AnnuaireId
AND n.typename = ? -- both type of childs (Documents or Contacts)
AND corp.ngrams_type = ? -- both type of ngrams (Authors or Terms or...)
)
, tableMaster AS (
SELECT ngs.terms, ngs.n, list.ngrams_type, corp.weight FROM ngrams ngs
JOIN nodes_ngrams list ON list.ngram_id = ngs.id
JOIN nodes_ngrams corp ON corp.ngram_id = ngs.id
JOIN nodes n ON n.id = corp.node_id
JOIN nodes_nodes nn ON nn.node2_id = n.id
WHERE list.node_id = ? -- Master listId
AND n.parent_id = ? -- Master CorpusId or AnnuaireId
AND n.typename = ? -- Master childs (Documents or Contacts)
AND corp.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
AND nn.node1_id = ? -- User CorpusId or AnnuaireId
)
SELECT COALESCE(tu.terms,tm.terms) AS terms
...
...
src/Gargantext/Database/Node.hs
View file @
96920cfd
...
...
@@ -30,12 +30,7 @@ import GHC.Int (Int64)
import
Control.Lens
(
set
)
import
Data.Maybe
import
Data.Time
(
UTCTime
)
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
)
,
FromField
,
fromField
,
returnError
)
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
import
Prelude
hiding
(
null
,
id
,
map
,
sum
)
import
Gargantext.Core
(
Lang
(
..
))
...
...
@@ -46,7 +41,6 @@ import Gargantext.Database.Queries
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Prelude
hiding
(
sum
)
import
Database.PostgreSQL.Simple.Internal
(
Field
)
import
Control.Applicative
(
Applicative
)
import
Control.Arrow
(
returnA
)
import
Control.Lens.TH
(
makeLensesWith
,
abbreviatedFields
)
...
...
@@ -56,7 +50,6 @@ import Data.Aeson
import
Data.Maybe
(
Maybe
,
fromMaybe
)
import
Data.Text
(
Text
)
import
Data.Profunctor.Product.TH
(
makeAdaptorAndInstance
)
import
Data.Typeable
(
Typeable
)
import
qualified
Data.ByteString
as
DB
import
qualified
Data.ByteString.Lazy
as
DBL
...
...
src/Gargantext/Database/Node/Contact.hs
View file @
96920cfd
...
...
@@ -17,30 +17,21 @@ Portability : POSIX
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Node.Contact
(
NodeContact
,
HyperdataContact
,
ContactWho
,
ContactWhere
,
ContactTouch
)
module
Gargantext.Database.Node.Contact
where
import
GHC.Generics
(
Generic
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Text
(
Text
)
import
Data.Time
(
UTCTime
)
import
qualified
Data.Text
as
DT
import
Control.Lens
(
makeLenses
)
import
Database.PostgreSQL.Simple
import
Opaleye
(
QueryRunnerColumnDefault
,
queryRunnerColumnDefault
,
PGJsonb
,
fieldQueryRunnerColumn
)
-- import Control.Lens (makeLenses)
import
Opaleye
(
QueryRunnerColumnDefault
,
queryRunnerColumnDefault
,
PGJsonb
,
fieldQueryRunnerColumn
)
import
Gargantext.Database.Utils
(
fromField'
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Node
(
NodeWrite
'
,
AnnuaireId
,
UserId
,
Name
,
node
)
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
(
Node
,
Hyperdata
,
NodeType
(
..
))
import
Data.Aeson
(
Result
(
Error
,
Success
),
fromJSON
,
FromJSON
,
ToJSON
)
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
)
,
FromField
,
fromField
,
returnError
)
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
------------------------------------------------------------------------
...
...
src/Gargantext/Database/Node/Document/Insert.hs
View file @
96920cfd
...
...
@@ -60,27 +60,26 @@ the concatenation of the parameters defined by @hashParameters@.
module
Gargantext.Database.Node.Document.Insert
where
import
Control.Lens
(
set
)
import
Data.Aeson
(
toJSON
,
Value
,
ToJSON
)
import
Data.ByteString.Internal
(
ByteString
)
import
Data.Aeson
(
toJSON
,
Value
)
import
Data.Maybe
(
maybe
)
import
Data.Text
(
Text
)
import
Data.Typeable
(
Typeable
)
import
Database.PostgreSQL.Simple
(
FromRow
,
Query
,
formatQuery
,
query
,
Only
(
..
))
import
Database.PostgreSQL.Simple
(
FromRow
,
Query
,
query
,
Only
(
..
))
import
Database.PostgreSQL.Simple.FromRow
(
fromRow
,
field
)
import
Database.PostgreSQL.Simple.SqlQQ
import
Database.PostgreSQL.Simple.ToField
(
toField
)
import
Database.PostgreSQL.Simple.ToRow
(
ToRow
(
..
))
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
DT
(
pack
,
unpack
,
concat
,
take
)
import
qualified
Data.Digest.Pure.SHA
as
SHA
(
sha256
,
showDigest
)
import
qualified
Data.ByteString.Lazy.Char8
as
DC
(
pack
)
import
GHC.Generics
(
Generic
)
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Database.Node
(
mkCmd
,
Cmd
(
..
))
import
Gargantext.Database.Node.Contact
(
HyperdataContact
(
..
))
import
Gargantext.Database.Types.Node
import
Gargantext.Prelude
import
qualified
Data.ByteString.Lazy.Char8
as
DC
(
pack
)
import
qualified
Data.Digest.Pure.SHA
as
SHA
(
sha256
,
showDigest
)
import
qualified
Data.Text
as
DT
(
pack
,
unpack
,
concat
,
take
)
-- TODO : the import of Document constructor below does not work
-- import Gargantext.Database.Types.Node (Document)
--import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
...
...
@@ -93,9 +92,11 @@ import Gargantext.Database.Types.Node
-- , node_parentId, node_name, node_hyperdata, hyperdataDocuments
-- , NodeTypeId
-- )
import
Gargantext.Prelude
{-| To Print result query
import Data.ByteString.Internal (ByteString)
import Database.PostgreSQL.Simple (formatQuery)
-}
import
GHC.Generics
(
Generic
)
---------------------------------------------------------------------------
-- * Main Insert functions
...
...
@@ -108,16 +109,12 @@ import GHC.Generics (Generic)
-- ParentId : folder ID which is parent of the inserted documents
data
Hyper
=
HyperDocument
HyperdataDocument
|
Hyper
Contact
HyperdataContact
data
ToDbData
=
ToDbDocument
HyperdataDocument
|
ToDb
Contact
HyperdataContact
insertDocuments
::
UserId
->
ParentId
->
[
Hyper
]
->
Cmd
[
ReturnId
]
insertDocuments
uId
pId
hs
=
mkCmd
$
\
c
->
query
c
queryInsert
(
Only
$
Values
fields
inputData
)
insertDocuments
::
UserId
->
ParentId
->
[
ToDbData
]
->
Cmd
[
ReturnId
]
insertDocuments
uId
pId
hs
=
mkCmd
$
\
c
->
query
c
queryInsert
(
Only
$
Values
fields
$
prepare
uId
pId
hs
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
inputSqlTypes
inputData
=
case
hs
of
[
HyperDocument
_
]
->
prepare
_hyperdataDocument_title
uId
pId
$
map
(
\
(
HyperDocument
h
)
->
h
)
hs
[
HyperContact
_
]
->
prepare
(
\
_
->
Just
"name"
)
uId
pId
$
map
(
\
(
HyperContact
h
)
->
h
)
hs
_
->
panic
"error"
-- | Debug SQL function
--
...
...
@@ -159,13 +156,18 @@ queryInsert = [sql|
JOIN nodes c USING (hyperdata); -- columns of unique index
|]
prepare
::
(
Hyperdata
a
,
ToJSON
a
)
=>
(
a
->
Maybe
Text
)
->
UserId
->
ParentId
->
[
a
]
->
[
InputData
]
prepare
f
uId
pId
=
map
(
\
h
->
InputData
tId
uId
pId
(
DT
.
take
255
<$>
maybe
"No Title"
identity
$
f
h
)
(
toJSON
h
)
)
prepare
::
UserId
->
ParentId
->
[
ToDbData
]
->
[
InputData
]
prepare
uId
pId
=
map
(
\
h
->
InputData
tId
uId
pId
(
name
h
)
(
toJSON'
h
))
where
tId
=
nodeTypeId
NodeDocument
tId
=
nodeTypeId
NodeDocument
toJSON'
(
ToDbDocument
hd
)
=
toJSON
hd
toJSON'
(
ToDbContact
hc
)
=
toJSON
hc
name
h
=
DT
.
take
255
<$>
maybe
"No Title"
identity
$
f
h
where
f
(
ToDbDocument
hd
)
=
_hyperdataDocument_title
hd
f
(
ToDbContact
_
)
=
Just
"Contact"
-- TODO view FirstName . LastName
------------------------------------------------------------------------
-- * Main Types used
...
...
src/Gargantext/Database/Utils.hs
View file @
96920cfd
...
...
@@ -27,7 +27,6 @@ import Database.PostgreSQL.Simple.Internal (Field)
import
qualified
Data.ByteString
as
DB
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
)
,
FromField
,
fromField
,
returnError
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment