Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
96920cfd
Commit
96920cfd
authored
Nov 28, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DB][FLOW] fix duplicate ngrams insertion.
parent
714462cc
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
84 additions
and
89 deletions
+84
-89
Flow.hs
src/Gargantext/Database/Flow.hs
+25
-23
Ngrams.hs
src/Gargantext/Database/Ngrams.hs
+27
-19
Node.hs
src/Gargantext/Database/Node.hs
+1
-8
Contact.hs
src/Gargantext/Database/Node/Contact.hs
+4
-13
Insert.hs
src/Gargantext/Database/Node/Document/Insert.hs
+27
-25
Utils.hs
src/Gargantext/Database/Utils.hs
+0
-1
No files found.
src/Gargantext/Database/Flow.hs
View file @
96920cfd
...
...
@@ -28,13 +28,13 @@ import Gargantext.Core.Types (NodePoly(..), ListType(..), listTypeId)
import
Gargantext.Database.Bashql
(
runCmd'
)
-- , del)
import
Gargantext.Database.Config
(
userMaster
,
userArbitrary
,
corpusMasterName
)
import
Gargantext.Database.Ngrams
(
insertNgrams
,
Ngrams
(
..
),
NgramsT
(
..
),
NgramsIndexed
(
..
),
indexNgramsT
,
ngramsTypeId
,
NgramsType
(
..
),
text2ngrams
)
import
Gargantext.Database.Node
(
getRoot
,
mkRoot
,
mkCorpus
,
Cmd
(
..
),
mkList
,
mkGraph
,
mkDashboard
,
mkAnnuaire
)
import
Gargantext.Database.Node
(
getRoot
,
mkRoot
,
mkCorpus
,
Cmd
(
..
),
mkList
)
--, mkGraph, mkDashboard)--
, mkAnnuaire)
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIds
,
Hyper
(
HyperDocument
))
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIds
,
ToDbData
(
..
))
import
Gargantext.Database.NodeNgram
(
NodeNgramPoly
(
..
),
insertNodeNgrams
)
import
Gargantext.Database.NodeNgramsNgrams
(
NodeNgramsNgramsPoly
(
..
),
insertNodeNgramsNgramsNew
)
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Node.Contact
(
HyperdataContact
(
..
))
--
import Gargantext.Database.Node.Contact (HyperdataContact(..))
import
Gargantext.Database.User
(
getUser
,
UserLight
(
..
),
Username
)
import
Gargantext.Ext.IMT
(
toSchoolName
)
import
Gargantext.Prelude
...
...
@@ -47,19 +47,20 @@ type CorpusId = Int
flowDatabase
::
FileFormat
->
FilePath
->
CorpusName
->
IO
Int
flowDatabase
ff
fp
cName
=
do
-- Corus Flow
-- Cor
p
us Flow
(
masterUserId
,
_
,
corpusId
)
<-
subFlowCorpus
userMaster
corpusMasterName
-- Documents Flow
hyperdataDocuments
<-
map
addUniqIds
<$>
parseDocs
ff
fp
let
hyperdataDocuments'
=
map
(
\
h
->
Hyper
Document
h
)
hyperdataDocuments
printDebug
"hyperdataDocuments"
hyperdataDocuments
let
hyperdataDocuments'
=
map
(
\
h
->
ToDb
Document
h
)
hyperdataDocuments
printDebug
"hyperdataDocuments"
(
length
hyperdataDocuments
)
ids
<-
runCmd'
$
insertDocuments
masterUserId
corpusId
hyperdataDocuments'
--printDebug "Docs IDs : " (ids)
idsRepeat
<-
runCmd'
$
insertDocuments
masterUserId
corpusId
hyperdataDocuments'
printDebug
"Repeated Docs IDs : "
(
length
idsRepeat
)
-- printDebug "Docs IDs : " (ids)
-- idsRepeat <- runCmd' $ insertDocuments masterUserId corpusId hyperdataDocuments'
-- printDebug "Repeated Docs IDs : " (length idsRepeat)
let
idsNotRepeated
=
filter
(
\
r
->
reInserted
r
==
True
)
ids
-- {-
-- Ngrams Flow
-- todo: flow for new documents only
let
tids
=
toInserted
ids
...
...
@@ -68,37 +69,38 @@ flowDatabase ff fp cName = do
let
tihs
=
toInsert
hyperdataDocuments
printDebug
"toInsert hyperdataDocuments"
(
length
tihs
)
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
toInsert
hyperdataDocuments
)
printDebug
"documentsWithId"
documentsWithId
let
documentsWithId
=
mergeData
(
toInserted
ids
NotRepeated
)
(
toInsert
hyperdataDocuments
)
--
printDebug "documentsWithId" documentsWithId
-- docsWithNgrams <- documentIdWithNgrams documentsWithId extractNgramsT
let
docsWithNgrams
=
documentIdWithNgrams
extractNgramsT
documentsWithId
printDebug
"docsWithNgrams"
docsWithNgrams
--
printDebug "docsWithNgrams" docsWithNgrams
let
maps
=
mapNodeIdNgrams
docsWithNgrams
printDebug
"maps"
(
maps
)
--
printDebug "maps" (maps)
indexedNgrams
<-
runCmd'
$
indexNgrams
maps
printDebug
"inserted ngrams"
indexedNgrams
--
printDebug "inserted ngrams" indexedNgrams
_
<-
runCmd'
$
insertToNodeNgrams
indexedNgrams
-- List Flow
listId2
<-
runCmd'
$
listFlow
masterUserId
corpusId
indexedNgrams
printDebug
"list id : "
listId2
(
userId
,
rootUserId
,
corpusId2
)
<-
subFlowCorpus
userArbitrary
cName
--(userId, rootUserId, corpusId2) <- subFlowCorpus userArbitrary cName
--}
(
userId
,
_
,
corpusId2
)
<-
subFlowCorpus
userArbitrary
cName
userListId
<-
runCmd'
$
listFlowUser
userId
corpusId2
printDebug
"UserList : "
userListId
inserted
<-
runCmd'
$
add
corpusId2
(
map
reId
ids
)
printDebug
"Inserted : "
(
length
inserted
)
_
<-
runCmd'
$
mkDashboard
corpusId2
userId
_
<-
runCmd'
$
mkGraph
corpusId2
userId
--
_
<-
runCmd'
$
mkDashboard
corpusId2
userId
--
_
<-
runCmd'
$
mkGraph
corpusId2
userId
-- Annuaire Flow
annuaireId
<-
runCmd'
$
mkAnnuaire
rootUserId
userId
-- _
<- runCmd' $ mkAnnuaire rootUserId userId
pure
corpusId2
-- runCmd' $ del [corpusId2, corpusId]
...
...
@@ -208,16 +210,16 @@ insertToNodeNgrams m = insertNodeNgrams [ NodeNgram Nothing nId ((_ngramsId
------------------------------------------------------------------------
listFlow
::
UserId
->
CorpusId
->
Map
(
NgramsT
NgramsIndexed
)
(
Map
NodeId
Int
)
->
Cmd
ListId
listFlow
uId
cId
ngs
=
do
printDebug
"ngs:"
ngs
--
printDebug "ngs:" ngs
lId
<-
maybe
(
panic
"mkList error"
)
identity
<$>
head
<$>
mkList
cId
uId
printDebug
"ngs"
(
DM
.
keys
ngs
)
--
printDebug "ngs" (DM.keys ngs)
-- TODO add stemming equivalence of 2 ngrams
let
groupEd
=
groupNgramsBy
(
\
(
NgramsT
t1
n1
)
(
NgramsT
t2
n2
)
->
if
(((
==
)
t1
t2
)
&&
((
==
)
n1
n2
))
then
(
Just
(
n1
,
n2
))
else
Nothing
)
ngs
_
<-
insertGroups
lId
groupEd
-- compute Candidate / Map
let
lists
=
ngrams2list
ngs
printDebug
"lists:"
lists
--
printDebug "lists:" lists
is
<-
insertLists
lId
lists
printDebug
"listNgrams inserted :"
is
...
...
src/Gargantext/Database/Ngrams.hs
View file @
96920cfd
...
...
@@ -24,6 +24,7 @@ Ngrams connection to the Database.
module
Gargantext.Database.Ngrams
where
-- import Opaleye
import
Debug.Trace
(
trace
)
import
Prelude
(
Enum
,
Bounded
,
minBound
,
maxBound
)
import
Control.Lens
(
makeLenses
,
view
)
import
Data.ByteString.Internal
(
ByteString
)
...
...
@@ -223,9 +224,11 @@ getNgramsTableData :: DPS.Connection
->
NodeType
->
NgramsType
->
NgramsTableParamUser
->
NgramsTableParamMaster
->
IO
[
NgramsTableData
]
getNgramsTableData
conn
nodeT
ngrmT
(
NgramsTableParam
ul
uc
)
(
NgramsTableParam
ml
mc
)
=
getNgramsTableData
conn
nodeT
ngrmT
(
NgramsTableParam
ul
uc
)
(
NgramsTableParam
ml
mc
)
=
do
_
<-
trace
$
show
(
ul
,
uc
,
nodeTId
,
ngrmTId
,
ml
,
mc
,
nodeTId
,
ngrmTId
,
uc
)
map
(
\
(
t
,
n
,
nt
,
w
)
->
NgramsTableData
t
n
(
fromListTypeId
nt
)
w
)
<$>
DPS
.
query
conn
querySelectTableNgrams
(
ul
,
uc
,
nodeTId
,
ngrmTId
,
ml
,
mc
,
nodeTId
,
ngrmTId
)
-- <$> DPS.query conn querySelectTableNgrams (ul,uc,nodeTId,ngrmTId,ml,mc,nodeTId,ngrmTId)
<$>
DPS
.
query
conn
querySelectTableNgrams
(
ul
,
uc
,
nodeTId
,
ngrmTId
,
ml
,
mc
,
nodeTId
,
ngrmTId
,
uc
)
where
nodeTId
=
nodeTypeId
nodeT
ngrmTId
=
ngramsTypeId
ngrmT
...
...
@@ -235,23 +238,28 @@ querySelectTableNgrams :: DPS.Query
querySelectTableNgrams
=
[
sql
|
WITH tableUser AS (
SELECT ngs.terms, ngs.n, nn1.ngrams_type,nn2.weight FROM ngrams ngs
JOIN nodes_ngrams nn1 ON nn1.ngram_id = ngs.id
JOIN nodes_ngrams nn2 ON nn2.ngram_id = ngs.id
JOIN nodes n ON n.id = nn2.node_id
WHERE nn1.node_id = ? -- User listId
AND n.parent_id = ? -- User CorpusId or AnnuaireId
AND n.typename = ? -- both type of childs (Documents or Contacts)
AND nn2.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
), tableMaster AS (
SELECT ngs.terms, ngs.n, nn1.ngrams_type,nn2.weight FROM ngrams ngs
JOIN nodes_ngrams nn1 ON nn1.ngram_id = ngs.id
JOIN nodes_ngrams nn2 ON nn2.ngram_id = ngs.id
JOIN nodes n ON n.id = nn2.node_id
WHERE nn1.node_id = ? -- Master listId
AND n.parent_id = ? -- Master CorpusId or AnnuaireId
AND n.typename = ? -- both type of childs (Documents or Contacts)
AND nn2.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
SELECT ngs.terms, ngs.n, list.ngrams_type, corp.weight FROM ngrams ngs
JOIN nodes_ngrams list ON list.ngram_id = ngs.id
JOIN nodes_ngrams corp ON corp.ngram_id = ngs.id
JOIN nodes n ON n.id = corp.node_id
WHERE list.node_id = ? -- User listId
AND n.parent_id = ? -- User CorpusId or AnnuaireId
AND n.typename = ? -- both type of childs (Documents or Contacts)
AND corp.ngrams_type = ? -- both type of ngrams (Authors or Terms or...)
)
, tableMaster AS (
SELECT ngs.terms, ngs.n, list.ngrams_type, corp.weight FROM ngrams ngs
JOIN nodes_ngrams list ON list.ngram_id = ngs.id
JOIN nodes_ngrams corp ON corp.ngram_id = ngs.id
JOIN nodes n ON n.id = corp.node_id
JOIN nodes_nodes nn ON nn.node2_id = n.id
WHERE list.node_id = ? -- Master listId
AND n.parent_id = ? -- Master CorpusId or AnnuaireId
AND n.typename = ? -- Master childs (Documents or Contacts)
AND corp.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
AND nn.node1_id = ? -- User CorpusId or AnnuaireId
)
SELECT COALESCE(tu.terms,tm.terms) AS terms
...
...
src/Gargantext/Database/Node.hs
View file @
96920cfd
...
...
@@ -30,12 +30,7 @@ import GHC.Int (Int64)
import
Control.Lens
(
set
)
import
Data.Maybe
import
Data.Time
(
UTCTime
)
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
)
,
FromField
,
fromField
,
returnError
)
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
import
Prelude
hiding
(
null
,
id
,
map
,
sum
)
import
Gargantext.Core
(
Lang
(
..
))
...
...
@@ -46,7 +41,6 @@ import Gargantext.Database.Queries
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Prelude
hiding
(
sum
)
import
Database.PostgreSQL.Simple.Internal
(
Field
)
import
Control.Applicative
(
Applicative
)
import
Control.Arrow
(
returnA
)
import
Control.Lens.TH
(
makeLensesWith
,
abbreviatedFields
)
...
...
@@ -56,7 +50,6 @@ import Data.Aeson
import
Data.Maybe
(
Maybe
,
fromMaybe
)
import
Data.Text
(
Text
)
import
Data.Profunctor.Product.TH
(
makeAdaptorAndInstance
)
import
Data.Typeable
(
Typeable
)
import
qualified
Data.ByteString
as
DB
import
qualified
Data.ByteString.Lazy
as
DBL
...
...
src/Gargantext/Database/Node/Contact.hs
View file @
96920cfd
...
...
@@ -17,30 +17,21 @@ Portability : POSIX
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Node.Contact
(
NodeContact
,
HyperdataContact
,
ContactWho
,
ContactWhere
,
ContactTouch
)
module
Gargantext.Database.Node.Contact
where
import
GHC.Generics
(
Generic
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Text
(
Text
)
import
Data.Time
(
UTCTime
)
import
qualified
Data.Text
as
DT
import
Control.Lens
(
makeLenses
)
import
Database.PostgreSQL.Simple
import
Opaleye
(
QueryRunnerColumnDefault
,
queryRunnerColumnDefault
,
PGJsonb
,
fieldQueryRunnerColumn
)
-- import Control.Lens (makeLenses)
import
Opaleye
(
QueryRunnerColumnDefault
,
queryRunnerColumnDefault
,
PGJsonb
,
fieldQueryRunnerColumn
)
import
Gargantext.Database.Utils
(
fromField'
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Node
(
NodeWrite
'
,
AnnuaireId
,
UserId
,
Name
,
node
)
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
(
Node
,
Hyperdata
,
NodeType
(
..
))
import
Data.Aeson
(
Result
(
Error
,
Success
),
fromJSON
,
FromJSON
,
ToJSON
)
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
)
,
FromField
,
fromField
,
returnError
)
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
------------------------------------------------------------------------
...
...
src/Gargantext/Database/Node/Document/Insert.hs
View file @
96920cfd
...
...
@@ -60,27 +60,26 @@ the concatenation of the parameters defined by @hashParameters@.
module
Gargantext.Database.Node.Document.Insert
where
import
Control.Lens
(
set
)
import
Data.Aeson
(
toJSON
,
Value
,
ToJSON
)
import
Data.ByteString.Internal
(
ByteString
)
import
Data.Aeson
(
toJSON
,
Value
)
import
Data.Maybe
(
maybe
)
import
Data.Text
(
Text
)
import
Data.Typeable
(
Typeable
)
import
Database.PostgreSQL.Simple
(
FromRow
,
Query
,
formatQuery
,
query
,
Only
(
..
))
import
Database.PostgreSQL.Simple
(
FromRow
,
Query
,
query
,
Only
(
..
))
import
Database.PostgreSQL.Simple.FromRow
(
fromRow
,
field
)
import
Database.PostgreSQL.Simple.SqlQQ
import
Database.PostgreSQL.Simple.ToField
(
toField
)
import
Database.PostgreSQL.Simple.ToRow
(
ToRow
(
..
))
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
DT
(
pack
,
unpack
,
concat
,
take
)
import
qualified
Data.Digest.Pure.SHA
as
SHA
(
sha256
,
showDigest
)
import
qualified
Data.ByteString.Lazy.Char8
as
DC
(
pack
)
import
GHC.Generics
(
Generic
)
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Database.Node
(
mkCmd
,
Cmd
(
..
))
import
Gargantext.Database.Node.Contact
(
HyperdataContact
(
..
))
import
Gargantext.Database.Types.Node
import
Gargantext.Prelude
import
qualified
Data.ByteString.Lazy.Char8
as
DC
(
pack
)
import
qualified
Data.Digest.Pure.SHA
as
SHA
(
sha256
,
showDigest
)
import
qualified
Data.Text
as
DT
(
pack
,
unpack
,
concat
,
take
)
-- TODO : the import of Document constructor below does not work
-- import Gargantext.Database.Types.Node (Document)
--import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
...
...
@@ -93,9 +92,11 @@ import Gargantext.Database.Types.Node
-- , node_parentId, node_name, node_hyperdata, hyperdataDocuments
-- , NodeTypeId
-- )
import
Gargantext.Prelude
{-| To Print result query
import Data.ByteString.Internal (ByteString)
import Database.PostgreSQL.Simple (formatQuery)
-}
import
GHC.Generics
(
Generic
)
---------------------------------------------------------------------------
-- * Main Insert functions
...
...
@@ -108,16 +109,12 @@ import GHC.Generics (Generic)
-- ParentId : folder ID which is parent of the inserted documents
data
Hyper
=
HyperDocument
HyperdataDocument
|
Hyper
Contact
HyperdataContact
data
ToDbData
=
ToDbDocument
HyperdataDocument
|
ToDb
Contact
HyperdataContact
insertDocuments
::
UserId
->
ParentId
->
[
Hyper
]
->
Cmd
[
ReturnId
]
insertDocuments
uId
pId
hs
=
mkCmd
$
\
c
->
query
c
queryInsert
(
Only
$
Values
fields
inputData
)
insertDocuments
::
UserId
->
ParentId
->
[
ToDbData
]
->
Cmd
[
ReturnId
]
insertDocuments
uId
pId
hs
=
mkCmd
$
\
c
->
query
c
queryInsert
(
Only
$
Values
fields
$
prepare
uId
pId
hs
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
inputSqlTypes
inputData
=
case
hs
of
[
HyperDocument
_
]
->
prepare
_hyperdataDocument_title
uId
pId
$
map
(
\
(
HyperDocument
h
)
->
h
)
hs
[
HyperContact
_
]
->
prepare
(
\
_
->
Just
"name"
)
uId
pId
$
map
(
\
(
HyperContact
h
)
->
h
)
hs
_
->
panic
"error"
-- | Debug SQL function
--
...
...
@@ -159,13 +156,18 @@ queryInsert = [sql|
JOIN nodes c USING (hyperdata); -- columns of unique index
|]
prepare
::
(
Hyperdata
a
,
ToJSON
a
)
=>
(
a
->
Maybe
Text
)
->
UserId
->
ParentId
->
[
a
]
->
[
InputData
]
prepare
f
uId
pId
=
map
(
\
h
->
InputData
tId
uId
pId
(
DT
.
take
255
<$>
maybe
"No Title"
identity
$
f
h
)
(
toJSON
h
)
)
prepare
::
UserId
->
ParentId
->
[
ToDbData
]
->
[
InputData
]
prepare
uId
pId
=
map
(
\
h
->
InputData
tId
uId
pId
(
name
h
)
(
toJSON'
h
))
where
tId
=
nodeTypeId
NodeDocument
tId
=
nodeTypeId
NodeDocument
toJSON'
(
ToDbDocument
hd
)
=
toJSON
hd
toJSON'
(
ToDbContact
hc
)
=
toJSON
hc
name
h
=
DT
.
take
255
<$>
maybe
"No Title"
identity
$
f
h
where
f
(
ToDbDocument
hd
)
=
_hyperdataDocument_title
hd
f
(
ToDbContact
_
)
=
Just
"Contact"
-- TODO view FirstName . LastName
------------------------------------------------------------------------
-- * Main Types used
...
...
src/Gargantext/Database/Utils.hs
View file @
96920cfd
...
...
@@ -27,7 +27,6 @@ import Database.PostgreSQL.Simple.Internal (Field)
import
qualified
Data.ByteString
as
DB
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
)
,
FromField
,
fromField
,
returnError
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment