Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
purescript-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
152
Issues
152
List
Board
Labels
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
purescript-gargantext
Commits
f6a473f2
Commit
f6a473f2
authored
Oct 24, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DBFLOW] Ngrams / NodeId / count
parent
871b48ee
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
169 additions
and
66 deletions
+169
-66
Flow.hs
src/Gargantext/Database/Flow.hs
+80
-10
Ngram.hs
src/Gargantext/Database/Ngram.hs
+83
-51
Insert.hs
src/Gargantext/Database/Node/Document/Insert.hs
+6
-5
No files found.
src/Gargantext/Database/Flow.hs
View file @
f6a473f2
...
...
@@ -25,12 +25,17 @@ module Gargantext.Database.Flow
where
import
System.FilePath
(
FilePath
)
import
Data.Maybe
(
Maybe
(
..
))
import
Data.Text
(
Text
,
unpack
)
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
DM
import
Gargantext.Core.Types
(
NodePoly
(
..
))
import
Gargantext.Prelude
import
Gargantext.Database.Bashql
(
runCmd'
,
del
)
import
Gargantext.Database.Types.Node
(
Node
(
..
),
HyperdataDocument
(
..
))
import
Gargantext.Database.Node
(
getRoot
,
mkRoot
,
mkCorpus
)
import
Gargantext.Database.User
(
getUser
,
UserLight
(
..
),
Username
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
reId
)
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIds
)
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
(
WOS
))
...
...
@@ -43,7 +48,8 @@ subFlow username = do
maybeUserId
<-
runCmd'
(
getUser
username
)
let
userId
=
case
maybeUserId
of
Nothing
->
panic
"Error: User does not exist (yet)"
-- mk NodeUser gargantua_id "Node Gargantua"
Nothing
->
panic
"Error: User does not exist (yet)"
-- mk NodeUser gargantua_id "Node Gargantua"
Just
user
->
userLight_id
user
rootId'
<-
map
_node_id
<$>
runCmd'
(
getRoot
userId
)
...
...
@@ -68,7 +74,7 @@ flow fp = do
(
masterUserId
,
_
,
corpusId
)
<-
subFlow
"gargantua"
docs
<-
parseDocs
WOS
fp
docs
<-
map
addUniqIds
<$>
parseDocs
WOS
fp
ids
<-
runCmd'
$
insertDocuments
masterUserId
corpusId
docs
printDebug
"Docs IDs : "
ids
...
...
@@ -80,5 +86,69 @@ flow fp = do
inserted
<-
runCmd'
$
add
corpusId2
(
map
reId
ids
)
printDebug
"Inserted : "
inserted
runCmd'
(
del
[
corpusId2
,
corpusId
])
runCmd'
$
del
[
corpusId2
,
corpusId
]
----------------------------------------------------------------
type
HashId
=
Text
type
ToInsert
=
Map
HashId
HyperdataDocument
type
Inserted
=
Map
HashId
ReturnId
toInsert
::
[
HyperdataDocument
]
->
Map
HashId
HyperdataDocument
toInsert
=
DM
.
fromList
.
map
(
\
d
->
(
hash
(
_hyperdataDocument_uniqIdBdd
d
),
d
))
where
hash
=
maybe
"Error"
identity
toInserted
::
[
ReturnId
]
->
Map
HashId
ReturnId
toInserted
rs
=
DM
.
fromList
$
map
(
\
r
->
(
reUniqId
r
,
r
)
)
$
filter
(
\
r
->
reInserted
r
==
True
)
rs
data
DocumentWithId
=
DocumentWithId
{
documentId
::
NodeId
,
documentData
::
HyperdataDocument
}
type
NodeId
=
Int
mergeData
::
Map
HashId
ReturnId
->
Map
HashId
HyperdataDocument
->
[
DocumentWithId
]
mergeData
rs
hs
=
map
(
\
(
hash
,
r
)
->
DocumentWithId
(
reId
r
)
(
lookup'
hash
hs
))
$
DM
.
toList
rs
where
lookup'
h
xs
=
maybe
(
panic
$
"Error with "
<>
h
)
identity
(
DM
.
lookup
h
xs
)
data
DocumentIdWithNgrams
=
DocumentIdWithNgrams
{
documentWithId
::
DocumentWithId
,
document_ngrams
::
Map
Ngram
Int
}
data
NgramsType
=
Sources
|
Authors
|
Terms
-- | Typed Ngrams
data
Ngrams
=
Ngrams
{
ngramsType
::
NgramsType
,
ngramsText
::
Text
,
ngramsSize
::
Int
}
type
Ngram
=
Text
type
NgramId
=
Int
documentIdWithNgrams
::
(
HyperdataDocument
->
Map
Ngram
Int
)
->
[
DocumentWithId
]
->
[
DocumentIdWithNgrams
]
documentIdWithNgrams
f
=
map
(
\
d
->
DocumentIdWithNgrams
d
((
f
.
documentData
)
d
))
-- | TODO check optimization
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
]
->
Map
Ngram
[(
NodeId
,
Int
)]
mapNodeIdNgrams
ds
=
DM
.
fromListWith
(
<>
)
xs
where
xs
=
[(
ngId
,
[(
nId
,
i
)])
|
(
nId
,
n2i'
)
<-
n2i
ds
,
(
ngId
,
i
)
<-
DM
.
toList
n2i'
]
n2i
=
map
(
\
d
->
((
documentId
.
documentWithId
)
d
,
document_ngrams
d
))
--indexNgram :: [Ngram] ->
---- insert to NodeNgram
---- using insertNgrams from
--indexNgram :: Map Ngram (Map NodeId Int) -> Map NgramId (Map NodeId Int)
--indexNgram = undefined
-- grouping here
src/Gargantext/Database/Ngram.hs
View file @
f6a473f2
{-|
Module : Gargantext.Databse.Ngram
Description :
Module : Gargantext.Datab
a
se.Ngram
Description :
Ngram connection to the Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
Ngrams connection to the Database.
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Ngram
where
import
Prelude
import
Data.
Text
(
Text
)
import
Control.Lens.TH
(
makeLensesWith
,
abbreviatedFields
)
import
Data.
List
(
find
)
import
Data.Maybe
(
Maybe
)
import
Data.Profunctor.Product.TH
(
makeAdaptorAndInstance
)
import
Control.Lens.TH
(
makeLensesWith
,
abbreviatedFields
)
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
GHC.Generics
(
Generic
)
import
Data.ByteString.Internal
(
ByteString
)
import
Data.Text
(
Text
)
import
Database.PostgreSQL.Simple.FromRow
(
fromRow
,
field
)
import
Database.PostgreSQL.Simple.SqlQQ
import
Gargantext.Database.Node
(
mkCmd
,
Cmd
(
..
))
-- import Opaleye
import
Prelude
import
Opaleye
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
-- Functions only
import
Data.List
(
find
)
--data NgramPoly id terms n = NgramDb { ngram_id :: id
-- , ngram_terms :: terms
-- , ngram_n :: n
-- } deriving (Show)
--
--type NgramWrite = NgramPoly (Maybe (Column PGInt4))
-- (Column PGText)
-- (Column PGInt4)
--
--type NgramRead = NgramPoly (Column PGInt4)
-- (Column PGText)
-- (Column PGInt4)
--
----type Ngram = NgramPoly Int Text Int
--
-- $(makeAdaptorAndInstance "pNgram" ''NgramPoly)
-- $(makeLensesWith abbreviatedFields ''NgramPoly)
--
--ngramTable :: Table NgramWrite NgramRead
--ngramTable = Table "ngrams" (pNgram NgramDb { ngram_id = optional "id"
-- , ngram_terms = required "terms"
-- , ngram_n = required "n"
-- }
-- )
--
--queryNgramTable :: Query NgramRead
--queryNgramTable = queryTable ngramTable
--
--dbGetNgrams :: PGS.Connection -> IO [NgramDb]
--dbGetNgrams conn = runQuery conn queryNgramTable
data
NgramPoly
id
terms
n
=
Ngram
{
ngram_id
::
id
,
ngram_terms
::
terms
,
ngram_n
::
n
}
deriving
(
Show
)
type
NgramWrite
=
NgramPoly
(
Maybe
(
Column
PGInt4
))
(
Column
PGText
)
(
Column
PGInt4
)
type
Ngram
=
Text
type
NgramId
=
Int
type
SizeN
=
Int
type
NgramRead
=
NgramPoly
(
Column
PGInt4
)
(
Column
PGText
)
(
Column
PGInt4
)
data
NgramIds
=
NgramIds
{
ngramId
::
Int
,
ngramTerms
::
Text
}
deriving
(
Show
,
Generic
)
type
Ngram
=
NgramPoly
Int
Text
Int
instance
DPS
.
FromRow
NgramIds
where
fromRow
=
NgramIds
<$>
field
<*>
field
$
(
makeAdaptorAndInstance
"pNgram"
''
N
gramPoly
)
$
(
makeLensesWith
abbreviatedFields
''
N
gramPoly
)
----------------------
insertNgrams
::
[(
Ngram
,
SizeN
)]
->
Cmd
[
DPS
.
Only
Int
]
insertNgrams
ns
=
mkCmd
$
\
conn
->
DPS
.
query
conn
queryInsertNgrams
(
DPS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
]
ngramTable
::
Table
NgramWrite
NgramRead
ngramTable
=
Table
"ngrams"
(
pNgram
Ngram
{
ngram_id
=
optional
"id"
,
ngram_terms
=
required
"terms"
,
ngram_n
=
required
"n"
}
)
queryNgramTable
::
Query
NgramRead
queryNgramTable
=
queryTable
ngramTable
insertNgrams_Debug
::
[(
Ngram
,
SizeN
)]
->
Cmd
ByteString
insertNgrams_Debug
ns
=
mkCmd
$
\
conn
->
DPS
.
formatQuery
conn
queryInsertNgrams
(
DPS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
]
--selectUsers :: Query UserRead
--selectUsers = proc () -> do
-- --user@(i, p, ll, is, un, fn, ln, m, iff, ive, dj) <- queryUserTable -< ()
-- row@(User i p ll is un fn ln m iff ive dj) <- queryUserTable -< ()
-- O.restrict -< i .== 1
-- --returnA -< User i p ll is un fn ln m iff ive dj
-- returnA -< row
--
findWith
::
(
Eq
a1
,
Foldable
t
)
=>
(
a
->
a1
)
->
a1
->
t
a
->
Maybe
a
findWith
f
t
=
find
(
\
x
->
f
x
==
t
)
----------------------
queryInsertNgrams
::
DPS
.
Query
queryInsertNgrams
=
[
sql
|
WITH input_rows(terms,n) AS (?)
, ins AS (
INSERT INTO ngrams (terms,n)
SELECT * FROM input_rows
ON CONFLICT (terms) DO NOTHING -- unique index created here
RETURNING id,terms
)
--userWithUsername :: Text -> [User] -> Maybe User
--userWithUsername t xs = userWith userUsername t xs
--
--userWithId :: Integer -> [User] -> Maybe User
--userWithId t xs = userWith userUserId t xs
SELECT id, terms
FROM ins
UNION ALL
SELECT c.id, terms
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
|]
-- | not optimized (get all ngrams without filters)
dbGetNgrams
::
PGS
.
Connection
->
IO
[
Ngram
]
dbGetNgrams
conn
=
runQuery
conn
queryNgramTable
src/Gargantext/Database/Node/Document/Insert.hs
View file @
f6a473f2
...
...
@@ -150,8 +150,9 @@ queryInsert = [sql|
|]
prepare
::
UserId
->
ParentId
->
[
HyperdataDocument
]
->
[
InputData
]
prepare
uId
pId
=
map
(
\
h
->
InputData
tId
uId
pId
(
maybe
"No Title of Document"
identity
$
_hyperdataDocument_title
h
)
(
toJSON
$
addUniqId
h
)
prepare
uId
pId
=
map
(
\
h
->
InputData
tId
uId
pId
(
maybe
"No Title of Document"
identity
$
_hyperdataDocument_title
h
)
(
toJSON
h
)
)
where
tId
=
nodeTypeId
NodeDocument
...
...
@@ -166,7 +167,7 @@ prepare uId pId = map (\h -> InputData tId uId pId (maybe "No Title of Document"
data
ReturnId
=
ReturnId
{
reInserted
::
Bool
-- ^ if the document is inserted (True: is new, False: is not new)
,
reId
::
Int
-- ^ always return the id of the document (even new or not new)
-- this is the uniq id in the database
,
reUniqId
::
Maybe
Text
-- ^ Hash Id with concatenation of hash parameters
,
reUniqId
::
Text
-- ^ Hash Id with concatenation of hash parameters
}
deriving
(
Show
,
Generic
)
instance
FromRow
ReturnId
where
...
...
@@ -195,8 +196,8 @@ instance ToRow InputData where
---------------------------------------------------------------------------
-- * Uniqueness of document definition
addUniqId
::
HyperdataDocument
->
HyperdataDocument
addUniqId
doc
=
set
hyperdataDocument_uniqIdBdd
(
Just
hashBdd
)
addUniqId
s
::
HyperdataDocument
->
HyperdataDocument
addUniqId
s
doc
=
set
hyperdataDocument_uniqIdBdd
(
Just
hashBdd
)
$
set
hyperdataDocument_uniqId
(
Just
hash
)
doc
where
hash
=
uniqId
$
DT
.
concat
$
map
(
$
doc
)
hashParameters
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment