Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f6a473f2
Commit
f6a473f2
authored
Oct 24, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DBFLOW] Ngrams / NodeId / count
parent
871b48ee
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
169 additions
and
66 deletions
+169
-66
Flow.hs
src/Gargantext/Database/Flow.hs
+80
-10
Ngram.hs
src/Gargantext/Database/Ngram.hs
+83
-51
Insert.hs
src/Gargantext/Database/Node/Document/Insert.hs
+6
-5
No files found.
src/Gargantext/Database/Flow.hs
View file @
f6a473f2
...
...
@@ -25,12 +25,17 @@ module Gargantext.Database.Flow
where
import
System.FilePath
(
FilePath
)
import
Data.Maybe
(
Maybe
(
..
))
import
Data.Text
(
Text
,
unpack
)
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
DM
import
Gargantext.Core.Types
(
NodePoly
(
..
))
import
Gargantext.Prelude
import
Gargantext.Database.Bashql
(
runCmd'
,
del
)
import
Gargantext.Database.Types.Node
(
Node
(
..
),
HyperdataDocument
(
..
))
import
Gargantext.Database.Node
(
getRoot
,
mkRoot
,
mkCorpus
)
import
Gargantext.Database.User
(
getUser
,
UserLight
(
..
),
Username
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
reId
)
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIds
)
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
(
WOS
))
...
...
@@ -43,7 +48,8 @@ subFlow username = do
maybeUserId
<-
runCmd'
(
getUser
username
)
let
userId
=
case
maybeUserId
of
Nothing
->
panic
"Error: User does not exist (yet)"
-- mk NodeUser gargantua_id "Node Gargantua"
Nothing
->
panic
"Error: User does not exist (yet)"
-- mk NodeUser gargantua_id "Node Gargantua"
Just
user
->
userLight_id
user
rootId'
<-
map
_node_id
<$>
runCmd'
(
getRoot
userId
)
...
...
@@ -68,7 +74,7 @@ flow fp = do
(
masterUserId
,
_
,
corpusId
)
<-
subFlow
"gargantua"
docs
<-
parseDocs
WOS
fp
docs
<-
map
addUniqIds
<$>
parseDocs
WOS
fp
ids
<-
runCmd'
$
insertDocuments
masterUserId
corpusId
docs
printDebug
"Docs IDs : "
ids
...
...
@@ -80,5 +86,69 @@ flow fp = do
inserted
<-
runCmd'
$
add
corpusId2
(
map
reId
ids
)
printDebug
"Inserted : "
inserted
runCmd'
(
del
[
corpusId2
,
corpusId
])
runCmd'
$
del
[
corpusId2
,
corpusId
]
----------------------------------------------------------------
type
HashId
=
Text
type
ToInsert
=
Map
HashId
HyperdataDocument
type
Inserted
=
Map
HashId
ReturnId
toInsert
::
[
HyperdataDocument
]
->
Map
HashId
HyperdataDocument
toInsert
=
DM
.
fromList
.
map
(
\
d
->
(
hash
(
_hyperdataDocument_uniqIdBdd
d
),
d
))
where
hash
=
maybe
"Error"
identity
toInserted
::
[
ReturnId
]
->
Map
HashId
ReturnId
toInserted
rs
=
DM
.
fromList
$
map
(
\
r
->
(
reUniqId
r
,
r
)
)
$
filter
(
\
r
->
reInserted
r
==
True
)
rs
data
DocumentWithId
=
DocumentWithId
{
documentId
::
NodeId
,
documentData
::
HyperdataDocument
}
type
NodeId
=
Int
mergeData
::
Map
HashId
ReturnId
->
Map
HashId
HyperdataDocument
->
[
DocumentWithId
]
mergeData
rs
hs
=
map
(
\
(
hash
,
r
)
->
DocumentWithId
(
reId
r
)
(
lookup'
hash
hs
))
$
DM
.
toList
rs
where
lookup'
h
xs
=
maybe
(
panic
$
"Error with "
<>
h
)
identity
(
DM
.
lookup
h
xs
)
data
DocumentIdWithNgrams
=
DocumentIdWithNgrams
{
documentWithId
::
DocumentWithId
,
document_ngrams
::
Map
Ngram
Int
}
data
NgramsType
=
Sources
|
Authors
|
Terms
-- | Typed Ngrams
data
Ngrams
=
Ngrams
{
ngramsType
::
NgramsType
,
ngramsText
::
Text
,
ngramsSize
::
Int
}
type
Ngram
=
Text
type
NgramId
=
Int
documentIdWithNgrams
::
(
HyperdataDocument
->
Map
Ngram
Int
)
->
[
DocumentWithId
]
->
[
DocumentIdWithNgrams
]
documentIdWithNgrams
f
=
map
(
\
d
->
DocumentIdWithNgrams
d
((
f
.
documentData
)
d
))
-- | TODO check optimization
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
]
->
Map
Ngram
[(
NodeId
,
Int
)]
mapNodeIdNgrams
ds
=
DM
.
fromListWith
(
<>
)
xs
where
xs
=
[(
ngId
,
[(
nId
,
i
)])
|
(
nId
,
n2i'
)
<-
n2i
ds
,
(
ngId
,
i
)
<-
DM
.
toList
n2i'
]
n2i
=
map
(
\
d
->
((
documentId
.
documentWithId
)
d
,
document_ngrams
d
))
--indexNgram :: [Ngram] ->
---- insert to NodeNgram
---- using insertNgrams from
--indexNgram :: Map Ngram (Map NodeId Int) -> Map NgramId (Map NodeId Int)
--indexNgram = undefined
-- grouping here
src/Gargantext/Database/Ngram.hs
View file @
f6a473f2
{-|
Module : Gargantext.Databse.Ngram
Description :
Module : Gargantext.Datab
a
se.Ngram
Description :
Ngram connection to the Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
Ngrams connection to the Database.
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Ngram
where
import
Prelude
import
Data.
Text
(
Text
)
import
Control.Lens.TH
(
makeLensesWith
,
abbreviatedFields
)
import
Data.
List
(
find
)
import
Data.Maybe
(
Maybe
)
import
Data.Profunctor.Product.TH
(
makeAdaptorAndInstance
)
import
Control.Lens.TH
(
makeLensesWith
,
abbreviatedFields
)
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
GHC.Generics
(
Generic
)
import
Data.ByteString.Internal
(
ByteString
)
import
Data.Text
(
Text
)
import
Database.PostgreSQL.Simple.FromRow
(
fromRow
,
field
)
import
Database.PostgreSQL.Simple.SqlQQ
import
Gargantext.Database.Node
(
mkCmd
,
Cmd
(
..
))
-- import Opaleye
import
Prelude
import
Opaleye
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
-- Functions only
import
Data.List
(
find
)
--data NgramPoly id terms n = NgramDb { ngram_id :: id
-- , ngram_terms :: terms
-- , ngram_n :: n
-- } deriving (Show)
--
--type NgramWrite = NgramPoly (Maybe (Column PGInt4))
-- (Column PGText)
-- (Column PGInt4)
--
--type NgramRead = NgramPoly (Column PGInt4)
-- (Column PGText)
-- (Column PGInt4)
--
----type Ngram = NgramPoly Int Text Int
--
-- $(makeAdaptorAndInstance "pNgram" ''NgramPoly)
-- $(makeLensesWith abbreviatedFields ''NgramPoly)
--
--ngramTable :: Table NgramWrite NgramRead
--ngramTable = Table "ngrams" (pNgram NgramDb { ngram_id = optional "id"
-- , ngram_terms = required "terms"
-- , ngram_n = required "n"
-- }
-- )
--
--queryNgramTable :: Query NgramRead
--queryNgramTable = queryTable ngramTable
--
--dbGetNgrams :: PGS.Connection -> IO [NgramDb]
--dbGetNgrams conn = runQuery conn queryNgramTable
data
NgramPoly
id
terms
n
=
Ngram
{
ngram_id
::
id
,
ngram_terms
::
terms
,
ngram_n
::
n
}
deriving
(
Show
)
type
NgramWrite
=
NgramPoly
(
Maybe
(
Column
PGInt4
))
(
Column
PGText
)
(
Column
PGInt4
)
type
Ngram
=
Text
type
NgramId
=
Int
type
SizeN
=
Int
type
NgramRead
=
NgramPoly
(
Column
PGInt4
)
(
Column
PGText
)
(
Column
PGInt4
)
data
NgramIds
=
NgramIds
{
ngramId
::
Int
,
ngramTerms
::
Text
}
deriving
(
Show
,
Generic
)
type
Ngram
=
NgramPoly
Int
Text
Int
instance
DPS
.
FromRow
NgramIds
where
fromRow
=
NgramIds
<$>
field
<*>
field
$
(
makeAdaptorAndInstance
"pNgram"
''
N
gramPoly
)
$
(
makeLensesWith
abbreviatedFields
''
N
gramPoly
)
----------------------
insertNgrams
::
[(
Ngram
,
SizeN
)]
->
Cmd
[
DPS
.
Only
Int
]
insertNgrams
ns
=
mkCmd
$
\
conn
->
DPS
.
query
conn
queryInsertNgrams
(
DPS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
]
ngramTable
::
Table
NgramWrite
NgramRead
ngramTable
=
Table
"ngrams"
(
pNgram
Ngram
{
ngram_id
=
optional
"id"
,
ngram_terms
=
required
"terms"
,
ngram_n
=
required
"n"
}
)
queryNgramTable
::
Query
NgramRead
queryNgramTable
=
queryTable
ngramTable
insertNgrams_Debug
::
[(
Ngram
,
SizeN
)]
->
Cmd
ByteString
insertNgrams_Debug
ns
=
mkCmd
$
\
conn
->
DPS
.
formatQuery
conn
queryInsertNgrams
(
DPS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
]
--selectUsers :: Query UserRead
--selectUsers = proc () -> do
-- --user@(i, p, ll, is, un, fn, ln, m, iff, ive, dj) <- queryUserTable -< ()
-- row@(User i p ll is un fn ln m iff ive dj) <- queryUserTable -< ()
-- O.restrict -< i .== 1
-- --returnA -< User i p ll is un fn ln m iff ive dj
-- returnA -< row
--
findWith
::
(
Eq
a1
,
Foldable
t
)
=>
(
a
->
a1
)
->
a1
->
t
a
->
Maybe
a
findWith
f
t
=
find
(
\
x
->
f
x
==
t
)
----------------------
queryInsertNgrams
::
DPS
.
Query
queryInsertNgrams
=
[
sql
|
WITH input_rows(terms,n) AS (?)
, ins AS (
INSERT INTO ngrams (terms,n)
SELECT * FROM input_rows
ON CONFLICT (terms) DO NOTHING -- unique index created here
RETURNING id,terms
)
--userWithUsername :: Text -> [User] -> Maybe User
--userWithUsername t xs = userWith userUsername t xs
--
--userWithId :: Integer -> [User] -> Maybe User
--userWithId t xs = userWith userUserId t xs
SELECT id, terms
FROM ins
UNION ALL
SELECT c.id, terms
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
|]
-- | not optimized (get all ngrams without filters)
dbGetNgrams
::
PGS
.
Connection
->
IO
[
Ngram
]
dbGetNgrams
conn
=
runQuery
conn
queryNgramTable
src/Gargantext/Database/Node/Document/Insert.hs
View file @
f6a473f2
...
...
@@ -150,8 +150,9 @@ queryInsert = [sql|
|]
prepare
::
UserId
->
ParentId
->
[
HyperdataDocument
]
->
[
InputData
]
prepare
uId
pId
=
map
(
\
h
->
InputData
tId
uId
pId
(
maybe
"No Title of Document"
identity
$
_hyperdataDocument_title
h
)
(
toJSON
$
addUniqId
h
)
prepare
uId
pId
=
map
(
\
h
->
InputData
tId
uId
pId
(
maybe
"No Title of Document"
identity
$
_hyperdataDocument_title
h
)
(
toJSON
h
)
)
where
tId
=
nodeTypeId
NodeDocument
...
...
@@ -166,7 +167,7 @@ prepare uId pId = map (\h -> InputData tId uId pId (maybe "No Title of Document"
data
ReturnId
=
ReturnId
{
reInserted
::
Bool
-- ^ if the document is inserted (True: is new, False: is not new)
,
reId
::
Int
-- ^ always return the id of the document (even new or not new)
-- this is the uniq id in the database
,
reUniqId
::
Maybe
Text
-- ^ Hash Id with concatenation of hash parameters
,
reUniqId
::
Text
-- ^ Hash Id with concatenation of hash parameters
}
deriving
(
Show
,
Generic
)
instance
FromRow
ReturnId
where
...
...
@@ -195,8 +196,8 @@ instance ToRow InputData where
---------------------------------------------------------------------------
-- * Uniqueness of document definition
addUniqId
::
HyperdataDocument
->
HyperdataDocument
addUniqId
doc
=
set
hyperdataDocument_uniqIdBdd
(
Just
hashBdd
)
addUniqId
s
::
HyperdataDocument
->
HyperdataDocument
addUniqId
s
doc
=
set
hyperdataDocument_uniqIdBdd
(
Just
hashBdd
)
$
set
hyperdataDocument_uniqId
(
Just
hash
)
doc
where
hash
=
uniqId
$
DT
.
concat
$
map
(
$
doc
)
hashParameters
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment