Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
cd3c7b82
Commit
cd3c7b82
authored
Oct 24, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DBFLOW] Ngrams indexed, compilation ok.
parent
986a253c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
79 additions
and
61 deletions
+79
-61
Flow.hs
src/Gargantext/Database/Flow.hs
+9
-45
Ngram.hs
src/Gargantext/Database/Ngram.hs
+70
-16
No files found.
src/Gargantext/Database/Flow.hs
View file @
cd3c7b82
...
...
@@ -21,12 +21,10 @@ authors
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Flow
where
import
System.FilePath
(
FilePath
)
import
Control.Lens
(
makeLenses
)
import
Data.Maybe
(
Maybe
(
..
))
import
Data.Text
(
Text
,
unpack
)
import
Data.Map
(
Map
)
...
...
@@ -37,12 +35,12 @@ import Gargantext.Core.Types (NodePoly(..))
import
Gargantext.Prelude
import
Gargantext.Database.Bashql
(
runCmd'
,
del
)
import
Gargantext.Database.Types.Node
(
Node
(
..
),
HyperdataDocument
(
..
))
import
Gargantext.Database.Node
(
getRoot
,
mkRoot
,
mkCorpus
)
import
Gargantext.Database.Node
(
getRoot
,
mkRoot
,
mkCorpus
,
Cmd
(
..
)
)
import
Gargantext.Database.User
(
getUser
,
UserLight
(
..
),
Username
)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIds
)
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
(
WOS
))
import
Gargantext.Database.Ngram
(
insertNgrams
,
Ngram
Ids
(
..
)
)
import
Gargantext.Database.Ngram
(
insertNgrams
,
Ngram
s
(
..
),
NgramsT
(
..
),
NgramsIndexed
(
..
),
indexNgramsT
)
type
UserId
=
Int
type
RootId
=
Int
...
...
@@ -119,48 +117,14 @@ mergeData rs hs = map (\(hash,r) -> DocumentWithId (reId r) (lookup' hash hs)) $
where
lookup'
h
xs
=
maybe
(
panic
$
"Error with "
<>
h
)
identity
(
DM
.
lookup
h
xs
)
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text (title or abstract) of documents has Terms Type
data
NgramsType
=
Sources
|
Authors
|
Terms
type
NgramId
=
Int
data
Ngrams
=
Ngrams
{
_ngramsTerms
::
Text
,
_ngramsSize
::
Int
}
deriving
(
Generic
)
instance
Eq
Ngrams
where
(
==
)
=
(
==
)
instance
Ord
Ngrams
where
compare
=
compare
makeLenses
''
N
grams
data
NgramsIndexed
=
NgramsIndexed
{
_ngrams
::
Ngrams
,
_ngramsId
::
NgramId
}
deriving
(
Generic
)
instance
Eq
NgramsIndexed
where
(
==
)
=
(
==
)
instance
Ord
NgramsIndexed
where
compare
=
compare
makeLenses
''
N
gramsIndexed
data
NgramsT
a
=
NgramsT
{
_ngramsType
::
NgramsType
,
_ngramsT
::
a
}
deriving
(
Generic
)
instance
Eq
(
NgramsT
a
)
where
(
==
)
=
(
==
)
instance
Ord
(
NgramsT
a
)
where
compare
=
compare
makeLenses
''
N
gramsT
data
DocumentIdWithNgrams
=
DocumentIdWithNgrams
{
documentWithId
::
DocumentWithId
,
document_ngrams
::
Map
(
NgramsT
Ngrams
)
Int
}
documentIdWithNgrams
::
(
HyperdataDocument
->
Map
(
NgramsT
Ngrams
)
Int
)
->
[
DocumentWithId
]
->
[
DocumentIdWithNgrams
]
documentIdWithNgrams
::
(
HyperdataDocument
->
Map
(
NgramsT
Ngrams
)
Int
)
->
[
DocumentWithId
]
->
[
DocumentIdWithNgrams
]
documentIdWithNgrams
f
=
map
(
\
d
->
DocumentIdWithNgrams
d
((
f
.
documentData
)
d
))
-- | TODO check optimization
...
...
@@ -171,10 +135,10 @@ mapNodeIdNgrams ds = DM.fromListWith (<>) xs
n2i
=
map
(
\
d
->
((
documentId
.
documentWithId
)
d
,
document_ngrams
d
))
indexNgrams
::
Map
(
NgramsT
Ngrams
)
[(
NodeId
,
Int
)]
->
IO
(
Map
(
NgramsT
NgramsIndexed
)
[(
NodeId
,
Int
)])
indexNgrams
ng2nId
=
undefined
--let keys = DM.keys ng2nId
->
Cmd
(
Map
(
NgramsT
NgramsIndexed
)
[(
NodeId
,
Int
)])
indexNgrams
ng2nId
=
do
terms2id
<-
insertNgrams
(
map
_ngramsT
$
DM
.
keys
ng2nId
)
pure
$
DM
.
mapKeys
(
indexNgramsT
terms2id
)
ng2nId
---- insert to NodeNgram
...
...
@@ -183,6 +147,6 @@ indexNgrams ng2nId = undefined
--indexNgram = undefined
-- group Ngrams
-- insert Group
s
-- insert Group
Id
src/Gargantext/Database/Ngram.hs
View file @
cd3c7b82
...
...
@@ -23,22 +23,25 @@ Ngrams connection to the Database.
module
Gargantext.Database.Ngram
where
-- import Opaleye
import
Control.Lens
(
makeLenses
)
import
Control.Lens.TH
(
makeLensesWith
,
abbreviatedFields
)
import
Data.ByteString.Internal
(
ByteString
)
import
Data.List
(
find
)
import
Data.Map
(
Map
,
fromList
,
lookup
)
import
Data.Maybe
(
Maybe
)
import
Data.Profunctor.Product.TH
(
makeAdaptorAndInstance
)
import
GHC.Generics
(
Generic
)
import
Data.ByteString.Internal
(
ByteString
)
import
Data.Text
(
Text
)
import
Database.PostgreSQL.Simple.FromRow
(
fromRow
,
field
)
import
Database.PostgreSQL.Simple.SqlQQ
import
Gargantext.Database.Node
(
mkCmd
,
Cmd
(
..
))
-- import Opaleye
import
Prelude
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
import
Database.PostgreSQL.Simple.FromRow
(
fromRow
,
field
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.ToField
(
toField
)
import
Database.PostgreSQL.Simple.ToRow
(
toRow
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
GHC.Generics
(
Generic
)
import
Gargantext.Database.Node
(
runCmd
,
mkCmd
,
Cmd
(
..
))
import
Gargantext.Prelude
import
qualified
Database.PostgreSQL.Simple
as
DPS
--data NgramPoly id terms n = NgramDb { ngram_id :: id
...
...
@@ -72,10 +75,50 @@ import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
--dbGetNgrams :: PGS.Connection -> IO [NgramDb]
--dbGetNgrams conn = runQuery conn queryNgramTable
type
Ngram
=
Text
type
NgramId
=
Int
type
Size
=
Int
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text (title or abstract) of documents has Terms Type
data
NgramsType
=
Sources
|
Authors
|
Terms
type
NgramsTerms
=
Text
type
NgramsId
=
Int
type
Size
=
Int
------------------------------------------------------------------------
-- | TODO put it in Gargantext.Text.Ngrams
data
Ngrams
=
Ngrams
{
_ngramsTerms
::
Text
,
_ngramsSize
::
Int
}
deriving
(
Generic
)
instance
Eq
Ngrams
where
(
==
)
=
(
==
)
instance
Ord
Ngrams
where
compare
=
compare
makeLenses
''
N
grams
instance
DPS
.
ToRow
Ngrams
where
toRow
(
Ngrams
t
s
)
=
[
toField
t
,
toField
s
]
-------------------------------------------------------------------------
-- | TODO put it in Gargantext.Text.Ngrams
-- Named entity are typed ngrams of Terms Ngrams
data
NgramsT
a
=
NgramsT
{
_ngramsType
::
NgramsType
,
_ngramsT
::
a
}
deriving
(
Generic
)
instance
Eq
(
NgramsT
a
)
where
(
==
)
=
(
==
)
instance
Ord
(
NgramsT
a
)
where
compare
=
compare
makeLenses
''
N
gramsT
-----------------------------------------------------------------------
data
NgramsIndexed
=
NgramsIndexed
{
_ngrams
::
Ngrams
,
_ngramsId
::
NgramsId
}
deriving
(
Generic
)
instance
Eq
NgramsIndexed
where
(
==
)
=
(
==
)
instance
Ord
NgramsIndexed
where
compare
=
compare
makeLenses
''
N
gramsIndexed
------------------------------------------------------------------------
data
NgramIds
=
NgramIds
{
ngramId
::
Int
,
ngramTerms
::
Text
}
deriving
(
Show
,
Generic
)
...
...
@@ -84,13 +127,24 @@ instance DPS.FromRow NgramIds where
fromRow
=
NgramIds
<$>
field
<*>
field
----------------------
insertNgrams
::
[(
Ngram
,
Size
)]
->
Cmd
[
NgramIds
]
insertNgrams
ns
=
mkCmd
$
\
conn
->
DPS
.
query
conn
queryInsertNgrams
(
DPS
.
Only
$
Values
fields
ns
)
indexNgramsT
::
Map
NgramsTerms
NgramsId
->
NgramsT
Ngrams
->
NgramsT
NgramsIndexed
indexNgramsT
m
n
=
indexNgramsTWith
f
n
where
f
n
=
maybe
(
panic
"indexNgramsT: should not happen"
)
identity
(
lookup
n
m
)
indexNgramsTWith
::
(
NgramsTerms
->
NgramsId
)
->
NgramsT
Ngrams
->
NgramsT
NgramsIndexed
indexNgramsTWith
f
(
NgramsT
t
n
)
=
NgramsT
t
(
NgramsIndexed
n
((
f
.
_ngramsTerms
)
n
))
----------------------
insertNgrams
::
[
Ngrams
]
->
Cmd
(
Map
NgramsTerms
NgramsId
)
insertNgrams
ns
=
fromList
<$>
map
(
\
(
NgramIds
i
t
)
->
(
t
,
i
))
<$>
(
insertNgrams'
ns
)
insertNgrams'
::
[
Ngrams
]
->
Cmd
[
NgramIds
]
insertNgrams'
ns
=
mkCmd
$
\
conn
->
DPS
.
query
conn
queryInsertNgrams
(
DPS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
]
insertNgrams_Debug
::
[(
Ngram
,
Size
)]
->
Cmd
ByteString
insertNgrams_Debug
::
[(
Ngram
sTerms
,
Size
)]
->
Cmd
ByteString
insertNgrams_Debug
ns
=
mkCmd
$
\
conn
->
DPS
.
formatQuery
conn
queryInsertNgrams
(
DPS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment