Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
986a253c
Commit
986a253c
authored
Oct 24, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DBFLOW] Ngrams, NgramsIndexed, NgramsT a.
parent
f6a473f2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
57 additions
and
25 deletions
+57
-25
Flow.hs
src/Gargantext/Database/Flow.hs
+52
-18
Ngram.hs
src/Gargantext/Database/Ngram.hs
+5
-7
No files found.
src/Gargantext/Database/Flow.hs
View file @
986a253c
...
...
@@ -18,16 +18,20 @@ authors
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Flow
where
import
System.FilePath
(
FilePath
)
import
Control.Lens
(
makeLenses
)
import
Data.Maybe
(
Maybe
(
..
))
import
Data.Text
(
Text
,
unpack
)
import
Data.Map
(
Map
)
import
qualified
Data.Map
as
DM
import
GHC.Generics
(
Generic
)
import
Gargantext.Core.Types
(
NodePoly
(
..
))
import
Gargantext.Prelude
...
...
@@ -38,6 +42,7 @@ import Gargantext.Database.User (getUser, UserLight(..), Username)
import
Gargantext.Database.Node.Document.Insert
(
insertDocuments
,
ReturnId
(
..
),
addUniqIds
)
import
Gargantext.Database.Node.Document.Add
(
add
)
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
(
WOS
))
import
Gargantext.Database.Ngram
(
insertNgrams
,
NgramIds
(
..
))
type
UserId
=
Int
type
RootId
=
Int
...
...
@@ -109,39 +114,66 @@ data DocumentWithId = DocumentWithId { documentId :: NodeId
type
NodeId
=
Int
mergeData
::
Map
HashId
ReturnId
->
Map
HashId
HyperdataDocument
->
[
DocumentWithId
]
mergeData
::
Map
HashId
ReturnId
->
Map
HashId
HyperdataDocument
->
[
DocumentWithId
]
mergeData
rs
hs
=
map
(
\
(
hash
,
r
)
->
DocumentWithId
(
reId
r
)
(
lookup'
hash
hs
))
$
DM
.
toList
rs
where
lookup'
h
xs
=
maybe
(
panic
$
"Error with "
<>
h
)
identity
(
DM
.
lookup
h
xs
)
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text (title or abstract) of documents has Terms Type
data
NgramsType
=
Sources
|
Authors
|
Terms
data
DocumentIdWithNgrams
=
DocumentIdWithNgrams
{
documentWithId
::
DocumentWithId
,
document_ngrams
::
Map
Ngram
Int
}
type
NgramId
=
Int
data
NgramsType
=
Sources
|
Authors
|
Terms
data
Ngrams
=
Ngrams
{
_ngramsTerms
::
Text
,
_ngramsSize
::
Int
}
deriving
(
Generic
)
instance
Eq
Ngrams
where
(
==
)
=
(
==
)
instance
Ord
Ngrams
where
compare
=
compare
makeLenses
''
N
grams
data
NgramsIndexed
=
NgramsIndexed
{
_ngrams
::
Ngrams
,
_ngramsId
::
NgramId
}
deriving
(
Generic
)
instance
Eq
NgramsIndexed
where
(
==
)
=
(
==
)
instance
Ord
NgramsIndexed
where
compare
=
compare
makeLenses
''
N
gramsIndexed
data
NgramsT
a
=
NgramsT
{
_ngramsType
::
NgramsType
,
_ngramsT
::
a
}
deriving
(
Generic
)
instance
Eq
(
NgramsT
a
)
where
(
==
)
=
(
==
)
instance
Ord
(
NgramsT
a
)
where
compare
=
compare
makeLenses
''
N
gramsT
data
DocumentIdWithNgrams
=
DocumentIdWithNgrams
{
documentWithId
::
DocumentWithId
,
document_ngrams
::
Map
(
NgramsT
Ngrams
)
Int
}
-- | Typed Ngrams
data
Ngrams
=
Ngrams
{
ngramsType
::
NgramsType
,
ngramsText
::
Text
,
ngramsSize
::
Int
}
type
Ngram
=
Text
type
NgramId
=
Int
documentIdWithNgrams
::
(
HyperdataDocument
->
Map
Ngram
Int
)
->
[
DocumentWithId
]
->
[
DocumentIdWithNgrams
]
documentIdWithNgrams
::
(
HyperdataDocument
->
Map
(
NgramsT
Ngrams
)
Int
)
->
[
DocumentWithId
]
->
[
DocumentIdWithNgrams
]
documentIdWithNgrams
f
=
map
(
\
d
->
DocumentIdWithNgrams
d
((
f
.
documentData
)
d
))
-- | TODO check optimization
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
]
->
Map
Ngram
[(
NodeId
,
Int
)]
mapNodeIdNgrams
::
[
DocumentIdWithNgrams
]
->
Map
(
NgramsT
Ngrams
)
[(
NodeId
,
Int
)]
mapNodeIdNgrams
ds
=
DM
.
fromListWith
(
<>
)
xs
where
xs
=
[(
ng
Id
,
[(
nId
,
i
)])
|
(
nId
,
n2i'
)
<-
n2i
ds
,
(
ngId
,
i
)
<-
DM
.
toList
n2i'
]
xs
=
[(
ng
,
[(
nId
,
i
)])
|
(
nId
,
n2i'
)
<-
n2i
ds
,
(
ng
,
i
)
<-
DM
.
toList
n2i'
]
n2i
=
map
(
\
d
->
((
documentId
.
documentWithId
)
d
,
document_ngrams
d
))
--indexNgram :: [Ngram] ->
indexNgrams
::
Map
(
NgramsT
Ngrams
)
[(
NodeId
,
Int
)]
->
IO
(
Map
(
NgramsT
NgramsIndexed
)
[(
NodeId
,
Int
)])
indexNgrams
ng2nId
=
undefined
--let keys = DM.keys ng2nId
...
...
@@ -150,5 +182,7 @@ mapNodeIdNgrams ds = DM.fromListWith (<>) xs
--indexNgram :: Map Ngram (Map NodeId Int) -> Map NgramId (Map NodeId Int)
--indexNgram = undefined
-- grouping here
-- group Ngrams
-- insert Groups
src/Gargantext/Database/Ngram.hs
View file @
986a253c
...
...
@@ -37,6 +37,7 @@ import Gargantext.Database.Node (mkCmd, Cmd(..))
import
Prelude
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
...
...
@@ -73,9 +74,9 @@ import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
type
Ngram
=
Text
type
NgramId
=
Int
type
Size
N
=
Int
type
Size
=
Int
data
NgramIds
=
NgramIds
{
ngramId
::
Int
data
NgramIds
=
NgramIds
{
ngramId
::
Int
,
ngramTerms
::
Text
}
deriving
(
Show
,
Generic
)
...
...
@@ -83,19 +84,17 @@ instance DPS.FromRow NgramIds where
fromRow
=
NgramIds
<$>
field
<*>
field
----------------------
insertNgrams
::
[(
Ngram
,
Size
N
)]
->
Cmd
[
DPS
.
Only
Int
]
insertNgrams
::
[(
Ngram
,
Size
)]
->
Cmd
[
NgramIds
]
insertNgrams
ns
=
mkCmd
$
\
conn
->
DPS
.
query
conn
queryInsertNgrams
(
DPS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
]
insertNgrams_Debug
::
[(
Ngram
,
Size
N
)]
->
Cmd
ByteString
insertNgrams_Debug
::
[(
Ngram
,
Size
)]
->
Cmd
ByteString
insertNgrams_Debug
ns
=
mkCmd
$
\
conn
->
DPS
.
formatQuery
conn
queryInsertNgrams
(
DPS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
]
----------------------
queryInsertNgrams
::
DPS
.
Query
queryInsertNgrams
=
[
sql
|
...
...
@@ -114,4 +113,3 @@ queryInsertNgrams = [sql|
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
|]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment