Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
157
Issues
157
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
5a2df841
Commit
5a2df841
authored
Nov 19, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DB][Flow] question about the map of ngramsT.
parent
2d561cb0
Pipeline
#7
failed with stage
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
58 additions
and
26 deletions
+58
-26
Flow.hs
src/Gargantext/Database/Flow.hs
+44
-17
Ngrams.hs
src/Gargantext/Database/Ngrams.hs
+9
-8
IMT.hs
src/Gargantext/Ext/IMT.hs
+4
-0
CSV.hs
src/Gargantext/Text/Parsers/CSV.hs
+1
-1
No files found.
src/Gargantext/Database/Flow.hs
View file @
5a2df841
...
...
@@ -24,9 +24,11 @@ authors
module
Gargantext.Database.Flow
(
flowDatabase
)
where
import
GHC.Show
(
Show
)
import
System.FilePath
(
FilePath
)
import
Data.Maybe
(
Maybe
(
..
),
catMaybes
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
,
splitOn
)
import
Data.Map
(
Map
)
import
Data.Tuple.Extra
(
both
,
second
)
import
qualified
Data.Map
as
DM
...
...
@@ -43,6 +45,7 @@ import Gargantext.Database.Types.Node (HyperdataDocument(..))
import
Gargantext.Database.User
(
getUser
,
UserLight
(
..
),
Username
)
import
Gargantext.Prelude
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
)
import
Gargantext.Ext.IMT
(
toSchoolName
)
type
UserId
=
Int
type
RootId
=
Int
...
...
@@ -56,16 +59,34 @@ flowDatabase ff fp cName = do
-- Documents Flow
hyperdataDocuments
<-
map
addUniqIds
<$>
parseDocs
ff
fp
--printDebug "hyperdataDocuments" hyperdataDocuments
ids
<-
runCmd'
$
insertDocuments
masterUserId
corpusId
hyperdataDocuments
printDebug
"Docs IDs : "
(
length
ids
)
--printDebug "Docs IDs : " (
ids)
idsRepeat
<-
runCmd'
$
insertDocuments
masterUserId
corpusId
hyperdataDocuments
printDebug
"Repeated Docs IDs : "
(
length
ids
)
--
printDebug "Repeated Docs IDs : " (length ids)
-- Ngrams Flow
-- todo: flow for new documents only
let
tids
=
toInserted
ids
--printDebug "toInserted ids" (length tids, tids)
let
tihs
=
toInsert
hyperdataDocuments
--printDebug "toInsert hyperdataDocuments" (length tihs, tihs)
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
toInsert
hyperdataDocuments
)
-- printDebug "documentsWithId" documentsWithId
let
docsWithNgrams
=
documentIdWithNgrams
extractNgramsT
documentsWithId
printDebug
"docsWithNgrams"
docsWithNgrams
{-
let maps = mapNodeIdNgrams docsWithNgrams
printDebug "maps" (maps)
indexedNgrams <- runCmd' $ indexNgrams maps
printDebug "inserted ngrams" indexedNgrams
_ <- runCmd' $ insertToNodeNgrams indexedNgrams
-- List Flow
...
...
@@ -74,10 +95,12 @@ flowDatabase ff fp cName = do
printDebug "Docs IDs : " (length idsRepeat)
(
_
,
_
,
corpusId2
)
<-
subFlow
"alexandre"
cName
-}
(
_
,
_
,
corpusId2
)
<-
subFlow
"user1"
cName
{-
inserted <- runCmd' $ add corpusId2 (map reId ids)
printDebug "Inserted : " (length inserted)
-}
pure
[
corpusId2
,
corpusId
]
--runCmd' $ del [corpusId2, corpusId]
...
...
@@ -116,7 +139,7 @@ type NodeId = Int
type
ListId
=
Int
toInsert
::
[
HyperdataDocument
]
->
Map
HashId
HyperdataDocument
toInsert
=
DM
.
fromList
.
map
(
\
d
->
(
hash
(
_hyperdataDocument_uniqId
Bdd
d
),
d
))
toInsert
=
DM
.
fromList
.
map
(
\
d
->
(
hash
(
_hyperdataDocument_uniqId
d
),
d
))
where
hash
=
maybe
"Error"
identity
...
...
@@ -127,12 +150,12 @@ toInserted rs = DM.fromList $ map (\r -> (reUniqId r, r) )
data
DocumentWithId
=
DocumentWithId
{
documentId
::
NodeId
,
documentData
::
HyperdataDocument
}
}
deriving
(
Show
)
mergeData
::
Map
HashId
ReturnId
->
Map
HashId
HyperdataDocument
->
[
DocumentWithId
]
mergeData
rs
hs
=
map
(
\
(
hash
,
r
)
->
DocumentWithId
(
reId
r
)
(
lookup'
hash
hs
))
$
DM
.
toList
r
s
where
lookup'
h
xs
=
maybe
(
panic
$
"Error with "
<>
h
)
identity
(
DM
.
lookup
h
x
s
)
mergeData
rs
hs
=
map
(
\
(
hash
,
hpd
)
->
DocumentWithId
(
lookup'
hash
rs
)
hpd
)
$
DM
.
toList
h
s
where
lookup'
h
xs
=
maybe
(
panic
$
"Database.Flow.mergeData: Error with "
<>
h
)
reId
(
DM
.
lookup
h
r
s
)
------------------------------------------------------------------------
...
...
@@ -140,16 +163,20 @@ data DocumentIdWithNgrams =
DocumentIdWithNgrams
{
documentWithId
::
DocumentWithId
,
document_ngrams
::
Map
(
NgramsT
Ngrams
)
Int
}
}
deriving
(
Show
)
-- TODO add
Authors and
Terms (Title + Abstract)
-- TODO add Terms (Title + Abstract)
-- add f :: Text -> Text
-- newtype Ngrams = Ngrams Text
extractNgramsT
::
HyperdataDocument
->
Map
(
NgramsT
Ngrams
)
Int
extractNgramsT
doc
=
DM
.
fromList
$
[(
NgramsT
Sources
ngrams
,
1
)]
extractNgramsT
doc
=
DM
.
fromList
$
[(
NgramsT
Sources
source
,
1
)]
<>
[(
NgramsT
Institutes
i'
,
1
)
|
i'
<-
institutes
]
<>
[(
NgramsT
Authors
a'
,
1
)
|
a'
<-
authors
]
where
ngrams
=
text2ngrams
$
maybe
"Nothing"
identity
maybeNgrams
maybeNgrams
=
_hyperdataDocument_source
doc
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
_hyperdataDocument_source
doc
institutes
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
map
toSchoolName
.
(
splitOn
", "
))
$
_hyperdataDocument_institutes
doc
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
splitOn
", "
)
$
_hyperdataDocument_authors
doc
-- TODO group terms
documentIdWithNgrams
::
(
HyperdataDocument
->
Map
(
NgramsT
Ngrams
)
Int
)
->
[
DocumentWithId
]
->
[
DocumentIdWithNgrams
]
...
...
@@ -171,9 +198,9 @@ indexNgrams ng2nId = do
insertToNodeNgrams
::
Map
(
NgramsT
NgramsIndexed
)
(
Map
NodeId
Int
)
->
Cmd
Int
insertToNodeNgrams
m
=
insertNodeNgrams
[
NodeNgram
Nothing
nId
((
_ngramsId
.
_ngramsT
)
ng
)
(
fromIntegral
n
)
((
ngramsTypeId
.
_ngramsType
)
ng
)
(
fromIntegral
n
)
((
ngramsTypeId
.
_ngramsType
)
ng
)
|
(
ng
,
nId2int
)
<-
DM
.
toList
m
,
(
nId
,
n
)
<-
DM
.
toList
nId2int
,
(
nId
,
n
)
<-
DM
.
toList
nId2int
]
...
...
src/Gargantext/Database/Ngrams.hs
View file @
5a2df841
...
...
@@ -76,13 +76,14 @@ import qualified Database.PostgreSQL.Simple as DPS
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text (title or abstract) of documents has Terms Type
data
NgramsType
=
Sources
|
Author
s
|
Terms
deriving
(
Eq
)
data
NgramsType
=
Authors
|
Institutes
|
Source
s
|
Terms
deriving
(
Eq
,
Show
)
ngramsTypeId
::
NgramsType
->
Int
ngramsTypeId
Terms
=
1
ngramsTypeId
Authors
=
2
ngramsTypeId
Sources
=
3
ngramsTypeId
Authors
=
1
ngramsTypeId
Institutes
=
2
ngramsTypeId
Sources
=
3
ngramsTypeId
Terms
=
4
type
NgramsTerms
=
Text
type
NgramsId
=
Int
...
...
@@ -92,7 +93,7 @@ type Size = Int
-- | TODO put it in Gargantext.Text.Ngrams
data
Ngrams
=
Ngrams
{
_ngramsTerms
::
Text
,
_ngramsSize
::
Int
}
deriving
(
Generic
)
}
deriving
(
Generic
,
Show
)
instance
Eq
Ngrams
where
(
==
)
=
(
==
)
instance
Ord
Ngrams
where
...
...
@@ -110,7 +111,7 @@ text2ngrams txt = Ngrams txt $ length $ splitOn " " txt
data
NgramsT
a
=
NgramsT
{
_ngramsType
::
NgramsType
,
_ngramsT
::
a
}
deriving
(
Generic
)
}
deriving
(
Generic
,
Show
)
instance
Eq
(
NgramsT
a
)
where
(
==
)
=
(
==
)
...
...
@@ -127,7 +128,7 @@ data NgramsIndexed =
NgramsIndexed
{
_ngrams
::
Ngrams
,
_ngramsId
::
NgramsId
}
deriving
(
Generic
)
}
deriving
(
Show
,
Generic
)
instance
Eq
NgramsIndexed
where
(
==
)
=
(
==
)
...
...
src/Gargantext/Ext/IMT.hs
View file @
5a2df841
...
...
@@ -94,6 +94,10 @@ hal_data = snd <$> CSV.readHal "doc/corpus_imt/Gargantext_Corpus.csv"
names
::
S
.
Set
Text
names
=
S
.
fromList
$
Gargantext
.
Prelude
.
map
(
\
s
->
school_id
s
)
schools
toSchoolName
::
Text
->
Text
toSchoolName
t
=
case
M
.
lookup
t
mapIdSchool
of
Nothing
->
t
Just
t'
->
t'
publisBySchool
::
DV
.
Vector
CsvHal
->
[(
Maybe
Text
,
Int
)]
publisBySchool
hal_data'
=
Gargantext
.
Prelude
.
map
(
\
(
i
,
n
)
->
(
M
.
lookup
i
mapIdSchool
,
n
))
...
...
src/Gargantext/Text/Parsers/CSV.hs
View file @
5a2df841
...
...
@@ -294,8 +294,8 @@ csvHal2doc (CsvHal title source
Nothing
Nothing
(
Just
title
)
(
Just
inst
)
(
Just
authors
)
(
Just
inst
)
(
Just
source
)
(
Just
abstract
)
(
Just
$
pack
.
show
$
jour
pub_year
pub_month
pub_day
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment