Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
148
Issues
148
List
Board
Labels
Milestones
Merge Requests
7
Merge Requests
7
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
3fe6eba0
Verified
Commit
3fe6eba0
authored
Mar 04, 2024
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[hyperdata] some fixes to uniqId
parent
ea8965c4
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
39 additions
and
39 deletions
+39
-39
Arxiv.hs
src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
+5
-8
EPO.hs
src/Gargantext/Core/Text/Corpus/API/EPO.hs
+5
-5
OpenAlex.hs
src/Gargantext/Core/Text/Corpus/API/OpenAlex.hs
+6
-6
Pubmed.hs
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+1
-1
Document.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Document.hs
+3
-0
Insert.hs
src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
+19
-19
No files found.
src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
View file @
3fe6eba0
...
...
@@ -99,7 +99,7 @@ toDoc l (Arxiv.Result { abstract
)
=
HyperdataDocument
{
_hd_bdd
=
Just
"Arxiv"
,
_hd_doi
=
Just
$
Text
.
pack
doi
,
_hd_url
=
Just
$
Text
.
pack
url
,
_hd_uniqId
=
Just
$
Text
.
pack
id
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Just
$
Text
.
pack
title
...
...
@@ -118,13 +118,10 @@ toDoc l (Arxiv.Result { abstract
where
authors
::
[
Ax
.
Author
]
->
Maybe
Text
authors
[]
=
Nothing
authors
aus'
=
Just
$
(
Text
.
intercalate
", "
)
$
map
Text
.
pack
$
map
Ax
.
auName
aus'
authors
aus'
=
Just
$
Text
.
intercalate
", "
$
map
(
Text
.
pack
.
Ax
.
auName
)
aus'
institutes
::
[
Ax
.
Author
]
->
Maybe
Text
institutes
[]
=
Nothing
institutes
aus'
=
Just
$
(
Text
.
intercalate
", "
)
$
(
map
(
Text
.
replace
", "
" - "
))
$
map
Text
.
pack
$
map
Ax
.
auFil
aus'
institutes
aus'
=
Just
$
Text
.
intercalate
", "
$
map
((
Text
.
replace
", "
" - "
.
Text
.
pack
)
.
Ax
.
auFil
)
aus'
src/Gargantext/Core/Text/Corpus/API/EPO.hs
View file @
3fe6eba0
...
...
@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do
Just
apiUrl
->
do
eRes
<-
EPO
.
searchEPOAPIC
apiUrl
authKey
Nothing
limit
(
Corpus
.
getRawQuery
q
)
pure
$
(
\
(
total
,
itemsC
)
->
(
Just
total
,
itemsC
.|
mapC
(
toDoc
lang
)))
<$>
eRes
-- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q)
-- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) )
...
...
@@ -48,8 +48,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
HyperdataDocument
{
_hd_bdd
=
Just
"EPO"
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqId
=
id
,
_hd_uniqIdBdd
=
id
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Map
.
lookup
lang
titles
,
_hd_authors
=
authors_
...
...
@@ -66,10 +66,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
,
_hd_language_iso2
=
Just
$
iso639ToText
lang
}
where
authors_
=
if
authors
==
[]
authors_
=
if
null
authors
then
Nothing
else
Just
(
T
.
intercalate
", "
authors
)
-- EPO.withAuthKey authKey $ \token -> do
-- let range = EPO.Range { rBegin = 1, rEnd = limit }
-- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range)
...
...
src/Gargantext/Core/Text/Corpus/API/OpenAlex.hs
View file @
3fe6eba0
...
...
@@ -37,8 +37,8 @@ toDoc (OA.Work { .. } ) =
HyperdataDocument
{
_hd_bdd
=
Just
"OpenAlex"
,
_hd_doi
=
doi
,
_hd_url
=
url
,
_hd_uniqId
=
Just
id
,
_hd_uniqIdBdd
=
Just
id
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
firstPage
biblio
,
_hd_title
=
title
,
_hd_authors
=
authors
authorships
...
...
@@ -55,25 +55,25 @@ toDoc (OA.Work { .. } ) =
,
_hd_language_iso2
=
language
}
where
firstPage
::
OA
.
Biblio
->
Maybe
Int
firstPage
OA
.
Biblio
{
first_page
}
=
maybe
Nothing
readMaybe
$
T
.
unpack
<$>
first_page
firstPage
OA
.
Biblio
{
first_page
}
=
(
readMaybe
.
T
.
unpack
)
=<<
first_page
authors
::
[
OA
.
Authorship
]
->
Maybe
Text
authors
[]
=
Nothing
authors
aus
=
Just
$
T
.
intercalate
", "
$
catMaybes
(
getDisplayName
<$>
aus
)
authors
aus
=
Just
$
T
.
intercalate
", "
$
mapMaybe
getDisplayName
aus
where
getDisplayName
::
OA
.
Authorship
->
Maybe
Text
getDisplayName
OA
.
Authorship
{
author
=
OA
.
DehydratedAuthor
{
display_name
=
dn
}
}
=
dn
institutes
::
[
OA
.
Authorship
]
->
Maybe
Text
institutes
[]
=
Nothing
institutes
aus
=
Just
$
T
.
intercalate
", "
(
(
T
.
replace
", "
" - "
)
.
getInstitutesNames
<$>
aus
)
institutes
aus
=
Just
$
T
.
intercalate
", "
(
T
.
replace
", "
" - "
.
getInstitutesNames
<$>
aus
)
where
getInstitutesNames
OA
.
Authorship
{
institutions
}
=
T
.
intercalate
", "
$
getDisplayName
<$>
institutions
getDisplayName
::
OA
.
DehydratedInstitution
->
Text
getDisplayName
OA
.
DehydratedInstitution
{
display_name
=
dn
}
=
dn
source
::
Maybe
Text
source
=
maybe
Nothing
getSource
primary_location
source
=
getSource
=<<
primary_location
where
getSource
OA
.
Location
{
source
=
s
}
=
getSourceDisplayName
<$>
s
getSourceDisplayName
OA
.
DehydratedSource
{
display_name
=
dn
}
=
dn
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
View file @
3fe6eba0
...
...
@@ -114,7 +114,7 @@ toDoc l (PubMedDoc.PubMed { pubmed_id
)
=
HyperdataDocument
{
_hd_bdd
=
Just
"PubMed"
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Just
$
Text
.
pack
$
show
pubmed_id
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
t
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Document.hs
View file @
3fe6eba0
...
...
@@ -29,7 +29,10 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude
data
HyperdataDocument
=
HyperdataDocument
{
_hd_bdd
::
!
(
Maybe
Text
)
,
_hd_doi
::
!
(
Maybe
Text
)
,
_hd_url
::
!
(
Maybe
Text
)
-- | Unique MD5 hash of the document
,
_hd_uniqId
::
!
(
Maybe
Text
)
-- | Used as unique ID per source (can be same doc in Openalex, HAL, etc)
-- I think it's currently not used.
,
_hd_uniqIdBdd
::
!
(
Maybe
Text
)
,
_hd_page
::
!
(
Maybe
Int
)
,
_hd_title
::
!
(
Maybe
Text
)
...
...
src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
View file @
3fe6eba0
...
...
@@ -58,14 +58,14 @@ module Gargantext.Database.Query.Table.Node.Document.Insert
where
import
Control.Lens
(
set
,
view
)
import
Control.Lens.Cons
import
Control.Lens.Prism
import
Control.Lens.Cons
(
_head
)
import
Control.Lens.Prism
(
_Just
)
import
Data.Aeson
(
toJSON
,
ToJSON
)
import
Data.Text
qualified
as
DT
(
pack
,
concat
,
take
,
filter
,
toLower
)
import
Data.Time.Segment
(
jour
)
import
Database.PostgreSQL.Simple
(
FromRow
,
Query
,
Only
(
..
))
import
Database.PostgreSQL.Simple.FromRow
(
fromRow
,
field
)
import
Database.PostgreSQL.Simple.SqlQQ
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.ToField
(
toField
,
Action
{-, ToField-}
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Gargantext.Core
(
HasDBid
(
toDBid
))
...
...
@@ -93,7 +93,7 @@ import Database.PostgreSQL.Simple (formatQuery)
insertDb
::
(
InsertDb
a
,
HasDBid
NodeType
)
=>
UserId
->
Maybe
ParentId
->
[
a
]
->
DBCmd
err
[
ReturnId
]
insertDb
u
p
=
runPGSQuery
queryInsert
.
Only
.
Values
fields
.
map
(
insertDb'
u
p
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
inputSqlTypes
fields
=
map
(
QualifiedIdentifier
Nothing
)
inputSqlTypes
class
InsertDb
a
where
...
...
@@ -207,12 +207,12 @@ instance AddUniqId HyperdataDocument
$
set
hd_uniqId
(
Just
shaUni
)
doc
where
shaUni
=
hash
$
DT
.
concat
$
map
(
$
doc
)
shaParametersDoc
shaBdd
=
hash
$
DT
.
concat
$
map
(
$
doc
)
([
(
\
d
->
maybeText
(
_hd_bdd
d
))
]
<>
shaParametersDoc
)
shaBdd
=
hash
$
DT
.
concat
$
map
(
$
doc
)
([
maybeText
.
_hd_bdd
]
<>
shaParametersDoc
)
shaParametersDoc
::
[
(
HyperdataDocument
->
Text
)
]
shaParametersDoc
=
[
\
d
->
filterText
$
maybeText
(
_hd_title
d
)
,
\
d
->
filterText
$
maybeText
(
_hd_abstract
d
)
,
\
d
->
filterText
$
maybeText
(
_hd_source
d
)
shaParametersDoc
::
[
HyperdataDocument
->
Text
]
shaParametersDoc
=
[
filterText
.
maybeText
.
_hd_title
,
filterText
.
maybeText
.
_hd_abstract
,
filterText
.
maybeText
.
_hd_source
-- , \d -> maybeText (_hd_publication_date d)
]
...
...
@@ -230,14 +230,14 @@ instance UniqParameters (Node a)
filterText
::
Text
->
Text
filterText
=
DT
.
toLower
.
(
DT
.
filter
isAlphaNum
)
filterText
=
DT
.
toLower
.
DT
.
filter
isAlphaNum
instance
(
UniqParameters
a
,
ToJSON
a
,
HasDBid
NodeType
)
=>
AddUniqId
(
Node
a
)
where
addUniqId
(
Node
nid
_
t
u
p
n
d
h
)
=
Node
nid
(
Just
newHash
)
t
u
p
n
d
h
where
newHash
=
"
\\
x"
<>
(
hash
$
uniqParameters
(
fromMaybe
0
p
)
h
)
newHash
=
"
\\
x"
<>
hash
(
uniqParameters
(
fromMaybe
0
p
)
h
)
---------------------------------------------------------------------------
...
...
@@ -249,17 +249,17 @@ instance AddUniqId HyperdataContact
addUniqId
=
addUniqIdsContact
addUniqIdsContact
::
HyperdataContact
->
HyperdataContact
addUniqIdsContact
hc
=
set
(
hc_uniqIdBdd
)
(
Just
shaBdd
)
$
set
(
hc_uniqId
)
(
Just
shaUni
)
hc
addUniqIdsContact
hc
=
set
hc_uniqIdBdd
(
Just
shaBdd
)
$
set
hc_uniqId
(
Just
shaUni
)
hc
where
shaUni
=
hash
$
DT
.
concat
$
map
(
$
hc
)
shaParametersContact
shaBdd
=
hash
$
DT
.
concat
$
map
(
$
hc
)
([
\
d
->
maybeText
(
view
hc_bdd
d
)
]
<>
shaParametersContact
)
shaBdd
=
hash
$
DT
.
concat
$
map
(
$
hc
)
([
maybeText
.
view
hc_bdd
]
<>
shaParametersContact
)
-- | TODO add more shaparameters
shaParametersContact
::
[
(
HyperdataContact
->
Text
)
]
shaParametersContact
=
[
\
d
->
maybeText
$
view
(
hc_who
.
_Just
.
cw_firstName
)
d
,
\
d
->
maybeText
$
view
(
hc_who
.
_Just
.
cw_lastName
)
d
,
\
d
->
maybeText
$
view
(
hc_where
.
_head
.
cw_touch
.
_Just
.
ct_mail
)
d
shaParametersContact
::
[
HyperdataContact
->
Text
]
shaParametersContact
=
[
maybeText
.
view
(
hc_who
.
_Just
.
cw_firstName
)
,
maybeText
.
view
(
hc_who
.
_Just
.
cw_lastName
)
,
maybeText
.
view
(
hc_where
.
_head
.
cw_touch
.
_Just
.
ct_mail
)
]
...
...
@@ -286,7 +286,7 @@ instance ToNode HyperdataDocument where
-- TODO better Node
instance
ToNode
HyperdataContact
where
toNode
u
p
h
=
Node
0
Nothing
(
toDBid
NodeContact
)
u
p
"Contact"
date
h
toNode
u
p
=
Node
0
Nothing
(
toDBid
NodeContact
)
u
p
"Contact"
date
where
date
=
jour
2020
01
01
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment