Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
150
Issues
150
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
96a23581
Commit
96a23581
authored
Aug 28, 2024
by
mzheng
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add grouping when creating a corpus from HAL
parent
9144bad9
Pipeline
#6552
passed with stages
in 48 minutes and 58 seconds
Changes
25
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
25 changed files
with
191 additions
and
29 deletions
+191
-29
update-project-dependencies
bin/update-project-dependencies
+1
-1
cabal.project
cabal.project
+1
-1
Searx.hs
src/Gargantext/API/Node/Corpus/Searx.hs
+2
-1
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+2
-1
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+2
-1
Arxiv.hs
src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
+2
-1
EPO.hs
src/Gargantext/Core/Text/Corpus/API/EPO.hs
+2
-1
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+3
-2
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+1
-0
OpenAlex.hs
src/Gargantext/Core/Text/Corpus/API/OpenAlex.hs
+2
-1
Pubmed.hs
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+2
-1
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+2
-1
Book.hs
src/Gargantext/Core/Text/Corpus/Parsers/Book.hs
+1
-0
Gitlab.hs
src/Gargantext/Core/Text/Corpus/Parsers/Gitlab.hs
+1
-0
GrandDebat.hs
src/Gargantext/Core/Text/Corpus/Parsers/GrandDebat.hs
+2
-1
Isidore.hs
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
+2
-1
Istex.hs
src/Gargantext/Core/Text/Corpus/Parsers/JSON/Istex.hs
+1
-0
TSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/TSV.hs
+6
-3
Wikidata.hs
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
+2
-1
List.hs
src/Gargantext/Core/Text/List.hs
+50
-4
Group.hs
src/Gargantext/Core/Text/List/Group.hs
+11
-2
WithScores.hs
src/Gargantext/Core/Text/List/Group/WithScores.hs
+59
-1
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+30
-1
Document.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Document.hs
+3
-2
stack.yaml
stack.yaml
+1
-1
No files found.
bin/update-project-dependencies
View file @
96a23581
...
...
@@ -18,7 +18,7 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
expected_cabal_project_hash
=
"
9c487a789f77d9a96b4ac6a4b6268a075a72b8a391d987ba12194a46d96f6ee8
"
expected_cabal_project_hash
=
"
500b0d7b6c0b95937096a27c9a21fcaeeb6c0933d6f0db5e2ead9e69fa25b63f
"
expected_cabal_project_freeze_hash
=
"50f3ccea242400c48bd9cec7286bd07c8223c87c043e09576dd5fef0949f982a"
...
...
cabal.project
View file @
96a23581
...
...
@@ -93,7 +93,7 @@ source-repository-package
source
-
repository
-
package
type
:
git
location
:
https
://
gitlab
.
iscpif
.
fr
/
gargantext
/
crawlers
/
hal
.
git
tag
:
1
dbd939257d33126e49d2679375553df1f2eebc5
tag
:
d54812d52c9d1f86d331a991b3a87c9a8b4379cf
source
-
repository
-
package
type
:
git
...
...
src/Gargantext/API/Node/Corpus/Searx.hs
View file @
96a23581
...
...
@@ -229,4 +229,5 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
l
}
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
l
,
_hd_institutes_tree
=
Nothing
}
src/Gargantext/API/Node/DocumentUpload.hs
View file @
96a23581
...
...
@@ -85,7 +85,8 @@ documentUpload nId doc = do
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
view
du_language
doc
}
,
_hd_language_iso2
=
Just
$
view
du_language
doc
,
_hd_institutes_tree
=
Nothing
}
let
lang
=
EN
ncs
<-
view
$
nlpServerGet
lang
...
...
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
96a23581
...
...
@@ -156,6 +156,7 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
lang
}
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
lang
,
_hd_institutes_tree
=
Nothing
}
)
(
text2titleParagraphs
paragraphSize
ctxts
)
)
src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
View file @
96a23581
...
...
@@ -119,7 +119,8 @@ toDoc l (Arxiv.Result { abstract
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
}
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
,
_hd_institutes_tree
=
Nothing
}
where
authors
::
[
Ax
.
Author
]
->
Maybe
Text
authors
[]
=
Nothing
...
...
src/Gargantext/Core/Text/Corpus/API/EPO.hs
View file @
96a23581
...
...
@@ -61,7 +61,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
iso639ToText
lang
}
,
_hd_language_iso2
=
Just
$
iso639ToText
lang
,
_hd_institutes_tree
=
Nothing
}
where
authors_
=
if
null
authors
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
96a23581
...
...
@@ -53,7 +53,7 @@ toDoc' la (HAL.Document { .. }) = do
,
_hd_page
=
Nothing
,
_hd_title
=
Just
$
unwords
_document_title
,
_hd_authors
=
Just
$
foldl'
(
\
x
y
->
if
x
==
""
then
y
else
x
<>
", "
<>
y
)
""
_document_authors_names
,
_hd_institutes
=
Just
$
foldl'
(
\
x
y
->
if
x
==
""
then
y
else
x
<>
", "
<>
y
)
""
$
_document_authors_affiliations
<>
map
show
_document_struct_id
,
_hd_institutes
=
Just
$
foldl'
(
\
x
y
->
if
x
==
""
then
y
else
x
<>
", "
<>
y
)
""
$
zipWith
(
\
affialition
structId
->
affialition
<>
" | "
<>
structId
)
_document_authors_affiliations
$
map
show
_document_struct_id
,
_hd_source
=
Just
$
maybe
"Nothing"
identity
_document_source
,
_hd_abstract
=
Just
abstract
,
_hd_publication_date
=
fmap
show
utctime
...
...
@@ -63,4 +63,5 @@ toDoc' la (HAL.Document { .. }) = do
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
show
la
}
,
_hd_language_iso2
=
Just
$
show
la
,
_hd_institutes_tree
=
Just
_document_institutes_tree
}
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
96a23581
...
...
@@ -94,4 +94,5 @@ isidoreToDoc lang (IsidoreDoc t a d u s as) = do
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
.
Text
.
pack
.
show
$
lang
,
_hd_institutes_tree
=
Nothing
}
src/Gargantext/Core/Text/Corpus/API/OpenAlex.hs
View file @
96a23581
...
...
@@ -50,7 +50,8 @@ toDoc (OA.Work { .. } ) =
,
_hd_publication_hour
=
Nothing
-- TODO
,
_hd_publication_minute
=
Nothing
-- TODO
,
_hd_publication_second
=
Nothing
-- TODO
,
_hd_language_iso2
=
language
}
,
_hd_language_iso2
=
language
,
_hd_institutes_tree
=
Nothing
}
where
firstPage
::
OA
.
Biblio
->
Maybe
Int
firstPage
OA
.
Biblio
{
first_page
}
=
(
readMaybe
.
T
.
unpack
)
=<<
first_page
...
...
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
View file @
96a23581
...
...
@@ -131,7 +131,8 @@ toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
}
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
,
_hd_institutes_tree
=
Nothing
}
where
authors
::
[
PubMedDoc
.
Author
]
->
Maybe
Text
authors
[]
=
Nothing
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
96a23581
...
...
@@ -267,7 +267,8 @@ toDoc ff d = do
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
}
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
,
_hd_institutes_tree
=
Nothing
}
-- printDebug "[G.C.T.C.Parsers] HyperdataDocument" hd
pure
hd
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Book.hs
View file @
96a23581
...
...
@@ -95,6 +95,7 @@ publiToHyperdata y (Publi a s t txt) =
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
DT
.
pack
$
show
FR
,
_hd_institutes_tree
=
Nothing
}
-------------------------------------------------------------
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Gitlab.hs
View file @
96a23581
...
...
@@ -56,6 +56,7 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument
,
_hd_publication_minute
=
Just
(
todMin
tod
)
,
_hd_publication_second
=
Just
(
round
$
todSec
tod
)
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
,
_hd_institutes_tree
=
Nothing
}
where
lang
=
EN
date
=
_issue_created
issue
...
...
src/Gargantext/Core/Text/Corpus/Parsers/GrandDebat.hs
View file @
96a23581
...
...
@@ -89,7 +89,8 @@ instance ToHyperdataDocument GrandDebatReference
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
Text
.
pack
$
show
FR
}
,
_hd_language_iso2
=
Just
$
Text
.
pack
$
show
FR
,
_hd_institutes_tree
=
Nothing
}
where
toAbstract
=
Text
.
intercalate
" . "
.
(
filter
(
/=
""
)
.
map
toSentence
)
toSentence
(
GrandDebatResponse
_id
_qtitle
_qvalue
r
)
=
case
r
of
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
View file @
96a23581
...
...
@@ -135,6 +135,7 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
unbound
l
langDoc
}
,
_hd_language_iso2
=
unbound
l
langDoc
,
_hd_institutes_tree
=
Nothing
}
bind2doc
_
_
=
undefined
src/Gargantext/Core/Text/Corpus/Parsers/JSON/Istex.hs
View file @
96a23581
...
...
@@ -50,5 +50,6 @@ toDoc la (ISTEX.Document i t a ab d s) = do
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
T
.
pack
.
show
)
la
,
_hd_institutes_tree
=
Nothing
}
src/Gargantext/Core/Text/Corpus/Parsers/TSV.hs
View file @
96a23581
...
...
@@ -75,7 +75,8 @@ toDoc (TsvGargV3 did dt _ dpy dpm dpd dab dau) =
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Nothing
}
,
_hd_language_iso2
=
Nothing
,
_hd_institutes_tree
=
Nothing
}
---------------------------------------------------------------
-- | Types Conversions
...
...
@@ -537,7 +538,8 @@ tsvHal2doc (TsvHal { .. }) =
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Nothing
}
,
_hd_language_iso2
=
Nothing
,
_hd_institutes_tree
=
Nothing
}
tsv2doc
::
TsvDoc
->
HyperdataDocument
...
...
@@ -560,7 +562,8 @@ tsv2doc (TsvDoc { .. })
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Nothing
}
,
_hd_language_iso2
=
Nothing
,
_hd_institutes_tree
=
Nothing
}
where
pubYear
=
fromMIntOrDec
defaultYear
tsv_publication_year
pubMonth
=
fromMaybe
defaultMonth
tsv_publication_month
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
View file @
96a23581
...
...
@@ -93,7 +93,8 @@ wikiPageToDocument m wr = do
,
_hd_publication_hour
=
hour
,
_hd_publication_minute
=
minute
,
_hd_publication_second
=
sec
,
_hd_language_iso2
=
iso2
}
,
_hd_language_iso2
=
iso2
,
_hd_institutes_tree
=
Nothing
}
wikidataSelect
::
Int
->
IO
[
WikiResult
]
...
...
src/Gargantext/Core/Text/List.hs
View file @
96a23581
...
...
@@ -28,7 +28,7 @@ import Gargantext.API.Ngrams.Types (NgramsElement, NgramsTerm(..))
import
Gargantext.Core.NLP
(
HasNLPServer
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Text.List.Group
(
toGroupedTree
,
setScoresWithMap
)
import
Gargantext.Core.Text.List.Group
(
toGroupedTree
,
setScoresWithMap
,
toGroupedTreeInstitutes
)
import
Gargantext.Core.Text.List.Group.Prelude
import
Gargantext.Core.Text.List.Group.WithStem
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
,
flowSocialList
)
...
...
@@ -38,7 +38,7 @@ import Gargantext.Core.Text.Ngrams (NgramsType(..), Ngrams(..))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
)
)
import
Gargantext.Data.HashMap.Strict.Utils
qualified
as
HashMap
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getContextsByNgramsUser
,
getContextsByNgramsOnlyUser
)
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getContextsByNgramsUser
,
getContextsByNgramsOnlyUser
,
getTreeInstitutesUser
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf_withSample
)
import
Gargantext.Database.Admin.Types.Node
(
MasterCorpusId
,
UserCorpusId
,
ContextId
)
import
Gargantext.Database.Prelude
(
DBCmd
)
...
...
@@ -77,18 +77,64 @@ buildNgramsLists :: ( HasNodeStory env err m
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
user
uCid
mCid
mfslw
gp
=
do
ngTerms
<-
buildNgramsTermsList
user
uCid
mCid
mfslw
gp
(
NgramsTerms
,
MapListSize
goodMapListSize
)
instTerms
<-
buildNgramsInstList
user
uCid
mfslw
GroupIdentity
(
Institutes
,
MapListSize
300
,
MaxListSize
1000
)
othersTerms
<-
mapM
(
buildNgramsOthersList
user
uCid
mfslw
GroupIdentity
)
[
(
Authors
,
MapListSize
9
,
MaxListSize
1000
)
,
(
Sources
,
MapListSize
9
,
MaxListSize
1000
)
,
(
Institutes
,
MapListSize
9
,
MaxListSize
1000
)
]
pure
$
Map
.
unions
$
[
ngTerms
]
<>
othersTerms
pure
$
Map
.
unions
$
[
ngTerms
]
<>
othersTerms
<>
[
instTerms
]
newtype
MapListSize
=
MapListSize
{
unMapListSize
::
Int
}
newtype
MaxListSize
=
MaxListSize
{
unMaxListSize
::
Int
}
buildNgramsInstList
::
(
HasNodeError
err
,
HasNLPServer
env
,
HasNodeStory
env
err
m
,
HasTreeError
err
)
=>
User
->
UserCorpusId
->
Maybe
FlowSocialListWith
->
GroupParams
->
(
NgramsType
,
MapListSize
,
MaxListSize
)
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsInstList
user
uCid
mfslw
_groupParams
(
nt
,
MapListSize
mapListSize
,
MaxListSize
maxListSize
)
=
do
allTerms
::
HashMap
NgramsTerm
(
Set
ContextId
)
<-
getContextsByNgramsUser
uCid
nt
institutesTree
::
HashMap
Text
[
Text
]
<-
getTreeInstitutesUser
uCid
nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists
::
FlowCont
NgramsTerm
FlowListScores
<-
flowSocialList
mfslw
user
nt
(
FlowCont
HashMap
.
empty
$
HashMap
.
fromList
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
repeat
mempty
)
)
let
groupedWithList
=
toGroupedTreeInstitutes
{- groupParams -}
socialLists
allTerms
institutesTree
(
stopTerms
,
tailTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
view
flc_scores
groupedWithList
(
mapTerms
,
tailTerms'
)
=
HashMap
.
partition
((
==
Just
MapTerm
)
.
viewListType
)
tailTerms
listSize
=
mapListSize
-
List
.
length
mapTerms
(
mapTerms'
,
candiTerms
)
=
both
HashMap
.
fromList
$
List
.
splitAt
listSize
$
List
.
take
maxListSize
$
List
.
sortOn
(
Down
.
viewScore
.
snd
)
$
HashMap
.
toList
tailTerms'
pure
$
Map
.
fromList
[(
nt
,
List
.
take
maxListSize
$
toNgramsElement
stopTerms
<>
toNgramsElement
mapTerms
<>
toNgramsElement
(
setListType
(
Just
MapTerm
)
mapTerms'
)
<>
toNgramsElement
(
setListType
(
Just
CandidateTerm
)
candiTerms
)
)]
buildNgramsOthersList
::
(
HasNodeError
err
,
HasNLPServer
env
,
HasNodeStory
env
err
m
...
...
src/Gargantext/Core/Text/List/Group.hs
View file @
96a23581
...
...
@@ -20,20 +20,29 @@ import Data.HashMap.Strict (HashMap)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core.Text.List.Group.Prelude
import
Gargantext.Core.Text.List.Group.WithScores
(
groupWithScores'
)
import
Gargantext.Core.Text.List.Group.WithScores
(
groupWithScores'
,
groupWithScoresInstitutes'
)
import
Gargantext.Core.Text.List.Social.Prelude
(
FlowListScores
,
FlowCont
)
import
Gargantext.Prelude
------------------------------------------------------------------------
toGroupedTree
::
(
Ord
a
,
Monoid
a
,
HasSize
a
)
=>
FlowCont
NgramsTerm
FlowListScores
->
HashMap
NgramsTerm
a
->
FlowCont
NgramsTerm
(
GroupedTreeScores
a
)
->
FlowCont
NgramsTerm
(
GroupedTreeScores
a
)
-- a = (Set ContextId)
toGroupedTree
flc
scores
=
groupWithScores'
flc
scoring
where
scoring
t
=
fromMaybe
mempty
$
HashMap
.
lookup
t
scores
toGroupedTreeInstitutes
::
(
Ord
a
,
Monoid
a
,
HasSize
a
)
=>
FlowCont
NgramsTerm
FlowListScores
->
HashMap
NgramsTerm
a
->
HashMap
Text
[
Text
]
->
FlowCont
NgramsTerm
(
GroupedTreeScores
a
)
-- a = (Set ContextId)
toGroupedTreeInstitutes
flc
scores
institutesTree
=
groupWithScoresInstitutes'
flc
scoring
institutesTree
where
scoring
t
=
fromMaybe
mempty
$
HashMap
.
lookup
t
scores
------------------------------------------------------------------------
setScoresWithMap
::
(
Ord
a
,
Ord
b
,
Monoid
b
)
=>
HashMap
NgramsTerm
b
->
HashMap
NgramsTerm
(
GroupedTreeScores
a
)
...
...
src/Gargantext/Core/Text/List/Group/WithScores.hs
View file @
96a23581
...
...
@@ -40,7 +40,23 @@ groupWithScores' flc scores = FlowCont groups orphans
-- orphans should be filtered already then becomes empty
orphans
=
mempty
groupWithScoresInstitutes'
::
(
Eq
a
,
Ord
a
,
Monoid
a
,
HasSize
a
)
=>
FlowCont
NgramsTerm
FlowListScores
->
(
NgramsTerm
->
a
)
->
HashMap
Text
[
Text
]
->
FlowCont
NgramsTerm
(
GroupedTreeScores
a
)
groupWithScoresInstitutes'
flc
scores
institutesTree
=
FlowCont
(
groups
institutesTree
)
orphans
where
-- parent/child relation is inherited from social lists
groups
institutesTree'
=
HashMap
.
filter
((
0
<
)
.
viewScore
)
$
toGroupedTreeInstitutes'
institutesTree'
$
toMapMaybeParent
scores
$
view
flc_scores
flc
<>
view
flc_cont
flc
-- orphans should be filtered already then becomes empty
orphans
=
mempty
------------------------------------------------------------------------
toMapMaybeParent
::
(
Eq
a
,
Ord
a
,
Monoid
a
)
...
...
@@ -72,6 +88,14 @@ toGroupedTree' m = case HashMap.lookup Nothing m of
Nothing
->
mempty
Just
m'
->
toGroupedTree''
m
m'
toGroupedTreeInstitutes'
::
Eq
a
=>
HashMap
Text
[
Text
]
->
HashMap
(
Maybe
Parent
)
(
HashMap
NgramsTerm
(
GroupedTreeScores
a
))
->
HashMap
Parent
(
GroupedTreeScores
a
)
toGroupedTreeInstitutes'
institutesTree
m
=
case
HashMap
.
lookup
Nothing
m
of
Nothing
->
mempty
Just
m'
->
toGroupedTreeInstitutes''
m
m'
institutesTree
filterGroupedTree
::
(
GroupedTreeScores
a
->
Bool
)
->
HashMap
Parent
(
GroupedTreeScores
a
)
->
HashMap
Parent
(
GroupedTreeScores
a
)
...
...
@@ -93,3 +117,37 @@ toGroupedTree'' m notEmpty
)
v
toGroupedTreeInstitutes''
::
Eq
a
=>
HashMap
(
Maybe
Parent
)
(
HashMap
NgramsTerm
(
GroupedTreeScores
a
))
->
HashMap
NgramsTerm
(
GroupedTreeScores
a
)
->
HashMap
Text
[
Text
]
->
HashMap
Parent
(
GroupedTreeScores
a
)
toGroupedTreeInstitutes''
m
notEmpty
institutesTree
|
notEmpty
==
mempty
=
mempty
|
otherwise
=
HashMap
.
mapWithKey
(
addGroup
institutesTree
m
)
notEmpty
where
addGroup
::
(
Eq
score
)
=>
HashMap
Text
[
Text
]
->
HashMap
(
Maybe
Parent
)
(
HashMap
NgramsTerm
(
GroupedTreeScores
score
))
->
Parent
->
GroupedTreeScores
score
->
GroupedTreeScores
score
addGroup
institutesTree'
m'
k
v
=
over
gts'_children
(
toGroupedTree''
m'
.
case
HashMap
.
lookup
(
unNgramsTerm
k
)
institutesTree'
of
Nothing
->
HashMap
.
union
(
fromMaybe
mempty
$
HashMap
.
lookup
(
Just
k
)
m'
)
Just
children
->
HashMap
.
union
(
foldl
(
\
acc
child
->
HashMap
.
union
acc
$
HashMap
.
singleton
(
NgramsTerm
child
)
GroupedTreeScores
{
_gts'_score
=
_gts'_score
v
,
_gts'_listType
=
_gts'_listType
v
,
_gts'_children
=
HashMap
.
empty
})
HashMap
.
empty
children
)
.
HashMap
.
union
(
fromMaybe
mempty
$
HashMap
.
lookup
(
Just
k
)
m'
)
)
v
\ No newline at end of file
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
96a23581
...
...
@@ -30,6 +30,7 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
(
unionsWith
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
(
..
),
MasterCorpusId
,
NodeType
(
NodeDocument
),
UserCorpusId
,
DocId
)
import
Gargantext.Database.Prelude
(
DBCmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
()
-- toDBid instance
...
...
@@ -91,6 +92,34 @@ getContextsByNgramsUser cId nt =
GROUP BY cng.context_id, ng.terms
|]
getTreeInstitutesUser
::
HasDBid
NodeType
=>
CorpusId
->
NgramsType
->
DBCmd
err
(
HashMap
Text
[
Text
])
getTreeInstitutesUser
cId
nt
=
HM
.
unionsWith
(
++
)
.
map
(
\
(
_
,
hd
)
->
HM
.
fromList
$
map
(
\
(
p
,
c
)
->
(
p
,
[
c
]))
$
Map
.
toList
$
fromMaybe
Map
.
empty
(
_hd_institutes_tree
hd
))
<$>
selectHyperDataByContextUser
cId
nt
selectHyperDataByContextUser
::
HasDBid
NodeType
=>
CorpusId
->
NgramsType
->
DBCmd
err
[(
ContextId
,
HyperdataDocument
)]
selectHyperDataByContextUser
cId'
nt'
=
runPGSQuery
queryHyperDataByContextUser
(
cId'
,
toDBid
nt'
)
queryHyperDataByContextUser
::
DPS
.
Query
queryHyperDataByContextUser
=
[
sql
|
SELECT cng.context_id, c.hyperdata FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN nodes_contexts nc ON nc.context_id = cng.context_id
JOIN contexts c ON nc.context_id = c.id
WHERE nc.node_id = ? -- CorpusId
AND cng.ngrams_type = ? -- NgramsTypeId
AND nc.category > 0 -- is not in Trash
GROUP BY cng.context_id, c.hyperdata
|]
------------------------------------------------------------------------
getOccByNgramsOnlyFast_withSample
::
HasDBid
NodeType
...
...
@@ -215,7 +244,7 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
GROUP BY cng.node_id, ir.terms
|]
-- queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
-- queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
-- WITH nodes_sample AS (SELECT c.id FROM contexts c TABLESAMPLE SYSTEM_ROWS (?)
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Document.hs
View file @
96a23581
...
...
@@ -36,6 +36,7 @@ data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe T
,
_hd_publication_minute
::
!
(
Maybe
Int
)
,
_hd_publication_second
::
!
(
Maybe
Int
)
,
_hd_language_iso2
::
!
(
Maybe
Text
)
,
_hd_institutes_tree
::
!
(
Maybe
(
Map
Text
Text
))
}
deriving
(
Show
,
Generic
)
...
...
@@ -53,7 +54,7 @@ defaultHyperdataDocument = case decode docExample of
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
where
docExample
::
ByteString
...
...
@@ -120,7 +121,7 @@ arbitraryHyperdataDocuments =
toHyperdataDocument'
(
t1
,
t2
)
=
HyperdataDocument
Nothing
Nothing
Nothing
Nothing
(
Just
t1
)
Nothing
Nothing
(
Just
t2
)
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
...
...
stack.yaml
View file @
96a23581
...
...
@@ -134,7 +134,7 @@
git
:
"
https://gitlab.iscpif.fr/gargantext/crawlers/epo-proxy-api.git"
subdirs
:
-
.
-
commit
:
1dbd939257d33126e49d2679375553df1f2eebc5
-
commit
:
d54812d52c9d1f86d331a991b3a87c9a8b4379cf
git
:
"
https://gitlab.iscpif.fr/gargantext/crawlers/hal.git"
subdirs
:
-
.
...
...
Przemyslaw Kaminski
@cgenie
mentioned in commit
5660aec0
·
Oct 08, 2024
mentioned in commit
5660aec0
mentioned in commit 5660aec07ec5a0a0a5468f440092c1a8f57a864e
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment