Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
14
Merge Requests
14
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
96a23581
Commit
96a23581
authored
Aug 28, 2024
by
mzheng
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add grouping when creating a corpus from HAL
parent
9144bad9
Pipeline
#6552
passed with stages
in 48 minutes and 58 seconds
Changes
25
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
25 changed files
with
191 additions
and
29 deletions
+191
-29
update-project-dependencies
bin/update-project-dependencies
+1
-1
cabal.project
cabal.project
+1
-1
Searx.hs
src/Gargantext/API/Node/Corpus/Searx.hs
+2
-1
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+2
-1
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+2
-1
Arxiv.hs
src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
+2
-1
EPO.hs
src/Gargantext/Core/Text/Corpus/API/EPO.hs
+2
-1
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+3
-2
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+1
-0
OpenAlex.hs
src/Gargantext/Core/Text/Corpus/API/OpenAlex.hs
+2
-1
Pubmed.hs
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+2
-1
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+2
-1
Book.hs
src/Gargantext/Core/Text/Corpus/Parsers/Book.hs
+1
-0
Gitlab.hs
src/Gargantext/Core/Text/Corpus/Parsers/Gitlab.hs
+1
-0
GrandDebat.hs
src/Gargantext/Core/Text/Corpus/Parsers/GrandDebat.hs
+2
-1
Isidore.hs
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
+2
-1
Istex.hs
src/Gargantext/Core/Text/Corpus/Parsers/JSON/Istex.hs
+1
-0
TSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/TSV.hs
+6
-3
Wikidata.hs
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
+2
-1
List.hs
src/Gargantext/Core/Text/List.hs
+50
-4
Group.hs
src/Gargantext/Core/Text/List/Group.hs
+11
-2
WithScores.hs
src/Gargantext/Core/Text/List/Group/WithScores.hs
+59
-1
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+30
-1
Document.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Document.hs
+3
-2
stack.yaml
stack.yaml
+1
-1
No files found.
bin/update-project-dependencies
View file @
96a23581
...
...
@@ -18,7 +18,7 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
expected_cabal_project_hash
=
"
9c487a789f77d9a96b4ac6a4b6268a075a72b8a391d987ba12194a46d96f6ee8
"
expected_cabal_project_hash
=
"
500b0d7b6c0b95937096a27c9a21fcaeeb6c0933d6f0db5e2ead9e69fa25b63f
"
expected_cabal_project_freeze_hash
=
"50f3ccea242400c48bd9cec7286bd07c8223c87c043e09576dd5fef0949f982a"
...
...
cabal.project
View file @
96a23581
...
...
@@ -93,7 +93,7 @@ source-repository-package
source
-
repository
-
package
type
:
git
location
:
https
://
gitlab
.
iscpif
.
fr
/
gargantext
/
crawlers
/
hal
.
git
tag
:
1
dbd939257d33126e49d2679375553df1f2eebc5
tag
:
d54812d52c9d1f86d331a991b3a87c9a8b4379cf
source
-
repository
-
package
type
:
git
...
...
src/Gargantext/API/Node/Corpus/Searx.hs
View file @
96a23581
...
...
@@ -229,4 +229,5 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
l
}
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
l
,
_hd_institutes_tree
=
Nothing
}
src/Gargantext/API/Node/DocumentUpload.hs
View file @
96a23581
...
...
@@ -85,7 +85,8 @@ documentUpload nId doc = do
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
view
du_language
doc
}
,
_hd_language_iso2
=
Just
$
view
du_language
doc
,
_hd_institutes_tree
=
Nothing
}
let
lang
=
EN
ncs
<-
view
$
nlpServerGet
lang
...
...
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
96a23581
...
...
@@ -156,6 +156,7 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
lang
}
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
lang
,
_hd_institutes_tree
=
Nothing
}
)
(
text2titleParagraphs
paragraphSize
ctxts
)
)
src/Gargantext/Core/Text/Corpus/API/Arxiv.hs
View file @
96a23581
...
...
@@ -119,7 +119,8 @@ toDoc l (Arxiv.Result { abstract
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
}
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
,
_hd_institutes_tree
=
Nothing
}
where
authors
::
[
Ax
.
Author
]
->
Maybe
Text
authors
[]
=
Nothing
...
...
src/Gargantext/Core/Text/Corpus/API/EPO.hs
View file @
96a23581
...
...
@@ -61,7 +61,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
iso639ToText
lang
}
,
_hd_language_iso2
=
Just
$
iso639ToText
lang
,
_hd_institutes_tree
=
Nothing
}
where
authors_
=
if
null
authors
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
96a23581
...
...
@@ -53,7 +53,7 @@ toDoc' la (HAL.Document { .. }) = do
,
_hd_page
=
Nothing
,
_hd_title
=
Just
$
unwords
_document_title
,
_hd_authors
=
Just
$
foldl'
(
\
x
y
->
if
x
==
""
then
y
else
x
<>
", "
<>
y
)
""
_document_authors_names
,
_hd_institutes
=
Just
$
foldl'
(
\
x
y
->
if
x
==
""
then
y
else
x
<>
", "
<>
y
)
""
$
_document_authors_affiliations
<>
map
show
_document_struct_id
,
_hd_institutes
=
Just
$
foldl'
(
\
x
y
->
if
x
==
""
then
y
else
x
<>
", "
<>
y
)
""
$
zipWith
(
\
affialition
structId
->
affialition
<>
" | "
<>
structId
)
_document_authors_affiliations
$
map
show
_document_struct_id
,
_hd_source
=
Just
$
maybe
"Nothing"
identity
_document_source
,
_hd_abstract
=
Just
abstract
,
_hd_publication_date
=
fmap
show
utctime
...
...
@@ -63,4 +63,5 @@ toDoc' la (HAL.Document { .. }) = do
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
show
la
}
,
_hd_language_iso2
=
Just
$
show
la
,
_hd_institutes_tree
=
Just
_document_institutes_tree
}
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
96a23581
...
...
@@ -94,4 +94,5 @@ isidoreToDoc lang (IsidoreDoc t a d u s as) = do
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
.
Text
.
pack
.
show
$
lang
,
_hd_institutes_tree
=
Nothing
}
src/Gargantext/Core/Text/Corpus/API/OpenAlex.hs
View file @
96a23581
...
...
@@ -50,7 +50,8 @@ toDoc (OA.Work { .. } ) =
,
_hd_publication_hour
=
Nothing
-- TODO
,
_hd_publication_minute
=
Nothing
-- TODO
,
_hd_publication_second
=
Nothing
-- TODO
,
_hd_language_iso2
=
language
}
,
_hd_language_iso2
=
language
,
_hd_institutes_tree
=
Nothing
}
where
firstPage
::
OA
.
Biblio
->
Maybe
Int
firstPage
OA
.
Biblio
{
first_page
}
=
(
readMaybe
.
T
.
unpack
)
=<<
first_page
...
...
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
View file @
96a23581
...
...
@@ -131,7 +131,8 @@ toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
}
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
,
_hd_institutes_tree
=
Nothing
}
where
authors
::
[
PubMedDoc
.
Author
]
->
Maybe
Text
authors
[]
=
Nothing
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
96a23581
...
...
@@ -267,7 +267,8 @@ toDoc ff d = do
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
}
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
,
_hd_institutes_tree
=
Nothing
}
-- printDebug "[G.C.T.C.Parsers] HyperdataDocument" hd
pure
hd
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Book.hs
View file @
96a23581
...
...
@@ -95,6 +95,7 @@ publiToHyperdata y (Publi a s t txt) =
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
DT
.
pack
$
show
FR
,
_hd_institutes_tree
=
Nothing
}
-------------------------------------------------------------
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Gitlab.hs
View file @
96a23581
...
...
@@ -56,6 +56,7 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument
,
_hd_publication_minute
=
Just
(
todMin
tod
)
,
_hd_publication_second
=
Just
(
round
$
todSec
tod
)
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
,
_hd_institutes_tree
=
Nothing
}
where
lang
=
EN
date
=
_issue_created
issue
...
...
src/Gargantext/Core/Text/Corpus/Parsers/GrandDebat.hs
View file @
96a23581
...
...
@@ -89,7 +89,8 @@ instance ToHyperdataDocument GrandDebatReference
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
Text
.
pack
$
show
FR
}
,
_hd_language_iso2
=
Just
$
Text
.
pack
$
show
FR
,
_hd_institutes_tree
=
Nothing
}
where
toAbstract
=
Text
.
intercalate
" . "
.
(
filter
(
/=
""
)
.
map
toSentence
)
toSentence
(
GrandDebatResponse
_id
_qtitle
_qvalue
r
)
=
case
r
of
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
View file @
96a23581
...
...
@@ -135,6 +135,7 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
unbound
l
langDoc
}
,
_hd_language_iso2
=
unbound
l
langDoc
,
_hd_institutes_tree
=
Nothing
}
bind2doc
_
_
=
undefined
src/Gargantext/Core/Text/Corpus/Parsers/JSON/Istex.hs
View file @
96a23581
...
...
@@ -50,5 +50,6 @@ toDoc la (ISTEX.Document i t a ab d s) = do
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
T
.
pack
.
show
)
la
,
_hd_institutes_tree
=
Nothing
}
src/Gargantext/Core/Text/Corpus/Parsers/TSV.hs
View file @
96a23581
...
...
@@ -75,7 +75,8 @@ toDoc (TsvGargV3 did dt _ dpy dpm dpd dab dau) =
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Nothing
}
,
_hd_language_iso2
=
Nothing
,
_hd_institutes_tree
=
Nothing
}
---------------------------------------------------------------
-- | Types Conversions
...
...
@@ -537,7 +538,8 @@ tsvHal2doc (TsvHal { .. }) =
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Nothing
}
,
_hd_language_iso2
=
Nothing
,
_hd_institutes_tree
=
Nothing
}
tsv2doc
::
TsvDoc
->
HyperdataDocument
...
...
@@ -560,7 +562,8 @@ tsv2doc (TsvDoc { .. })
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Nothing
}
,
_hd_language_iso2
=
Nothing
,
_hd_institutes_tree
=
Nothing
}
where
pubYear
=
fromMIntOrDec
defaultYear
tsv_publication_year
pubMonth
=
fromMaybe
defaultMonth
tsv_publication_month
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Wikidata.hs
View file @
96a23581
...
...
@@ -93,7 +93,8 @@ wikiPageToDocument m wr = do
,
_hd_publication_hour
=
hour
,
_hd_publication_minute
=
minute
,
_hd_publication_second
=
sec
,
_hd_language_iso2
=
iso2
}
,
_hd_language_iso2
=
iso2
,
_hd_institutes_tree
=
Nothing
}
wikidataSelect
::
Int
->
IO
[
WikiResult
]
...
...
src/Gargantext/Core/Text/List.hs
View file @
96a23581
...
...
@@ -28,7 +28,7 @@ import Gargantext.API.Ngrams.Types (NgramsElement, NgramsTerm(..))
import
Gargantext.Core.NLP
(
HasNLPServer
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Text.List.Group
(
toGroupedTree
,
setScoresWithMap
)
import
Gargantext.Core.Text.List.Group
(
toGroupedTree
,
setScoresWithMap
,
toGroupedTreeInstitutes
)
import
Gargantext.Core.Text.List.Group.Prelude
import
Gargantext.Core.Text.List.Group.WithStem
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
,
flowSocialList
)
...
...
@@ -38,7 +38,7 @@ import Gargantext.Core.Text.Ngrams (NgramsType(..), Ngrams(..))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
)
)
import
Gargantext.Data.HashMap.Strict.Utils
qualified
as
HashMap
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getContextsByNgramsUser
,
getContextsByNgramsOnlyUser
)
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getContextsByNgramsUser
,
getContextsByNgramsOnlyUser
,
getTreeInstitutesUser
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf_withSample
)
import
Gargantext.Database.Admin.Types.Node
(
MasterCorpusId
,
UserCorpusId
,
ContextId
)
import
Gargantext.Database.Prelude
(
DBCmd
)
...
...
@@ -77,18 +77,64 @@ buildNgramsLists :: ( HasNodeStory env err m
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
user
uCid
mCid
mfslw
gp
=
do
ngTerms
<-
buildNgramsTermsList
user
uCid
mCid
mfslw
gp
(
NgramsTerms
,
MapListSize
goodMapListSize
)
instTerms
<-
buildNgramsInstList
user
uCid
mfslw
GroupIdentity
(
Institutes
,
MapListSize
300
,
MaxListSize
1000
)
othersTerms
<-
mapM
(
buildNgramsOthersList
user
uCid
mfslw
GroupIdentity
)
[
(
Authors
,
MapListSize
9
,
MaxListSize
1000
)
,
(
Sources
,
MapListSize
9
,
MaxListSize
1000
)
,
(
Institutes
,
MapListSize
9
,
MaxListSize
1000
)
]
pure
$
Map
.
unions
$
[
ngTerms
]
<>
othersTerms
pure
$
Map
.
unions
$
[
ngTerms
]
<>
othersTerms
<>
[
instTerms
]
newtype
MapListSize
=
MapListSize
{
unMapListSize
::
Int
}
newtype
MaxListSize
=
MaxListSize
{
unMaxListSize
::
Int
}
buildNgramsInstList
::
(
HasNodeError
err
,
HasNLPServer
env
,
HasNodeStory
env
err
m
,
HasTreeError
err
)
=>
User
->
UserCorpusId
->
Maybe
FlowSocialListWith
->
GroupParams
->
(
NgramsType
,
MapListSize
,
MaxListSize
)
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsInstList
user
uCid
mfslw
_groupParams
(
nt
,
MapListSize
mapListSize
,
MaxListSize
maxListSize
)
=
do
allTerms
::
HashMap
NgramsTerm
(
Set
ContextId
)
<-
getContextsByNgramsUser
uCid
nt
institutesTree
::
HashMap
Text
[
Text
]
<-
getTreeInstitutesUser
uCid
nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists
::
FlowCont
NgramsTerm
FlowListScores
<-
flowSocialList
mfslw
user
nt
(
FlowCont
HashMap
.
empty
$
HashMap
.
fromList
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
repeat
mempty
)
)
let
groupedWithList
=
toGroupedTreeInstitutes
{- groupParams -}
socialLists
allTerms
institutesTree
(
stopTerms
,
tailTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
view
flc_scores
groupedWithList
(
mapTerms
,
tailTerms'
)
=
HashMap
.
partition
((
==
Just
MapTerm
)
.
viewListType
)
tailTerms
listSize
=
mapListSize
-
List
.
length
mapTerms
(
mapTerms'
,
candiTerms
)
=
both
HashMap
.
fromList
$
List
.
splitAt
listSize
$
List
.
take
maxListSize
$
List
.
sortOn
(
Down
.
viewScore
.
snd
)
$
HashMap
.
toList
tailTerms'
pure
$
Map
.
fromList
[(
nt
,
List
.
take
maxListSize
$
toNgramsElement
stopTerms
<>
toNgramsElement
mapTerms
<>
toNgramsElement
(
setListType
(
Just
MapTerm
)
mapTerms'
)
<>
toNgramsElement
(
setListType
(
Just
CandidateTerm
)
candiTerms
)
)]
buildNgramsOthersList
::
(
HasNodeError
err
,
HasNLPServer
env
,
HasNodeStory
env
err
m
...
...
src/Gargantext/Core/Text/List/Group.hs
View file @
96a23581
...
...
@@ -20,20 +20,29 @@ import Data.HashMap.Strict (HashMap)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core.Text.List.Group.Prelude
import
Gargantext.Core.Text.List.Group.WithScores
(
groupWithScores'
)
import
Gargantext.Core.Text.List.Group.WithScores
(
groupWithScores'
,
groupWithScoresInstitutes'
)
import
Gargantext.Core.Text.List.Social.Prelude
(
FlowListScores
,
FlowCont
)
import
Gargantext.Prelude
------------------------------------------------------------------------
toGroupedTree
::
(
Ord
a
,
Monoid
a
,
HasSize
a
)
=>
FlowCont
NgramsTerm
FlowListScores
->
HashMap
NgramsTerm
a
->
FlowCont
NgramsTerm
(
GroupedTreeScores
a
)
->
FlowCont
NgramsTerm
(
GroupedTreeScores
a
)
-- a = (Set ContextId)
toGroupedTree
flc
scores
=
groupWithScores'
flc
scoring
where
scoring
t
=
fromMaybe
mempty
$
HashMap
.
lookup
t
scores
toGroupedTreeInstitutes
::
(
Ord
a
,
Monoid
a
,
HasSize
a
)
=>
FlowCont
NgramsTerm
FlowListScores
->
HashMap
NgramsTerm
a
->
HashMap
Text
[
Text
]
->
FlowCont
NgramsTerm
(
GroupedTreeScores
a
)
-- a = (Set ContextId)
toGroupedTreeInstitutes
flc
scores
institutesTree
=
groupWithScoresInstitutes'
flc
scoring
institutesTree
where
scoring
t
=
fromMaybe
mempty
$
HashMap
.
lookup
t
scores
------------------------------------------------------------------------
setScoresWithMap
::
(
Ord
a
,
Ord
b
,
Monoid
b
)
=>
HashMap
NgramsTerm
b
->
HashMap
NgramsTerm
(
GroupedTreeScores
a
)
...
...
src/Gargantext/Core/Text/List/Group/WithScores.hs
View file @
96a23581
...
...
@@ -41,6 +41,22 @@ groupWithScores' flc scores = FlowCont groups orphans
-- orphans should be filtered already then becomes empty
orphans
=
mempty
groupWithScoresInstitutes'
::
(
Eq
a
,
Ord
a
,
Monoid
a
,
HasSize
a
)
=>
FlowCont
NgramsTerm
FlowListScores
->
(
NgramsTerm
->
a
)
->
HashMap
Text
[
Text
]
->
FlowCont
NgramsTerm
(
GroupedTreeScores
a
)
groupWithScoresInstitutes'
flc
scores
institutesTree
=
FlowCont
(
groups
institutesTree
)
orphans
where
-- parent/child relation is inherited from social lists
groups
institutesTree'
=
HashMap
.
filter
((
0
<
)
.
viewScore
)
$
toGroupedTreeInstitutes'
institutesTree'
$
toMapMaybeParent
scores
$
view
flc_scores
flc
<>
view
flc_cont
flc
-- orphans should be filtered already then becomes empty
orphans
=
mempty
------------------------------------------------------------------------
toMapMaybeParent
::
(
Eq
a
,
Ord
a
,
Monoid
a
)
...
...
@@ -72,6 +88,14 @@ toGroupedTree' m = case HashMap.lookup Nothing m of
Nothing
->
mempty
Just
m'
->
toGroupedTree''
m
m'
toGroupedTreeInstitutes'
::
Eq
a
=>
HashMap
Text
[
Text
]
->
HashMap
(
Maybe
Parent
)
(
HashMap
NgramsTerm
(
GroupedTreeScores
a
))
->
HashMap
Parent
(
GroupedTreeScores
a
)
toGroupedTreeInstitutes'
institutesTree
m
=
case
HashMap
.
lookup
Nothing
m
of
Nothing
->
mempty
Just
m'
->
toGroupedTreeInstitutes''
m
m'
institutesTree
filterGroupedTree
::
(
GroupedTreeScores
a
->
Bool
)
->
HashMap
Parent
(
GroupedTreeScores
a
)
->
HashMap
Parent
(
GroupedTreeScores
a
)
...
...
@@ -93,3 +117,37 @@ toGroupedTree'' m notEmpty
)
v
toGroupedTreeInstitutes''
::
Eq
a
=>
HashMap
(
Maybe
Parent
)
(
HashMap
NgramsTerm
(
GroupedTreeScores
a
))
->
HashMap
NgramsTerm
(
GroupedTreeScores
a
)
->
HashMap
Text
[
Text
]
->
HashMap
Parent
(
GroupedTreeScores
a
)
toGroupedTreeInstitutes''
m
notEmpty
institutesTree
|
notEmpty
==
mempty
=
mempty
|
otherwise
=
HashMap
.
mapWithKey
(
addGroup
institutesTree
m
)
notEmpty
where
addGroup
::
(
Eq
score
)
=>
HashMap
Text
[
Text
]
->
HashMap
(
Maybe
Parent
)
(
HashMap
NgramsTerm
(
GroupedTreeScores
score
))
->
Parent
->
GroupedTreeScores
score
->
GroupedTreeScores
score
addGroup
institutesTree'
m'
k
v
=
over
gts'_children
(
toGroupedTree''
m'
.
case
HashMap
.
lookup
(
unNgramsTerm
k
)
institutesTree'
of
Nothing
->
HashMap
.
union
(
fromMaybe
mempty
$
HashMap
.
lookup
(
Just
k
)
m'
)
Just
children
->
HashMap
.
union
(
foldl
(
\
acc
child
->
HashMap
.
union
acc
$
HashMap
.
singleton
(
NgramsTerm
child
)
GroupedTreeScores
{
_gts'_score
=
_gts'_score
v
,
_gts'_listType
=
_gts'_listType
v
,
_gts'_children
=
HashMap
.
empty
})
HashMap
.
empty
children
)
.
HashMap
.
union
(
fromMaybe
mempty
$
HashMap
.
lookup
(
Just
k
)
m'
)
)
v
\ No newline at end of file
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
96a23581
...
...
@@ -30,6 +30,7 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
(
unionsWith
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
(
..
),
MasterCorpusId
,
NodeType
(
NodeDocument
),
UserCorpusId
,
DocId
)
import
Gargantext.Database.Prelude
(
DBCmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
()
-- toDBid instance
...
...
@@ -91,6 +92,34 @@ getContextsByNgramsUser cId nt =
GROUP BY cng.context_id, ng.terms
|]
getTreeInstitutesUser
::
HasDBid
NodeType
=>
CorpusId
->
NgramsType
->
DBCmd
err
(
HashMap
Text
[
Text
])
getTreeInstitutesUser
cId
nt
=
HM
.
unionsWith
(
++
)
.
map
(
\
(
_
,
hd
)
->
HM
.
fromList
$
map
(
\
(
p
,
c
)
->
(
p
,
[
c
]))
$
Map
.
toList
$
fromMaybe
Map
.
empty
(
_hd_institutes_tree
hd
))
<$>
selectHyperDataByContextUser
cId
nt
selectHyperDataByContextUser
::
HasDBid
NodeType
=>
CorpusId
->
NgramsType
->
DBCmd
err
[(
ContextId
,
HyperdataDocument
)]
selectHyperDataByContextUser
cId'
nt'
=
runPGSQuery
queryHyperDataByContextUser
(
cId'
,
toDBid
nt'
)
queryHyperDataByContextUser
::
DPS
.
Query
queryHyperDataByContextUser
=
[
sql
|
SELECT cng.context_id, c.hyperdata FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN nodes_contexts nc ON nc.context_id = cng.context_id
JOIN contexts c ON nc.context_id = c.id
WHERE nc.node_id = ? -- CorpusId
AND cng.ngrams_type = ? -- NgramsTypeId
AND nc.category > 0 -- is not in Trash
GROUP BY cng.context_id, c.hyperdata
|]
------------------------------------------------------------------------
getOccByNgramsOnlyFast_withSample
::
HasDBid
NodeType
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Document.hs
View file @
96a23581
...
...
@@ -36,6 +36,7 @@ data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe T
,
_hd_publication_minute
::
!
(
Maybe
Int
)
,
_hd_publication_second
::
!
(
Maybe
Int
)
,
_hd_language_iso2
::
!
(
Maybe
Text
)
,
_hd_institutes_tree
::
!
(
Maybe
(
Map
Text
Text
))
}
deriving
(
Show
,
Generic
)
...
...
@@ -53,7 +54,7 @@ defaultHyperdataDocument = case decode docExample of
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
where
docExample
::
ByteString
...
...
@@ -120,7 +121,7 @@ arbitraryHyperdataDocuments =
toHyperdataDocument'
(
t1
,
t2
)
=
HyperdataDocument
Nothing
Nothing
Nothing
Nothing
(
Just
t1
)
Nothing
Nothing
(
Just
t2
)
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
...
...
stack.yaml
View file @
96a23581
...
...
@@ -134,7 +134,7 @@
git
:
"
https://gitlab.iscpif.fr/gargantext/crawlers/epo-proxy-api.git"
subdirs
:
-
.
-
commit
:
1dbd939257d33126e49d2679375553df1f2eebc5
-
commit
:
d54812d52c9d1f86d331a991b3a87c9a8b4379cf
git
:
"
https://gitlab.iscpif.fr/gargantext/crawlers/hal.git"
subdirs
:
-
.
...
...
Przemyslaw Kaminski
@cgenie
mentioned in commit
5660aec0
·
Oct 08, 2024
mentioned in commit
5660aec0
mentioned in commit 5660aec07ec5a0a0a5468f440092c1a8f57a864e
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment