Commit 96a23581 authored by mzheng's avatar mzheng

add grouping when creating a corpus from HAL

parent 9144bad9
Pipeline #6552 passed with stages
in 48 minutes and 58 seconds
...@@ -18,7 +18,7 @@ fi ...@@ -18,7 +18,7 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and # with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI # `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in. # cache can kick in.
expected_cabal_project_hash="9c487a789f77d9a96b4ac6a4b6268a075a72b8a391d987ba12194a46d96f6ee8" expected_cabal_project_hash="500b0d7b6c0b95937096a27c9a21fcaeeb6c0933d6f0db5e2ead9e69fa25b63f"
expected_cabal_project_freeze_hash="50f3ccea242400c48bd9cec7286bd07c8223c87c043e09576dd5fef0949f982a" expected_cabal_project_freeze_hash="50f3ccea242400c48bd9cec7286bd07c8223c87c043e09576dd5fef0949f982a"
......
...@@ -93,7 +93,7 @@ source-repository-package ...@@ -93,7 +93,7 @@ source-repository-package
source-repository-package source-repository-package
type: git type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git location: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
tag: 1dbd939257d33126e49d2679375553df1f2eebc5 tag: d54812d52c9d1f86d331a991b3a87c9a8b4379cf
source-repository-package source-repository-package
type: git type: git
......
...@@ -229,4 +229,5 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p ...@@ -229,4 +229,5 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ T.pack $ show l } , _hd_language_iso2 = Just $ T.pack $ show l
, _hd_institutes_tree = Nothing}
...@@ -85,7 +85,8 @@ documentUpload nId doc = do ...@@ -85,7 +85,8 @@ documentUpload nId doc = do
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ view du_language doc } , _hd_language_iso2 = Just $ view du_language doc
, _hd_institutes_tree = Nothing }
let lang = EN let lang = EN
ncs <- view $ nlpServerGet lang ncs <- view $ nlpServerGet lang
......
...@@ -156,6 +156,7 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) = ...@@ -156,6 +156,7 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ T.pack $ show lang } , _hd_language_iso2 = Just $ T.pack $ show lang
, _hd_institutes_tree = Nothing }
) (text2titleParagraphs paragraphSize ctxts) ) (text2titleParagraphs paragraphSize ctxts)
) )
...@@ -119,7 +119,8 @@ toDoc l (Arxiv.Result { abstract ...@@ -119,7 +119,8 @@ toDoc l (Arxiv.Result { abstract
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l } , _hd_language_iso2 = Just $ (Text.pack . show) l
, _hd_institutes_tree = Nothing }
where where
authors :: [Ax.Author] -> Maybe Text authors :: [Ax.Author] -> Maybe Text
authors [] = Nothing authors [] = Nothing
......
...@@ -61,7 +61,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) = ...@@ -61,7 +61,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ iso639ToText lang } , _hd_language_iso2 = Just $ iso639ToText lang
, _hd_institutes_tree = Nothing }
where where
authors_ = if null authors authors_ = if null authors
......
...@@ -53,7 +53,7 @@ toDoc' la (HAL.Document { .. }) = do ...@@ -53,7 +53,7 @@ toDoc' la (HAL.Document { .. }) = do
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just $ unwords _document_title , _hd_title = Just $ unwords _document_title
, _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" _document_authors_names , _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" _document_authors_names
, _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" $ _document_authors_affiliations <> map show _document_struct_id , _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" $ zipWith (\affialition structId -> affialition <> " | " <> structId) _document_authors_affiliations $ map show _document_struct_id
, _hd_source = Just $ maybe "Nothing" identity _document_source , _hd_source = Just $ maybe "Nothing" identity _document_source
, _hd_abstract = Just abstract , _hd_abstract = Just abstract
, _hd_publication_date = fmap show utctime , _hd_publication_date = fmap show utctime
...@@ -63,4 +63,5 @@ toDoc' la (HAL.Document { .. }) = do ...@@ -63,4 +63,5 @@ toDoc' la (HAL.Document { .. }) = do
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ show la } , _hd_language_iso2 = Just $ show la
, _hd_institutes_tree = Just _document_institutes_tree }
...@@ -94,4 +94,5 @@ isidoreToDoc lang (IsidoreDoc t a d u s as) = do ...@@ -94,4 +94,5 @@ isidoreToDoc lang (IsidoreDoc t a d u s as) = do
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just . Text.pack . show $ lang , _hd_language_iso2 = Just . Text.pack . show $ lang
, _hd_institutes_tree = Nothing
} }
...@@ -50,7 +50,8 @@ toDoc (OA.Work { .. } ) = ...@@ -50,7 +50,8 @@ toDoc (OA.Work { .. } ) =
, _hd_publication_hour = Nothing -- TODO , _hd_publication_hour = Nothing -- TODO
, _hd_publication_minute = Nothing -- TODO , _hd_publication_minute = Nothing -- TODO
, _hd_publication_second = Nothing -- TODO , _hd_publication_second = Nothing -- TODO
, _hd_language_iso2 = language } , _hd_language_iso2 = language
, _hd_institutes_tree = Nothing }
where where
firstPage :: OA.Biblio -> Maybe Int firstPage :: OA.Biblio -> Maybe Int
firstPage OA.Biblio { first_page } = (readMaybe . T.unpack) =<< first_page firstPage OA.Biblio { first_page } = (readMaybe . T.unpack) =<< first_page
......
...@@ -131,7 +131,8 @@ toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus ...@@ -131,7 +131,8 @@ toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l } , _hd_language_iso2 = Just $ (Text.pack . show) l
, _hd_institutes_tree = Nothing }
where where
authors :: [PubMedDoc.Author] -> Maybe Text authors :: [PubMedDoc.Author] -> Maybe Text
authors [] = Nothing authors [] = Nothing
......
...@@ -267,7 +267,8 @@ toDoc ff d = do ...@@ -267,7 +267,8 @@ toDoc ff d = do
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (DT.pack . show) lang } , _hd_language_iso2 = Just $ (DT.pack . show) lang
, _hd_institutes_tree = Nothing }
-- printDebug "[G.C.T.C.Parsers] HyperdataDocument" hd -- printDebug "[G.C.T.C.Parsers] HyperdataDocument" hd
pure hd pure hd
......
...@@ -95,6 +95,7 @@ publiToHyperdata y (Publi a s t txt) = ...@@ -95,6 +95,7 @@ publiToHyperdata y (Publi a s t txt) =
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ DT.pack $ show FR , _hd_language_iso2 = Just $ DT.pack $ show FR
, _hd_institutes_tree = Nothing
} }
------------------------------------------------------------- -------------------------------------------------------------
......
...@@ -56,6 +56,7 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument ...@@ -56,6 +56,7 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument
, _hd_publication_minute = Just (todMin tod) , _hd_publication_minute = Just (todMin tod)
, _hd_publication_second = Just (round $ todSec tod) , _hd_publication_second = Just (round $ todSec tod)
, _hd_language_iso2 = Just $ (DT.pack . show) lang , _hd_language_iso2 = Just $ (DT.pack . show) lang
, _hd_institutes_tree = Nothing
} }
where lang = EN where lang = EN
date = _issue_created issue date = _issue_created issue
......
...@@ -89,7 +89,8 @@ instance ToHyperdataDocument GrandDebatReference ...@@ -89,7 +89,8 @@ instance ToHyperdataDocument GrandDebatReference
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ Text.pack $ show FR } , _hd_language_iso2 = Just $ Text.pack $ show FR
, _hd_institutes_tree = Nothing }
where where
toAbstract = Text.intercalate " . " . (filter (/= "") . map toSentence) toAbstract = Text.intercalate " . " . (filter (/= "") . map toSentence)
toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of
......
...@@ -135,6 +135,7 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract ...@@ -135,6 +135,7 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = unbound l langDoc } , _hd_language_iso2 = unbound l langDoc
, _hd_institutes_tree = Nothing }
bind2doc _ _ = undefined bind2doc _ _ = undefined
...@@ -50,5 +50,6 @@ toDoc la (ISTEX.Document i t a ab d s) = do ...@@ -50,5 +50,6 @@ toDoc la (ISTEX.Document i t a ab d s) = do
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (T.pack . show) la , _hd_language_iso2 = Just $ (T.pack . show) la
, _hd_institutes_tree = Nothing
} }
...@@ -75,7 +75,8 @@ toDoc (TsvGargV3 did dt _ dpy dpm dpd dab dau) = ...@@ -75,7 +75,8 @@ toDoc (TsvGargV3 did dt _ dpy dpm dpd dab dau) =
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Nothing } , _hd_language_iso2 = Nothing
, _hd_institutes_tree = Nothing }
--------------------------------------------------------------- ---------------------------------------------------------------
-- | Types Conversions -- | Types Conversions
...@@ -537,7 +538,8 @@ tsvHal2doc (TsvHal { .. }) = ...@@ -537,7 +538,8 @@ tsvHal2doc (TsvHal { .. }) =
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Nothing } , _hd_language_iso2 = Nothing
, _hd_institutes_tree = Nothing }
tsv2doc :: TsvDoc -> HyperdataDocument tsv2doc :: TsvDoc -> HyperdataDocument
...@@ -560,7 +562,8 @@ tsv2doc (TsvDoc { .. }) ...@@ -560,7 +562,8 @@ tsv2doc (TsvDoc { .. })
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Nothing } , _hd_language_iso2 = Nothing
, _hd_institutes_tree = Nothing }
where where
pubYear = fromMIntOrDec defaultYear tsv_publication_year pubYear = fromMIntOrDec defaultYear tsv_publication_year
pubMonth = fromMaybe defaultMonth tsv_publication_month pubMonth = fromMaybe defaultMonth tsv_publication_month
......
...@@ -93,7 +93,8 @@ wikiPageToDocument m wr = do ...@@ -93,7 +93,8 @@ wikiPageToDocument m wr = do
, _hd_publication_hour = hour , _hd_publication_hour = hour
, _hd_publication_minute = minute , _hd_publication_minute = minute
, _hd_publication_second = sec , _hd_publication_second = sec
, _hd_language_iso2 = iso2 } , _hd_language_iso2 = iso2
, _hd_institutes_tree = Nothing }
wikidataSelect :: Int -> IO [WikiResult] wikidataSelect :: Int -> IO [WikiResult]
......
...@@ -28,7 +28,7 @@ import Gargantext.API.Ngrams.Types (NgramsElement, NgramsTerm(..)) ...@@ -28,7 +28,7 @@ import Gargantext.API.Ngrams.Types (NgramsElement, NgramsTerm(..))
import Gargantext.Core.NLP (HasNLPServer) import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.NodeStory.Types ( HasNodeStory ) import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text (size) import Gargantext.Core.Text (size)
import Gargantext.Core.Text.List.Group ( toGroupedTree, setScoresWithMap ) import Gargantext.Core.Text.List.Group ( toGroupedTree, setScoresWithMap, toGroupedTreeInstitutes )
import Gargantext.Core.Text.List.Group.Prelude import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Group.WithStem import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.List.Social ( FlowSocialListWith, flowSocialList ) import Gargantext.Core.Text.List.Social ( FlowSocialListWith, flowSocialList )
...@@ -38,7 +38,7 @@ import Gargantext.Core.Text.Ngrams (NgramsType(..), Ngrams(..)) ...@@ -38,7 +38,7 @@ import Gargantext.Core.Text.Ngrams (NgramsType(..), Ngrams(..))
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main ( ListType(..) ) import Gargantext.Core.Types.Main ( ListType(..) )
import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, getContextsByNgramsOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, getContextsByNgramsOnlyUser, getTreeInstitutesUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf_withSample) import Gargantext.Database.Action.Metrics.TFICF (getTficf_withSample)
import Gargantext.Database.Admin.Types.Node ( MasterCorpusId, UserCorpusId, ContextId ) import Gargantext.Database.Admin.Types.Node ( MasterCorpusId, UserCorpusId, ContextId )
import Gargantext.Database.Prelude (DBCmd) import Gargantext.Database.Prelude (DBCmd)
...@@ -77,18 +77,64 @@ buildNgramsLists :: ( HasNodeStory env err m ...@@ -77,18 +77,64 @@ buildNgramsLists :: ( HasNodeStory env err m
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsLists user uCid mCid mfslw gp = do buildNgramsLists user uCid mCid mfslw gp = do
ngTerms <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize goodMapListSize) ngTerms <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize goodMapListSize)
instTerms <- buildNgramsInstList user uCid mfslw GroupIdentity (Institutes, MapListSize 300, MaxListSize 1000)
othersTerms <- mapM (buildNgramsOthersList user uCid mfslw GroupIdentity) othersTerms <- mapM (buildNgramsOthersList user uCid mfslw GroupIdentity)
[ (Authors , MapListSize 9, MaxListSize 1000) [ (Authors , MapListSize 9, MaxListSize 1000)
, (Sources , MapListSize 9, MaxListSize 1000) , (Sources , MapListSize 9, MaxListSize 1000)
, (Institutes, MapListSize 9, MaxListSize 1000)
] ]
pure $ Map.unions $ [ngTerms] <> othersTerms pure $ Map.unions $ [ngTerms] <> othersTerms <> [instTerms]
newtype MapListSize = MapListSize { unMapListSize :: Int } newtype MapListSize = MapListSize { unMapListSize :: Int }
newtype MaxListSize = MaxListSize { unMaxListSize :: Int } newtype MaxListSize = MaxListSize { unMaxListSize :: Int }
buildNgramsInstList :: ( HasNodeError err
, HasNLPServer env
, HasNodeStory env err m
, HasTreeError err
)
=> User
-> UserCorpusId
-> Maybe FlowSocialListWith
-> GroupParams
-> (NgramsType, MapListSize, MaxListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsInstList user uCid mfslw _groupParams (nt, MapListSize mapListSize, MaxListSize maxListSize) = do
allTerms :: HashMap NgramsTerm (Set ContextId) <- getContextsByNgramsUser uCid nt
institutesTree :: HashMap Text [Text] <- getTreeInstitutesUser uCid nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists :: FlowCont NgramsTerm FlowListScores
<- flowSocialList mfslw user nt ( FlowCont HashMap.empty
$ HashMap.fromList
$ List.zip (HashMap.keys allTerms)
(repeat mempty)
)
let
groupedWithList = toGroupedTreeInstitutes {- groupParams -} socialLists allTerms institutesTree
(stopTerms, tailTerms) = HashMap.partition ((== Just StopTerm) . viewListType)
$ view flc_scores groupedWithList
(mapTerms, tailTerms') = HashMap.partition ((== Just MapTerm) . viewListType) tailTerms
listSize = mapListSize - List.length mapTerms
(mapTerms', candiTerms) = both HashMap.fromList
$ List.splitAt listSize
$ List.take maxListSize
$ List.sortOn (Down . viewScore . snd)
$ HashMap.toList tailTerms'
pure $ Map.fromList [( nt, List.take maxListSize $ toNgramsElement stopTerms
<> toNgramsElement mapTerms
<> toNgramsElement (setListType (Just MapTerm ) mapTerms')
<> toNgramsElement (setListType (Just CandidateTerm) candiTerms)
)]
buildNgramsOthersList :: ( HasNodeError err buildNgramsOthersList :: ( HasNodeError err
, HasNLPServer env , HasNLPServer env
, HasNodeStory env err m , HasNodeStory env err m
......
...@@ -20,20 +20,29 @@ import Data.HashMap.Strict (HashMap) ...@@ -20,20 +20,29 @@ import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap import Data.HashMap.Strict qualified as HashMap
import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Text.List.Group.Prelude import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Group.WithScores ( groupWithScores' ) import Gargantext.Core.Text.List.Group.WithScores ( groupWithScores', groupWithScoresInstitutes' )
import Gargantext.Core.Text.List.Social.Prelude ( FlowListScores, FlowCont ) import Gargantext.Core.Text.List.Social.Prelude ( FlowListScores, FlowCont )
import Gargantext.Prelude import Gargantext.Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
toGroupedTree :: (Ord a, Monoid a, HasSize a) toGroupedTree :: (Ord a, Monoid a, HasSize a)
=> FlowCont NgramsTerm FlowListScores => FlowCont NgramsTerm FlowListScores
-> HashMap NgramsTerm a -> HashMap NgramsTerm a
-> FlowCont NgramsTerm (GroupedTreeScores a) -> FlowCont NgramsTerm (GroupedTreeScores a) -- a = (Set ContextId)
toGroupedTree flc scores = toGroupedTree flc scores =
groupWithScores' flc scoring groupWithScores' flc scoring
where where
scoring t = fromMaybe mempty $ HashMap.lookup t scores scoring t = fromMaybe mempty $ HashMap.lookup t scores
toGroupedTreeInstitutes :: (Ord a, Monoid a, HasSize a)
=> FlowCont NgramsTerm FlowListScores
-> HashMap NgramsTerm a
-> HashMap Text [Text]
-> FlowCont NgramsTerm (GroupedTreeScores a) -- a = (Set ContextId)
toGroupedTreeInstitutes flc scores institutesTree =
groupWithScoresInstitutes' flc scoring institutesTree
where
scoring t = fromMaybe mempty $ HashMap.lookup t scores
------------------------------------------------------------------------ ------------------------------------------------------------------------
setScoresWithMap :: (Ord a, Ord b, Monoid b) => HashMap NgramsTerm b setScoresWithMap :: (Ord a, Ord b, Monoid b) => HashMap NgramsTerm b
-> HashMap NgramsTerm (GroupedTreeScores a) -> HashMap NgramsTerm (GroupedTreeScores a)
......
...@@ -41,6 +41,22 @@ groupWithScores' flc scores = FlowCont groups orphans ...@@ -41,6 +41,22 @@ groupWithScores' flc scores = FlowCont groups orphans
-- orphans should be filtered already then becomes empty -- orphans should be filtered already then becomes empty
orphans = mempty orphans = mempty
groupWithScoresInstitutes':: (Eq a, Ord a, Monoid a, HasSize a)
=> FlowCont NgramsTerm FlowListScores
-> (NgramsTerm -> a)
-> HashMap Text [Text]
-> FlowCont NgramsTerm (GroupedTreeScores a)
groupWithScoresInstitutes' flc scores institutesTree = FlowCont (groups institutesTree) orphans
where
-- parent/child relation is inherited from social lists
groups institutesTree' = HashMap.filter ((0 <) . viewScore)
$ toGroupedTreeInstitutes' institutesTree'
$ toMapMaybeParent scores
$ view flc_scores flc <> view flc_cont flc
-- orphans should be filtered already then becomes empty
orphans = mempty
------------------------------------------------------------------------ ------------------------------------------------------------------------
toMapMaybeParent :: (Eq a, Ord a, Monoid a) toMapMaybeParent :: (Eq a, Ord a, Monoid a)
...@@ -72,6 +88,14 @@ toGroupedTree' m = case HashMap.lookup Nothing m of ...@@ -72,6 +88,14 @@ toGroupedTree' m = case HashMap.lookup Nothing m of
Nothing -> mempty Nothing -> mempty
Just m' -> toGroupedTree'' m m' Just m' -> toGroupedTree'' m m'
toGroupedTreeInstitutes' :: Eq a
=> HashMap Text [Text]
-> HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores a))
-> HashMap Parent (GroupedTreeScores a)
toGroupedTreeInstitutes' institutesTree m = case HashMap.lookup Nothing m of
Nothing -> mempty
Just m' -> toGroupedTreeInstitutes'' m m' institutesTree
filterGroupedTree :: (GroupedTreeScores a -> Bool) filterGroupedTree :: (GroupedTreeScores a -> Bool)
-> HashMap Parent (GroupedTreeScores a) -> HashMap Parent (GroupedTreeScores a)
-> HashMap Parent (GroupedTreeScores a) -> HashMap Parent (GroupedTreeScores a)
...@@ -93,3 +117,37 @@ toGroupedTree'' m notEmpty ...@@ -93,3 +117,37 @@ toGroupedTree'' m notEmpty
) )
v v
toGroupedTreeInstitutes'' :: Eq a => HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores a))
-> HashMap NgramsTerm (GroupedTreeScores a)
-> HashMap Text [Text]
-> HashMap Parent (GroupedTreeScores a)
toGroupedTreeInstitutes'' m notEmpty institutesTree
| notEmpty == mempty = mempty
| otherwise = HashMap.mapWithKey (addGroup institutesTree m) notEmpty
where
addGroup :: (Eq score)
=> HashMap Text [Text]
-> HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores score))
-> Parent
-> GroupedTreeScores score
-> GroupedTreeScores score
addGroup institutesTree' m' k v =
over gts'_children ( toGroupedTree'' m'
. case HashMap.lookup (unNgramsTerm k) institutesTree' of
Nothing -> HashMap.union ( fromMaybe mempty
$ HashMap.lookup (Just k) m'
)
Just children -> HashMap.union (foldl (\acc child -> HashMap.union acc $
HashMap.singleton
(NgramsTerm child)
GroupedTreeScores
{ _gts'_score= _gts'_score v
, _gts'_listType= _gts'_listType v
, _gts'_children= HashMap.empty
}) HashMap.empty children
)
. HashMap.union ( fromMaybe mempty
$ HashMap.lookup (Just k) m'
)
)
v
\ No newline at end of file
...@@ -30,6 +30,7 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..)) ...@@ -30,6 +30,7 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core ( HasDBid(toDBid) ) import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Data.HashMap.Strict.Utils as HM ( unionsWith ) import Gargantext.Data.HashMap.Strict.Utils as HM ( unionsWith )
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId (..), MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId) import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId (..), MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (DBCmd, runPGSQuery) import Gargantext.Database.Prelude (DBCmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams () -- toDBid instance import Gargantext.Database.Schema.Ngrams () -- toDBid instance
...@@ -91,6 +92,34 @@ getContextsByNgramsUser cId nt = ...@@ -91,6 +92,34 @@ getContextsByNgramsUser cId nt =
GROUP BY cng.context_id, ng.terms GROUP BY cng.context_id, ng.terms
|] |]
getTreeInstitutesUser :: HasDBid NodeType
=> CorpusId
-> NgramsType
-> DBCmd err (HashMap Text [Text])
getTreeInstitutesUser cId nt =
HM.unionsWith (++) . map (\(_, hd) -> HM.fromList $ map (\(p, c) -> (p, [c])) $ Map.toList $ fromMaybe Map.empty (_hd_institutes_tree hd)) <$> selectHyperDataByContextUser cId nt
selectHyperDataByContextUser :: HasDBid NodeType
=> CorpusId
-> NgramsType
-> DBCmd err [(ContextId, HyperdataDocument)]
selectHyperDataByContextUser cId' nt' =
runPGSQuery queryHyperDataByContextUser
( cId'
, toDBid nt'
)
queryHyperDataByContextUser :: DPS.Query
queryHyperDataByContextUser = [sql|
SELECT cng.context_id, c.hyperdata FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN nodes_contexts nc ON nc.context_id = cng.context_id
JOIN contexts c ON nc.context_id = c.id
WHERE nc.node_id = ? -- CorpusId
AND cng.ngrams_type = ? -- NgramsTypeId
AND nc.category > 0 -- is not in Trash
GROUP BY cng.context_id, c.hyperdata
|]
------------------------------------------------------------------------ ------------------------------------------------------------------------
getOccByNgramsOnlyFast_withSample :: HasDBid NodeType getOccByNgramsOnlyFast_withSample :: HasDBid NodeType
......
...@@ -36,6 +36,7 @@ data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe T ...@@ -36,6 +36,7 @@ data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe T
, _hd_publication_minute :: !(Maybe Int) , _hd_publication_minute :: !(Maybe Int)
, _hd_publication_second :: !(Maybe Int) , _hd_publication_second :: !(Maybe Int)
, _hd_language_iso2 :: !(Maybe Text) , _hd_language_iso2 :: !(Maybe Text)
, _hd_institutes_tree :: !(Maybe (Map Text Text))
} }
deriving (Show, Generic) deriving (Show, Generic)
...@@ -53,7 +54,7 @@ defaultHyperdataDocument = case decode docExample of ...@@ -53,7 +54,7 @@ defaultHyperdataDocument = case decode docExample of
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing
where where
docExample :: ByteString docExample :: ByteString
...@@ -120,7 +121,7 @@ arbitraryHyperdataDocuments = ...@@ -120,7 +121,7 @@ arbitraryHyperdataDocuments =
toHyperdataDocument' (t1,t2) = toHyperdataDocument' (t1,t2) =
HyperdataDocument Nothing Nothing Nothing Nothing (Just t1) HyperdataDocument Nothing Nothing Nothing Nothing (Just t1)
Nothing Nothing (Just t2) Nothing Nothing Nothing Nothing Nothing Nothing Nothing (Just t2) Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
......
...@@ -134,7 +134,7 @@ ...@@ -134,7 +134,7 @@
git: "https://gitlab.iscpif.fr/gargantext/crawlers/epo-proxy-api.git" git: "https://gitlab.iscpif.fr/gargantext/crawlers/epo-proxy-api.git"
subdirs: subdirs:
- . - .
- commit: 1dbd939257d33126e49d2679375553df1f2eebc5 - commit: d54812d52c9d1f86d331a991b3a87c9a8b4379cf
git: "https://gitlab.iscpif.fr/gargantext/crawlers/hal.git" git: "https://gitlab.iscpif.fr/gargantext/crawlers/hal.git"
subdirs: subdirs:
- . - .
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment