Verified Commit 3305c248 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch '79-dev-rewrite-better-record-syntax' into dev-corpora-from-write-nodes

parents 112ea7af 477a7fdc
Pipeline #1851 failed with stage
in 7 minutes and 10 seconds
......@@ -20,9 +20,11 @@ default-extensions:
- FlexibleInstances
- GeneralizedNewtypeDeriving
- MultiParamTypeClasses
- NamedFieldPuns
- NoImplicitPrelude
- OverloadedStrings
- RankNTypes
- RecordWildCards
library:
source-dirs: src
ghc-options:
......
......@@ -59,7 +59,7 @@ import System.IO (FilePath)
data Mode = Dev | Mock | Prod
deriving (Show, Read, Generic)
deriving (Show, Read, Generic)
-- | startGargantext takes as parameters port number and Ini file.
startGargantext :: Mode -> PortNumber -> FilePath -> IO ()
......
......@@ -111,7 +111,10 @@ updateScatter' cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit
let
metrics = fmap (\(Scored t s1 s2) -> Metric (unNgramsTerm t) s1 s2 (listType t ngs'))
metrics = fmap (\(Scored t s1 s2) -> Metric { m_label = unNgramsTerm t
, m_x = s1
, m_y = s2
, m_cat = listType t ngs' })
$ fmap normalizeLocal scores
listType t m = maybe (panic errorMsg) fst $ HashMap.lookup t m
errorMsg = "API.Node.metrics: key absent"
......
......@@ -315,7 +315,10 @@ type PairWith = Summary "Pair a Corpus with an Annuaire"
pairWith :: CorpusId -> GargServer PairWith
pairWith cId aId lId = do
r <- pairing cId aId lId
_ <- insertNodeNode [ NodeNode cId aId Nothing Nothing]
_ <- insertNodeNode [ NodeNode { _nn_node1_id = cId
, _nn_node2_id = aId
, _nn_score = Nothing
, _nn_category = Nothing }]
pure r
......
......@@ -74,18 +74,18 @@ addToAnnuaireWithForm :: FlowCmdM env err m
-> AnnuaireWithForm
-> (JobLog -> m ())
-> m JobLog
addToAnnuaireWithForm _cid (AnnuaireWithForm ft _d _l) logStatus = do
addToAnnuaireWithForm _cid (AnnuaireWithForm { _wf_filetype }) logStatus = do
printDebug "ft" ft
printDebug "ft" _wf_filetype
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
......@@ -63,15 +63,17 @@ getCorpus cId lId nt' = do
repo <- getRepo' [fromMaybe (panic "[Gargantext.API.Node.Corpus.Export]") lId]
ngs <- getNodeNgrams cId lId nt repo
let -- uniqId is hash computed already for each document imported in database
r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hash b)) (d_hash a b)
) ns (Map.map (Set.map unNgramsTerm) ngs)
r = Map.intersectionWith
(\a b -> Document { _d_document = a
, _d_ngrams = Ngrams (Set.toList b) (hash b)
, _d_hash = d_hash a b }
) ns (Map.map (Set.map unNgramsTerm) ngs)
where
d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
, hash b
]
pure $ Corpus (Map.elems r) (hash $ List.map _d_hash
$ Map.elems r
)
pure $ Corpus { _c_corpus = Map.elems r
, _c_hash = hash $ List.map _d_hash $ Map.elems r }
getNodeNgrams :: HasNodeError err
=> CorpusId
......
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Node.Corpus.Searx where
......
......@@ -21,7 +21,7 @@ import Data.Maybe (catMaybes)
import Data.Text (Text)
import Data.List (replicate, null)
import Data.Aeson
import Data.Swagger
import Data.Swagger hiding (title, url)
import GHC.Generics (Generic)
import Servant
import Test.QuickCheck (elements)
......@@ -107,13 +107,15 @@ publicNodes = do
-- http://localhost:8008/api/v1.0/node/23543/file/download<Paste>
-- http://localhost:8000/images/Gargantextuel-212x300.jpg
toPublicData :: Text -> (Node HyperdataFolder, [NodeId]) -> Maybe PublicData
toPublicData base (n , mn) = PublicData <$> (hd ^? (_Just . hf_data . cf_title))
<*> (hd ^? (_Just . hf_data . cf_desc ))
<*> (Just $ url' mn) -- "images/Gargantextuel-212x300.jpg"
<*> (Just $ url' mn)
<*> Just (cs $ show $ utc2year (n^.node_date))
<*> (hd ^? (_Just . hf_data . cf_query))
<*> (hd ^? (_Just . hf_data . cf_authors))
toPublicData base (n , mn) = do
title <- (hd ^? (_Just . hf_data . cf_title))
abstract <- (hd ^? (_Just . hf_data . cf_desc ))
img <- (Just $ url' mn) -- "images/Gargantextuel-212x300.jpg"
url <- (Just $ url' mn)
date <- Just (cs $ show $ utc2year (n^.node_date))
database <- (hd ^? (_Just . hf_data . cf_query))
author <- (hd ^? (_Just . hf_data . cf_authors))
pure $ PublicData { .. }
where
hd = head
$ filter (\(HyperdataField cd _ _) -> cd == JSON)
......@@ -150,13 +152,13 @@ instance Arbitrary PublicData where
defaultPublicData :: PublicData
defaultPublicData =
PublicData "Title"
(foldl (<>) "" $ replicate 100 "abstract ")
"images/Gargantextuel-212x300.jpg"
"https://.."
"YY/MM/DD"
"database"
"Author"
PublicData { title = "Title"
, abstract = foldl (<>) "" $ replicate 100 "abstract "
, img = "images/Gargantextuel-212x300.jpg"
, url = "https://.."
, date = "YY/MM/DD"
, database = "database"
, author = "Author" }
......
......@@ -10,7 +10,6 @@ Portability : POSIX
Count API part of Gargantext.
-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE DeriveAnyClass #-}
......
......@@ -86,7 +86,11 @@ instance ToSchema TableQuery where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "tq_")
instance Arbitrary TableQuery where
arbitrary = elements [TableQuery 0 10 DateAsc Docs "electrodes"]
arbitrary = elements [TableQuery { tq_offset = 0
, tq_limit = 10
, tq_orderBy = DateAsc
, tq_view = Docs
, tq_query = "electrodes" }]
tableApi :: NodeId -> GargServer TableApi
......
......@@ -33,73 +33,74 @@ data School = School { school_shortName :: Text
schools :: [School]
schools = [ School
("Mines Albi-Carmaux")
("Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux")
("469216")
{ school_shortName = "Mines Albi-Carmaux"
, school_longName = "Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux"
, school_id = "469216" }
, School
("Mines Alès")
("EMA - École des Mines d'Alès")
("6279")
{ school_shortName = "Mines Alès"
, school_longName = "EMA - École des Mines d'Alès"
, school_id = "6279" }
, School
("Mines Douai")
("Mines Douai EMD - École des Mines de Douai")
("224096")
{ school_shortName = "Mines Douai"
, school_longName = "Mines Douai EMD - École des Mines de Douai"
, school_id = "224096" }
, School
("Mines Lille")
("Mines Lille - École des Mines de Lille")
("144103")
{ school_shortName = "Mines Lille"
, school_longName = "Mines Lille - École des Mines de Lille"
, school_id = "144103" }
, School
("IMT Lille Douai")
("IMT Lille Douai")
("497330")
{ school_shortName = "IMT Lille Douai"
, school_longName = "IMT Lille Douai"
, school_id = "497330" }
, School
("Mines Nantes")
("Mines Nantes - Mines Nantes")
("84538")
{ school_shortName = "Mines Nantes"
, school_longName = "Mines Nantes - Mines Nantes"
, school_id = "84538" }
, School
("Télécom Bretagne")
("Télécom Bretagne")
("301262")
{ school_shortName = "Télécom Bretagne"
, school_longName = "Télécom Bretagne"
, school_id = "301262" }
, School
("IMT Atlantique")
("IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire")
("481355")
{ school_shortName = "IMT Atlantique"
, school_longName = "IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire"
, school_id = "481355" }
, School
("Mines Saint-Étienne")
("Mines Saint-Étienne MSE - École des Mines de Saint-Étienne")
("29212")
{ school_shortName = "Mines Saint-Étienne"
, school_longName = "Mines Saint-Étienne MSE - École des Mines de Saint-Étienne"
, school_id = "29212" }
, School
("Télécom École de Management")
("TEM - Télécom Ecole de Management")
("301442")
{ school_shortName = "Télécom École de Management"
, school_longName = "TEM - Télécom Ecole de Management"
, school_id = "301442" }
, School
("IMT Business School")
("IMT Business School")
("542824")
{ school_shortName = "IMT Business School"
, school_longName = "IMT Business School"
, school_id = "542824" }
, School
("Télécom ParisTech")
("Télécom ParisTech")
("300362")
{ school_shortName = "Télécom ParisTech"
, school_longName = "Télécom ParisTech"
, school_id = "300362" }
, School
("Télécom SudParis")
("TSP - Télécom SudParis")
("352124")
{ school_shortName = "Télécom SudParis"
, school_longName = "TSP - Télécom SudParis"
, school_id = "352124" }
, School
("ARMINES")
("ARMINES")
("300362")
{ school_shortName = "ARMINES"
, school_longName = "ARMINES"
, school_id = "300362" }
, School
("Eurecom")
("Eurecom")
("421532")
{ school_shortName = "Eurecom"
, school_longName = "Eurecom"
, school_id = "421532" }
, School
("Mines ParisTech")
("MINES ParisTech - École nationale supérieure des mines de Paris")
("301492")
{ school_shortName = "Mines ParisTech"
, school_longName = "MINES ParisTech - École nationale supérieure des mines de Paris"
, school_id = "301492" }
]
mapIdSchool :: Map Text Text
mapIdSchool = M.fromList $ Gargantext.Prelude.map (\(School n _ i) -> (i,n)) schools
mapIdSchool = M.fromList $ Gargantext.Prelude.map
(\(School { school_shortName, school_id }) -> (school_id, school_shortName)) schools
hal_data :: IO (Either Prelude.String (DV.Vector CsvHal))
hal_data = do
......
......@@ -75,37 +75,39 @@ data IMTUser = IMTUser
-- | CSV instance
instance FromNamedRecord IMTUser where
parseNamedRecord r = IMTUser <$> r .: "id"
<*> r .: "entite"
<*> r .: "mail"
<*> r .: "nom"
<*> r .: "prenom"
<*> r .: "fonction"
<*> r .: "fonction2"
<*> r .: "tel"
<*> r .: "fax"
<*> r .: "service"
<*> r .: "groupe"
<*> r .: "entite2"
<*> r .: "service2"
<*> r .: "groupe2"
<*> r .: "bureau"
<*> r .: "url"
<*> r .: "pservice"
<*> r .: "pfonction"
<*> r .: "afonction"
<*> r .: "afonction2"
<*> r .: "grprech"
<*> r .: "appellation"
<*> r .: "lieu"
<*> r .: "aprecision"
<*> r .: "atel"
<*> r .: "sexe"
<*> r .: "statut"
<*> r .: "idutilentite"
<*> r .: "actif"
<*> r .: "idutilsiecoles"
<*> r .: "date_modification"
parseNamedRecord r = do
id <- r .: "id"
entite <- r .: "entite"
mail <- r .: "mail"
nom <- r .: "nom"
prenom <- r .: "prenom"
fonction <- r .: "fonction"
fonction2 <- r .: "fonction2"
tel <- r .: "tel"
fax <- r .: "fax"
service <- r .: "service"
groupe <- r .: "groupe"
entite2 <- r .: "entite2"
service2 <- r .: "service2"
groupe2 <- r .: "groupe2"
bureau <- r .: "bureau"
url <- r .: "url"
pservice <- r .: "pservice"
pfonction <- r .: "pfonction"
afonction <- r .: "afonction"
afonction2 <- r .: "afonction2"
grprech <- r .: "grprech"
appellation <- r .: "appellation"
lieu <- r .: "lieu"
aprecision <- r .: "aprecision"
atel <- r .: "atel"
sexe <- r .: "sexe"
statut <- r .: "statut"
idutilentite <- r .: "idutilentite"
actif <- r .: "actif"
idutilsiecoles <- r .: "idutilsiecoles"
date_modification <- r .: "date_modification"
pure $ IMTUser {..}
headerCSVannuaire :: Header
headerCSVannuaire =
......@@ -136,15 +138,54 @@ deserialiseFromFile' filepath = deserialise <$> BL.readFile filepath
------------------------------------------------------------------------
imtUser2gargContact :: IMTUser -> HyperdataContact
imtUser2gargContact (IMTUser id' entite' mail' nom' prenom' fonction' _fonction2' tel' _fax'
service' _groupe' _entite2 _service2 _group2 bureau' url' _pservice' _pfonction' _afonction' _afonction2'
_grprech' _appellation' lieu' _aprecision' _atel' _sexe' _statut' _idutilentite'
_actif' _idutilsiecoles' date_modification')
= HyperdataContact (Just "IMT Annuaire") (Just qui) [ou] ((<>) <$> (fmap (\p -> p <> " ") prenom') <*> nom') entite' date_modification' Nothing Nothing
where
qui = ContactWho id' prenom' nom' (catMaybes [service']) []
ou = ContactWhere (toList entite') (toList service') fonction' bureau' (Just "France") lieu' contact Nothing Nothing
contact = Just $ ContactTouch mail' tel' url'
-- meta = ContactMetaData (Just "IMT annuaire") date_modification'
toList Nothing = []
toList (Just x) = [x]
--imtUser2gargContact (IMTUser id' entite' mail' nom' prenom' fonction' _fonction2' tel' _fax'
-- service' _groupe' _entite2 _service2 _group2 bureau' url' _pservice' _pfonction' _afonction' _afonction2'
-- _grprech' _appellation' lieu' _aprecision' _atel' _sexe' _statut' _idutilentite'
-- _actif' _idutilsiecoles' date_modification')
-- = HyperdataContact (Just "IMT Annuaire") (Just qui) [ou] ((<>) <$> (fmap (\p -> p <> " ") prenom') <*> nom') entite' date_modification' Nothing Nothing
imtUser2gargContact (IMTUser { id
, entite
, mail
, nom
, prenom
, fonction
, tel
, service
, bureau
, url
, lieu
, date_modification }) =
HyperdataContact { _hc_bdd = Just "IMT Annuaire"
, _hc_who = Just qui
, _hc_where = [ou]
, _hc_title = title
, _hc_source = entite
, _hc_lastValidation = date_modification
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
where
title = (<>) <$> (fmap (\p -> p <> " ") prenom) <*> nom
qui = ContactWho { _cw_id = id
, _cw_firstName = prenom
, _cw_lastName = nom
, _cw_keywords = catMaybes [service]
, _cw_freetags = [] }
ou = ContactWhere { _cw_organization = toList entite
, _cw_labTeamDepts = toList service
, _cw_role = fonction
, _cw_office = bureau
, _cw_country = Just "France"
, _cw_city = lieu
, _cw_touch = contact
, _cw_entry = Nothing
, _cw_exit = Nothing }
contact = Just $ ContactTouch { _ct_mail = mail
, _ct_phone = tel
, _ct_url = url }
-- meta = ContactMetaData (Just "IMT annuaire") date_modification'
toList Nothing = []
toList (Just x) = [x]
......@@ -50,3 +50,6 @@ instance Arbitrary GraphMetric where
arbitrary = elements [ minBound .. maxBound ]
------------------------------------------------------------------------
......@@ -82,7 +82,9 @@ readNodeStoryEnv :: NodeStoryDir -> IO NodeStoryEnv
readNodeStoryEnv nsd = do
mvar <- nodeStoryVar nsd Nothing [0]
saver <- mkNodeStorySaver nsd mvar
pure $ NodeStoryEnv mvar saver (nodeStoryVar nsd (Just mvar))
pure $ NodeStoryEnv { _nse_var = mvar
, _nse_saver = saver
, _nse_getter = nodeStoryVar nsd (Just mvar) }
------------------------------------------------------------------------
mkNodeStorySaver :: NodeStoryDir -> MVar NodeListStory -> IO (IO ())
......@@ -212,7 +214,9 @@ repoToNodeListStory (Repo _v s h) = NodeStory $ Map.fromList ns
h' = ngramsStatePatch_migration h
ns = List.map (\(n,ns')
-> (n, let hs = fromMaybe [] (Map.lookup n h') in
Archive (List.length hs) ns' hs
Archive { _a_version = List.length hs
, _a_state = ns'
, _a_history = hs }
)
) $ Map.toList s'
......@@ -276,10 +280,17 @@ instance Serialise NgramsStatePatch'
-- TODO Semigroup instance for unions
-- TODO check this
instance (Semigroup s, Semigroup p) => Semigroup (Archive s p) where
(<>) (Archive _v _s p) (Archive v' s' p') = Archive v' s' (p' <> p)
(<>) (Archive { _a_history = p }) (Archive { _a_version = v'
, _a_state = s'
, _a_history = p'}) =
Archive { _a_version = v'
, _a_state = s'
, _a_history = p' <> p }
instance Monoid (Archive NgramsState' NgramsStatePatch') where
mempty = Archive 0 mempty []
mempty = Archive { _a_version = 0
, _a_state = mempty
, _a_history = [] }
instance (FromJSON s, FromJSON p) => FromJSON (Archive s p) where
parseJSON = genericParseJSON $ unPrefix "_a_"
......@@ -293,13 +304,17 @@ initNodeStory :: Monoid s => NodeId -> NodeStory s p
initNodeStory ni = NodeStory $ Map.singleton ni initArchive
initArchive :: Monoid s => Archive s p
initArchive = Archive 0 mempty []
initArchive = Archive { _a_version = 0
, _a_state = mempty
, _a_history = [] }
initNodeListStoryMock :: NodeListStory
initNodeListStoryMock = NodeStory $ Map.singleton nodeListId archive
where
nodeListId = 0
archive = Archive 0 ngramsTableMap []
archive = Archive { _a_version = 0
, _a_state = ngramsTableMap
, _a_history = [] }
ngramsTableMap = Map.singleton TableNgrams.NgramsTerms
$ Map.fromList
[ (n ^. ne_ngrams, ngramsElementToRepo n)
......
......@@ -31,23 +31,23 @@ get la q ml = do
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d)
pure $ HyperdataDocument (Just "Hal")
(Just $ pack $ show i)
Nothing
Nothing
Nothing
Nothing
(Just $ intercalate " " t)
(Just $ foldl (\x y -> x <> ", " <> y) "" aus)
(Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id)
(Just $ maybe "Nothing" identity s)
(Just $ intercalate " " ab)
(fmap (pack . show) utctime)
pub_year
pub_month
pub_day
Nothing
Nothing
Nothing
(Just $ (pack . show) la)
pure $ HyperdataDocument { _hd_bdd = Just "Hal"
, _hd_doi = Just $ pack $ show i
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ intercalate " " t
, _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus
, _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id
, _hd_source = Just $ maybe "Nothing" identity s
, _hd_abstract = Just $ intercalate " " ab
, _hd_publication_date = fmap (pack . show) utctime
, _hd_publication_year = pub_year
, _hd_publication_month = pub_month
, _hd_publication_day = pub_day
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (pack . show) la }
......@@ -39,22 +39,23 @@ toDoc' la docs' = do
toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc la (ISTEX.Document i t a ab d s) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") (Just . pack . show) d)
pure $ HyperdataDocument (Just "Istex")
(Just i)
Nothing
Nothing
Nothing
Nothing
t
(Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a))
(Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a))
(Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s))
ab
(fmap (pack . show) utctime)
pub_year
pub_month
pub_day
Nothing
Nothing
Nothing
(Just $ (pack . show) la)
pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
, _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
, _hd_abstract = ab
, _hd_publication_date = fmap (pack . show) utctime
, _hd_publication_year = pub_year
, _hd_publication_month = pub_month
, _hd_publication_day = pub_day
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (pack . show) la }
......@@ -38,25 +38,25 @@ get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
(PubMedDoc.PubMedDate a y m d)
) = HyperdataDocument (Just "PubMed")
Nothing
Nothing
Nothing
Nothing
Nothing
t
(authors aus)
(institutes aus)
j
(abstract as)
(Just $ Text.pack $ show a)
(Just $ fromIntegral y)
(Just m)
(Just d)
Nothing
Nothing
Nothing
(Just $ (Text.pack . show) l)
) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = authors aus
, _hd_institutes = institutes aus
, _hd_source = j
, _hd_abstract = abstract as
, _hd_publication_date = Just $ Text.pack $ show a
, _hd_publication_year = Just $ fromIntegral y
, _hd_publication_month = Just m
, _hd_publication_day = Just d
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l }
where
authors :: Maybe [PubMedDoc.Author] -> Maybe Text
authors aus' = case aus' of
......
......@@ -122,25 +122,25 @@ toDoc ff d = do
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse
pure $ HyperdataDocument (Just $ DT.pack $ show ff)
(lookup "doi" d)
(lookup "URL" d)
Nothing
Nothing
Nothing
(lookup "title" d)
Nothing
(lookup "authors" d)
(lookup "source" d)
(lookup "abstract" d)
(fmap (DT.pack . show) utcTime)
(pub_year)
(pub_month)
(pub_day)
Nothing
Nothing
Nothing
(Just $ (DT.pack . show) lang)
pure $ HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d
, _hd_url = lookup "URL" d
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = lookup "title" d
, _hd_authors = Nothing
, _hd_institutes = lookup "authors" d
, _hd_source = lookup "source" d
, _hd_abstract = lookup "abstract" d
, _hd_publication_date = fmap (DT.pack . show) utcTime
, _hd_publication_year = pub_year
, _hd_publication_month = pub_month
, _hd_publication_day = pub_day
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (DT.pack . show) lang }
enrichWith :: FileFormat
-> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
......
......@@ -75,18 +75,26 @@ instance ToJSON GrandDebatReference
instance ToHyperdataDocument GrandDebatReference
where
toHyperdataDocument (GrandDebatReference id' _ref title'
_createdAt' publishedAt' _updatedAt
_trashed _trashedStatus
_authorId authorType' authorZipCode'
responses') =
HyperdataDocument (Just "GrandDebat") id'
Nothing Nothing Nothing Nothing
title' authorType' authorType' authorZipCode'
(toAbstract <$> responses')
publishedAt'
Nothing Nothing Nothing Nothing Nothing Nothing
(Just $ Text.pack $ show FR)
toHyperdataDocument (GrandDebatReference { id, title, publishedAt, authorType, authorZipCode, responses }) =
HyperdataDocument { _hd_bdd = Just "GrandDebat"
, _hd_doi = id
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = title
, _hd_authors = authorType
, _hd_institutes = authorType
, _hd_source = authorZipCode
, _hd_abstract = toAbstract <$> responses
, _hd_publication_date = publishedAt
, _hd_publication_year = Nothing
, _hd_publication_month = Nothing
, _hd_publication_day = Nothing
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ Text.pack $ show FR }
where
toAbstract = (Text.intercalate " . ") . ((filter (/= "")) . (map toSentence))
toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of
......
......@@ -119,17 +119,24 @@ unbound _ _ = Nothing
bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
bind2doc l [ link, date, langDoc, authors, _source, publisher, title, abstract ] =
HyperdataDocument (Just "Isidore")
Nothing
(unbound l link)
Nothing Nothing Nothing
(unbound l title)
(unbound l authors)
Nothing
(unbound l publisher)
(unbound l abstract)
(unbound l date)
Nothing Nothing Nothing Nothing Nothing Nothing
(unbound l langDoc)
HyperdataDocument { _hd_bdd = Just "Isidore"
, _hd_doi = Nothing
, _hd_url = unbound l link
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = unbound l title
, _hd_authors = unbound l authors
, _hd_institutes = Nothing
, _hd_source = unbound l publisher
, _hd_abstract = unbound l abstract
, _hd_publication_date = unbound l date
, _hd_publication_year = Nothing
, _hd_publication_month = Nothing
, _hd_publication_day = Nothing
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = unbound l langDoc }
bind2doc _ _ = undefined
......@@ -48,8 +48,14 @@ json2csv fin fout = do
writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
patent2csvDoc :: Patent -> CsvDoc
patent2csvDoc (Patent title abstract year _) =
CsvDoc title "Source" (Just $ read (unpack year)) (Just 1) (Just 1) abstract "Authors"
patent2csvDoc (Patent { .. }) =
CsvDoc { csv_title = _patent_title
, csv_source = "Source"
, csv_publication_year = Just $ read (unpack _patent_year)
, csv_publication_month = Just 1
, csv_publication_day = Just 1
, csv_abstract = _patent_abstract
, csv_authors = "Authors" }
......
......@@ -70,3 +70,5 @@ onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
-> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
onField k f m = m <> ( maybe [] f (lookup k m) )
......@@ -68,5 +68,3 @@ fixFields ns = map (first fixFields'') ns
| champs == "UR" = "url"
| champs == "N2" = abstract
| otherwise = champs
......@@ -95,7 +95,9 @@ parsePage =
revision <-
parseRevision
many_ $ ignoreAnyTreeContent
return $ Page Mediawiki title revision
return $ Page { _markupFormat = Mediawiki
, _title = title
, _text = revision }
parseMediawiki :: MonadThrow m => ConduitT Event Page m (Maybe ())
parseMediawiki =
......@@ -108,7 +110,7 @@ mediawikiPageToPlain :: Page -> IO Page
mediawikiPageToPlain page = do
title <- mediaToPlain $ _title page
revision <- mediaToPlain $ _text page
return $ Page Plaintext title revision
return $ Page { _markupFormat = Plaintext, _title = title, _text = revision }
where mediaToPlain media =
case media of
(Nothing) -> return Nothing
......
......@@ -86,17 +86,17 @@ buildNgramsLists user uCid mCid mfslw gp = do
data MapListSize = MapListSize { unMapListSize :: !Int }
buildNgramsOthersList ::( HasNodeError err
, CmdM env err m
, HasNodeStory env err m
, HasTreeError err
)
=> User
-> UserCorpusId
-> Maybe FlowSocialListWith
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsOthersList :: ( HasNodeError err
, CmdM env err m
, HasNodeStory env err m
, HasTreeError err
)
=> User
-> UserCorpusId
-> Maybe FlowSocialListWith
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize) = do
allTerms :: HashMap NgramsTerm (Set NodeId) <- getNodesByNgramsUser uCid nt
......@@ -106,7 +106,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize)
$ HashMap.fromList
$ List.zip (HashMap.keys allTerms)
(List.cycle [mempty])
)
)
let
groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms
......@@ -148,13 +148,13 @@ buildNgramsTermsList :: ( HasNodeError err
, HasNodeStory env err m
, HasTreeError err
)
=> User
-> UserCorpusId
-> MasterCorpusId
-> Maybe FlowSocialListWith
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
=> User
-> UserCorpusId
-> MasterCorpusId
-> Maybe FlowSocialListWith
-> GroupParams
-> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement])
buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
-- Filter 0 With Double
......@@ -170,7 +170,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
$ HashMap.fromList
$ List.zip (HashMap.keys allTerms)
(List.cycle [mempty])
)
)
printDebug "[buldNgramsTermsList: Flow Social List / end]" nt
let ngramsKeys = HashMap.keysSet allTerms
......
......@@ -72,7 +72,7 @@ groupWith :: GroupParams
-> NgramsTerm
-> NgramsTerm
groupWith GroupIdentity t = identity t
groupWith (GroupParams l _m _n _) t =
groupWith (GroupParams { unGroupParams_lang = l }) t =
NgramsTerm
$ Text.intercalate " "
$ map (stem l)
......@@ -86,7 +86,7 @@ groupWith (GroupParams l _m _n _) t =
$ unNgramsTerm t
-- | This lemmatization group done with CoreNLP algo (or others)
groupWith (GroupWithPosTag _ _ m) t =
groupWith (GroupWithPosTag { _gwl_map = m }) t =
case HashMap.lookup (unNgramsTerm t) m of
Nothing -> clean t
Just t' -> clean $ NgramsTerm t'
......
......@@ -70,9 +70,11 @@ Children are not modified in this specific case.
-- | Old list get -1 score
-- New list get +1 score
-- Hence others lists lay around 0 score
addScorePatch fl (t, (NgramsPatch children' (Patch.Replace old_list new_list))) =
addScorePatch fl (t, (NgramsPatch { _patch_children
, _patch_list = Patch.Replace old_list new_list })) =
-- Adding new 'Children' score
addScorePatch fl' (t, NgramsPatch children' Patch.Keep)
addScorePatch fl' (t, NgramsPatch { _patch_children
, _patch_list = Patch.Keep })
where
-- | Adding new 'ListType' score
fl' = fl & flc_scores . at t %~ (score fls_listType old_list (-1))
......@@ -80,8 +82,9 @@ addScorePatch fl (t, (NgramsPatch children' (Patch.Replace old_list new_list)))
& flc_cont %~ (HashMap.delete t)
-- | Patching existing Ngrams with children
addScorePatch fl (p, NgramsPatch children' Patch.Keep) =
foldl' addChild fl $ patchMSet_toList children'
addScorePatch fl (p, NgramsPatch { _patch_children
, _patch_list = Patch.Keep }) =
foldl' addChild fl $ patchMSet_toList _patch_children
where
-- | Adding a child
addChild fl' (t, Patch.Replace Nothing (Just _)) = doLink ( 1) p t fl'
......@@ -92,20 +95,24 @@ addScorePatch fl (p, NgramsPatch children' Patch.Keep) =
addChild fl' _ = fl'
-- | Inserting a new Ngrams
addScorePatch fl (t, NgramsReplace Nothing (Just nre)) =
addScorePatch fl (t, NgramsReplace { _patch_old = Nothing
, _patch_new = Just nre }) =
childrenScore 1 t (nre ^. nre_children)
$ fl & flc_scores . at t %~ (score fls_listType $ nre ^. nre_list) 1
& flc_cont %~ (HashMap.delete t)
addScorePatch fl (t, NgramsReplace (Just old_nre) maybe_new_nre) =
addScorePatch fl (t, NgramsReplace { _patch_old = Just old_nre
, _patch_new = maybe_new_nre }) =
let fl' = childrenScore (-1) t (old_nre ^. nre_children)
$ fl & flc_scores . at t %~ (score fls_listType $ old_nre ^. nre_list) (-1)
& flc_cont %~ (HashMap.delete t)
in case maybe_new_nre of
Nothing -> fl'
Just new_nre -> addScorePatch fl' (t, NgramsReplace Nothing (Just new_nre))
Just new_nre -> addScorePatch fl' (t, NgramsReplace { _patch_old = Nothing
, _patch_new = Just new_nre })
addScorePatch fl (_, NgramsReplace Nothing Nothing) = fl
addScorePatch fl (_, NgramsReplace { _patch_old = Nothing
, _patch_new = Nothing }) = fl
-------------------------------------------------------------------------------
-- | Utils
......
......@@ -13,8 +13,6 @@ Starting from this model, a specific Gargantext engine will be made
(using more metrics scores/features).
-}
{-# LANGUAGE NamedFieldPuns #-}
module Gargantext.Core.Text.Search where
import Data.SearchEngine
......
......@@ -82,11 +82,11 @@ makeLenses ''TermType
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms :: TermType Lang -> [Text] -> IO [[Terms]]
extractTerms (Unsupervised l n s m) xs = mapM (terms (Unsupervised l n s (Just m'))) xs
extractTerms (Unsupervised {..}) xs = mapM (terms (Unsupervised { _tt_model = Just m', .. })) xs
where
m' = case m of
m' = case _tt_model of
Just m''-> m''
Nothing -> newTries n (Text.intercalate " " xs)
Nothing -> newTries _tt_windowSize (Text.intercalate " " xs)
extractTerms termTypeLang xs = mapM (terms termTypeLang) xs
......@@ -96,15 +96,16 @@ withLang :: (Foldable t, Functor t, HasText h)
=> TermType Lang
-> t h
-> TermType Lang
withLang (Unsupervised l n s m) ns = Unsupervised l n s m'
withLang (Unsupervised {..}) ns = Unsupervised { _tt_model = m', .. }
where
m' = case m of
m' = case _tt_model of
Nothing -> -- trace ("buildTries here" :: String)
Just $ buildTries n $ fmap toToken
$ uniText
$ Text.intercalate " . "
$ List.concat
$ map hasText ns
Just $ buildTries _tt_ngramsSize
$ fmap toToken
$ uniText
$ Text.intercalate " . "
$ List.concat
$ map hasText ns
just_m -> just_m
withLang l _ = l
......@@ -171,9 +172,9 @@ terms :: TermType Lang -> Text -> IO [Terms]
terms (Mono lang) txt = pure $ monoTerms lang txt
terms (Multi lang) txt = multiterms lang txt
terms (MonoMulti lang) txt = terms (Multi lang) txt
terms (Unsupervised lang n s m) txt = termsUnsupervised (Unsupervised lang n s (Just m')) txt
terms (Unsupervised { .. }) txt = termsUnsupervised (Unsupervised { _tt_model = Just m', .. }) txt
where
m' = maybe (newTries n txt) identity m
m' = maybe (newTries _tt_ngramsSize txt) identity _tt_model
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
......
......@@ -93,7 +93,8 @@ flowList_DbRepo lId ngs = do
let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> (unNgramsTerm <$> parent))
<*> getCgramsId mapCgramsId ntype ngram
| (ntype, ngs') <- Map.toList ngs
, NgramsElement (NgramsTerm ngram) _ _ _ _ parent _ <- ngs'
, NgramsElement { _ne_ngrams = NgramsTerm ngram
, _ne_parent = parent } <- ngs'
]
-- Inserting groups of ngrams
_r <- insert_Node_NodeNgrams_NodeNgrams
......@@ -115,15 +116,37 @@ toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
-> (NgramsType, [NgramsElement])
-> [NodeNgramsW]
toNodeNgramsW'' l' (ngrams_type, elms) =
[ NodeNgrams Nothing l' list_type ngrams_terms' ngrams_type Nothing Nothing Nothing 0 |
(NgramsElement (NgramsTerm ngrams_terms') _size list_type _occ _root _parent _children) <- elms
[ NodeNgrams { _nng_id = Nothing
, _nng_node_id = l'
, _nng_node_subtype = list_type
, _nng_ngrams_id = ngrams_terms'
, _nng_ngrams_type = ngrams_type
, _nng_ngrams_field = Nothing
, _nng_ngrams_tag = Nothing
, _nng_ngrams_class = Nothing
, _nng_ngrams_weight = 0 } |
(NgramsElement { _ne_ngrams = NgramsTerm ngrams_terms'
, _ne_size = _size
, _ne_list = list_type
, _ne_occurrences = _occ
, _ne_root = _root
, _ne_parent = _parent
, _ne_children = _children }) <- elms
]
toNodeNgramsW' :: ListId
-> [(Text, [NgramsType])]
-> [NodeNgramsW]
toNodeNgramsW' l'' ngs = [ NodeNgrams Nothing l'' CandidateTerm terms ngrams_type Nothing Nothing Nothing 0
toNodeNgramsW' l'' ngs = [ NodeNgrams { _nng_id = Nothing
, _nng_node_id = l''
, _nng_node_subtype = CandidateTerm
, _nng_ngrams_id = terms
, _nng_ngrams_type = ngrams_type
, _nng_ngrams_field = Nothing
, _nng_ngrams_tag = Nothing
, _nng_ngrams_class = Nothing
, _nng_ngrams_weight = 0 }
| (terms, ngrams_types) <- ngs
, ngrams_type <- ngrams_types
]
......
......@@ -72,7 +72,10 @@ pairing a c l' = do
Just l'' -> pure l''
dataPaired <- dataPairing a (c,l,Authors) takeName takeName
r <- insertDB $ prepareInsert dataPaired
_ <- insertNodeNode [ NodeNode c a Nothing Nothing]
_ <- insertNodeNode [ NodeNode { _nn_node1_id = c
, _nn_node2_id = a
, _nn_score = Nothing
, _nn_category = Nothing }]
pure r
......@@ -96,7 +99,10 @@ dataPairing aId (cId, lId, ngt) fc fa = do
prepareInsert :: HashMap ContactId (Set DocId) -> [NodeNode]
prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
prepareInsert m = map (\(n1,n2) -> NodeNode { _nn_node1_id = n1
, _nn_node2_id = n2
, _nn_score = Nothing
, _nn_category = Nothing })
$ List.concat
$ map (\(contactId, setDocIds)
-> map (\setDocId
......
......@@ -54,7 +54,10 @@ insertDocNgrams :: CorpusId
-> HashMap (Indexed Int Ngrams) (Map NgramsType (Map NodeId Int))
-> Cmd err Int
insertDocNgrams cId m =
insertDocNgramsOn cId [ DocNgrams n (_index ng) (ngramsTypeId t) (fromIntegral i)
insertDocNgramsOn cId [ DocNgrams { dn_doc_id = n
, dn_ngrams_id = _index ng
, dn_ngrams_type = ngramsTypeId t
, dn_weight = fromIntegral i }
| (ng, t2n2i) <- HashMap.toList m
, (t, n2i) <- DM.toList t2n2i
, (n, i) <- DM.toList n2i
......
......@@ -29,5 +29,6 @@ sendMail :: HasNodeError err => User -> Cmd err ()
sendMail u = do
server <- view $ hasConfig . gc_url
userLight <- getUserLightDB u
liftBase $ mail server (MailInfo (userLight_username userLight) (userLight_email userLight))
liftBase $ mail server (MailInfo { mailInfo_username = userLight_username userLight
, mailInfo_address = userLight_email userLight })
......@@ -92,13 +92,13 @@ queryInCorpus cId t q = proc () -> do
else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
restrict -< (n ^. ns_search) @@ (pgTSQuery (unpack q))
restrict -< (n ^. ns_typename ) .== (pgInt4 $ toDBid NodeDocument)
returnA -< FacetDoc (n^.ns_id )
(n^.ns_date )
(n^.ns_name )
(n^.ns_hyperdata )
(nn^.nn_category )
(nn^.nn_score )
(nn^.nn_score )
returnA -< FacetDoc { facetDoc_id = n^.ns_id
, facetDoc_created = n^.ns_date
, facetDoc_title = n^.ns_name
, facetDoc_hyperdata = n^.ns_hyperdata
, facetDoc_category = nn^.nn_category
, facetDoc_ngramCount = nn^.nn_score
, facetDoc_score = nn^.nn_score }
joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
......@@ -156,7 +156,10 @@ selectGroup :: HasDBid NodeType
selectGroup cId aId q = proc () -> do
(a, b, c, d) <- aggregate (p4 (groupBy, groupBy, groupBy, O.sum))
(selectContactViaDoc cId aId q) -< ()
returnA -< FacetPaired a b c d
returnA -< FacetPaired { _fp_id = a
, _fp_date = b
, _fp_hyperdata = c
, _fp_score = d }
queryContactViaDoc :: O.Query ( NodeSearchRead
......@@ -270,7 +273,7 @@ textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
textSearch :: HasDBid NodeType
=> TSQuery -> ParentId
-> Limit -> Offset -> Order
-> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
-> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
where
typeId = toDBid NodeDocument
......
......@@ -53,7 +53,10 @@ shareNodeWith (ShareNodeWith_User NodeFolderShared u) n = do
then errorWith "[G.D.A.S.shareNodeWith] Can share to others only"
else do
folderSharedId <- getFolderId u NodeFolderShared
insertDB ([NodeNode folderSharedId n Nothing Nothing]:: [NodeNode])
insertDB ([NodeNode { _nn_node1_id = folderSharedId
, _nn_node2_id = n
, _nn_score = Nothing
, _nn_category = Nothing }]:: [NodeNode])
shareNodeWith (ShareNodeWith_Node NodeFolderPublic nId) n = do
nodeToCheck <- getNode n
......@@ -63,7 +66,10 @@ shareNodeWith (ShareNodeWith_Node NodeFolderPublic nId) n = do
else do
folderToCheck <- getNode nId
if hasNodeType folderToCheck NodeFolderPublic
then insertDB ([NodeNode nId n Nothing Nothing] :: [NodeNode])
then insertDB ([NodeNode { _nn_node1_id = nId
, _nn_node2_id = n
, _nn_score = Nothing
, _nn_category = Nothing }] :: [NodeNode])
else errorWith "[G.D.A.S.shareNodeWith] Can share NodeWith NodeFolderPublic only"
shareNodeWith _ _ = errorWith "[G.D.A.S.shareNodeWith] Not implemented for this NodeType"
......
......@@ -118,11 +118,11 @@ instance (Arbitrary i, Arbitrary l) => Arbitrary (Pair i l) where
arbitrary = Pair <$> arbitrary <*> arbitrary
data FacetPaired id date hyperdata score =
FacetPaired {_fp_id :: id
,_fp_date :: date
,_fp_hyperdata :: hyperdata
,_fp_score :: score
} deriving (Show, Generic)
FacetPaired { _fp_id :: id
, _fp_date :: date
, _fp_hyperdata :: hyperdata
, _fp_score :: score }
deriving (Show, Generic)
$(deriveJSON (unPrefix "_fp_") ''FacetPaired)
$(makeAdaptorAndInstance "pFacetPaired" ''FacetPaired)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment