[refactoring] more record syntax refactoring

parent 37a36aba
...@@ -31,23 +31,23 @@ get la q ml = do ...@@ -31,23 +31,23 @@ get la q ml = do
toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument toDoc' :: Lang -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d) (utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") Just d)
pure $ HyperdataDocument (Just "Hal") pure $ HyperdataDocument { _hd_bdd = Just "Hal"
(Just $ pack $ show i) , _hd_doi = Just $ pack $ show i
Nothing , _hd_url = Nothing
Nothing , _hd_uniqId = Nothing
Nothing , _hd_uniqIdBdd = Nothing
Nothing , _hd_page = Nothing
(Just $ intercalate " " t) , _hd_title = Just $ intercalate " " t
(Just $ foldl (\x y -> x <> ", " <> y) "" aus) , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" aus
(Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id) , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" $ affs <> map (cs . show) struct_id
(Just $ maybe "Nothing" identity s) , _hd_source = Just $ maybe "Nothing" identity s
(Just $ intercalate " " ab) , _hd_abstract = Just $ intercalate " " ab
(fmap (pack . show) utctime) , _hd_publication_date = fmap (pack . show) utctime
pub_year , _hd_publication_year = pub_year
pub_month , _hd_publication_month = pub_month
pub_day , _hd_publication_day = pub_day
Nothing , _hd_publication_hour = Nothing
Nothing , _hd_publication_minute = Nothing
Nothing , _hd_publication_second = Nothing
(Just $ (pack . show) la) , _hd_language_iso2 = Just $ (pack . show) la }
...@@ -39,22 +39,23 @@ toDoc' la docs' = do ...@@ -39,22 +39,23 @@ toDoc' la docs' = do
toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument toDoc :: Lang -> ISTEX.Document -> IO HyperdataDocument
toDoc la (ISTEX.Document i t a ab d s) = do toDoc la (ISTEX.Document i t a ab d s) = do
(utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") (Just . pack . show) d) (utctime, (pub_year, pub_month, pub_day)) <- Date.dateSplit la (maybe (Just "2019") (Just . pack . show) d)
pure $ HyperdataDocument (Just "Istex") pure $ HyperdataDocument { _hd_bdd = Just "Istex"
(Just i) , _hd_doi = Just i
Nothing , _hd_url = Nothing
Nothing , _hd_uniqId = Nothing
Nothing , _hd_uniqIdBdd = Nothing
Nothing , _hd_page = Nothing
t , _hd_title = t
(Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)) , _hd_authors = Just $ foldl (\x y -> x <> ", " <> y) "" (map ISTEX._author_name a)
(Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)) , _hd_institutes = Just $ foldl (\x y -> x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
(Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)) , _hd_source = Just $ foldl (\x y -> x <> ", " <> y) "" (catMaybes $ map ISTEX._source_title s)
ab , _hd_abstract = ab
(fmap (pack . show) utctime) , _hd_publication_date = fmap (pack . show) utctime
pub_year , _hd_publication_year = pub_year
pub_month , _hd_publication_month = pub_month
pub_day , _hd_publication_day = pub_day
Nothing , _hd_publication_hour = Nothing
Nothing , _hd_publication_minute = Nothing
Nothing , _hd_publication_second = Nothing
(Just $ (pack . show) la) , _hd_language_iso2 = Just $ (pack . show) la }
...@@ -38,25 +38,25 @@ get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN)) ...@@ -38,25 +38,25 @@ get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus) toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
(PubMedDoc.PubMedDate a y m d) (PubMedDoc.PubMedDate a y m d)
) = HyperdataDocument (Just "PubMed") ) = HyperdataDocument { _hd_bdd = Just "PubMed"
Nothing , _hd_doi = Nothing
Nothing , _hd_url = Nothing
Nothing , _hd_uniqId = Nothing
Nothing , _hd_uniqIdBdd = Nothing
Nothing , _hd_page = Nothing
t , _hd_title = t
(authors aus) , _hd_authors = authors aus
(institutes aus) , _hd_institutes = institutes aus
j , _hd_source = j
(abstract as) , _hd_abstract = abstract as
(Just $ Text.pack $ show a) , _hd_publication_date = Just $ Text.pack $ show a
(Just $ fromIntegral y) , _hd_publication_year = Just $ fromIntegral y
(Just m) , _hd_publication_month = Just m
(Just d) , _hd_publication_day = Just d
Nothing , _hd_publication_hour = Nothing
Nothing , _hd_publication_minute = Nothing
Nothing , _hd_publication_second = Nothing
(Just $ (Text.pack . show) l) , _hd_language_iso2 = Just $ (Text.pack . show) l }
where where
authors :: Maybe [PubMedDoc.Author] -> Maybe Text authors :: Maybe [PubMedDoc.Author] -> Maybe Text
authors aus' = case aus' of authors aus' = case aus' of
......
...@@ -122,25 +122,25 @@ toDoc ff d = do ...@@ -122,25 +122,25 @@ toDoc ff d = do
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse (utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse
pure $ HyperdataDocument (Just $ DT.pack $ show ff) pure $ HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
(lookup "doi" d) , _hd_doi = lookup "doi" d
(lookup "URL" d) , _hd_url = lookup "URL" d
Nothing , _hd_uniqId = Nothing
Nothing , _hd_uniqIdBdd = Nothing
Nothing , _hd_page = Nothing
(lookup "title" d) , _hd_title = lookup "title" d
Nothing , _hd_authors = Nothing
(lookup "authors" d) , _hd_institutes = lookup "authors" d
(lookup "source" d) , _hd_source = lookup "source" d
(lookup "abstract" d) , _hd_abstract = lookup "abstract" d
(fmap (DT.pack . show) utcTime) , _hd_publication_date = fmap (DT.pack . show) utcTime
(pub_year) , _hd_publication_year = pub_year
(pub_month) , _hd_publication_month = pub_month
(pub_day) , _hd_publication_day = pub_day
Nothing , _hd_publication_hour = Nothing
Nothing , _hd_publication_minute = Nothing
Nothing , _hd_publication_second = Nothing
(Just $ (DT.pack . show) lang) , _hd_language_iso2 = Just $ (DT.pack . show) lang }
enrichWith :: FileFormat enrichWith :: FileFormat
-> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]]) -> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
......
...@@ -75,18 +75,26 @@ instance ToJSON GrandDebatReference ...@@ -75,18 +75,26 @@ instance ToJSON GrandDebatReference
instance ToHyperdataDocument GrandDebatReference instance ToHyperdataDocument GrandDebatReference
where where
toHyperdataDocument (GrandDebatReference id' _ref title' toHyperdataDocument (GrandDebatReference { id, title, publishedAt, authorType, authorZipCode, responses }) =
_createdAt' publishedAt' _updatedAt HyperdataDocument { _hd_bdd = Just "GrandDebat"
_trashed _trashedStatus , _hd_doi = id
_authorId authorType' authorZipCode' , _hd_url = Nothing
responses') = , _hd_uniqId = Nothing
HyperdataDocument (Just "GrandDebat") id' , _hd_uniqIdBdd = Nothing
Nothing Nothing Nothing Nothing , _hd_page = Nothing
title' authorType' authorType' authorZipCode' , _hd_title = title
(toAbstract <$> responses') , _hd_authors = authorType
publishedAt' , _hd_institutes = authorType
Nothing Nothing Nothing Nothing Nothing Nothing , _hd_source = authorZipCode
(Just $ Text.pack $ show FR) , _hd_abstract = toAbstract <$> responses
, _hd_publication_date = publishedAt
, _hd_publication_year = Nothing
, _hd_publication_month = Nothing
, _hd_publication_day = Nothing
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ Text.pack $ show FR }
where where
toAbstract = (Text.intercalate " . ") . ((filter (/= "")) . (map toSentence)) toAbstract = (Text.intercalate " . ") . ((filter (/= "")) . (map toSentence))
toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of
......
...@@ -119,17 +119,24 @@ unbound _ _ = Nothing ...@@ -119,17 +119,24 @@ unbound _ _ = Nothing
bind2doc :: Lang -> [BindingValue] -> HyperdataDocument bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
bind2doc l [ link, date, langDoc, authors, _source, publisher, title, abstract ] = bind2doc l [ link, date, langDoc, authors, _source, publisher, title, abstract ] =
HyperdataDocument (Just "Isidore") HyperdataDocument { _hd_bdd = Just "Isidore"
Nothing , _hd_doi = Nothing
(unbound l link) , _hd_url = unbound l link
Nothing Nothing Nothing , _hd_uniqId = Nothing
(unbound l title) , _hd_uniqIdBdd = Nothing
(unbound l authors) , _hd_page = Nothing
Nothing , _hd_title = unbound l title
(unbound l publisher) , _hd_authors = unbound l authors
(unbound l abstract) , _hd_institutes = Nothing
(unbound l date) , _hd_source = unbound l publisher
Nothing Nothing Nothing Nothing Nothing Nothing , _hd_abstract = unbound l abstract
(unbound l langDoc) , _hd_publication_date = unbound l date
, _hd_publication_year = Nothing
, _hd_publication_month = Nothing
, _hd_publication_day = Nothing
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = unbound l langDoc }
bind2doc _ _ = undefined bind2doc _ _ = undefined
...@@ -48,8 +48,14 @@ json2csv fin fout = do ...@@ -48,8 +48,14 @@ json2csv fin fout = do
writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents) writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
patent2csvDoc :: Patent -> CsvDoc patent2csvDoc :: Patent -> CsvDoc
patent2csvDoc (Patent title abstract year _) = patent2csvDoc (Patent { .. }) =
CsvDoc title "Source" (Just $ read (unpack year)) (Just 1) (Just 1) abstract "Authors" CsvDoc { csv_title = _patent_title
, csv_source = "Source"
, csv_publication_year = Just $ read (unpack _patent_year)
, csv_publication_month = Just 1
, csv_publication_day = Just 1
, csv_abstract = _patent_abstract
, csv_authors = "Authors" }
......
...@@ -70,3 +70,5 @@ onField :: ByteString -> (ByteString -> [(ByteString, ByteString)]) ...@@ -70,3 +70,5 @@ onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
-> [(ByteString, ByteString)] -> [(ByteString, ByteString)] -> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
onField k f m = m <> ( maybe [] f (lookup k m) ) onField k f m = m <> ( maybe [] f (lookup k m) )
...@@ -68,5 +68,3 @@ fixFields ns = map (first fixFields'') ns ...@@ -68,5 +68,3 @@ fixFields ns = map (first fixFields'') ns
| champs == "UR" = "url" | champs == "UR" = "url"
| champs == "N2" = abstract | champs == "N2" = abstract
| otherwise = champs | otherwise = champs
...@@ -95,7 +95,9 @@ parsePage = ...@@ -95,7 +95,9 @@ parsePage =
revision <- revision <-
parseRevision parseRevision
many_ $ ignoreAnyTreeContent many_ $ ignoreAnyTreeContent
return $ Page Mediawiki title revision return $ Page { _markupFormat = Mediawiki
, _title = title
, _text = revision }
parseMediawiki :: MonadThrow m => ConduitT Event Page m (Maybe ()) parseMediawiki :: MonadThrow m => ConduitT Event Page m (Maybe ())
parseMediawiki = parseMediawiki =
...@@ -108,7 +110,7 @@ mediawikiPageToPlain :: Page -> IO Page ...@@ -108,7 +110,7 @@ mediawikiPageToPlain :: Page -> IO Page
mediawikiPageToPlain page = do mediawikiPageToPlain page = do
title <- mediaToPlain $ _title page title <- mediaToPlain $ _title page
revision <- mediaToPlain $ _text page revision <- mediaToPlain $ _text page
return $ Page Plaintext title revision return $ Page { _markupFormat = Plaintext, _title = title, _text = revision }
where mediaToPlain media = where mediaToPlain media =
case media of case media of
(Nothing) -> return Nothing (Nothing) -> return Nothing
......
...@@ -86,17 +86,17 @@ buildNgramsLists user uCid mCid mfslw gp = do ...@@ -86,17 +86,17 @@ buildNgramsLists user uCid mCid mfslw gp = do
data MapListSize = MapListSize { unMapListSize :: !Int } data MapListSize = MapListSize { unMapListSize :: !Int }
buildNgramsOthersList ::( HasNodeError err buildNgramsOthersList :: ( HasNodeError err
, CmdM env err m , CmdM env err m
, HasNodeStory env err m , HasNodeStory env err m
, HasTreeError err , HasTreeError err
) )
=> User => User
-> UserCorpusId -> UserCorpusId
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> GroupParams -> GroupParams
-> (NgramsType, MapListSize) -> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize) = do buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize) = do
allTerms :: HashMap NgramsTerm (Set NodeId) <- getNodesByNgramsUser uCid nt allTerms :: HashMap NgramsTerm (Set NodeId) <- getNodesByNgramsUser uCid nt
...@@ -106,7 +106,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize) ...@@ -106,7 +106,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize)
$ HashMap.fromList $ HashMap.fromList
$ List.zip (HashMap.keys allTerms) $ List.zip (HashMap.keys allTerms)
(List.cycle [mempty]) (List.cycle [mempty])
) )
let let
groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms
...@@ -148,13 +148,13 @@ buildNgramsTermsList :: ( HasNodeError err ...@@ -148,13 +148,13 @@ buildNgramsTermsList :: ( HasNodeError err
, HasNodeStory env err m , HasNodeStory env err m
, HasTreeError err , HasTreeError err
) )
=> User => User
-> UserCorpusId -> UserCorpusId
-> MasterCorpusId -> MasterCorpusId
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> GroupParams -> GroupParams
-> (NgramsType, MapListSize) -> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
-- Filter 0 With Double -- Filter 0 With Double
...@@ -170,7 +170,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do ...@@ -170,7 +170,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
$ HashMap.fromList $ HashMap.fromList
$ List.zip (HashMap.keys allTerms) $ List.zip (HashMap.keys allTerms)
(List.cycle [mempty]) (List.cycle [mempty])
) )
printDebug "[buldNgramsTermsList: Flow Social List / end]" nt printDebug "[buldNgramsTermsList: Flow Social List / end]" nt
let ngramsKeys = HashMap.keysSet allTerms let ngramsKeys = HashMap.keysSet allTerms
......
...@@ -72,7 +72,7 @@ groupWith :: GroupParams ...@@ -72,7 +72,7 @@ groupWith :: GroupParams
-> NgramsTerm -> NgramsTerm
-> NgramsTerm -> NgramsTerm
groupWith GroupIdentity t = identity t groupWith GroupIdentity t = identity t
groupWith (GroupParams l _m _n _) t = groupWith (GroupParams { unGroupParams_lang = l }) t =
NgramsTerm NgramsTerm
$ Text.intercalate " " $ Text.intercalate " "
$ map (stem l) $ map (stem l)
...@@ -86,7 +86,7 @@ groupWith (GroupParams l _m _n _) t = ...@@ -86,7 +86,7 @@ groupWith (GroupParams l _m _n _) t =
$ unNgramsTerm t $ unNgramsTerm t
-- | This lemmatization group done with CoreNLP algo (or others) -- | This lemmatization group done with CoreNLP algo (or others)
groupWith (GroupWithPosTag _ _ m) t = groupWith (GroupWithPosTag { _gwl_map = m }) t =
case HashMap.lookup (unNgramsTerm t) m of case HashMap.lookup (unNgramsTerm t) m of
Nothing -> clean t Nothing -> clean t
Just t' -> clean $ NgramsTerm t' Just t' -> clean $ NgramsTerm t'
......
...@@ -82,11 +82,11 @@ makeLenses ''TermType ...@@ -82,11 +82,11 @@ makeLenses ''TermType
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms]) --extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms :: TermType Lang -> [Text] -> IO [[Terms]] extractTerms :: TermType Lang -> [Text] -> IO [[Terms]]
extractTerms (Unsupervised l n s m) xs = mapM (terms (Unsupervised l n s (Just m'))) xs extractTerms (Unsupervised {..}) xs = mapM (terms (Unsupervised { _tt_model = Just m', .. })) xs
where where
m' = case m of m' = case _tt_model of
Just m''-> m'' Just m''-> m''
Nothing -> newTries n (Text.intercalate " " xs) Nothing -> newTries _tt_windowSize (Text.intercalate " " xs)
extractTerms termTypeLang xs = mapM (terms termTypeLang) xs extractTerms termTypeLang xs = mapM (terms termTypeLang) xs
...@@ -96,15 +96,16 @@ withLang :: (Foldable t, Functor t, HasText h) ...@@ -96,15 +96,16 @@ withLang :: (Foldable t, Functor t, HasText h)
=> TermType Lang => TermType Lang
-> t h -> t h
-> TermType Lang -> TermType Lang
withLang (Unsupervised l n s m) ns = Unsupervised l n s m' withLang (Unsupervised {..}) ns = Unsupervised { _tt_model = m', .. }
where where
m' = case m of m' = case _tt_model of
Nothing -> -- trace ("buildTries here" :: String) Nothing -> -- trace ("buildTries here" :: String)
Just $ buildTries n $ fmap toToken Just $ buildTries _tt_ngramsSize
$ uniText $ fmap toToken
$ Text.intercalate " . " $ uniText
$ List.concat $ Text.intercalate " . "
$ map hasText ns $ List.concat
$ map hasText ns
just_m -> just_m just_m -> just_m
withLang l _ = l withLang l _ = l
...@@ -171,9 +172,9 @@ terms :: TermType Lang -> Text -> IO [Terms] ...@@ -171,9 +172,9 @@ terms :: TermType Lang -> Text -> IO [Terms]
terms (Mono lang) txt = pure $ monoTerms lang txt terms (Mono lang) txt = pure $ monoTerms lang txt
terms (Multi lang) txt = multiterms lang txt terms (Multi lang) txt = multiterms lang txt
terms (MonoMulti lang) txt = terms (Multi lang) txt terms (MonoMulti lang) txt = terms (Multi lang) txt
terms (Unsupervised lang n s m) txt = termsUnsupervised (Unsupervised lang n s (Just m')) txt terms (Unsupervised { .. }) txt = termsUnsupervised (Unsupervised { _tt_model = Just m', .. }) txt
where where
m' = maybe (newTries n txt) identity m m' = maybe (newTries _tt_ngramsSize txt) identity _tt_model
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt -- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment