Commit 0a565696 authored by mzheng's avatar mzheng

changed cabal.project and _corpus_authors_affiliations to instStructName_s

parent b99b9e56
......@@ -5,7 +5,8 @@ module Main where
import Conduit ( sinkList, mapM_C, (.|), runConduit )
import Data.LanguageCodes (ISO639_1(..))
import Data.Text qualified as T
import HAL (getMetadataWithCursorOptsC, countResultsOpts', HalCrawlerOptions(..), defaultHalOptions)
import HAL --(getMetadataWithCursorOptsC, countResultsOpts', HalCrawlerOptions(..), defaultHalOptions)
import HAL.Types
import HAL.Doc
import HAL.Doc.Corpus (Corpus(..))
import Network.HTTP.Client (newManager)
......@@ -39,11 +40,11 @@ fetchParams = Fetch <$>
(FetchParams
<$> strArgument (metavar "query")
<*> option auto (long "limit")
<*> optional(option (maybeReader readLang) (long "lang")))
<*> optional (option (maybeReader readLang) (long "lang")))
readLang :: Prelude.String -> Maybe ISO639_1
readLang = readMaybe
params :: Parser Command
params = subparser
(command "count" (info countParams (progDesc "Count number of docs for a given query"))
......@@ -56,11 +57,19 @@ opts = info (params <**> helper)
<> header "crawlerHAL-exe")
main :: IO ()
main = run =<< execParser opts
-- res <- getMetadataWith (generateRequestByStructID "artificial intelligence" imt) (Just 0) (Just 55)
-- case res of
-- (Left err) -> print err
-- (Right val) -> print $ _docs val
main = do
let a = T.pack "ok" <> T.pack "ok"
res <- getMetadataWith [generateRequestByStructID "camera" imt] (Just 0) (Just 55) (Just EN)
case res of
(Left err) -> print err
(Right val) -> mapM_ (print . cleanShow) $ _docs val
cleanShow :: Corpus -> Text
cleanShow corp = T.append "\n" $ cleanAuthorsAffiliations corp
cleanAuthorsAffiliations :: Corpus -> Text
cleanAuthorsAffiliations corp = T.append "Authors affiliations : " $ T.intercalate " | " $ _corpus_authors_affiliations corp
run :: Command -> IO ()
run (Count (CountParams { cp_query, cp_lang })) = do
......@@ -115,8 +124,8 @@ run (Fetch (FetchParams { fp_query, fp_limit, fp_lang })) = do
-- OR structId_i:1048346
-- OR structId_i:352124)|]
-- imt :: [T.Text]
-- imt = [
imt :: [T.Text]
imt = [
-- "224096"
-- ,"144103"
-- ,"84538"
......@@ -132,6 +141,6 @@ run (Fetch (FetchParams { fp_query, fp_limit, fp_lang })) = do
-- ,"481355"
-- ,"469216"
-- ,"542824"
-- ,"6279"
"6279"
-- ,"29212"
-- ]
]
with-compiler: ghc-9.4.7
with-compiler: ghc
packages: .
tests: True
......@@ -230,7 +230,8 @@ baseFields = [ "docid"
, "submittedDate_s"
, "source_s"
, "authFullName_s"
, "authOrganism_s" ]
, "authOrganism_s"
, "instStructName_s" ]
structFields :: Text
structFields = "docid,label_s,parentDocid_i"
......
......@@ -38,7 +38,7 @@ instance FromJSON Corpus where
_corpus_date <- o .:? "submittedDate_s"
_corpus_source <- o .:? "source_s"
_corpus_authors_names <- o .: "authFullName_s" <|> return []
_corpus_authors_affiliations <- o .: "authOrganism_s" <|> return []
_corpus_authors_affiliations <- o .: "instStructName_s" <|> return []
_corpus_struct_id <- o .: "structId_i" <|> return []
abstracts <-
......@@ -52,4 +52,4 @@ instance FromJSON Corpus where
pure $ Corpus { .. }
instance ToHttpApiData Corpus where
toUrlPiece _ = "docid,title_s,en_abstract_s,fr_abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s,structId_i"
toUrlPiece _ = "docid,title_s,en_abstract_s,fr_abstract_s,submittedDate_s,source_s,authFullName_s,instStructName_s,structId_i"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment