Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
hal
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
crawlers
hal
Commits
0a565696
Commit
0a565696
authored
Mar 26, 2024
by
mzheng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
changed cabal.project and _corpus_authors_affiliations to instStructName_s
parent
b99b9e56
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
26 additions
and
16 deletions
+26
-16
Main.hs
app/Main.hs
+21
-12
cabal.project
cabal.project
+1
-1
HAL.hs
src/HAL.hs
+2
-1
Corpus.hs
src/HAL/Doc/Corpus.hs
+2
-2
No files found.
app/Main.hs
View file @
0a565696
...
...
@@ -5,7 +5,8 @@ module Main where
import
Conduit
(
sinkList
,
mapM_C
,
(
.|
),
runConduit
)
import
Data.LanguageCodes
(
ISO639_1
(
..
))
import
Data.Text
qualified
as
T
import
HAL
(
getMetadataWithCursorOptsC
,
countResultsOpts'
,
HalCrawlerOptions
(
..
),
defaultHalOptions
)
import
HAL
--(getMetadataWithCursorOptsC, countResultsOpts', HalCrawlerOptions(..), defaultHalOptions)
import
HAL.Types
import
HAL.Doc
import
HAL.Doc.Corpus
(
Corpus
(
..
))
import
Network.HTTP.Client
(
newManager
)
...
...
@@ -39,11 +40,11 @@ fetchParams = Fetch <$>
(
FetchParams
<$>
strArgument
(
metavar
"query"
)
<*>
option
auto
(
long
"limit"
)
<*>
optional
(
option
(
maybeReader
readLang
)
(
long
"lang"
)))
<*>
optional
(
option
(
maybeReader
readLang
)
(
long
"lang"
)))
readLang
::
Prelude
.
String
->
Maybe
ISO639_1
readLang
=
readMaybe
params
::
Parser
Command
params
=
subparser
(
command
"count"
(
info
countParams
(
progDesc
"Count number of docs for a given query"
))
...
...
@@ -56,11 +57,19 @@ opts = info (params <**> helper)
<>
header
"crawlerHAL-exe"
)
main
::
IO
()
main
=
run
=<<
execParser
opts
-- res <- getMetadataWith (generateRequestByStructID "artificial intelligence" imt) (Just 0) (Just 55)
-- case res of
-- (Left err) -> print err
-- (Right val) -> print $ _docs val
main
=
do
let
a
=
T
.
pack
"ok"
<>
T
.
pack
"ok"
res
<-
getMetadataWith
[
generateRequestByStructID
"camera"
imt
]
(
Just
0
)
(
Just
55
)
(
Just
EN
)
case
res
of
(
Left
err
)
->
print
err
(
Right
val
)
->
mapM_
(
print
.
cleanShow
)
$
_docs
val
cleanShow
::
Corpus
->
Text
cleanShow
corp
=
T
.
append
"
\n
"
$
cleanAuthorsAffiliations
corp
cleanAuthorsAffiliations
::
Corpus
->
Text
cleanAuthorsAffiliations
corp
=
T
.
append
"Authors affiliations : "
$
T
.
intercalate
" | "
$
_corpus_authors_affiliations
corp
run
::
Command
->
IO
()
run
(
Count
(
CountParams
{
cp_query
,
cp_lang
}))
=
do
...
...
@@ -115,8 +124,8 @@ run (Fetch (FetchParams { fp_query, fp_limit, fp_lang })) = do
-- OR structId_i:1048346
-- OR structId_i:352124)|]
--
imt :: [T.Text]
--
imt = [
imt
::
[
T
.
Text
]
imt
=
[
-- "224096"
-- ,"144103"
-- ,"84538"
...
...
@@ -132,6 +141,6 @@ run (Fetch (FetchParams { fp_query, fp_limit, fp_lang })) = do
-- ,"481355"
-- ,"469216"
-- ,"542824"
-- ,
"6279"
"6279"
-- ,"29212"
--
]
]
cabal.project
View file @
0a565696
with-compiler: ghc
-9.4.7
with-compiler: ghc
packages: .
tests: True
src/HAL.hs
View file @
0a565696
...
...
@@ -230,7 +230,8 @@ baseFields = [ "docid"
,
"submittedDate_s"
,
"source_s"
,
"authFullName_s"
,
"authOrganism_s"
]
,
"authOrganism_s"
,
"instStructName_s"
]
structFields
::
Text
structFields
=
"docid,label_s,parentDocid_i"
...
...
src/HAL/Doc/Corpus.hs
View file @
0a565696
...
...
@@ -38,7 +38,7 @@ instance FromJSON Corpus where
_corpus_date
<-
o
.:?
"submittedDate_s"
_corpus_source
<-
o
.:?
"source_s"
_corpus_authors_names
<-
o
.:
"authFullName_s"
<|>
return
[]
_corpus_authors_affiliations
<-
o
.:
"
authOrganism
_s"
<|>
return
[]
_corpus_authors_affiliations
<-
o
.:
"
instStructName
_s"
<|>
return
[]
_corpus_struct_id
<-
o
.:
"structId_i"
<|>
return
[]
abstracts
<-
...
...
@@ -52,4 +52,4 @@ instance FromJSON Corpus where
pure
$
Corpus
{
..
}
instance
ToHttpApiData
Corpus
where
toUrlPiece
_
=
"docid,title_s,en_abstract_s,fr_abstract_s,submittedDate_s,source_s,authFullName_s,
authOrganism
_s,structId_i"
toUrlPiece
_
=
"docid,title_s,en_abstract_s,fr_abstract_s,submittedDate_s,source_s,authFullName_s,
instStructName
_s,structId_i"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment