Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
hal
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
crawlers
hal
Commits
8782d81e
Verified
Commit
8782d81e
authored
Jul 26, 2023
by
Przemyslaw Kaminski
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[lang] fixes to requestedFields, Main updated, query is an array now
parent
240a13e7
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
48 additions
and
35 deletions
+48
-35
Main.hs
app/Main.hs
+14
-7
cabal.project
cabal.project
+1
-1
crawlerHAL.cabal
crawlerHAL.cabal
+9
-9
HAL.hs
src/HAL.hs
+8
-7
Corpus.hs
src/HAL/Doc/Corpus.hs
+12
-9
Utils.hs
src/HAL/Utils.hs
+4
-2
No files found.
app/Main.hs
View file @
8782d81e
...
...
@@ -21,10 +21,10 @@ import Tree
data
CountParams
=
CountParams
{
cp_query
::
T
.
Text
}
{
cp_query
::
[
T
.
Text
]
}
data
FetchParams
=
FetchParams
{
fp_query
::
T
.
Text
}
{
fp_query
::
[
T
.
Text
]
}
data
Command
=
Count
CountParams
...
...
@@ -33,12 +33,12 @@ data Command =
countParams
::
Parser
Command
countParams
=
Count
<$>
(
CountParams
<$>
strArgument
(
metavar
"query"
))
<$>
many
(
strArgument
(
metavar
"query"
)
))
fetchParams
::
Parser
Command
fetchParams
=
Fetch
<$>
(
FetchParams
<$>
strArgument
(
metavar
"query"
))
<$>
many
(
strArgument
(
metavar
"query"
)
))
params
::
Parser
Command
params
=
subparser
...
...
@@ -60,20 +60,27 @@ main = run =<< execParser opts
run
::
Command
->
IO
()
run
(
Count
(
CountParams
{
cp_query
}))
=
do
res
<-
getMetadataWithC
(
cp_query
)
(
Just
0
)
Nothing
Nothing
res
<-
getMetadataWithC
cp_query
(
Just
0
)
Nothing
Nothing
case
res
of
Left
err
->
putText
$
show
err
Right
(
cnt
,
_docsC
)
->
putText
$
show
cnt
run
(
Fetch
(
FetchParams
{
fp_query
}))
=
do
res
<-
getMetadataWithC
(
fp_query
)
(
Just
0
)
Nothing
Nothing
res
<-
getMetadataWithC
fp_query
(
Just
0
)
Nothing
Nothing
case
res
of
Left
err
->
putText
$
show
err
Right
(
_cnt
,
docsC
)
->
do
_
<-
runConduit
$
docsC
.|
mapM_C
(
\
(
Corpus
{
..
})
->
putText
$
"docid: "
<>
show
_corpus_docid
)
.|
mapM_C
printCorpus
.|
sinkList
pure
()
where
printCorpus
Corpus
{
..
}
=
do
putText
$
"docid: "
<>
_corpus_docid
<>
" ["
<>
(
T
.
intercalate
" "
_corpus_title
)
<>
"]"
putText
$
" "
<>
(
T
.
intercalate
" "
_corpus_abstract
)
putText
$
" "
<>
show
_corpus_abstract_lang_map
putText
$
" "
<>
show
_corpus_original
putText
"------------"
-- data
...
...
cabal.project
View file @
8782d81e
-- Generated by stack2cabal
with-compiler: ghc-
9.2.8
with-compiler: ghc-
8.10.7
packages:
./
...
...
crawlerHAL.cabal
View file @
8782d81e
...
...
@@ -48,7 +48,7 @@ library
RecordWildCards
TypeOperators
build-depends:
aeson >=
2.1.0 && < 2.3
aeson >=
1.5.6.0 && < 1.6
, base >=4.7 && <5
, bytestring >= 0.11.0 && < 0.13
, conduit >= 1.3.5 && < 1.4
...
...
@@ -65,10 +65,10 @@ library
, servant >= 0.19 && < 0.21
, servant-client >= 0.19 && < 0.21
, split >= 0.2.3.5 && < 0.3
, text >=
2.0.2
&& < 2.1
, text >=
1.2.3.0
&& < 2.1
, text-format >= 0.3.2.1 && < 0.4
, utf8-string >= 1.0.2 && < 1.1
, vector >= 0.1
3
.0 && < 0.14
, vector >= 0.1
2
.0 && < 0.14
default-language: Haskell2010
executable crawlerHAL-exe
...
...
@@ -88,7 +88,7 @@ executable crawlerHAL-exe
TypeOperators
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson >=
2.1.0 && < 2.3
aeson >=
1.5.6 && < 1.6
, base >=4.7 && <5
, bytestring >= 0.11.0 && < 0.13
, conduit >= 1.3.5 && < 1.4
...
...
@@ -106,9 +106,9 @@ executable crawlerHAL-exe
, servant >= 0.19 && < 0.21
, servant-client >= 0.19 && < 0.21
, split >= 0.2.3.5 && < 0.3
, text >=
2.0.2
&& < 2.1
, text >=
1.2.3.0
&& < 2.1
, utf8-string >= 1.0.2 && < 1.1
, vector >= 0.1
3
.0 && < 0.14
, vector >= 0.1
2
.0 && < 0.14
default-language: Haskell2010
test-suite halCrawler-test
...
...
@@ -129,7 +129,7 @@ test-suite halCrawler-test
TypeOperators
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson >=
2.1.0 && < 2.3
aeson >=
1.5.6 && < 1.6
, base >=4.7 && <5
, bytestring >= 0.11.0 && < 0.13
, conduit >= 1.3.5 && < 1.4
...
...
@@ -147,7 +147,7 @@ test-suite halCrawler-test
, servant >= 0.19 && < 0.21
, servant-client >= 0.19 && < 0.21
, split >= 0.2.3.5 && < 0.3
, text >=
2.0.2
&& < 2.1
, text >=
1.2.3.0
&& < 2.1
, utf8-string >= 1.0.2 && < 1.1
, vector >= 0.1
3
.0 && < 0.14
, vector >= 0.1
2
.0 && < 0.14
default-language: Haskell2010
src/HAL.hs
View file @
8782d81e
...
...
@@ -30,14 +30,14 @@ getMetadataWith :: Query
getMetadataWith
q
start_
limit
lang
=
do
runHalAPIClient
$
search
(
Just
$
requestedFields
lang
)
[
q
]
Nothing
start_
limit
getMetadataWithC
::
Query
getMetadataWithC
::
[
Query
]
->
Maybe
Start
->
Maybe
Limit
->
Maybe
ISO639_1
->
IO
(
Either
ClientError
(
Maybe
Count
,
ConduitT
()
Corpus
IO
()
))
getMetadataWithC
q
start_
limit
lang
=
do
getMetadataWithC
q
s
start_
limit
lang
=
do
-- First, estimate the total number of documents
eCount
<-
countResults
q
eCount
<-
countResults
q
s
pure
$
get'
<$>
eCount
where
get'
::
Count
...
...
@@ -55,8 +55,9 @@ getMetadataWithC q start_ limit lang = do
getPage
::
Start
->
Int
->
IO
[
Corpus
]
getPage
start'
pageNum
=
do
-- putText $ "requestedFields: " <> (show $ requestedFields lang)
let
offset
=
start'
+
pageNum
*
batchSize
eRes
<-
runHalAPIClient
$
search
(
Just
$
requestedFields
lang
)
[
q
]
Nothing
(
Just
offset
)
(
Just
$
fromIntegral
batchSize
)
eRes
<-
runHalAPIClient
$
search
(
Just
$
requestedFields
lang
)
qs
Nothing
(
Just
offset
)
(
Just
$
fromIntegral
batchSize
)
pure
$
case
eRes
of
Left
_
->
[]
Right
(
Response
{
_docs
})
->
_docs
...
...
@@ -65,10 +66,10 @@ getMetadataWithC q start_ limit lang = do
-- putText $ show _corpus_title
-- pure c
countResults
::
Query
->
IO
(
Either
ClientError
Count
)
countResults
q
=
do
countResults
::
[
Query
]
->
IO
(
Either
ClientError
Count
)
countResults
q
s
=
do
-- First, estimate the total number of documents
eRes
<-
runHalAPIClient
$
search
(
Just
$
requestedFields
Nothing
)
[
q
]
Nothing
(
Just
0
)
(
Just
1
)
::
IO
(
Either
ClientError
(
Response
Corpus
))
eRes
<-
runHalAPIClient
$
search
(
Just
$
requestedFields
Nothing
)
qs
Nothing
(
Just
0
)
(
Just
1
)
::
IO
(
Either
ClientError
(
Response
Corpus
))
pure
$
_numFound
<$>
eRes
requestedFields
::
Maybe
ISO639_1
->
Text
...
...
src/HAL/Doc/Corpus.hs
View file @
8782d81e
...
...
@@ -23,22 +23,23 @@ data Corpus = Corpus
,
_corpus_authors_names
::
[
Text
]
,
_corpus_authors_affiliations
::
[
Text
]
,
_corpus_struct_id
::
[
Int
]
,
_corpus_original
::
Object
}
deriving
(
Show
,
Generic
)
L
.
makeLenses
''
C
orpus
instance
Default
Corpus
where
def
=
Corpus
"default Id"
def
def
def
def
def
def
def
def
def
=
Corpus
"default Id"
def
def
def
def
def
def
def
def
mempty
instance
FromJSON
Corpus
where
parseJSON
=
withObject
"Corpus"
$
\
o
->
do
_corpus_docid
<-
(
o
.:
"docid"
)
_corpus_title
<-
(
o
.:
"title_s"
<|>
return
[]
)
_corpus_abstract
<-
(
o
.:
"en_abstract_s"
<|>
return
[]
)
_corpus_date
<-
(
o
.:?
"submittedDate_s"
)
_corpus_source
<-
(
o
.:?
"source_s"
)
_corpus_authors_names
<-
(
o
.:
"authFullName_s"
<|>
return
[]
)
_corpus_authors_affiliations
<-
(
o
.:
"authOrganism_s"
<|>
return
[]
)
_corpus_struct_id
<-
(
o
.:
"structId_i"
<|>
return
[]
)
_corpus_docid
<-
o
.:
"docid"
_corpus_title
<-
o
.:
"title_s"
<|>
return
[]
_corpus_abstract
<-
o
.:
"en_abstract_s"
<|>
return
[]
_corpus_date
<-
o
.:?
"submittedDate_s"
_corpus_source
<-
o
.:?
"source_s"
_corpus_authors_names
<-
o
.:
"authFullName_s"
<|>
return
[]
_corpus_authors_affiliations
<-
o
.:
"authOrganism_s"
<|>
return
[]
_corpus_struct_id
<-
o
.:
"structId_i"
<|>
return
[]
abstracts
<-
mapM
(
\
lang
->
do
...
...
@@ -46,6 +47,8 @@ instance FromJSON Corpus where
pure
$
(
\
a
->
(
lang
,
a
))
<$>
ma
)
allLangs
let
_corpus_abstract_lang_map
=
Map
.
fromList
$
catMaybes
abstracts
let
_corpus_original
=
o
pure
$
Corpus
{
..
}
instance
ToHttpApiData
Corpus
where
...
...
src/HAL/Utils.hs
View file @
8782d81e
module
HAL.Utils
where
import
Data.LanguageCodes
(
ISO639_1
(
..
),
language
)
import
Data.LanguageCodes
(
ISO639_1
(
..
),
toChars
)
import
Data.Text
qualified
as
T
import
Protolude
...
...
@@ -8,4 +8,6 @@ allLangs :: [ISO639_1]
allLangs
=
enumFrom
(
toEnum
0
)
::
[
ISO639_1
]
langAbstractS
::
ISO639_1
->
Text
langAbstractS
lang
=
(
T
.
pack
$
language
lang
)
<>
"_abstract_s"
langAbstractS
lang
=
(
T
.
pack
[
l1
,
l2
])
<>
"_abstract_s"
where
(
l1
,
l2
)
=
toChars
lang
Przemyslaw Kaminski
@cgenie
mentioned in issue
gargantext/haskell-gargantext#256
·
Jul 26, 2023
mentioned in issue
gargantext/haskell-gargantext#256
mentioned in issue gargantext/haskell-gargantext#256
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment