Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
hal
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
hal
Commits
f7b928da
Verified
Commit
f7b928da
authored
Jul 26, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[refactoring] fixes to dependencies
parent
8782d81e
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
77 additions
and
68 deletions
+77
-68
Main.hs
app/Main.hs
+43
-44
cabal.project
cabal.project
+2
-2
crawlerHAL.cabal
crawlerHAL.cabal
+10
-13
HAL.hs
src/HAL.hs
+16
-5
Corpus.hs
src/HAL/Doc/Corpus.hs
+1
-2
Utils.hs
src/HAL/Utils.hs
+5
-2
No files found.
app/Main.hs
View file @
f7b928da
...
...
@@ -11,7 +11,6 @@ import HAL (getMetadataWith)
import
HAL.Client
import
HAL.Doc
import
HAL.Doc.Corpus
(
Corpus
(
..
))
import
NeatInterpolation
(
text
)
import
Network.HTTP.Client
(
newManager
)
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Options.Applicative
...
...
@@ -84,47 +83,47 @@ run (Fetch (FetchParams { fp_query })) = do
-- data
yearReq
=
[
text
|
(language_t:en)
AND (producedDateY_i:2018
OR producedDateY_i:2019
OR producedDateY_i:2020
OR producedDateY_i:2021
OR producedDateY_i:2022)
AND (structId_i:302102
OR structId_i:469216
OR structId_i:6279
OR structId_i:224096
OR structId_i:144103
OR structId_i:497330
OR structId_i:1076752
OR structId_i:84538
OR structId_i:301262
OR structId_i:481355
OR structId_i:29212
OR structId_i:301442
OR structId_i:542824
OR structId_i:300362
OR structId_i:1048346
OR structId_i:352124)
|]
--
yearReq = [text|
--
(language_t:en)
--
AND (producedDateY_i:2018
--
OR producedDateY_i:2019
--
OR producedDateY_i:2020
--
OR producedDateY_i:2021
--
OR producedDateY_i:2022)
--
AND (structId_i:302102
--
OR structId_i:469216
--
OR structId_i:6279
--
OR structId_i:224096
--
OR structId_i:144103
--
OR structId_i:497330
--
OR structId_i:1076752
--
OR structId_i:84538
--
OR structId_i:301262
--
OR structId_i:481355
--
OR structId_i:29212
--
OR structId_i:301442
--
OR structId_i:542824
--
OR structId_i:300362
--
OR structId_i:1048346
--
OR structId_i:352124)|]
imt
::
[
T
.
Text
]
imt
=
[
"224096"
,
"144103"
,
"84538"
,
"300104"
,
"300362"
,
"301262"
,
"301442"
,
"301492"
,
"302102"
,
"421532"
,
"497330"
,
"352124"
,
"481355"
,
"469216"
,
"542824"
,
"6279"
,
"29212"
]
--
imt :: [T.Text]
--
imt = [
--
"224096"
--
,"144103"
--
,"84538"
--
,"300104"
--
,"300362"
--
,"301262"
--
,"301442"
--
,"301492"
--
,"302102"
--
,"421532"
--
,"497330"
--
,"352124"
--
,"481355"
--
,"469216"
--
,"542824"
--
,"6279"
--
,"29212"
--
]
cabal.project
View file @
f7b928da
...
...
@@ -6,5 +6,5 @@ packages:
./
allow-older: *
allow-newer: *
--
allow-older: *
allow-newer:
base:
*
crawlerHAL.cabal
View file @
f7b928da
...
...
@@ -48,18 +48,17 @@ library
RecordWildCards
TypeOperators
build-depends:
aeson >= 1.5.6
.0
&& < 1.6
aeson >= 1.5.6 && < 1.6
, base >=4.7 && <5
, bytestring >= 0.11.0 && < 0.13
, conduit >= 1.3.5 && < 1.4
, containers >= 0.6.7 && < 0.7
, data-default >= 0.7.1.1 && < 0.8
, http-client >= 0.7.13.1 && < 0.8
, http-client-tls >= 0.3.6.
2
&& < 0.4
, http-client-tls >= 0.3.6.
1
&& < 0.4
, iso639 >= 0.1.0.3 && < 0.2
, lens >= 5.2.2 && < 5.3
, neat-interpolation >= 0.5.1.3 && < 0.6
, optparse-applicative >= 0.18.1.0 && < 0.19
, lens >= 5.1.1 && < 5.3
, optparse-applicative >= 0.17 && < 0.19
, protolude >= 0.3.3 && < 0.4
, scientific >= 0.3.7.0 && < 0.4
, servant >= 0.19 && < 0.21
...
...
@@ -96,11 +95,10 @@ executable crawlerHAL-exe
, crawlerHAL
, data-default >= 0.7.1.1 && < 0.8
, http-client >= 0.7.13.1 && < 0.8
, http-client-tls >= 0.3.6.
2
&& < 0.4
, http-client-tls >= 0.3.6.
1
&& < 0.4
, iso639 >= 0.1.0.3 && < 0.2
, lens >= 5.2.2 && < 5.3
, neat-interpolation >= 0.5.1.3 && < 0.6
, optparse-applicative >= 0.18.1.0 && < 0.19
, lens >= 5.1.1 && < 5.3
, optparse-applicative >= 0.17 && < 0.19
, protolude >= 0.3.3 && < 0.4
, scientific >= 0.3.7.0 && < 0.4
, servant >= 0.19 && < 0.21
...
...
@@ -137,11 +135,10 @@ test-suite halCrawler-test
, data-default >= 0.7.1.1 && < 0.8
, halCrawler
, http-client >= 0.7.13.1 && < 0.8
, http-client-tls >= 0.3.6.
2
&& < 0.4
, http-client-tls >= 0.3.6.
1
&& < 0.4
, iso639 >= 0.1.0.3 && < 0.2
, lens >= 5.2.2 && < 5.3
, neat-interpolation >= 0.5.1.3 && < 0.6
, optparse-applicative >= 0.18.1.0 && < 0.19
, lens >= 5.1.1 && < 5.3
, optparse-applicative >= 0.17 && < 0.19
, protolude >= 0.3.3 && < 0.4
, scientific >= 0.3.7.0 && < 0.4
, servant >= 0.19 && < 0.21
...
...
src/HAL.hs
View file @
f7b928da
...
...
@@ -7,7 +7,7 @@ import Data.Text
import
HAL.Client
import
HAL.Doc.Corpus
import
HAL.Doc.Struct
import
HAL.Utils
(
langAbstractS
)
import
HAL.Utils
(
langAbstractS
,
toText
)
import
Network.HTTP.Client
(
newManager
)
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Protolude
...
...
@@ -22,20 +22,31 @@ type Start = Int
type
Limit
=
Integer
type
Count
=
Integer
getMetadataWith
::
Query
queryWithLang
::
Maybe
ISO639_1
->
[
Query
]
->
[
Query
]
queryWithLang
Nothing
qs
=
qs
queryWithLang
(
Just
lang
)
qs
=
qs
<>
[
"language_s:"
<>
toText
lang
]
getMetadataWith
::
[
Query
]
->
Maybe
Start
->
Maybe
Limit
->
Maybe
ISO639_1
->
IO
(
Either
ClientError
(
Response
Corpus
))
getMetadataWith
q
start_
limit
lang
=
do
runHalAPIClient
$
search
(
Just
$
requestedFields
lang
)
[
q
]
Nothing
start_
limit
getMetadataWith
q
s
start_
limit
lang
=
do
runHalAPIClient
$
search
(
Just
$
requestedFields
lang
)
(
queryWithLang
lang
qs
)
Nothing
start_
limit
getMetadataWithC
::
[
Query
]
->
Maybe
Start
->
Maybe
Limit
->
Maybe
ISO639_1
->
IO
(
Either
ClientError
(
Maybe
Count
,
ConduitT
()
Corpus
IO
()
))
getMetadataWithC
qs
start_
limit
lang
=
do
getMetadataWithC
qs
start_
limit
lang
=
getMetadataWithLangC
(
queryWithLang
lang
qs
)
start_
limit
lang
getMetadataWithLangC
::
[
Query
]
->
Maybe
Start
->
Maybe
Limit
->
Maybe
ISO639_1
->
IO
(
Either
ClientError
(
Maybe
Count
,
ConduitT
()
Corpus
IO
()
))
getMetadataWithLangC
qs
start_
limit
lang
=
do
-- First, estimate the total number of documents
eCount
<-
countResults
qs
pure
$
get'
<$>
eCount
...
...
src/HAL/Doc/Corpus.hs
View file @
f7b928da
...
...
@@ -4,7 +4,6 @@ module HAL.Doc.Corpus where
import
Control.Lens
qualified
as
L
import
Data.Aeson
import
Data.Aeson.Key
(
fromText
)
import
Data.Default
import
Data.Map.Strict
qualified
as
Map
import
GHC.Generics
...
...
@@ -43,7 +42,7 @@ instance FromJSON Corpus where
abstracts
<-
mapM
(
\
lang
->
do
ma
<-
o
.:?
(
fromText
$
langAbstractS
lang
)
ma
<-
o
.:?
(
langAbstractS
lang
)
pure
$
(
\
a
->
(
lang
,
a
))
<$>
ma
)
allLangs
let
_corpus_abstract_lang_map
=
Map
.
fromList
$
catMaybes
abstracts
...
...
src/HAL/Utils.hs
View file @
f7b928da
...
...
@@ -7,7 +7,10 @@ import Protolude
allLangs
::
[
ISO639_1
]
allLangs
=
enumFrom
(
toEnum
0
)
::
[
ISO639_1
]
langAbstractS
::
ISO639_1
->
Text
langAbstractS
lang
=
(
T
.
pack
[
l1
,
l2
])
<>
"_abstract_s"
toText
::
ISO639_1
->
Text
toText
lang
=
T
.
pack
[
l1
,
l2
]
where
(
l1
,
l2
)
=
toChars
lang
langAbstractS
::
ISO639_1
->
Text
langAbstractS
lang
=
(
toText
lang
)
<>
"_abstract_s"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment