Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
hal
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
hal
Commits
f7b928da
Verified
Commit
f7b928da
authored
1 year ago
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[refactoring] fixes to dependencies
parent
8782d81e
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
77 additions
and
68 deletions
+77
-68
Main.hs
app/Main.hs
+43
-44
cabal.project
cabal.project
+2
-2
crawlerHAL.cabal
crawlerHAL.cabal
+10
-13
HAL.hs
src/HAL.hs
+16
-5
Corpus.hs
src/HAL/Doc/Corpus.hs
+1
-2
Utils.hs
src/HAL/Utils.hs
+5
-2
No files found.
app/Main.hs
View file @
f7b928da
...
...
@@ -11,7 +11,6 @@ import HAL (getMetadataWith)
import
HAL.Client
import
HAL.Doc
import
HAL.Doc.Corpus
(
Corpus
(
..
))
import
NeatInterpolation
(
text
)
import
Network.HTTP.Client
(
newManager
)
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Options.Applicative
...
...
@@ -84,47 +83,47 @@ run (Fetch (FetchParams { fp_query })) = do
-- data
yearReq
=
[
text
|
(language_t:en)
AND (producedDateY_i:2018
OR producedDateY_i:2019
OR producedDateY_i:2020
OR producedDateY_i:2021
OR producedDateY_i:2022)
AND (structId_i:302102
OR structId_i:469216
OR structId_i:6279
OR structId_i:224096
OR structId_i:144103
OR structId_i:497330
OR structId_i:1076752
OR structId_i:84538
OR structId_i:301262
OR structId_i:481355
OR structId_i:29212
OR structId_i:301442
OR structId_i:542824
OR structId_i:300362
OR structId_i:1048346
OR structId_i:352124)
|]
--
yearReq = [text|
--
(language_t:en)
--
AND (producedDateY_i:2018
--
OR producedDateY_i:2019
--
OR producedDateY_i:2020
--
OR producedDateY_i:2021
--
OR producedDateY_i:2022)
--
AND (structId_i:302102
--
OR structId_i:469216
--
OR structId_i:6279
--
OR structId_i:224096
--
OR structId_i:144103
--
OR structId_i:497330
--
OR structId_i:1076752
--
OR structId_i:84538
--
OR structId_i:301262
--
OR structId_i:481355
--
OR structId_i:29212
--
OR structId_i:301442
--
OR structId_i:542824
--
OR structId_i:300362
--
OR structId_i:1048346
--
OR structId_i:352124)|]
imt
::
[
T
.
Text
]
imt
=
[
"224096"
,
"144103"
,
"84538"
,
"300104"
,
"300362"
,
"301262"
,
"301442"
,
"301492"
,
"302102"
,
"421532"
,
"497330"
,
"352124"
,
"481355"
,
"469216"
,
"542824"
,
"6279"
,
"29212"
]
--
imt :: [T.Text]
--
imt = [
--
"224096"
--
,"144103"
--
,"84538"
--
,"300104"
--
,"300362"
--
,"301262"
--
,"301442"
--
,"301492"
--
,"302102"
--
,"421532"
--
,"497330"
--
,"352124"
--
,"481355"
--
,"469216"
--
,"542824"
--
,"6279"
--
,"29212"
--
]
This diff is collapsed.
Click to expand it.
cabal.project
View file @
f7b928da
...
...
@@ -6,5 +6,5 @@ packages:
./
allow-older: *
allow-newer: *
--
allow-older: *
allow-newer:
base:
*
This diff is collapsed.
Click to expand it.
crawlerHAL.cabal
View file @
f7b928da
...
...
@@ -48,18 +48,17 @@ library
RecordWildCards
TypeOperators
build-depends:
aeson >= 1.5.6
.0
&& < 1.6
aeson >= 1.5.6 && < 1.6
, base >=4.7 && <5
, bytestring >= 0.11.0 && < 0.13
, conduit >= 1.3.5 && < 1.4
, containers >= 0.6.7 && < 0.7
, data-default >= 0.7.1.1 && < 0.8
, http-client >= 0.7.13.1 && < 0.8
, http-client-tls >= 0.3.6.
2
&& < 0.4
, http-client-tls >= 0.3.6.
1
&& < 0.4
, iso639 >= 0.1.0.3 && < 0.2
, lens >= 5.2.2 && < 5.3
, neat-interpolation >= 0.5.1.3 && < 0.6
, optparse-applicative >= 0.18.1.0 && < 0.19
, lens >= 5.1.1 && < 5.3
, optparse-applicative >= 0.17 && < 0.19
, protolude >= 0.3.3 && < 0.4
, scientific >= 0.3.7.0 && < 0.4
, servant >= 0.19 && < 0.21
...
...
@@ -96,11 +95,10 @@ executable crawlerHAL-exe
, crawlerHAL
, data-default >= 0.7.1.1 && < 0.8
, http-client >= 0.7.13.1 && < 0.8
, http-client-tls >= 0.3.6.
2
&& < 0.4
, http-client-tls >= 0.3.6.
1
&& < 0.4
, iso639 >= 0.1.0.3 && < 0.2
, lens >= 5.2.2 && < 5.3
, neat-interpolation >= 0.5.1.3 && < 0.6
, optparse-applicative >= 0.18.1.0 && < 0.19
, lens >= 5.1.1 && < 5.3
, optparse-applicative >= 0.17 && < 0.19
, protolude >= 0.3.3 && < 0.4
, scientific >= 0.3.7.0 && < 0.4
, servant >= 0.19 && < 0.21
...
...
@@ -137,11 +135,10 @@ test-suite halCrawler-test
, data-default >= 0.7.1.1 && < 0.8
, halCrawler
, http-client >= 0.7.13.1 && < 0.8
, http-client-tls >= 0.3.6.
2
&& < 0.4
, http-client-tls >= 0.3.6.
1
&& < 0.4
, iso639 >= 0.1.0.3 && < 0.2
, lens >= 5.2.2 && < 5.3
, neat-interpolation >= 0.5.1.3 && < 0.6
, optparse-applicative >= 0.18.1.0 && < 0.19
, lens >= 5.1.1 && < 5.3
, optparse-applicative >= 0.17 && < 0.19
, protolude >= 0.3.3 && < 0.4
, scientific >= 0.3.7.0 && < 0.4
, servant >= 0.19 && < 0.21
...
...
This diff is collapsed.
Click to expand it.
src/HAL.hs
View file @
f7b928da
...
...
@@ -7,7 +7,7 @@ import Data.Text
import
HAL.Client
import
HAL.Doc.Corpus
import
HAL.Doc.Struct
import
HAL.Utils
(
langAbstractS
)
import
HAL.Utils
(
langAbstractS
,
toText
)
import
Network.HTTP.Client
(
newManager
)
import
Network.HTTP.Client.TLS
(
tlsManagerSettings
)
import
Protolude
...
...
@@ -22,20 +22,31 @@ type Start = Int
type
Limit
=
Integer
type
Count
=
Integer
getMetadataWith
::
Query
queryWithLang
::
Maybe
ISO639_1
->
[
Query
]
->
[
Query
]
queryWithLang
Nothing
qs
=
qs
queryWithLang
(
Just
lang
)
qs
=
qs
<>
[
"language_s:"
<>
toText
lang
]
getMetadataWith
::
[
Query
]
->
Maybe
Start
->
Maybe
Limit
->
Maybe
ISO639_1
->
IO
(
Either
ClientError
(
Response
Corpus
))
getMetadataWith
q
start_
limit
lang
=
do
runHalAPIClient
$
search
(
Just
$
requestedFields
lang
)
[
q
]
Nothing
start_
limit
getMetadataWith
q
s
start_
limit
lang
=
do
runHalAPIClient
$
search
(
Just
$
requestedFields
lang
)
(
queryWithLang
lang
qs
)
Nothing
start_
limit
getMetadataWithC
::
[
Query
]
->
Maybe
Start
->
Maybe
Limit
->
Maybe
ISO639_1
->
IO
(
Either
ClientError
(
Maybe
Count
,
ConduitT
()
Corpus
IO
()
))
getMetadataWithC
qs
start_
limit
lang
=
do
getMetadataWithC
qs
start_
limit
lang
=
getMetadataWithLangC
(
queryWithLang
lang
qs
)
start_
limit
lang
getMetadataWithLangC
::
[
Query
]
->
Maybe
Start
->
Maybe
Limit
->
Maybe
ISO639_1
->
IO
(
Either
ClientError
(
Maybe
Count
,
ConduitT
()
Corpus
IO
()
))
getMetadataWithLangC
qs
start_
limit
lang
=
do
-- First, estimate the total number of documents
eCount
<-
countResults
qs
pure
$
get'
<$>
eCount
...
...
This diff is collapsed.
Click to expand it.
src/HAL/Doc/Corpus.hs
View file @
f7b928da
...
...
@@ -4,7 +4,6 @@ module HAL.Doc.Corpus where
import
Control.Lens
qualified
as
L
import
Data.Aeson
import
Data.Aeson.Key
(
fromText
)
import
Data.Default
import
Data.Map.Strict
qualified
as
Map
import
GHC.Generics
...
...
@@ -43,7 +42,7 @@ instance FromJSON Corpus where
abstracts
<-
mapM
(
\
lang
->
do
ma
<-
o
.:?
(
fromText
$
langAbstractS
lang
)
ma
<-
o
.:?
(
langAbstractS
lang
)
pure
$
(
\
a
->
(
lang
,
a
))
<$>
ma
)
allLangs
let
_corpus_abstract_lang_map
=
Map
.
fromList
$
catMaybes
abstracts
...
...
This diff is collapsed.
Click to expand it.
src/HAL/Utils.hs
View file @
f7b928da
...
...
@@ -7,7 +7,10 @@ import Protolude
allLangs
::
[
ISO639_1
]
allLangs
=
enumFrom
(
toEnum
0
)
::
[
ISO639_1
]
langAbstractS
::
ISO639_1
->
Text
langAbstractS
lang
=
(
T
.
pack
[
l1
,
l2
])
<>
"_abstract_s"
toText
::
ISO639_1
->
Text
toText
lang
=
T
.
pack
[
l1
,
l2
]
where
(
l1
,
l2
)
=
toChars
lang
langAbstractS
::
ISO639_1
->
Text
langAbstractS
lang
=
(
toText
lang
)
<>
"_abstract_s"
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment