Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
159
Issues
159
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
3d9b4e21
Commit
3d9b4e21
authored
Dec 13, 2024
by
Grégoire Locqueville
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Removed dead code in `Core.Text...`
parent
91c73c45
Changes
15
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
17 additions
and
534 deletions
+17
-534
gargantext.cabal
gargantext.cabal
+0
-4
Text.hs
src/Gargantext/Core/Text.hs
+0
-13
Context.hs
src/Gargantext/Core/Text/Context.hs
+0
-29
Corpus.hs
src/Gargantext/Core/Text/Corpus.hs
+2
-19
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+0
-5
Isidore.hs
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
+3
-14
Pubmed.hs
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+0
-2
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+0
-40
Date.hs
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
+1
-12
FrameWrite.hs
src/Gargantext/Core/Text/Corpus/Parsers/FrameWrite.hs
+0
-73
Iramuteq.hs
src/Gargantext/Core/Text/Corpus/Parsers/Iramuteq.hs
+5
-6
Isidore.hs
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
+0
-141
TSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/TSV.hs
+1
-172
Query.hs
src/Gargantext/Core/Text/Corpus/Query.hs
+0
-4
weeder.toml
weeder.toml
+5
-0
No files found.
gargantext.cabal
View file @
3d9b4e21
...
...
@@ -362,7 +362,6 @@ library
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
Gargantext.Core.Text.Corpus.Parsers.GrandDebat
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
Gargantext.Core.Text.Corpus.Parsers.Isidore
Gargantext.Core.Text.Corpus.Parsers.JSON
Gargantext.Core.Text.Corpus.Parsers.JSON.Istex
Gargantext.Core.Text.Corpus.Parsers.RIS
...
...
@@ -530,7 +529,6 @@ library
, haskell-throttle
, hlcm ^>= 0.2.2
, hsinfomap ^>= 0.1
, hsparql ^>= 0.3.8
, hstatistics ^>= 0.3.1
, http-api-data >= 0.5 && < 0.6
, http-client ^>= 0.7.14
...
...
@@ -573,7 +571,6 @@ library
, quickcheck-instances ^>= 0.3.25.2
, rake ^>= 0.0.1
, random ^>= 1.2.1
, rdf4h ^>= 3.1.1
, regex
, replace-attoparsec ^>= 1.5.0.0
, resource-pool >= 0.4.0.0 && < 0.5
...
...
@@ -604,7 +601,6 @@ library
, stm-containers >= 1.2.0.3 && < 1.3
, stringsearch >= 0.3.6.6
, swagger2 ^>= 2.8.7
, tagsoup ^>= 0.14.8
, template-haskell ^>= 2.19.0.0
, temporary ^>= 1.3
, text ^>= 2.0.2
...
...
src/Gargantext/Core/Text.hs
View file @
3d9b4e21
...
...
@@ -15,7 +15,6 @@ Text gathers terms in unit of contexts.
module
Gargantext.Core.Text
where
import
Data.Text
(
split
)
import
Data.Text
qualified
as
DT
import
Gargantext.Prelude
hiding
(
filter
)
import
NLP.FullStop
(
segment
)
...
...
@@ -85,18 +84,6 @@ instance Collage MultiTerme Mot where
sentences
::
Text
->
[
Text
]
sentences
txt
=
map
DT
.
pack
$
segment
$
DT
.
unpack
txt
sentences'
::
Text
->
[
Text
]
sentences'
txt
=
split
isCharStop
txt
isCharStop
::
Char
->
Bool
isCharStop
c
=
c
`
elem
`
[
'.'
,
'?'
,
'!'
]
unsentences
::
[
Text
]
->
Text
unsentences
txts
=
DT
.
unwords
txts
-- | Ngrams size
size
::
Text
->
Int
size
t
=
1
+
DT
.
count
" "
t
src/Gargantext/Core/Text/Context.hs
View file @
3d9b4e21
...
...
@@ -23,10 +23,7 @@ How to split contexts is describes in this module.
module
Gargantext.Core.Text.Context
where
import
Data.Text
(
pack
,
unpack
)
import
Gargantext.Core.Text
import
Gargantext.Prelude
hiding
(
length
)
import
Text.HTML.TagSoup
(
parseTags
,
isTagText
,
Tag
(
..
))
------------------------------------------------------------------------
type
Term
=
Text
...
...
@@ -38,31 +35,5 @@ type TermList = [(Label, [MultiTerm])]
type
Sentence
a
=
[
a
]
-- or a nominal group
type
Corpus
a
=
[
Sentence
a
]
-- a list of sentences
-- type ConText a = [Sentence a]
-- type Corpus a = [ConText a]
------------------------------------------------------------------------
-- | Contexts definition to build/unbuild contexts.
data
SplitContext
=
Chars
Int
|
Sentences
Int
|
Paragraphs
Int
-- | splitBy contexts of Chars or Sentences or Paragraphs
-- To see some examples at a higher level (sentences and paragraph), see
-- 'Gargantext.Core.Text.Examples.ex_terms'
--
-- >>> splitBy (Chars 0) (pack "abcde")
-- ["a","b","c","d","e"]
--
-- >>> splitBy (Chars 1) (pack "abcde")
-- ["ab","bc","cd","de"]
--
-- >>> splitBy (Chars 2) (pack "abcde")
-- ["abc","bcd","cde"]
splitBy
::
SplitContext
->
Text
->
[
Text
]
splitBy
(
Chars
n
)
=
map
pack
.
chunkAlong
(
n
+
1
)
1
.
unpack
splitBy
(
Sentences
n
)
=
map
unsentences
.
chunkAlong
(
n
+
1
)
1
.
sentences
splitBy
(
Paragraphs
_
)
=
map
unTag
.
filter
isTagText
.
parseTags
where
unTag
::
IsString
p
=>
Tag
p
->
p
unTag
(
TagText
x
)
=
x
unTag
_
=
""
src/Gargantext/Core/Text/Corpus.hs
View file @
3d9b4e21
module
Gargantext.Core.Text.Corpus
(
makeSubcorpusFromQuery
,
subcorpusEasy
)
where
module
Gargantext.Core.Text.Corpus
(
makeSubcorpusFromQuery
)
where
import
Control.Lens
(
view
)
import
Data.Set.Internal
qualified
as
Set
(
singleton
)
import
Data.Text
qualified
as
T
import
Gargantext.API.Dev
(
runCmdReplEasy
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
(
InternalNodeError
))
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core.NodeStory.Types
(
HasNodeStoryEnv
)
...
...
@@ -18,7 +16,7 @@ import Gargantext.Database.Action.Metrics (updateContextScore, updateNgramsOccur
import
Gargantext.Database.Action.Search
(
searchInCorpus
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
,
hc_lang
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
Node
Id
(
UnsafeMkNodeId
),
Node
Type
(
..
),
nodeId2ContextId
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeType
(
..
),
nodeId2ContextId
)
import
Gargantext.Database.Prelude
(
DBCmdWithEnv
)
import
Gargantext.Database.Query.Facet.Types
(
facetDoc_id
)
import
Gargantext.Database.Query.Table.Node
(
insertDefaultNode
,
copyNodeStories
,
defaultList
,
getNodeWithType
)
...
...
@@ -28,21 +26,6 @@ import Gargantext.Database.Schema.Node (node_hyperdata)
import
Gargantext.Prelude
-- | A version of the below function for use in the REPL (so you don't need to
-- manually import tons of constructors etc.)
subcorpusEasy
::
Text
-- ^ Username
->
Int
-- ^ Original corpus ID
->
Text
-- ^ Search string
->
Bool
-- ^ Whether to reuse the parent term list (True) or recompute one from scratch (False)
->
IO
()
subcorpusEasy
username
cId
rawQuery
reuseParentList
=
do
let
eitherQuery
=
Q
.
parseQuery
$
Q
.
RawQuery
rawQuery
case
eitherQuery
of
Left
msg
->
print
$
"Error parsing query
\"
"
<>
rawQuery
<>
"
\"
: "
<>
T
.
pack
msg
Right
query
->
void
$
runCmdReplEasy
$
makeSubcorpusFromQuery
(
UserName
username
)
(
UnsafeMkNodeId
cId
)
query
reuseParentList
-- | Given a "parent" corpus and a query, search for all docs in the parent
-- that match the query, and create a corpus from those. The created corpus
-- is inserted in the tree as a child of the parent corpus.
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
3d9b4e21
...
...
@@ -23,13 +23,8 @@ import Gargantext.Defaults qualified as Defaults
import
Gargantext.Prelude
hiding
(
intercalate
)
import
HAL
qualified
import
HAL.Doc.Document
qualified
as
HAL
import
HAL.Types
qualified
as
HAL
import
Servant.Client
(
ClientError
)
get
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
[
HyperdataDocument
]
get
la
q
ml
=
do
eDocs
<-
HAL
.
getMetadataWith
[
q
]
(
Just
0
)
(
fromIntegral
<$>
ml
)
la
either
(
panicTrace
.
pack
.
show
)
(
mapM
(
toDoc'
la
)
.
HAL
.
_docs
)
eDocs
getC
::
Maybe
ISO639
.
ISO639_1
->
Text
->
Maybe
Int
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
la
q
ml
=
do
...
...
src/Gargantext/Core/Text/Corpus/API/Isidore.hs
View file @
3d9b4e21
...
...
@@ -11,22 +11,18 @@ Portability : POSIX
{-# LANGUAGE ScopedTypeVariables #-}
module
Gargantext.Core.Text.Corpus.API.Isidore
(
get
-- * Internals (possibly unused?)
,
isidore2tsvFile
module
Gargantext.Core.Text.Corpus.API.Isidore
(
get
)
where
import
Data.Text
qualified
as
Text
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
cleanText
)
import
Gargantext.Core.Text.Corpus.Parsers.TSV
(
writeDocs2Tsv
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Defaults
qualified
as
Defaults
import
Gargantext.Prelude
hiding
(
get
)
import
Isidore
qualified
as
Isidore
import
Isidore
qualified
import
Isidore.Client
import
Servant.Client
(
ClientError
(
DecodeFailure
)
)
...
...
@@ -50,13 +46,6 @@ get lang l q a = do
hDocs
<-
mapM
(
isidoreToDoc
lang
)
(
toIsidoreDocs
iDocs
)
pure
hDocs
isidore2tsvFile
::
FilePath
->
Lang
->
Maybe
Isidore
.
Limit
->
Maybe
Isidore
.
TextQuery
->
Maybe
Isidore
.
AuthorQuery
->
IO
()
isidore2tsvFile
fp
lang
li
tq
aq
=
do
hdocs
<-
get
lang
li
tq
aq
writeDocs2Tsv
fp
hdocs
isidoreToDoc
::
Lang
->
IsidoreDoc
->
IO
HyperdataDocument
isidoreToDoc
lang
(
IsidoreDoc
t
a
d
u
s
as
)
=
do
let
...
...
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
View file @
3d9b4e21
...
...
@@ -10,12 +10,10 @@ Portability : POSIX
-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
module
Gargantext.Core.Text.Corpus.API.Pubmed
(
get
-- * Internals for testing
,
ESearch
(
..
)
,
convertQuery
,
getESearch
)
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
3d9b4e21
...
...
@@ -29,7 +29,6 @@ module Gargantext.Core.Text.Corpus.Parsers (
,
cleanText
,
parseFormatC
,
splitOn
,
etale
)
where
-- import Gargantext.Core.Text.Learn (detectLangDefault)
...
...
@@ -49,7 +48,6 @@ import Gargantext.API.Node.Corpus.New.Types (FileFormat(..))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers.TSV
(
parseHal
,
parseTsv
,
parseTsvC
)
import
Gargantext.Core.Text.Corpus.Parsers.Date
qualified
as
Date
import
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
(
text2titleParagraphs
)
import
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
qualified
as
Iramuteq
import
Gargantext.Core.Text.Corpus.Parsers.JSON
(
parseJSONC
,
parseIstex
)
import
Gargantext.Core.Text.Corpus.Parsers.RIS
qualified
as
RIS
...
...
@@ -178,44 +176,6 @@ filterZIPFileNameP Istex f = (takeExtension (unEntrySelector f) == ".json") &&
filterZIPFileNameP
_
_
=
True
etale
::
[
HyperdataDocument
]
->
[
HyperdataDocument
]
etale
=
concatMap
etale'
where
etale'
::
HyperdataDocument
->
[
HyperdataDocument
]
etale'
h
=
map
(
\
t
->
h
{
_hd_abstract
=
Just
t
})
$
map
snd
$
text2titleParagraphs
7
(
maybe
""
identity
$
_hd_abstract
h
)
-- parseFormat :: FileType -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument])
-- parseFormat TsvGargV3 bs = pure $ parseTsv' $ DBL.fromStrict bs
-- parseFormat TsvHal bs = pure $ parseHal' $ DBL.fromStrict bs
-- parseFormat RisPresse bs = do
-- docs <- mapM (toDoc RIS)
-- <$> snd
-- <$> enrichWith RisPresse
-- $ partitionEithers
-- $ [runParser' RisPresse bs]
-- pure $ Right docs
-- parseFormat WOS bs = do
-- docs <- mapM (toDoc WOS)
-- <$> snd
-- <$> enrichWith WOS
-- $ partitionEithers
-- $ [runParser' WOS bs]
-- pure $ Right docs
-- parseFormat ZIP bs = do
-- path <- emptySystemTempFile "parsed-zip"
-- DB.writeFile path bs
-- parsedZip <- withArchive path $ do
-- DM.keys <$> getEntries
-- pure $ Left $ "Not implemented for ZIP, parsedZip" <> show parsedZip
-- parseFormat _ _ = undefined
-- | Parse file into documents
-- TODO manage errors here
-- TODO: to debug maybe add the filepath in error message
parseFile
::
FileType
->
FileFormat
->
FilePath
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
View file @
3d9b4e21
...
...
@@ -21,16 +21,13 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
module
Gargantext.Core.Text.Corpus.Parsers.Date
(
dateSplit
,
mDateSplit
,
defaultDay
,
defaultUTCTime
,
split'
)
where
import
Data.List
qualified
as
List
import
Data.Text
(
unpack
,
splitOn
,
replace
)
import
Data.Time
(
defaultTimeLocale
,
iso8601DateFormat
,
parseTimeM
,
toGregorian
)
import
Data.Time.Calendar
qualified
as
DTC
import
Data.Time.Clock
(
UTCTime
(
..
),
secondsToDiffTime
)
import
Data.Time.Clock
(
UTCTime
(
..
))
import
Gargantext.Prelude
hiding
(
replace
)
------------------------------------------------------------------------
...
...
@@ -113,11 +110,3 @@ readDate txt = do
--let format = cs $ iso8601DateFormat (Just "%F %H:%M:%S")
let
format
=
cs
$
iso8601DateFormat
Nothing
parseTimeM
True
defaultTimeLocale
(
unpack
format
)
(
cs
txt
)
defaultDay
::
DTC
.
Day
defaultDay
=
DTC
.
fromGregorian
1
1
1
defaultUTCTime
::
UTCTime
defaultUTCTime
=
UTCTime
{
utctDay
=
defaultDay
,
utctDayTime
=
secondsToDiffTime
0
}
src/Gargantext/Core/Text/Corpus/Parsers/FrameWrite.hs
View file @
3d9b4e21
...
...
@@ -35,39 +35,6 @@ import Text.Parsec.String
-- par défaut: un doc == 1 NodeWrite
-- ## mean each ## section will be a new document with title the subsubsection title. Either it features options for author, date etc. or it will inherit the document's option.
sample
::
Text
sample
=
unlines
[
"title1"
-- , "title2"
-- , "=="
-- , "^@@authors: FirstName1, LastName1; FirstName2, LastName2"
,
"date: 2021-09-10"
,
"source: someSource"
,
"document contents 1"
,
"document contents 2"
]
sampleUnordered
::
Text
sampleUnordered
=
unlines
[
"title1"
,
"title2"
,
"=="
,
"document contents 1"
,
"date: 2021-09-10"
,
"authors: FirstName1, LastName1; FirstName2, LastName2"
,
"source: someSource"
,
"document contents 2"
]
-- parseSample = parse documentP "sample" (unpack sample)
-- parseSampleUnordered = parse documentP "sampleUnordered" (unpack sampleUnordered)
parseLinesSample
::
Either
ParseError
Parsed
parseLinesSample
=
parseLines
sample
parseLinesSampleUnordered
::
Either
ParseError
Parsed
parseLinesSampleUnordered
=
parseLines
sampleUnordered
data
Author
=
Author
{
firstName
::
Text
,
lastName
::
Text
}
...
...
@@ -114,14 +81,6 @@ parseLines text = foldl f emptyParsed <$> lst
f
(
Parsed
{
..
})
(
LSource
s
)
=
Parsed
{
source
=
Just
s
,
..
}
f
(
Parsed
{
..
})
(
LTitle
t
)
=
Parsed
{
title
=
t
,
..
}
-- Source should be the name of the node
-- First line of each Context should be the title.
documentLinesP
::
Parser
[
Line
]
documentLinesP
=
do
t
<-
titleP
ls
<-
lineP
`
sepBy
`
newline
pure
$
[
LTitle
$
pack
t
]
++
ls
documentLines
::
Parser
[
Line
]
documentLines
=
do
ls
<-
lineP
`
sepBy
`
newline
...
...
@@ -157,27 +116,6 @@ contentsLineP = do
--------------------
-- documentP = do
-- t <- titleP
-- a <- optionMaybe authorsP
-- d <- optionMaybe dateP
-- s <- optionMaybe sourceP
-- c <- contentsP
-- pure $ Parsed { title = pack t
-- , authors = fromMaybe [] a
-- , date = pack <$> d
-- , source = pack <$> s
-- , contents = pack c }
titleDelimiterP
::
Parser
()
titleDelimiterP
=
do
_
<-
newline
-- _ <- try (string "==")
pure
()
titleP
::
Parser
[
Char
]
titleP
=
manyTill
anyChar
(
try
titleDelimiterP
)
authorsPrefixP
::
Parser
[
Char
]
authorsPrefixP
=
do
_
<-
string
"authors:"
...
...
@@ -225,12 +163,6 @@ sourceP = try sourcePrefixP
_
<-
string
"source:"
many
(
char
' '
)
-- contentsP :: Parser String
-- contentsP = many anyChar
tokenEnd
::
Parser
()
tokenEnd
=
void
(
char
'
\n
'
)
<|>
eof
--- MISC Tools
-- Using ChunkAlong here enable redundancies in short corpora of texts
-- maybe use splitEvery or chunkAlong depending on the size of the whole text
...
...
@@ -249,8 +181,3 @@ text2titleParagraphs n = catMaybes
doTitle
::
[
Text
]
->
Maybe
(
Text
,
Text
)
doTitle
(
t
:
ts
)
=
Just
(
t
,
DT
.
unwords
ts
)
doTitle
[]
=
Nothing
clean
::
Text
->
Text
clean
=
DT
.
unwords
.
List
.
filter
(
\
w
->
DT
.
length
w
<
25
)
.
DT
.
words
src/Gargantext/Core/Text/Corpus/Parsers/Iramuteq.hs
View file @
3d9b4e21
...
...
@@ -12,19 +12,18 @@ commentary with @some markup@.
-}
module
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
(
parseIramuteqFile
,
parser
,
keys
)
where
module
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
(
parser
,
keys
)
where
import
Control.Applicative
import
Data.Attoparsec.ByteString
(
Parser
,
takeTill
,
parseOnly
)
import
Data.Attoparsec.ByteString.Char8
(
isEndOfLine
,
takeWhile
,
endOfLine
)
import
Data.ByteString
(
ByteString
)
import
Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
,
lines
,
concat
)
import
qualified
Data.ByteString
as
DB
parseIramuteqFile
::
FilePath
->
IO
(
Either
String
[[(
ByteString
,
ByteString
)]])
parseIramuteqFile
fp
=
do
txts
<-
DB
.
readFile
fp
pure
$
parseOnly
parser
txts
-------------------------------------------------------------
parser
::
Parser
[[(
ByteString
,
ByteString
)]]
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
deleted
100644 → 0
View file @
91c73c45
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.Isidore
Description : To query French Humanities publication database
Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO:
- put endpoint in configuration file
- more flexible fields of research
- type database name
- use more ontologies to help building corpora
-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE ScopedTypeVariables #-}
module
Gargantext.Core.Text.Corpus.Parsers.Isidore
where
import
Data.ByteString.Lazy
(
ByteString
)
import
Data.RDF
(
Node
(
LNode
,
UNode
),
LValue
(
PlainLL
,
TypedL
,
PlainL
)
)
import
Data.Text
qualified
as
T
import
Database.HSparql.Connection
(
BindingValue
(
..
),
EndPoint
,
structureContent
)
import
Database.HSparql.QueryGenerator
import
Gargantext.Core
(
Lang
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Prelude
hiding
(
ByteString
)
import
Network.Wreq
(
getWith
,
Response
,
defaults
,
header
,
param
,
responseStatus
,
responseBody
)
import
Prelude
qualified
route
::
EndPoint
route
=
"https://isidore.science/sparql/"
selectQueryRaw'
::
Prelude
.
String
->
Prelude
.
String
->
IO
(
Response
ByteString
)
selectQueryRaw'
uri
q
=
getWith
opts
uri
where
opts
=
defaults
&
header
"Accept"
.~
[
"application/sparql-results+xml"
]
&
header
"User-Agent"
.~
[
"gargantext-hsparql-client"
]
&
param
"query"
.~
[
T
.
pack
q
]
isidoreGet
::
Lang
->
Int
->
Text
->
IO
(
Maybe
[
HyperdataDocument
])
isidoreGet
la
li
q
=
do
bindingValues
<-
isidoreGet'
li
q
case
bindingValues
of
Nothing
->
pure
Nothing
Just
dv
->
pure
$
Just
$
map
(
bind2doc
la
)
dv
isidoreGet'
::
Int
->
Text
->
IO
(
Maybe
[[
BindingValue
]])
isidoreGet'
l
q
=
do
let
s
=
createSelectQuery
$
isidoreSelect
l
q
putStrLn
s
r
<-
selectQueryRaw'
route
s
putStrLn
(
show
$
r
^.
responseStatus
::
Text
)
pure
$
structureContent
$
r
^.
responseBody
-- res <- selectQuery route $ simpleSelect q
-- pure res
isidoreSelect
::
Int
->
Text
->
Query
SelectQuery
isidoreSelect
lim
q
=
do
-- See Predefined Namespace Prefixes:
-- https://isidore.science/sparql?nsdecl
isidore
<-
prefix
"isidore"
(
iriRef
"http://isidore.science/class/"
)
rdf
<-
prefix
"rdf"
(
iriRef
"http://www.w3.org/1999/02/22-rdf-syntax-ns#"
)
dcterms
<-
prefix
"dcterms"
(
iriRef
"http://purl.org/dc/terms/"
)
dc
<-
prefix
"dc"
(
iriRef
"http://purl.org/dc/elements/1.1/"
)
--iso <- prefix "fra" (iriRef "http://lexvo.org/id/iso639-3/")
--ore <- prefix "ore" (iriRef "http://www.openarchives.org/ore/terms/")
--bif <- prefix "bif" (iriRef "bif:")
link'
<-
var
title
<-
var
date
<-
var
abstract
<-
var
authors
<-
var
source
<-
var
langDoc
<-
var
publisher
<-
var
--agg <- var
triple_
link'
(
rdf
.:.
"type"
)
(
isidore
.:.
"Document"
)
triple_
link'
(
dcterms
.:.
"title"
)
title
triple_
link'
(
dcterms
.:.
"date"
)
date
triple_
link'
(
dcterms
.:.
"creator"
)
authors
--triple_ link (dcterms .:. "language") langDoc
triple_
link'
(
dc
.:.
"description"
)
abstract
--triple_ link (ore .:. "isAggregatedBy") agg
--triple_ agg (dcterms .:. "title") title
optional_
$
triple_
link'
(
dcterms
.:.
"source"
)
source
optional_
$
triple_
link'
(
dcterms
.:.
"publisher"
)
publisher
-- TODO FIX BUG with (.||.) operator
--filterExpr_ $ (.||.) (contains title q) (contains abstract q)
--filterExpr_ (containsWith authors q) -- (contains abstract q)
--filterExpr_ (containsWith title q) -- (contains abstract q)
--filterExpr_ $ (.||.) (containsWith title q) (contains abstract q)
filterExpr_
(
containsWith
title
q
)
-- TODO FIX filter with lang
--filterExpr_ $ langMatches title (str ("fra" :: Text))
--filterExpr_ $ (.==.) langDoc (str ("http://lexvo.org/id/iso639-3/fra" :: Text))
orderNextDesc
date
limit_
lim
distinct_
selectVars
[
link'
,
date
,
langDoc
,
authors
,
source
,
publisher
,
title
,
abstract
]
-- | TODO : check if all cases are taken into account
unbound
::
Lang
->
BindingValue
->
Maybe
Text
unbound
_
Unbound
=
Nothing
unbound
_
(
Bound
(
UNode
x
))
=
Just
x
unbound
_
(
Bound
(
LNode
(
TypedL
x
_
)))
=
Just
x
unbound
_
(
Bound
(
LNode
(
PlainL
x
)))
=
Just
x
unbound
l
(
Bound
(
LNode
(
PlainLL
x
l'
)))
=
if
l'
==
T
.
toLower
(
show
l
)
then
Just
x
else
Nothing
unbound
_
_
=
Nothing
bind2doc
::
Lang
->
[
BindingValue
]
->
HyperdataDocument
bind2doc
l
[
link'
,
date
,
langDoc
,
authors
,
_source
,
publisher
,
title
,
abstract
]
=
HyperdataDocument
{
_hd_bdd
=
Just
"Isidore"
,
_hd_doi
=
Nothing
,
_hd_url
=
unbound
l
link'
,
_hd_page
=
Nothing
,
_hd_title
=
unbound
l
title
,
_hd_authors
=
unbound
l
authors
,
_hd_institutes
=
Nothing
,
_hd_source
=
unbound
l
publisher
,
_hd_abstract
=
unbound
l
abstract
,
_hd_publication_date
=
unbound
l
date
,
_hd_publication_year
=
Nothing
,
_hd_publication_month
=
Nothing
,
_hd_publication_day
=
Nothing
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
unbound
l
langDoc
,
_hd_institutes_tree
=
Nothing
}
bind2doc
_
_
=
undefined
src/Gargantext/Core/Text/Corpus/Parsers/TSV.hs
View file @
3d9b4e21
This diff is collapsed.
Click to expand it.
src/Gargantext/Core/Text/Corpus/Query.hs
View file @
3d9b4e21
...
...
@@ -8,7 +8,6 @@ module Gargantext.Core.Text.Corpus.Query (
,
QueryTerm
(
..
)
,
getQuery
,
parseQuery
,
mapQuery
,
renderQuery
,
renderQueryTerm
,
interpretQuery
...
...
@@ -130,6 +129,3 @@ parseQuery (RawQuery txt) = bimap show (Query . BoolExpr.boolTreeToCNF) $
renderQuery
::
Query
->
RawQuery
renderQuery
(
Query
cnf
)
=
RawQuery
.
T
.
pack
$
BoolExpr
.
boolExprPrinter
(
showsPrec
0
)
(
BoolExpr
.
fromCNF
cnf
)
""
mapQuery
::
(
QueryTerm
->
QueryTerm
)
->
Query
->
Query
mapQuery
f
=
Query
.
fmap
(
map
f
)
.
getQuery
weeder.toml
View file @
3d9b4e21
...
...
@@ -22,6 +22,11 @@ roots = [ '^Main\.main$'
,
'^Gargantext\.API\.Ngrams\.List\.importTsvFile$'
# Used by the tests
,
'^Gargantext\.Core\.Text\.Corpus\.API\.Pubmed\.convertQuery$'
,
'^Gargantext\.Core\.Text\.Corpus\.API\.Pubmed\.getESearch$'
# Template Haskell
# Weeder is not smart enough to know what functions will be used by
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment