Verified Commit 6e38bade authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 224-dev-uniform-ngrams-creation

parents 03427377 135a1220
## Version 0.0.6.9.9.9.6.5 [RELEASE CANDIDATE 007]
* [BACK][WIP][Singulars and plurals not grouped anymore (#169)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169)
* [BACK][FEAT][Coherent Stemming interface (#324)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/324)
* [BACK][FIX] Order 1 aligned with first implementation with tests
## Version 0.0.6.9.9.9.6.4 [RELEASE CANDIDATE 007]
* [BACK][FEAT][[Node Phylo] Change the default value of findAncestors to False (#314)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/314)
* [BACK][OPTIM][Export Data as zip for all exports (#312)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/312)
* [BACK][METRICS] SQL queries to follow the community (export to CSV)
## Version 0.0.6.9.9.9.6.3 [Release Candidate for 007] ## Version 0.0.6.9.9.9.6.3 [Release Candidate for 007]
* [BACK][OPTIM] Option to enable GHC buld with O2 option * [BACK][OPTIM] Option to enable GHC buld with O2 option
......
...@@ -25,7 +25,7 @@ phyloConfig = PhyloConfig { ...@@ -25,7 +25,7 @@ phyloConfig = PhyloConfig {
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2} , similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1} , seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True , defaultMode = True
, findAncestors = True , findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups} , phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3} , phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5} , timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
......
module Main where
import Prelude
import Data.TreeDiff.Class
import Data.TreeDiff.Pretty
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.Environment (getArgs)
import System.Exit (exitFailure)
import Control.Monad (unless)
import qualified Data.List as L
-- | Renders in a pretty way the content of two golden files. The
-- first file should contain the expected output, the second the
-- actual data generated by the test suite.
main :: IO ()
main = do
(refPath:newPath:_) <- getArgs
ref <- T.lines <$> TIO.readFile refPath
new <- T.lines <$> TIO.readFile newPath
let differences = filter (\(r,n) -> r /= n) $ zip ref new
unless (L.null differences) $ do
putStrLn $ show $ ansiWlEditExpr $ ediff' (map fst differences) (map snd differences)
exitFailure
...@@ -44,7 +44,7 @@ phyloConfig outdir = PhyloConfig { ...@@ -44,7 +44,7 @@ phyloConfig outdir = PhyloConfig {
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2} , similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1} , seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True , defaultMode = True
, findAncestors = True , findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups} , phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3} , phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5} , timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
......
select count(*) from nodes n where n.typename = 30;
WITH total AS (SELECT * from nodes n where n.typename = 30)
, sum AS (SELECT count(*) AS "TOTAL" from total)
, increase AS (SELECT count(*) from total as t WHERE t.date >= date_trunc('month', current_date - interval '3' month))
SELECT *,
(SELECT TO_CHAR((ROUND(100 * NULLIF((SELECT * from increase),0) / NULLIF((SELECT * from sum), 0))), 'fm99%') AS "CREATED LAST 3 MONTHS")
FROM sum
WITH total AS (SELECT * from nodes n where n.typename = 9)
, sum AS (SELECT count(*) AS "TOTAL" from total)
, increase AS (SELECT count(*) from total as t WHERE t.date >= date_trunc('month', current_date - interval '3' month))
SELECT *,
(SELECT TO_CHAR((ROUND(100 * NULLIF((SELECT * from increase),0) / NULLIF((SELECT * from sum), 0))), 'fm99%') AS "CREATED LAST 3 MONTHS")
FROM sum
WITH total AS (SELECT * from nodes n where n.typename = 90)
, sum AS (SELECT count(*) AS "TOTAL" from total)
, increase AS (SELECT count(*) from total as t WHERE t.date >= date_trunc('month', current_date - interval '3' month))
SELECT *,
(SELECT TO_CHAR((ROUND(100 * NULLIF((SELECT * from increase),0) / NULLIF((SELECT * from sum), 0))), 'fm99%') AS "CREATED LAST 3 MONTHS")
FROM sum
WITH total AS (SELECT * from nodes n where n.typename = 210)
, sum AS (SELECT count(*) AS "TOTAL" from total)
, increase AS (SELECT count(*) from total as t WHERE t.date >= date_trunc('month', current_date - interval '3' month))
SELECT *,
(SELECT TO_CHAR((ROUND(100 * NULLIF((SELECT * from increase),0) / NULLIF((SELECT * from sum), 0))), 'fm99%') AS "CREATED LAST 3 MONTHS")
FROM sum
WITH total AS (SELECT * from auth_user as A)
, sum AS (SELECT count(*) AS "TOTAL" from total)
, increase AS (SELECT count(*) from total as t WHERE t.date_joined >= date_trunc('month', current_date - interval '3' month))
SELECT *,
(SELECT TO_CHAR((ROUND(100 * NULLIF((SELECT * from increase),0) / NULLIF((SELECT * from sum), 0))), 'fm99%') AS "CREATED LAST 3 MONTHS")
FROM sum
select count(*) from nodes n where n.typename = 9;
\COPY (SELECT count(*), date_trunc('month', n.date) FROM nodes n WHERE n.typename = 30 GROUP BY 2 ORDER BY 2) TO '/tmp/corpora.csv' (FORMAT csv);
\COPY (SELECT count(*), date_trunc('month', n.date) from nodes n where n.typename = 9 group by 2 ORDER BY 2) TO '/tmp/graphs.csv' (FORMAT csv);
\COPY (select count(*), date_trunc('month', n.date) from nodes n where n.typename = 90 group by 2 ORDER BY 2) TO '/tmp/phylos.csv' (FORMAT csv);
\COPY (select count(*), date_trunc('month', n.date) from nodes n where n.typename = 210 group by 2 ORDER BY 2) TO '/tmp/teams.csv' (FORMAT csv);
\COPY (select count(*), date_trunc('month', a.date_joined) from auth_user a group by 2) TO '/tmp/users.csv' (FORMAT csv);
select count(*) from nodes n where n.typename = 90;
select count(*) from nodes n where n.typename = 210;
select count(*) from auth_user;
...@@ -19,7 +19,7 @@ fi ...@@ -19,7 +19,7 @@ fi
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI # `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in. # cache can kick in.
expected_cabal_project_hash="c9fe39301e8b60bfd183e60e7e25a14cd1c9c66d8739bf9041ca3f4db89db7c6" expected_cabal_project_hash="c9fe39301e8b60bfd183e60e7e25a14cd1c9c66d8739bf9041ca3f4db89db7c6"
expected_cabal_project_freeze_hash="db24c7d3006167102532e3101e2b49bae13d478003459c7d3f1d66590e57740a" expected_cabal_project_freeze_hash="2c8960ffcf1b94aa11a3543e3b5facd2db5af19569fecaec4bc0ab4c1edd22a5"
cabal --store-dir=$STORE_DIR v2-build --dry-run cabal --store-dir=$STORE_DIR v2-build --dry-run
cabal2stack --system-ghc --allow-newer --resolver lts-21.17 --resolver-file devops/stack/lts-21.17.yaml -o stack.yaml cabal2stack --system-ghc --allow-newer --resolver lts-21.17 --resolver-file devops/stack/lts-21.17.yaml -o stack.yaml
......
active-repositories: hackage.haskell.org:merge
constraints: any.Cabal ==3.8.1.0,
any.Cabal-syntax ==3.8.1.0,
any.Glob ==0.10.2,
any.HSvm ==0.1.1.3.22,
any.HTTP ==4000.4.1,
HTTP -conduit10 +network-uri -warn-as-error -warp-tests,
any.HUnit ==1.6.2.0,
any.JuicyPixels ==3.3.8,
JuicyPixels -mmap,
any.KMP ==0.2.0.0,
any.List ==0.6.2,
any.MissingH ==1.4.3.0,
MissingH +network--ge-3_0_0,
any.MonadRandom ==0.6,
any.OneTuple ==0.4.1.1,
any.Only ==0.1,
any.QuickCheck ==2.14.3,
QuickCheck -old-random +templatehaskell,
any.RSA ==2.4.1,
any.SHA ==1.6.4.4,
SHA -exe,
any.StateVar ==1.2.2,
any.Unique ==0.4.7.8,
any.abstract-deque ==0.3,
abstract-deque -usecas,
any.accelerate ==1.3.0.0,
accelerate +bounds-checks -debug -internal-checks -nofib -unsafe-checks,
any.accelerate-arithmetic ==1.0.0.1,
any.accelerate-llvm ==1.3.0.0,
any.accelerate-llvm-native ==1.3.0.0,
any.accelerate-utility ==1.0.0.1,
any.adjunctions ==4.4.2,
any.aeson ==2.1.2.1,
aeson -cffi +ordered-keymap,
any.aeson-pretty ==0.8.9,
aeson-pretty -lib-only,
any.aeson-qq ==0.8.4,
any.alex ==3.3.0.0,
any.ansi-terminal ==0.11.5,
ansi-terminal -example -win32-2-13-1,
any.ansi-terminal-types ==0.11.5,
any.ansi-wl-pprint ==0.6.9,
ansi-wl-pprint -example,
any.appar ==0.1.8,
any.array ==0.5.4.0,
any.asn1-encoding ==0.9.6,
any.asn1-parse ==0.9.5,
any.asn1-types ==0.3.4,
any.assoc ==1.1,
assoc +tagged,
any.async ==2.2.4,
async -bench,
any.atomic-primops ==0.8.4,
atomic-primops -debug,
any.attoparsec ==0.14.4,
attoparsec -developer,
any.attoparsec-iso8601 ==1.1.0.0,
any.authenticate-oauth ==1.7,
any.auto-update ==0.1.6,
any.barbies ==2.0.4.0,
any.base ==4.17.2.0,
any.base-compat ==0.12.3,
any.base-compat-batteries ==0.12.3,
any.base-orphans ==0.9.1,
any.base-unicode-symbols ==0.2.4.2,
base-unicode-symbols +base-4-8 -old-base,
any.base16-bytestring ==1.0.2.0,
any.base64 ==0.4.2.4,
any.base64-bytestring ==1.2.1.0,
any.basement ==0.0.16,
any.basic-prelude ==0.7.0,
any.bifunctors ==5.5.15,
bifunctors +semigroups +tagged,
any.bimap ==0.5.0,
any.binary ==0.8.9.1,
any.binary-orphans ==1.0.4.1,
any.bindings-DSL ==1.0.25,
any.bitvec ==1.1.5.0,
bitvec +simd,
any.blaze-builder ==0.4.2.3,
any.blaze-html ==0.9.1.2,
any.blaze-markup ==0.8.3.0,
any.blaze-svg ==0.3.7,
any.boolexpr ==0.3,
any.boring ==0.2.1,
boring +tagged,
any.bsb-http-chunked ==0.0.0.4,
any.byteable ==0.1.1,
any.byteorder ==1.0.4,
any.bytestring ==0.11.5.2,
any.bytestring-builder ==0.10.8.2.0,
bytestring-builder +bytestring_has_builder,
any.bzlib-conduit ==0.3.0.2,
any.c2hs ==0.28.8,
c2hs +base3 -regression,
any.cabal-doctest ==1.0.9,
any.call-stack ==0.4.0,
any.case-insensitive ==1.2.1.0,
any.cassava ==0.5.3.0,
cassava -bytestring--lt-0_10_4,
any.cassava-conduit ==0.6.5,
cassava-conduit +small_base,
any.cborg ==0.2.9.0,
cborg +optimize-gmp,
any.cereal ==0.5.8.3,
cereal -bytestring-builder,
any.charset ==0.3.10,
any.chunked-data ==0.3.1,
any.cipher-aes ==0.2.11,
cipher-aes +support_aesni,
any.citeproc ==0.8.1,
citeproc -executable -icu,
any.classy-prelude ==1.5.0.3,
any.clock ==0.8.4,
clock -llvm,
any.cmdargs ==0.10.22,
cmdargs +quotation -testprog,
any.code-page ==0.2.1,
any.colour ==2.3.6,
any.commonmark ==0.2.3,
any.commonmark-extensions ==0.2.3.6,
any.commonmark-pandoc ==0.2.1.3,
any.comonad ==5.0.8,
comonad +containers +distributive +indexed-traversable,
any.concise ==0.1.0.1,
any.concurrent-output ==1.10.20,
any.conduit ==1.3.5,
any.conduit-extra ==1.3.6,
any.conduit-zstd ==0.0.2.0,
any.connection ==0.3.1,
any.constraints ==0.13.4,
any.constraints-extras ==0.4.0.0,
constraints-extras +build-readme,
any.containers ==0.6.7,
any.contravariant ==1.5.5,
contravariant +semigroups +statevar +tagged,
any.cookie ==0.4.6,
any.cpphs ==1.20.9.1,
cpphs -old-locale,
any.cprng-aes ==0.6.1,
any.crawlerArxiv ==0.1.0.0,
any.crawlerHAL ==0.1.0.0,
any.crawlerISTEX ==0.1.0.0,
any.crawlerIsidore ==0.1.0.0,
any.crawlerPubMed ==0.1.0.0,
any.criterion ==1.6.1.0,
criterion -embed-data-files -fast,
any.criterion-measurement ==0.2.1.0,
criterion-measurement -fast,
any.cron ==0.7.0,
cron -lib-werror,
any.crypto-api ==0.13.3,
crypto-api -all_cpolys,
any.crypto-cipher-types ==0.0.9,
any.crypto-pubkey-types ==0.4.3,
any.crypto-random ==0.0.9,
any.cryptohash ==0.11.9,
any.cryptohash-md5 ==0.11.101.0,
any.cryptohash-sha1 ==0.11.101.0,
any.crypton ==0.32,
crypton -check_alignment +integer-gmp -old_toolchain_inliner +support_aesni +support_deepseq +support_pclmuldq +support_rdrand -support_sse +use_target_attributes,
any.cryptonite ==0.30,
cryptonite -check_alignment +integer-gmp -old_toolchain_inliner +support_aesni +support_deepseq -support_pclmuldq +support_rdrand -support_sse +use_target_attributes,
any.data-default ==0.7.1.1,
any.data-default-class ==0.1.2.0,
any.data-default-instances-containers ==0.0.1,
any.data-default-instances-dlist ==0.0.1,
any.data-default-instances-old-locale ==0.0.1,
any.data-fix ==0.3.2,
any.data-ordlist ==0.4.7.0,
any.data-time-segment ==0.1.0.0,
any.dec ==0.0.5,
any.deepseq ==1.4.8.0,
any.dense-linear-algebra ==0.1.0.0,
any.dependent-sum ==0.7.1.0,
any.deriving-aeson ==0.2.9,
any.digest ==0.0.1.7,
digest +pkg-config,
any.directory ==1.3.7.1,
any.discrimination ==0.5,
any.distributive ==0.6.2.1,
distributive +semigroups +tagged,
any.dlist ==1.0,
dlist -werror,
any.dlist-instances ==0.1.1.1,
any.doclayout ==0.4.0.1,
any.doctemplates ==0.11,
any.double-conversion ==2.0.4.2,
double-conversion -developer +embedded_double_conversion,
any.duckling ==0.2.0.0,
any.easy-file ==0.2.5,
any.eigen ==3.3.7.0,
any.either ==5.0.2,
any.ekg-core ==0.1.1.7,
any.ekg-json ==0.1.0.8,
any.emojis ==0.1.3,
any.enclosed-exceptions ==1.0.3,
any.entropy ==0.4.1.10,
entropy -donotgetentropy,
any.epo-api-client ==0.1.0.0,
any.erf ==2.0.0.0,
any.exceptions ==0.10.5,
any.extensible-exceptions ==0.1.1.4,
any.extra ==1.7.14,
any.fail ==4.9.0.0,
any.fast-logger ==3.2.2,
any.fast-tagsoup ==1.0.14,
any.fclabels ==2.0.5.1,
any.fgl ==5.8.0.0,
fgl +containers042,
any.file-embed ==0.0.15.0,
any.file-embed-lzma ==0.0.1,
any.filelock ==0.1.1.7,
any.filepath ==1.4.2.2,
any.filepattern ==0.1.3,
any.fmt ==0.6.3.0,
any.foldable1-classes-compat ==0.1,
foldable1-classes-compat +tagged,
any.formatting ==7.2.0,
formatting +no-double-conversion,
any.free ==5.1.10,
any.full-text-search ==0.2.1.4,
full-text-search -build-search-demo,
any.fullstop ==0.1.4,
any.gargantext-graph ==0.1.0.0,
any.gargantext-prelude ==0.1.0.0,
any.generic-deriving ==1.14.5,
generic-deriving +base-4-9,
any.generic-monoid ==0.1.0.1,
any.generically ==0.1.1,
any.generics-sop ==0.5.1.3,
any.ghc ==9.4.7,
any.ghc-bignum ==1.3,
any.ghc-boot ==9.4.7,
any.ghc-boot-th ==9.4.7,
any.ghc-heap ==9.4.7,
any.ghc-lib-parser ==9.4.7.20230826,
ghc-lib-parser +threaded-rts,
any.ghc-lib-parser-ex ==9.4.0.0,
ghc-lib-parser-ex -auto -no-ghc-lib,
any.ghc-parser ==0.2.6.0,
any.ghc-paths ==0.1.0.12,
any.ghc-prim ==0.9.1,
any.ghc-syntax-highlighter ==0.0.9.0,
ghc-syntax-highlighter -dev,
any.ghci ==9.4.7,
any.githash ==0.1.7.0,
any.graphviz ==2999.20.1.0,
graphviz -test-parsing,
any.gridtables ==0.1.0.0,
any.groups ==0.5.3,
any.haddock-library ==1.11.0,
any.half ==0.3.1,
any.happy ==1.20.1.1,
any.hashable ==1.4.3.0,
hashable +integer-gmp -random-initial-seed,
any.hashtables ==1.3.1,
hashtables -bounds-checking -debug -detailed-profiling -portable -sse42 +unsafe-tricks,
any.haskeline ==0.8.2,
any.haskell-igraph ==0.10.4,
any.haskell-lexer ==1.1.1,
any.haskell-src-exts ==1.23.1,
any.haskell-src-meta ==0.8.12,
any.hedgehog ==1.2,
any.hexpat ==0.20.13,
hexpat -bundle,
any.hgal ==2.0.0.2,
any.hlcm ==0.2.2,
any.hlint ==3.5,
hlint +ghc-lib +gpl -hsyaml +threaded,
any.hmatrix ==0.20.2,
hmatrix -disable-default-paths -no-random_r -openblas,
any.hmatrix-gsl-stats ==0.4.1.8,
hmatrix-gsl-stats -onlygsl,
any.hourglass ==0.2.12,
any.hpc ==0.6.1.0,
any.hsc2hs ==0.68.10,
hsc2hs -in-ghc-tree,
any.hscolour ==1.24.4,
any.hsinfomap ==0.1,
any.hslogger ==1.3.1.0,
hslogger +network--gt-3_0_0,
any.hsparql ==0.3.8,
any.hspec ==2.11.1,
any.hspec-core ==2.11.1,
any.hspec-discover ==2.11.1,
any.hspec-expectations ==0.8.3,
any.hspec-wai ==0.11.1,
any.hspec-wai-json ==0.11.0,
any.hstatistics ==0.3.1,
any.http-api-data ==0.5,
http-api-data -use-text-show,
any.http-client ==0.7.14,
http-client +network-uri,
any.http-client-tls ==0.3.6.1,
any.http-conduit ==2.3.8.1,
http-conduit +aeson,
any.http-date ==0.0.11,
any.http-media ==0.8.1.1,
any.http-types ==0.12.3,
any.http2 ==4.1.4,
http2 -devel -h2spec,
any.hxt ==9.3.1.22,
hxt +network-uri -profile,
any.hxt-charproperties ==9.5.0.0,
hxt-charproperties -profile,
any.hxt-regex-xmlschema ==9.2.0.7,
hxt-regex-xmlschema -profile,
any.hxt-unicode ==9.0.2.4,
any.ihaskell ==0.11.0.0,
ihaskell +use-hlint,
any.indexed-profunctors ==0.1.1.1,
any.indexed-traversable ==0.1.3,
any.indexed-traversable-instances ==0.1.1.2,
any.ini ==0.4.2,
any.insert-ordered-containers ==0.2.5.3,
any.integer-gmp ==1.1,
any.integer-logarithms ==1.0.3.1,
integer-logarithms -check-bounds +integer-gmp,
any.invariant ==0.6.2,
any.io-streams ==1.5.2.2,
io-streams +network -nointeractivetests +zlib,
any.io-streams-haproxy ==1.0.1.0,
any.iproute ==1.7.12,
any.ipynb ==0.2,
any.ipython-kernel ==0.11.0.0,
ipython-kernel -examples,
any.iso639 ==0.1.0.3,
any.jira-wiki-markup ==1.5.1,
any.jose ==0.10,
jose -demos,
any.js-chart ==2.9.4.1,
any.json-stream ==0.4.5.3,
json-stream -conduit,
any.kan-extensions ==5.2.5,
any.keys ==3.12.3,
any.language-c ==0.9.2,
language-c -allwarnings +iecfpextension +usebytestrings,
any.lens ==5.2.3,
lens -benchmark-uniplate -dump-splices +inlining -j +test-hunit +test-properties +test-templates +trustworthy,
any.lens-aeson ==1.2.3,
any.libffi ==0.2.1,
libffi +ghc-bundled-libffi,
any.libyaml ==0.1.2,
libyaml -no-unicode -system-libyaml,
any.lifted-async ==0.10.2.4,
any.lifted-base ==0.2.3.12,
any.listsafe ==0.1.0.1,
any.llvm-hs ==12.0.0,
llvm-hs -debug -llvm-with-rtti +shared-llvm,
any.llvm-hs-pure ==12.0.0,
any.located-base ==0.1.1.1,
any.lockfree-queue ==0.2.4,
any.logging-effect ==1.3.12,
any.logict ==0.8.0.0,
any.loop ==0.3.0,
any.lzma ==0.0.1.0,
lzma +pkgconfig,
any.math-functions ==0.3.4.3,
math-functions +system-erf +system-expm1,
any.matrix ==0.3.6.1,
any.megaparsec ==9.3.1,
megaparsec -dev,
any.memory ==0.18.0,
memory +support_bytestring +support_deepseq,
any.microlens ==0.4.13.1,
any.microstache ==1.0.2.3,
any.mime-mail ==0.5.1,
any.mime-types ==0.1.2.0,
any.mmorph ==1.2.0,
any.modern-uri ==0.3.6.1,
modern-uri -dev,
any.monad-control ==1.0.3.1,
any.monad-logger ==0.3.40,
monad-logger +template_haskell,
any.monad-loops ==0.4.3,
monad-loops +base4,
any.monad-time ==0.4.0.0,
any.mono-traversable ==1.0.15.3,
any.mono-traversable-instances ==0.1.1.0,
any.monoid-extras ==0.6.2,
any.morpheus-graphql ==0.24.3,
any.morpheus-graphql-app ==0.24.3,
any.morpheus-graphql-client ==0.24.3,
any.morpheus-graphql-code-gen ==0.24.3,
any.morpheus-graphql-code-gen-utils ==0.24.3,
any.morpheus-graphql-core ==0.24.3,
any.morpheus-graphql-server ==0.24.3,
any.morpheus-graphql-subscriptions ==0.24.3,
any.morpheus-graphql-tests ==0.24.3,
any.mtl ==2.2.2,
any.mtl-compat ==0.2.2,
mtl-compat -two-point-one -two-point-two,
any.mutable-containers ==0.3.4.1,
any.mwc-random ==0.15.0.2,
any.natural-transformation ==0.4,
any.network ==3.1.4.0,
network -devel,
any.network-bsd ==2.8.1.0,
any.network-byte-order ==0.1.7,
any.network-info ==0.2.1,
any.network-uri ==2.6.4.2,
any.old-locale ==1.0.0.7,
any.old-time ==1.1.0.3,
any.opaleye ==0.9.6.1,
any.openalex ==0.1.0.0,
any.optics-core ==0.4.1.1,
optics-core -explicit-generic-labels,
any.optics-extra ==0.4.2.1,
any.optics-th ==0.4.1,
any.optparse-applicative ==0.17.1.0,
optparse-applicative +process,
any.optparse-generic ==1.4.9,
any.optparse-simple ==0.1.1.4,
optparse-simple -build-example,
any.ordered-containers ==0.2.3,
any.pandoc ==3.0.1,
pandoc -embed_data_files,
any.pandoc-types ==1.23.1,
any.parallel ==3.2.2.0,
any.parsec ==3.1.16.1,
any.parser-combinators ==1.3.0,
parser-combinators -dev,
any.parsers ==0.12.11,
parsers +attoparsec +binary +parsec,
any.password ==3.0.2.1,
password +argon2 +bcrypt +pbkdf2 +scrypt,
any.password-types ==1.0.0.0,
any.patches-class ==0.1.0.1,
any.patches-map ==0.1.0.1,
any.path ==0.9.5,
path -dev,
any.path-io ==1.8.1,
path-io -dev,
any.pem ==0.2.4,
any.pointed ==5.0.4,
pointed +comonad +containers +kan-extensions +semigroupoids +semigroups +stm +tagged +transformers +unordered-containers,
any.polyparse ==1.13,
any.port-utils ==0.2.1.0,
any.postgres-options ==0.2.1.0,
any.postgresql-libpq ==0.9.5.0,
postgresql-libpq -use-pkg-config,
any.postgresql-simple ==0.6.5.1,
any.pretty ==1.1.3.6,
any.pretty-show ==1.10,
any.pretty-simple ==4.1.2.0,
pretty-simple -buildexample +buildexe,
any.prettyprinter ==1.7.1,
prettyprinter -buildreadme +text,
any.prettyprinter-ansi-terminal ==1.1.3,
any.primitive ==0.8.0.0,
any.probability ==0.2.8,
probability +splitbase,
any.process ==1.6.17.0,
any.product-profunctors ==0.11.1.1,
any.profunctors ==5.6.2,
any.promises ==0.3,
any.protolude ==0.3.3,
any.psqueues ==0.2.7.3,
any.pureMD5 ==2.1.4,
pureMD5 -test,
any.qrcode-core ==0.9.9,
any.qrcode-juicypixels ==0.8.5,
any.quickcheck-instances ==0.3.30,
quickcheck-instances -bytestring-builder,
any.quickcheck-io ==0.2.0,
any.rake ==0.0.1,
any.random ==1.2.1,
any.random-shuffle ==0.0.4,
any.raw-strings-qq ==1.1,
any.rdf4h ==3.1.1,
any.readable ==0.3.1,
any.recover-rtti ==0.4.3,
any.recv ==0.1.0,
any.refact ==0.3.0.2,
any.reflection ==2.1.7,
reflection -slow +template-haskell,
any.regex-base ==0.94.0.2,
any.regex-compat ==0.95.2.1,
any.regex-pcre ==0.95.0.0,
regex-pcre +pkg-config,
any.regex-posix ==0.96.0.1,
regex-posix -_regex-posix-clib,
any.regex-tdfa ==1.3.2.2,
regex-tdfa +doctest -force-o2,
any.relude ==1.2.1.0,
any.replace-attoparsec ==1.5.0.0,
any.req ==3.13.0,
req -dev,
any.resource-pool ==0.4.0.0,
any.resourcet ==1.2.6,
any.retry ==0.9.3.1,
retry -lib-werror,
any.rts ==1.0.2,
any.safe ==0.3.19,
any.safe-exceptions ==0.1.7.4,
any.say ==0.1.0.1,
any.scientific ==0.3.7.0,
scientific -bytestring-builder -integer-simple,
any.securemem ==0.1.10,
any.selective ==0.7,
any.semialign ==1.3,
semialign +semigroupoids,
any.semigroupoids ==5.3.7,
semigroupoids +comonad +containers +contravariant +distributive +tagged +unordered-containers,
any.semigroups ==0.20,
semigroups +binary +bytestring -bytestring-builder +containers +deepseq +hashable +tagged +template-haskell +text +transformers +unordered-containers,
any.serialise ==0.2.6.0,
serialise +newtime15,
any.servant ==0.20.1,
any.servant-auth ==0.4.1.0,
any.servant-auth-client ==0.4.1.1,
any.servant-auth-server ==0.4.8.0,
any.servant-auth-swagger ==0.2.10.2,
any.servant-blaze ==0.9.1,
any.servant-cassava ==0.10.2,
any.servant-client ==0.19,
any.servant-client-core ==0.20,
any.servant-docs ==0.12,
any.servant-ekg ==0.3.1,
any.servant-flatten ==0.2,
any.servant-foreign ==0.15.4,
any.servant-job ==0.2.0.0,
any.servant-multipart ==0.12.1,
any.servant-multipart-api ==0.12.1,
any.servant-server ==0.20,
any.servant-swagger ==1.1.11,
any.servant-swagger-ui ==0.3.5.5.0.0,
any.servant-swagger-ui-core ==0.3.5,
any.servant-xml-conduit ==0.1.0.4,
any.shelly ==1.12.1,
shelly -build-examples -lifted,
any.simple-reflect ==0.3.3,
any.simple-sendfile ==0.2.32,
simple-sendfile +allow-bsd -fallback,
any.singleton-bool ==0.1.6,
any.singletons ==3.0.2,
any.singletons-base ==3.1.1,
any.singletons-th ==3.1.1,
any.skylighting ==0.13.4.1,
skylighting -executable,
any.skylighting-core ==0.13.4.1,
skylighting-core -executable,
any.skylighting-format-ansi ==0.1,
any.skylighting-format-blaze-html ==0.1.1.1,
any.skylighting-format-context ==0.1.0.2,
any.skylighting-format-latex ==0.1,
any.smallcheck ==1.2.1.1,
any.smtp-mail ==0.3.0.0,
any.snap-core ==1.0.5.1,
snap-core -debug +network-uri -portable,
any.snap-server ==1.1.2.1,
snap-server -build-pong -build-testserver -debug -openssl -portable,
any.socks ==0.6.1,
any.some ==1.0.4.1,
some +newtype-unsafe,
any.sop-core ==0.5.0.2,
any.sparse-linear ==0.1.0.0,
any.split ==0.2.3.5,
any.splitmix ==0.1.0.5,
splitmix -optimised-mixer,
any.statistics ==0.16.2.1,
any.stemmer ==0.5.2,
any.stm ==2.5.1.0,
any.stm-chans ==3.0.0.9,
any.stm-delay ==0.1.1.1,
any.storable-complex ==0.2.3.0,
any.streaming-commons ==0.2.2.6,
streaming-commons -use-bytestring-builder,
any.strict ==0.5,
any.string-conversions ==0.4.0.1,
any.stringsearch ==0.3.6.6,
stringsearch -base3 +base4,
any.swagger2 ==2.8.7,
any.syb ==0.7.2.4,
any.system-cxx-std-lib ==1.0,
any.system-filepath ==0.4.14,
any.tagged ==0.8.7,
tagged +deepseq +transformers,
any.taggy ==0.2.1,
any.taggy-lens ==0.1.2,
any.tagsoup ==0.14.8,
any.tasty ==1.4.3,
tasty +unix,
any.tasty-bench ==0.3.5,
tasty-bench -debug +tasty,
any.tasty-golden ==2.3.5,
tasty-golden -build-example,
any.tasty-hspec ==1.2.0.3,
any.tasty-hunit ==0.10.1,
any.tasty-quickcheck ==0.10.2,
any.tasty-smallcheck ==0.8.2,
any.template-haskell ==2.19.0.0,
any.temporary ==1.3,
any.terminal-size ==0.3.4,
any.terminfo ==0.4.1.5,
any.texmath ==0.12.8.3,
texmath -executable -server,
any.text ==2.0.2,
any.text-conversions ==0.3.1.1,
any.text-format ==0.3.2.1,
text-format -developer,
any.text-icu ==0.8.0.4,
any.text-metrics ==0.3.2,
text-metrics -dev,
any.text-regex-replace ==0.1.1.5,
any.text-short ==0.1.5,
text-short -asserts,
any.text-show ==3.10.4,
text-show +base-4-9 +integer-gmp +new-functor-classes +template-haskell-2-11,
any.text16-compat ==0.1.0.0,
text16-compat -enable-golden-test-generation,
any.tf-random ==0.5,
any.th-abstraction ==0.4.5.0,
any.th-compat ==0.1.4,
any.th-desugar ==1.14,
any.th-expand-syns ==0.4.11.0,
any.th-lift ==0.8.4,
any.th-lift-instances ==0.1.20,
any.th-orphans ==0.13.14,
any.th-reify-many ==0.1.10,
any.these ==1.2,
any.time ==1.12.2,
any.time-compat ==1.9.6.1,
time-compat -old-locale,
any.time-locale-compat ==0.1.1.5,
time-locale-compat -old-locale,
any.time-manager ==0.0.1,
any.timezone-olson ==0.2.1,
any.timezone-series ==0.1.13,
any.tls ==1.6.0,
tls +compat -hans +network,
any.tmp-postgres ==1.34.1.0,
any.tomland ==1.3.3.2,
tomland -build-play-tomland -build-readme,
any.transformers ==0.5.6.2,
any.transformers-base ==0.4.6,
transformers-base +orphaninstances,
any.transformers-compat ==0.7.2,
transformers-compat -five +five-three -four +generic-deriving +mtl -three -two,
any.tree-diff ==0.3.0.1,
any.tuple ==0.3.0.2,
any.type-equality ==1,
any.typed-process ==0.2.11.1,
any.typst-symbols ==0.1.4,
any.unicode-collation ==0.1.3.5,
unicode-collation -doctests -executable,
any.unicode-data ==0.4.0.1,
unicode-data -ucd2haskell,
any.unicode-show ==0.1.1.1,
any.unicode-transforms ==0.4.0.1,
unicode-transforms -bench-show -dev -has-icu -has-llvm -use-gauge,
any.uniplate ==1.6.13,
any.unique ==0.0.1,
any.universe-base ==1.1.3.1,
any.unix ==2.7.3,
any.unix-compat ==0.7,
unix-compat -old-time,
any.unix-time ==0.4.11,
any.unliftio ==0.2.25.0,
any.unliftio-core ==0.2.1.0,
any.unordered-containers ==0.2.19.1,
unordered-containers -debug,
any.uri-bytestring ==0.3.3.1,
uri-bytestring -lib-werror,
any.uri-encode ==1.5.0.7,
uri-encode +network-uri -tools,
any.utf8-string ==1.0.2,
any.utility-ht ==0.0.17,
any.uuid ==1.3.15,
any.uuid-types ==1.0.5.1,
any.validation-selective ==0.2.0.0,
any.validity ==0.12.0.2,
any.vault ==0.3.1.5,
vault +useghc,
any.vector ==0.12.3.0,
vector +boundschecks -internalchecks -unsafechecks -wall,
any.vector-algorithms ==0.9.0.1,
vector-algorithms +bench +boundschecks -internalchecks -llvm +properties -unsafechecks,
any.vector-binary-instances ==0.2.5.2,
any.vector-instances ==3.4.2,
vector-instances +hashable,
any.vector-th-unbox ==0.2.2,
any.void ==0.7.3,
void -safe,
any.wai ==3.2.4,
any.wai-app-static ==3.1.8,
wai-app-static +crypton -print,
any.wai-cors ==0.2.7,
any.wai-extra ==3.1.13.0,
wai-extra -build-example,
any.wai-logger ==2.4.0,
any.wai-websockets ==3.0.1.2,
wai-websockets +example,
any.warp ==3.3.25,
warp +allow-sendfilefd -network-bytestring -warp-debug +x509,
any.websockets ==0.12.7.3,
websockets -example,
any.wikiparsec ==2.1.0,
wikiparsec +library-only,
any.witherable ==0.4.2,
any.wl-pprint-annotated ==0.1.0.1,
any.wl-pprint-text ==1.2.0.2,
any.word8 ==0.1.3,
any.wreq ==0.5.4.2,
wreq -aws -developer +doctest -httpbin,
any.wuss ==2.0.1.3,
wuss -pedantic,
any.x509 ==1.7.7,
any.x509-store ==1.6.9,
any.x509-system ==1.6.7,
any.x509-validation ==1.6.12,
any.xml ==1.3.14,
any.xml-conduit ==1.9.1.3,
any.xml-types ==0.3.8,
any.yaml ==0.11.11.2,
yaml +no-examples +no-exe,
any.zeromq4-haskell ==0.8.0,
any.zip ==2.0.0,
zip -dev -disable-bzip2 -disable-zstd,
any.zip-archive ==0.4.3,
zip-archive -executable,
any.zlib ==0.6.3.0,
zlib -bundled-c-zlib -non-blocking-ffi -pkg-config,
any.zlib-bindings ==0.1.1.5,
any.zstd ==0.1.3.0,
zstd +standalone
index-state: hackage.haskell.org 2023-12-10T10:34:46Z
...@@ -5,7 +5,7 @@ cabal-version: 3.4 ...@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.9.9.9.6.3 version: 0.0.6.9.9.9.6.5
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -38,6 +38,7 @@ data-files: ...@@ -38,6 +38,7 @@ data-files:
test-data/phylo/bpa_phylo_test.json test-data/phylo/bpa_phylo_test.json
test-data/phylo/open_science.json test-data/phylo/open_science.json
test-data/phylo/issue-290-small.golden.json test-data/phylo/issue-290-small.golden.json
test-data/stemming/lancaster.txt
test-data/test_config.ini test-data/test_config.ini
gargantext-cors-settings.toml gargantext-cors-settings.toml
.clippy.dhall .clippy.dhall
...@@ -87,6 +88,7 @@ common tests ...@@ -87,6 +88,7 @@ common tests
, hspec-wai , hspec-wai
, hspec-wai-json , hspec-wai-json
, tasty ^>= 1.5 , tasty ^>= 1.5
, tasty-golden
, tasty-hspec , tasty-hspec
, tasty-hunit , tasty-hunit
, tasty-quickcheck , tasty-quickcheck
...@@ -151,6 +153,7 @@ library ...@@ -151,6 +153,7 @@ library
Gargantext.Core.Methods.Similarities Gargantext.Core.Methods.Similarities
Gargantext.Core.Ngrams.Tools Gargantext.Core.Ngrams.Tools
Gargantext.Core.Ngrams.Types Gargantext.Core.Ngrams.Types
Gargantext.Core.Methods.Similarities.Conditional
Gargantext.Core.NLP Gargantext.Core.NLP
Gargantext.Core.NodeStory Gargantext.Core.NodeStory
Gargantext.Core.NodeStory.DB Gargantext.Core.NodeStory.DB
...@@ -181,6 +184,7 @@ library ...@@ -181,6 +184,7 @@ library
Gargantext.Core.Text.Terms.Eleve Gargantext.Core.Text.Terms.Eleve
Gargantext.Core.Text.Terms.Mono Gargantext.Core.Text.Terms.Mono
Gargantext.Core.Text.Terms.Mono.Stem.En Gargantext.Core.Text.Terms.Mono.Stem.En
Gargantext.Core.Text.Terms.Mono.Stem.Lancaster
Gargantext.Core.Text.Terms.Multi Gargantext.Core.Text.Terms.Multi
Gargantext.Core.Text.Terms.Multi.Lang.En Gargantext.Core.Text.Terms.Multi.Lang.En
Gargantext.Core.Text.Terms.Multi.Lang.Fr Gargantext.Core.Text.Terms.Multi.Lang.Fr
...@@ -280,8 +284,8 @@ library ...@@ -280,8 +284,8 @@ library
Gargantext.API.Node.Corpus.Searx Gargantext.API.Node.Corpus.Searx
Gargantext.API.Node.Document.Export Gargantext.API.Node.Document.Export
Gargantext.API.Node.Document.Export.Types Gargantext.API.Node.Document.Export.Types
Gargantext.API.Node.DocumentsFromWriteNodes
Gargantext.API.Node.DocumentUpload Gargantext.API.Node.DocumentUpload
Gargantext.API.Node.DocumentsFromWriteNodes
Gargantext.API.Node.FrameCalcUpload Gargantext.API.Node.FrameCalcUpload
Gargantext.API.Node.Get Gargantext.API.Node.Get
Gargantext.API.Node.New Gargantext.API.Node.New
...@@ -305,7 +309,6 @@ library ...@@ -305,7 +309,6 @@ library
Gargantext.Core.Methods.Similarities.Accelerate.Conditional Gargantext.Core.Methods.Similarities.Accelerate.Conditional
Gargantext.Core.Methods.Similarities.Accelerate.Distributional Gargantext.Core.Methods.Similarities.Accelerate.Distributional
Gargantext.Core.Methods.Similarities.Accelerate.SpeGen Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
Gargantext.Core.Methods.Similarities.Conditional
Gargantext.Core.Methods.Similarities.Distributional Gargantext.Core.Methods.Similarities.Distributional
Gargantext.Core.Ngrams.NgramsTree Gargantext.Core.Ngrams.NgramsTree
Gargantext.Core.Statistics Gargantext.Core.Statistics
...@@ -326,10 +329,10 @@ library ...@@ -326,10 +329,10 @@ library
Gargantext.Core.Text.Corpus.Parsers.RIS Gargantext.Core.Text.Corpus.Parsers.RIS
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
Gargantext.Core.Text.Corpus.Parsers.Telegram Gargantext.Core.Text.Corpus.Parsers.Telegram
Gargantext.Core.Text.Corpus.Parsers.WOS
Gargantext.Core.Text.Corpus.Parsers.Wikidata Gargantext.Core.Text.Corpus.Parsers.Wikidata
Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
Gargantext.Core.Text.Corpus.Parsers.Wikimedia Gargantext.Core.Text.Corpus.Parsers.Wikimedia
Gargantext.Core.Text.Corpus.Parsers.WOS
Gargantext.Core.Text.Learn Gargantext.Core.Text.Learn
Gargantext.Core.Text.List Gargantext.Core.Text.List
Gargantext.Core.Text.List.Group Gargantext.Core.Text.List.Group
...@@ -427,12 +430,12 @@ library ...@@ -427,12 +430,12 @@ library
Gargantext.Database.Query.Table.Node.Select Gargantext.Database.Query.Table.Node.Select
Gargantext.Database.Query.Table.Node.Update Gargantext.Database.Query.Table.Node.Update
Gargantext.Database.Query.Table.Node.User Gargantext.Database.Query.Table.Node.User
Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
Gargantext.Database.Query.Table.NodeContext Gargantext.Database.Query.Table.NodeContext
Gargantext.Database.Query.Table.NodeContext_NodeContext Gargantext.Database.Query.Table.NodeContext_NodeContext
Gargantext.Database.Query.Table.NodeNgrams Gargantext.Database.Query.Table.NodeNgrams
Gargantext.Database.Query.Table.NodeNode Gargantext.Database.Query.Table.NodeNode
Gargantext.Database.Query.Table.NodeNodeNgrams Gargantext.Database.Query.Table.NodeNodeNgrams
Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
Gargantext.Database.Query.Table.NodesNgramsRepo Gargantext.Database.Query.Table.NodesNgramsRepo
Gargantext.Database.Query.Tree Gargantext.Database.Query.Tree
Gargantext.Database.Query.Tree.Error Gargantext.Database.Query.Tree.Error
...@@ -440,13 +443,13 @@ library ...@@ -440,13 +443,13 @@ library
Gargantext.Database.Schema.ContextNodeNgrams Gargantext.Database.Schema.ContextNodeNgrams
Gargantext.Database.Schema.ContextNodeNgrams2 Gargantext.Database.Schema.ContextNodeNgrams2
Gargantext.Database.Schema.NgramsPostag Gargantext.Database.Schema.NgramsPostag
Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
Gargantext.Database.Schema.NodeContext Gargantext.Database.Schema.NodeContext
Gargantext.Database.Schema.NodeContext_NodeContext Gargantext.Database.Schema.NodeContext_NodeContext
Gargantext.Database.Schema.NodeNgrams Gargantext.Database.Schema.NodeNgrams
Gargantext.Database.Schema.NodeNode Gargantext.Database.Schema.NodeNode
Gargantext.Database.Schema.NodeNodeNgrams Gargantext.Database.Schema.NodeNodeNgrams
Gargantext.Database.Schema.NodeNodeNgrams2 Gargantext.Database.Schema.NodeNodeNgrams2
Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
Gargantext.Database.Schema.NodesNgramsRepo Gargantext.Database.Schema.NodesNgramsRepo
Gargantext.Database.Schema.Prelude Gargantext.Database.Schema.Prelude
Gargantext.Database.Types Gargantext.Database.Types
...@@ -651,7 +654,8 @@ library ...@@ -651,7 +654,8 @@ library
, xml-conduit ^>= 1.9.1.3 , xml-conduit ^>= 1.9.1.3
, xml-types ^>= 0.3.8 , xml-types ^>= 0.3.8
, yaml ^>= 0.11.8.0 , yaml ^>= 0.11.8.0
, zip ^>= 2.0.0 , zip ^>= 1.7.2
, zip-archive ^>= 0.4.3
, zlib ^>= 0.6.2.3 , zlib ^>= 0.6.2.3
executable gargantext-admin executable gargantext-admin
...@@ -865,6 +869,7 @@ test-suite garg-test-tasty ...@@ -865,6 +869,7 @@ test-suite garg-test-tasty
Test.Offline.Errors Test.Offline.Errors
Test.Offline.JSON Test.Offline.JSON
Test.Offline.Phylo Test.Offline.Phylo
Test.Offline.Stemming.Lancaster
Test.Parsers.Date Test.Parsers.Date
Test.Parsers.Types Test.Parsers.Types
Test.Parsers.WOS Test.Parsers.WOS
...@@ -902,6 +907,7 @@ test-suite garg-test-tasty ...@@ -902,6 +907,7 @@ test-suite garg-test-tasty
, patches-map ^>= 0.1.0.1 , patches-map ^>= 0.1.0.1
, postgres-options >= 0.2 && < 0.3 , postgres-options >= 0.2 && < 0.3
, postgresql-simple >= 0.6.4 && < 0.7 , postgresql-simple >= 0.6.4 && < 0.7
, pretty
, process ^>= 1.6.13.2 , process ^>= 1.6.13.2
, quickcheck-instances ^>= 0.3.25.2 , quickcheck-instances ^>= 0.3.25.2
, raw-strings-qq , raw-strings-qq
...@@ -915,8 +921,11 @@ test-suite garg-test-tasty ...@@ -915,8 +921,11 @@ test-suite garg-test-tasty
, servant-server ^>= 0.20 , servant-server ^>= 0.20
, shelly >= 1.9 && < 2 , shelly >= 1.9 && < 2
, stm ^>= 2.5.0.1 , stm ^>= 2.5.0.1
, template-haskell ^>= 2.19.0.0 , template-haskell
, text ^>= 1.2.4.1
, time ^>= 1.9.3
, tmp-postgres >= 1.34.1 && < 1.35 , tmp-postgres >= 1.34.1 && < 1.35
, tree-diff
, unordered-containers ^>= 0.2.16.0 , unordered-containers ^>= 0.2.16.0
, wai , wai
, wai-extra , wai-extra
...@@ -1030,3 +1039,16 @@ executable gargantext-phylo-profile ...@@ -1030,3 +1039,16 @@ executable gargantext-phylo-profile
, shelly , shelly
, split , split
default-language: Haskell2010 default-language: Haskell2010
executable garg-golden-file-diff
import:
defaults
, optimized
main-is: Main.hs
hs-source-dirs:
bin/gargantext-golden-file-diff
build-depends:
base
, text
, tree-diff
default-language: Haskell2010
...@@ -25,10 +25,10 @@ import Data.Morpheus.Types ...@@ -25,10 +25,10 @@ import Data.Morpheus.Types
import Data.Text (pack) import Data.Text (pack)
import Data.Time.Format.ISO8601 (iso8601Show) import Data.Time.Format.ISO8601 (iso8601Show)
import Gargantext.API.Admin.Types (HasSettings) import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Prelude (GargM) import Gargantext.API.Prelude (GargM)
import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow) import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
import Gargantext.Database.Admin.Types.Node (ContextTitle, NodeId(..), NodeTypeId, UserId, unNodeId, ContextId (..)) import Gargantext.Database.Admin.Types.Node (ContextTitle, NodeId(..), NodeTypeId, UserId, unNodeId, ContextId (..))
import Gargantext.Database.Prelude (CmdCommon) import Gargantext.Database.Prelude (CmdCommon)
import Gargantext.Database.Query.Table.NodeContext (getNodeContext, getContextsForNgramsTerms, ContextForNgramsTerms(..), {- getContextNgrams, -} getContextNgramsMatchingFTS) import Gargantext.Database.Query.Table.NodeContext (getNodeContext, getContextsForNgramsTerms, ContextForNgramsTerms(..), {- getContextNgrams, -} getContextNgramsMatchingFTS)
...@@ -71,8 +71,6 @@ data HyperdataRowDocumentGQL = ...@@ -71,8 +71,6 @@ data HyperdataRowDocumentGQL =
, hrd_source :: Text , hrd_source :: Text
, hrd_title :: Text , hrd_title :: Text
, hrd_url :: Text , hrd_url :: Text
, hrd_uniqId :: Text
, hrd_uniqIdBdd :: Text
} deriving (Generic, GQLType, Show) } deriving (Generic, GQLType, Show)
data NodeContextGQL = NodeContextGQL data NodeContextGQL = NodeContextGQL
...@@ -216,8 +214,6 @@ toHyperdataRowDocumentGQL hyperdata = ...@@ -216,8 +214,6 @@ toHyperdataRowDocumentGQL hyperdata =
, hrd_source = _hr_source , hrd_source = _hr_source
, hrd_title = _hr_title , hrd_title = _hr_title
, hrd_url = _hr_url , hrd_url = _hr_url
, hrd_uniqId = _hr_uniqId
, hrd_uniqIdBdd = _hr_uniqIdBdd
} }
HyperdataRowContact { } -> Nothing HyperdataRowContact { } -> Nothing
......
...@@ -54,16 +54,18 @@ import Servant ...@@ -54,16 +54,18 @@ import Servant
------------------------------------------------------------------------ ------------------------------------------------------------------------
type GETAPI = Summary "Get List" type GETAPI = Summary "Get List"
:> "lists" :> "lists"
:> Capture "listId" ListId
:> "json"
:> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
:<|> "lists"
:> Capture "listId" ListId :> Capture "listId" ListId
:> "csv" :> ( "json"
:> Get '[GUS.CSV] (Headers '[Header "Content-Disposition" Text] NgramsTableMap) :> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
:<|> "json.zip"
:> Get '[GUS.ZIP] (Headers '[Header "Content-Disposition" Text] NgramsListZIP)
:<|> "csv"
:> Get '[GUS.CSV] (Headers '[Header "Content-Disposition" Text] NgramsTableMap) )
getApi :: GargServer GETAPI getApi :: GargServer GETAPI
getApi = getJson :<|> getCsv getApi listId = getJson listId
:<|> getJsonZip listId
:<|> getCsv listId
-- --
-- JSON API -- JSON API
...@@ -93,6 +95,18 @@ getJson lId = do ...@@ -93,6 +95,18 @@ getJson lId = do
] ]
) lst ) lst
getJsonZip :: HasNodeStory env err m
=> ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsListZIP)
getJsonZip lId = do
lst <- getNgramsList lId
let nlz = NgramsListZIP { _nlz_nl = lst, _nlz_list_id = lId}
pure $ addHeader (concat [ "attachment; filename="
, nlzFileName nlz
, ".zip"
]
) nlz
getCsv :: HasNodeStory env err m getCsv :: HasNodeStory env err m
=> ListId => ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap) -> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
......
...@@ -21,12 +21,12 @@ import Data.Map.Strict (fromList) ...@@ -21,12 +21,12 @@ import Data.Map.Strict (fromList)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Text qualified as Text import Data.Text qualified as Text
import Data.Validity import Data.Validity
import Gargantext.Core.Ngrams.Tools (getNgramsTableMap) import Gargantext.API.Ngrams (getNgramsTableMap)
import Gargantext.Core.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory (HasNodeStory) import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.List.Social.Prelude import Gargantext.Core.Text.List.Social.Prelude ( unPatchMapToHashMap )
import Gargantext.Core.Types (ListType) import Gargantext.Core.Types.Main ( ListType )
import Gargantext.Database.Admin.Types.Node (ListId) import Gargantext.Database.Admin.Types.Node (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypes) import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypes)
import Gargantext.Prelude import Gargantext.Prelude
......
...@@ -21,22 +21,22 @@ import Data.List qualified as List ...@@ -21,22 +21,22 @@ import Data.List qualified as List
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Text (pack) import Data.Text (pack)
import Gargantext.API.Node.Corpus.Export.Types import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm) )
import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..) )
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (GargNoServer) import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo) import Gargantext.Core.NodeStory.Types ( NodeListStory )
import Gargantext.Core.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node import Gargantext.Database.Query.Table.Node ( defaultList )
import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername) import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata) import Gargantext.Database.Schema.Context (_context_id)
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash) import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash) import Gargantext.Prelude.Crypto.Hash (hash)
...@@ -51,9 +51,7 @@ getCorpus :: CorpusId ...@@ -51,9 +51,7 @@ getCorpus :: CorpusId
getCorpus cId lId nt' = do getCorpus cId lId nt' = do
let let
nt = case nt' of nt = fromMaybe NgramsTerms nt'
Nothing -> NgramsTerms
Just t -> t
listId <- case lId of listId <- case lId of
Nothing -> defaultList cId Nothing -> defaultList cId
...@@ -75,10 +73,10 @@ getCorpus cId lId nt' = do ...@@ -75,10 +73,10 @@ getCorpus cId lId nt' = do
) ns (Map.map (Set.map unNgramsTerm) ngs) ) ns (Map.map (Set.map unNgramsTerm) ngs)
where where
d_hash :: Context HyperdataDocument -> Set Text -> Text d_hash :: Context HyperdataDocument -> Set Text -> Text
d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a) d_hash _a b = hash [ -- fromMaybe "" (_hd_uniqId $ _context_hyperdata a),
, hash b hash b
] ]
pure $ addHeader ("attachment; filename=GarganText_corpus-" <> (pack $ show cId) <> ".json") pure $ addHeader ("attachment; filename=GarganText_corpus-" <> pack (show cId) <> ".json")
$ Corpus { _c_corpus = Map.elems r $ Corpus { _c_corpus = Map.elems r
, _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r } , _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
......
...@@ -23,7 +23,7 @@ import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM) ...@@ -23,7 +23,7 @@ import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM)
import Data.Tuple.Select (sel1, sel2, sel3) import Data.Tuple.Select (sel1, sel2, sel3)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet) import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory (HasNodeStory) import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Corpus.API qualified as API import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.List (buildNgramsLists) import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..)) import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
...@@ -39,8 +39,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus) ...@@ -39,8 +39,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId, ListId, NodeType(NodeTexts)) import Gargantext.Database.Admin.Types.Node (CorpusId, ListId, NodeType(NodeTexts))
import Gargantext.Database.Prelude (hasConfig) import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getOrMkList) import Gargantext.Database.Query.Table.Node (getOrMkList, insertDefaultNodeIfNotExists)
import Gargantext.Database.Query.Table.Node (insertDefaultNodeIfNotExists)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Tree.Error (HasTreeError) import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus) import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
...@@ -53,7 +52,7 @@ import Prelude qualified ...@@ -53,7 +52,7 @@ import Prelude qualified
langToSearx :: Lang -> Text langToSearx :: Lang -> Text
langToSearx All = "en-US" langToSearx All = "en-US"
langToSearx x = (Text.toLower acronym) <> "-" <> acronym langToSearx x = Text.toLower acronym <> "-" <> acronym
where where
acronym = show x acronym = show x
...@@ -136,7 +135,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) = ...@@ -136,7 +135,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) =
-- docs :: [Either Text HyperdataDocument] -- docs :: [Either Text HyperdataDocument]
let docs = hyperdataDocumentFromSearxResult l <$> _srs_results let docs = hyperdataDocumentFromSearxResult l <$> _srs_results
--printDebug "[triggerSearxSearch] docs" docs --printDebug "[triggerSearxSearch] docs" docs
let docs' = catMaybes $ rightToMaybe <$> docs let docs' = mapMaybe rightToMaybe docs
{- {-
Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do
printDebug "[triggerSearxSearch] doc time" $ printDebug "[triggerSearxSearch] doc time" $
...@@ -214,16 +213,14 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p ...@@ -214,16 +213,14 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p
Right HyperdataDocument { _hd_bdd = Just "Searx" Right HyperdataDocument { _hd_bdd = Just "Searx"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just _sr_title , _hd_title = Just _sr_title
, _hd_authors = Nothing , _hd_authors = Nothing
, _hd_institutes = Nothing , _hd_institutes = Nothing
, _hd_source = Just _sr_engine , _hd_source = Just _sr_engine
, _hd_abstract = _sr_content , _hd_abstract = _sr_content
, _hd_publication_date = T.pack <$> formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" <$> mDate , _hd_publication_date = T.pack Prelude.. formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" Prelude.<$> mDate
, _hd_publication_year = fromIntegral <$> sel1 <$> mGregorian , _hd_publication_year = fromIntegral Prelude.. sel1 Prelude.<$> mGregorian
, _hd_publication_month = sel2 <$> mGregorian , _hd_publication_month = sel2 <$> mGregorian
, _hd_publication_day = sel3 <$> mGregorian , _hd_publication_day = sel3 <$> mGregorian
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
......
...@@ -12,28 +12,29 @@ module Gargantext.API.Node.Document.Export ...@@ -12,28 +12,29 @@ module Gargantext.API.Node.Document.Export
where where
import Control.Lens (view) import Control.Lens (view)
import Data.ByteString.Lazy.Char8 qualified as BSC
import Data.Csv (encodeDefaultOrderedByName) import Data.Csv (encodeDefaultOrderedByName)
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Data.Version (showVersion) import Data.Version (showVersion)
import Gargantext.API.Node.Document.Export.Types import Gargantext.API.Node.Document.Export.Types
import Gargantext.API.Prelude (GargNoServer, GargServer) import Gargantext.API.Prelude (GargNoServer, GargServer)
import Gargantext.Core (toDBid) import Gargantext.Core (toDBid)
import Gargantext.Core.Types import Gargantext.Database.Admin.Types.Node (DocId, NodeId, NodeType(..))
import Gargantext.Database.Query.Facet (runViewDocuments, Facet(..)) import Gargantext.Database.Query.Facet (runViewDocuments, Facet(..))
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType) import Gargantext.Database.Query.Table.Node (getClosestParentIdByType)
import Gargantext.Database.Query.Table.Node.User import Gargantext.Database.Query.Table.Node.User ( getNodeUser )
import Gargantext.Database.Schema.Node (NodePoly(..), node_user_id) import Gargantext.Database.Schema.Node (NodePoly(..), node_user_id)
import Gargantext.Prelude import Gargantext.Prelude
import Servant import Paths_gargantext qualified as PG -- cabal magic build module
import qualified Data.ByteString.Lazy.Char8 as BSC import Servant ( addHeader, (:<|>)((:<|>)), Header, Headers(getResponse) )
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Paths_gargantext as PG -- cabal magic build module
api :: NodeId api :: NodeId
-- ^ The ID of the target user -- ^ The ID of the target user
-> DocId -> DocId
-> GargServer API -> GargServer API
api userNodeId dId = getDocumentsJSON userNodeId dId api userNodeId dId = getDocumentsJSON userNodeId dId
:<|> getDocumentsJSONZip userNodeId dId
:<|> getDocumentsCSV userNodeId dId :<|> getDocumentsCSV userNodeId dId
-------------------------------------------------- --------------------------------------------------
...@@ -47,11 +48,12 @@ getDocumentsJSON nodeUserId pId = do ...@@ -47,11 +48,12 @@ getDocumentsJSON nodeUserId pId = do
mcId <- getClosestParentIdByType pId NodeCorpus mcId <- getClosestParentIdByType pId NodeCorpus
let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId
docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing
pure $ addHeader (T.concat [ "attachment; filename=GarganText_DocsList-" let dexp = DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, T.pack $ show pId , _de_garg_version = T.pack $ showVersion PG.version }
, ".json"]) pure $ addHeader (T.concat [ "attachment; filename="
DocumentExport { _de_documents = mapFacetDoc uId <$> docs , "GarganText_DocsList-"
, _de_garg_version = T.pack $ showVersion PG.version } , T.pack (show pId)
, ".json" ]) dexp
where where
mapFacetDoc uId (FacetDoc { .. }) = mapFacetDoc uId (FacetDoc { .. }) =
Document { _d_document = Document { _d_document =
...@@ -71,6 +73,18 @@ getDocumentsJSON nodeUserId pId = do ...@@ -71,6 +73,18 @@ getDocumentsJSON nodeUserId pId = do
, _ng_hash = "" } , _ng_hash = "" }
, _d_hash = ""} , _d_hash = ""}
getDocumentsJSONZip :: NodeId
-- ^ The Node ID of the target user
-> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] DocumentExportZIP) -- [Document]
getDocumentsJSONZip userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId
let dexp = getResponse dJSON
let dexpz = DocumentExportZIP { _dez_dexp = dexp, _dez_doc_id = pId }
pure $ addHeader (T.concat [ "attachment; filename="
, dezFileName dexpz
, ".zip" ]) dexpz
getDocumentsCSV :: NodeId getDocumentsCSV :: NodeId
-- ^ The Node ID of the target user -- ^ The Node ID of the target user
-> DocId -> DocId
......
...@@ -13,14 +13,21 @@ Portability : POSIX ...@@ -13,14 +13,21 @@ Portability : POSIX
module Gargantext.API.Node.Document.Export.Types where module Gargantext.API.Node.Document.Export.Types where
import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON)
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..), (.=), header, namedRecord) import Data.Csv (DefaultOrdered(..), ToNamedRecord(..), (.=), header, namedRecord)
import Data.Swagger ( genericDeclareNamedSchema, ToParamSchema(..), ToSchema(..) ) import Data.Swagger ( genericDeclareNamedSchema, ToParamSchema(..), ToSchema(..) )
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Gargantext.Core.Types ( Node, TODO ) import Gargantext.Core.Types ( Node, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Node (DocId)
import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude import Gargantext.Utils.Servant (ZIP)
import Servant import Gargantext.Utils.Zip (zipContentsPure)
import Protolude
import Servant ((:>), (:<|>), Get, Header, Headers(..), JSON, MimeRender(..), PlainText, Summary)
-- | Document Export -- | Document Export
...@@ -29,6 +36,12 @@ data DocumentExport = ...@@ -29,6 +36,12 @@ data DocumentExport =
, _de_garg_version :: Text , _de_garg_version :: Text
} deriving (Generic) } deriving (Generic)
-- | This is to represent a zipped document export. We want to have doc_id in zipped file name.
data DocumentExportZIP =
DocumentExportZIP { _dez_dexp :: DocumentExport
, _dez_doc_id :: DocId } deriving (Generic)
data Document = data Document =
Document { _d_document :: Node HyperdataDocument Document { _d_document :: Node HyperdataDocument
, _d_ngrams :: Ngrams , _d_ngrams :: Ngrams
...@@ -66,6 +79,9 @@ type Hash = Text ...@@ -66,6 +79,9 @@ type Hash = Text
instance ToSchema DocumentExport where instance ToSchema DocumentExport where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_de_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_de_")
instance ToSchema DocumentExportZIP where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_dez_")
instance ToSchema Document where instance ToSchema Document where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
...@@ -76,6 +92,9 @@ instance ToSchema Ngrams where ...@@ -76,6 +92,9 @@ instance ToSchema Ngrams where
instance ToParamSchema DocumentExport where instance ToParamSchema DocumentExport where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO) toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema DocumentExportZIP where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Document where instance ToParamSchema Document where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO) toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
...@@ -85,10 +104,25 @@ instance ToParamSchema Ngrams where ...@@ -85,10 +104,25 @@ instance ToParamSchema Ngrams where
type API = Summary "Document Export" type API = Summary "Document Export"
:> "export" :> "export"
:> ( "json" :> ( "json"
:> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExport) :> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExport)
:<|> "json.zip"
:> Get '[ZIP] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExportZIP)
:<|> "csv" :<|> "csv"
:> Get '[PlainText] (Headers '[Servant.Header "Content-Disposition" Text] Text)) -- [Document]) :> Get '[PlainText] (Headers '[Servant.Header "Content-Disposition" Text] Text) )
$(deriveJSON (unPrefix "_ng_") ''Ngrams) $(deriveJSON (unPrefix "_ng_") ''Ngrams)
$(deriveJSON (unPrefix "_d_") ''Document) $(deriveJSON (unPrefix "_d_") ''Document)
$(deriveJSON (unPrefix "_de_") ''DocumentExport) $(deriveJSON (unPrefix "_de_") ''DocumentExport)
------
-- Needs to be here because of deriveJSON TH above
dezFileName :: DocumentExportZIP -> Text
dezFileName (DocumentExportZIP { .. }) = "GarganText_DocsList-" <> show _dez_doc_id <> ".json"
instance MimeRender ZIP DocumentExportZIP where
mimeRender _ dexpz@(DocumentExportZIP { .. }) =
zipContentsPure (T.unpack $ dezFileName dexpz) (encode _dez_dexp)
...@@ -22,22 +22,22 @@ import Data.Swagger (ToSchema) ...@@ -22,22 +22,22 @@ import Data.Swagger (ToSchema)
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env) import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Prelude import Gargantext.API.Prelude ( GargM )
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet) import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit) import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Terms (TermType(..)) import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix) import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus) import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus )
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node ( DocId, NodeId, NodeType(NodeCorpus) )
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType') import Gargantext.Database.Query.Table.Node (getClosestParentIdByType')
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant import Servant ( JSON, Summary, type (:>), HasServer(ServerT) )
data DocumentUpload = DocumentUpload data DocumentUpload = DocumentUpload
...@@ -108,8 +108,6 @@ documentUpload nId doc = do ...@@ -108,8 +108,6 @@ documentUpload nId doc = do
let hd = HyperdataDocument { _hd_bdd = Nothing let hd = HyperdataDocument { _hd_bdd = Nothing
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just $ if view du_title doc == "" then T.take 50 (view du_abstract doc) else view du_title doc , _hd_title = Just $ if view du_title doc == "" then T.take 50 (view du_abstract doc) else view du_title doc
, _hd_authors = Just $ view du_authors doc , _hd_authors = Just $ view du_authors doc
......
...@@ -10,23 +10,23 @@ Portability : POSIX ...@@ -10,23 +10,23 @@ Portability : POSIX
-} -}
{-# LANGUAGE MonoLocalBinds #-} {-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.DocumentsFromWriteNodes module Gargantext.API.Node.DocumentsFromWriteNodes
where where
import Conduit import Conduit ( yieldMany )
import Data.Aeson (defaultOptions, genericParseJSON, genericToJSON) import Control.Lens ((^.))
import Data.Aeson ( genericParseJSON, defaultOptions, genericToJSON, FromJSON(parseJSON), ToJSON(toJSON) )
import Data.List qualified as List import Data.List qualified as List
import Data.Swagger import Data.Swagger ( ToSchema )
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.API.Admin.Auth.Types import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser, auth_node_id, auth_user_id )
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..)) import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings) import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Ngrams (commitStatePatch) import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Prelude (GargM) import Gargantext.API.Prelude (GargM)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Ngrams.Types (Versioned(..)) import Gargantext.Core.Ngrams.Types (Versioned(..))
...@@ -39,13 +39,13 @@ import Gargantext.Core.Types.Individu (User(..)) ...@@ -39,13 +39,13 @@ import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flowDataText) import Gargantext.Database.Action.Flow (flowDataText)
import Gargantext.Database.Action.Flow.Types (DataText(..), FlowCmdM) import Gargantext.Database.Action.Flow.Types (DataText(..), FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..), getHyperdataFrameContents )
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node ( NodeId, Node, NodeType(..) )
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith, getOrMkList) import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith, getOrMkList)
import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date) import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant import Servant ( JSON, Summary, type (:>), HasServer(ServerT) )
------------------------------------------------------------------------ ------------------------------------------------------------------------
type API = Summary " Documents from Write nodes." type API = Summary " Documents from Write nodes."
...@@ -106,7 +106,7 @@ documentsFromWriteNodes authenticatedUser nId Params { selection, lang, paragrap ...@@ -106,7 +106,7 @@ documentsFromWriteNodes authenticatedUser nId Params { selection, lang, paragrap
pure (node, contents) pure (node, contents)
) frameWrites ) frameWrites
let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs) let paragraphs' = fromMaybe (7 :: Int) $ readMaybe (T.unpack paragraphs)
let parsedE = (\(node, contents) let parsedE = (\(node, contents)
-> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents -> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
let parsed = List.concat $ rights parsedE let parsed = List.concat $ rights parsedE
...@@ -159,8 +159,6 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) = ...@@ -159,8 +159,6 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just $ show Notes Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just $ show Notes
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just t , _hd_title = Just t
, _hd_authors = Just authors' , _hd_authors = Just authors'
......
...@@ -16,13 +16,13 @@ Here is writtent a common interface. ...@@ -16,13 +16,13 @@ Here is writtent a common interface.
module Gargantext.Core.Ext.IMTUser -- (deserialiseImtUsersFromFile) module Gargantext.Core.Ext.IMTUser -- (deserialiseImtUsersFromFile)
where where
import Codec.Serialise import Codec.Serialise ( Serialise, deserialise )
import Data.ByteString.Lazy qualified as BL import Data.ByteString.Lazy qualified as BL
import Data.Csv import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header )
import Data.Text qualified as T import Data.Text qualified as T
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Vector qualified as Vector import Data.Vector qualified as Vector
import Gargantext.Core.Text.Corpus.Parsers.CSV import Gargantext.Core.Text.Corpus.Parsers.CSV ( csvDecodeOptions, Delimiter(Tab) )
import Gargantext.Database.Admin.Types.Hyperdata.Contact import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Prelude import Gargantext.Prelude
import System.FilePath.Posix (takeExtension) import System.FilePath.Posix (takeExtension)
...@@ -156,11 +156,9 @@ imtUser2gargContact (IMTUser { id ...@@ -156,11 +156,9 @@ imtUser2gargContact (IMTUser { id
, _hc_where = [ou] , _hc_where = [ou]
, _hc_title = title , _hc_title = title
, _hc_source = entite , _hc_source = entite
, _hc_lastValidation = date_modification , _hc_lastValidation = date_modification }
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
where where
title = (<>) <$> (fmap (\p -> p <> " ") prenom) <*> nom title = (<>) <$> fmap (\p -> p <> " ") prenom <*> nom
qui = ContactWho { _cw_id = id qui = ContactWho { _cw_id = id
, _cw_firstName = prenom , _cw_firstName = prenom
, _cw_lastName = nom , _cw_lastName = nom
...@@ -182,7 +180,7 @@ imtUser2gargContact (IMTUser { id ...@@ -182,7 +180,7 @@ imtUser2gargContact (IMTUser { id
-- meta = ContactMetaData (Just "IMT annuaire") date_modification' -- meta = ContactMetaData (Just "IMT annuaire") date_modification'
toList' Nothing = [] toList' Nothing = []
toList' (Just x) = [x] toList' (Just x) = [x]
...@@ -15,7 +15,6 @@ Portability : POSIX ...@@ -15,7 +15,6 @@ Portability : POSIX
module Gargantext.Core.Flow.Types where module Gargantext.Core.Flow.Types where
import Control.Lens import Control.Lens
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Schema.Node (node_hash_id) import Gargantext.Database.Schema.Node (node_hash_id)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -25,14 +24,6 @@ class UniqId a ...@@ -25,14 +24,6 @@ class UniqId a
where where
uniqId :: Lens' a (Maybe Hash) uniqId :: Lens' a (Maybe Hash)
instance UniqId HyperdataDocument
where
uniqId = hd_uniqId
instance UniqId HyperdataContact
where
uniqId = hc_uniqId
instance UniqId (Node a) instance UniqId (Node a)
where where
uniqId = node_hash_id uniqId = node_hash_id
......
...@@ -125,7 +125,7 @@ matrixEye n' = ...@@ -125,7 +125,7 @@ matrixEye n' =
diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a) diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
diagNull n m = trace ("diagNull") $ zipWith (*) m (matrixEye n) diagNull n m = zipWith (*) m (matrixEye n)
-- Returns an N-dimensional array with the values of x for the indices where -- Returns an N-dimensional array with the values of x for the indices where
......
...@@ -19,7 +19,7 @@ import Data.Array.Accelerate (Matrix) ...@@ -19,7 +19,7 @@ import Data.Array.Accelerate (Matrix)
import Data.Swagger import Data.Swagger
import Data.Text (Text) import Data.Text (Text)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional) import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional')
import Gargantext.Core.Methods.Similarities.Accelerate.Distributional (logDistributional2) import Gargantext.Core.Methods.Similarities.Accelerate.Distributional (logDistributional2)
-- import Gargantext.Core.Text.Metrics.Count (coocOn) -- import Gargantext.Core.Text.Metrics.Count (coocOn)
-- import Gargantext.Core.Viz.Graph.Index -- import Gargantext.Core.Viz.Graph.Index
...@@ -35,13 +35,13 @@ data Similarity = Conditional | Distributional ...@@ -35,13 +35,13 @@ data Similarity = Conditional | Distributional
deriving (Show, Eq) deriving (Show, Eq)
measure :: Similarity -> Matrix Int -> Matrix Double measure :: Similarity -> Matrix Int -> Matrix Double
measure Conditional x = measureConditional x measure Conditional x = measureConditional' x
measure Distributional x = logDistributional2 x measure Distributional x = logDistributional2 x
------------------------------------------------------------------------ ------------------------------------------------------------------------
withMetric :: GraphMetric -> Similarity withMetric :: GraphMetric -> Similarity
withMetric Order1 = Conditional withMetric Order1 = Conditional
withMetric _ = Distributional withMetric _ = Distributional
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Order2 type is for keeping Database json compatibility -- Order2 type is for keeping Database json compatibility
......
...@@ -48,8 +48,30 @@ import qualified Gargantext.Prelude as P ...@@ -48,8 +48,30 @@ import qualified Gargantext.Prelude as P
-- Filtered with MiniMax. -- Filtered with MiniMax.
measureConditional :: Matrix Int -> Matrix Double measureConditional :: Matrix Int -> Matrix Double
measureConditional m = run $ x $ map fromIntegral $ use m measureConditional m = run $ x $ map fromIntegral $ use m
where
x :: Acc (Matrix Double) -> Acc (Matrix Double)
x mat = maxOnly $ diagNull r $ divByDiag r mat
r :: Dim
r = dim m
-- Maybe we should use backpermute to accelerate it (no need to access to cells then
maxOnly :: Acc (SymetricMatrix Double) -> Acc (Matrix Double)
maxOnly m' = generate (shape m')
((\coord
-> let (Z :. (i :: Exp Int) :. (j :: Exp Int)) = unlift coord
ij = m' ! (lift $ (Z :. i :. j))
ji = m' ! (lift $ (Z :. j :. i))
in
ifThenElse (ij > ji) ij (constant 0)
)
)
measureConditional' :: Matrix Int -> Matrix Double
measureConditional' m = run $ x $ map fromIntegral $ use m
where where
x :: Acc (Matrix Double) -> Acc (Matrix Double) x :: Acc (Matrix Double) -> Acc (Matrix Double)
x mat = matMiniMax $ matProba r mat x mat = matMiniMax $ matProba r mat
...@@ -58,6 +80,7 @@ measureConditional m = run $ x $ map fromIntegral $ use m ...@@ -58,6 +80,7 @@ measureConditional m = run $ x $ map fromIntegral $ use m
r = dim m r = dim m
-- | To filter the nodes -- | To filter the nodes
-- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called -- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see -- "confidence" , is the maximum probability between @i@ and @j@ to see
......
...@@ -21,7 +21,9 @@ import Data.HashMap.Strict qualified as Map ...@@ -21,7 +21,9 @@ import Data.HashMap.Strict qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
import Gargantext.Core.Viz.Graph.Utils (getMax) import Gargantext.Core.Viz.Graph.Utils (getMax)
import Gargantext.Prelude import Gargantext.Prelude
import Data.Map.Strict qualified as M
import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Viz.Graph.Index (score, MatrixShape(..))
type HashMap = Map.HashMap type HashMap = Map.HashMap
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -35,7 +37,11 @@ conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq) ...@@ -35,7 +37,11 @@ conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq)
where where
results' = [ let results' = [ let
ij = (/) <$> Map.lookup (i,j) m <*> Map.lookup (j,j) m ij = (/) <$> Map.lookup (i,j) m <*> Map.lookup (j,j) m
ji = (/) <$> Map.lookup (j,i) m <*> Map.lookup (i,i) m -- proba of i|j, high values means i is more generic than j
ji = (/) <$> Map.lookup (i,j) m <*> Map.lookup (i,i) m
-- proba of j|i, high values means j is more generic than i
in getMax (i,j) ij ji in getMax (i,j) ij ji
| i <- keys | i <- keys
...@@ -49,4 +55,45 @@ conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq) ...@@ -49,4 +55,45 @@ conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq)
keys = Set.toList $ Set.fromList (x <> y) keys = Set.toList $ Set.fromList (x <> y)
(x,y) = unzip $ Map.keys m (x,y) = unzip $ Map.keys m
{-
Only for TESTs
-}
conditional_test :: Bool
conditional_test = conditional_test1 == conditional_test2
conditional_test1 :: HashMap (Text,Text) Double
conditional_test1 = conditional $ Map.fromList example_matrix
conditional_test2 :: HashMap (Text,Text) Double
conditional_test2 = Map.fromList
$ M.toList
$ M.filter (>0)
$ score Square measureConditional
$ M.fromList example_matrix
example_matrix :: [((Text,Text), Int)]
example_matrix = concat [
compte "polygon" "polygon" 19
, compte "polygon" "square" 6
, compte "polygon" "triangle" 10
, compte "polygon" "losange" 3
, compte "triangle" "triangle" 11
, compte "square" "square" 7
, compte "losange" "losange" 15
, compte "shape" "shape" 10
, compte "circle" "circle" 6
, compte "shape" "circle" 3
, compte "shape" "square" 2
, compte "polygon" "shape" 10
]
where
compte a b c = if a /= b
then [((a,b),c), ((b,a), c)]
else [((a,b),c)]
...@@ -116,21 +116,20 @@ where ...@@ -116,21 +116,20 @@ where
import Codec.Serialise (Serialise()) import Codec.Serialise (Serialise())
import Control.Category ((>>>)) import Control.Category ((>>>))
import Control.Lens (makePrisms, Iso', iso, from, (.=), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^?), (%~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~), over) import Control.Lens (makeLenses, makePrisms, Iso', iso, from, (.~), (.=), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^?), (%~), (.~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~), over)
import Control.Monad.State import Data.Aeson hiding ((.=))
import Data.Aeson qualified as Aeson import Data.Aeson.TH (deriveJSON)
import Data.Aeson (FromJSONKey(..), FromJSONKeyFunction(..), ToJSONKey(..), decode, encode, genericFromJSONKey, defaultJSONKeyOptions, genericToJSONKey, defaultJSONKeyOptions, genericParseJSON, genericToEncoding, genericToJSON, (.:), (.:?), withObject, object) import Data.Csv (defaultEncodeOptions, encodeByNameWith, header, namedRecord, EncodeOptions(..), NamedRecord, Quoting(QuoteNone))
import Data.Foldable import Data.Csv qualified as Csv
import Data.HashMap.Strict.InsOrd qualified as InsOrdHashMap import Data.HashMap.Strict.InsOrd qualified as InsOrdHashMap
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM import Data.Map.Strict.Patch qualified as PM
import Data.Monoid
import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, MaybePatch(Mod), unMod, old, new) import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, MaybePatch(Mod), unMod, old, new)
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.String (IsString(..)) import Data.String (IsString(..))
import Data.Swagger hiding (version, patch) import Data.Swagger ( NamedSchema(NamedSchema), declareSchemaRef, genericDeclareNamedSchema, SwaggerType(SwaggerObject), ToParamSchema, ToSchema(..), HasProperties(properties), HasRequired(required), HasType(type_) )
import Data.Text (pack, strip) import Data.Text qualified as T
import Data.Validity import Data.Validity ( Validity(..) )
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField) import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField) import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField)
import Gargantext.Core.Text (size) import Gargantext.Core.Text (size)
...@@ -139,10 +138,12 @@ import Gargantext.Core.Types.Query (Limit, Offset, MaxSize, MinSize) ...@@ -139,10 +138,12 @@ import Gargantext.Core.Types.Query (Limit, Offset, MaxSize, MinSize)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node (ContextId) import Gargantext.Database.Admin.Types.Node (ContextId)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM') import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (IsString, hash, from, rem, replace, to) import Gargantext.Prelude hiding (IsString, hash, from, replace, to)
import Gargantext.Prelude.Crypto.Hash (IsHashable(..)) import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
import Servant hiding (Patch) import Gargantext.Utils.Servant (CSV, ZIP)
import Gargantext.Utils.Zip (zipContentsPure)
import Servant ( FromHttpApiData(parseUrlPiece), ToHttpApiData(toUrlPiece), Required, Strict, QueryParam', MimeRender(.. ))
import Servant.Job.Utils (jsonOptions) import Servant.Job.Utils (jsonOptions)
import Test.QuickCheck (elements, frequency) import Test.QuickCheck (elements, frequency)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
...@@ -183,7 +184,7 @@ instance FromHttpApiData TabType where ...@@ -183,7 +184,7 @@ instance FromHttpApiData TabType where
parseUrlPiece _ = Left "Unexpected value of TabType" parseUrlPiece _ = Left "Unexpected value of TabType"
instance ToHttpApiData TabType where instance ToHttpApiData TabType where
toUrlPiece = pack . show toUrlPiece = T.pack . show
instance ToParamSchema TabType instance ToParamSchema TabType
instance ToJSON TabType instance ToJSON TabType
instance FromJSON TabType instance FromJSON TabType
...@@ -232,9 +233,9 @@ instance IsHashable NgramsTerm where ...@@ -232,9 +233,9 @@ instance IsHashable NgramsTerm where
instance Monoid NgramsTerm where instance Monoid NgramsTerm where
mempty = NgramsTerm "" mempty = NgramsTerm ""
instance FromJSONKey NgramsTerm where instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ T.strip t
instance IsString NgramsTerm where instance IsString NgramsTerm where
fromString s = NgramsTerm $ pack s fromString s = NgramsTerm $ T.pack s
data RootParent = RootParent data RootParent = RootParent
...@@ -398,7 +399,7 @@ instance FromHttpApiData OrderBy ...@@ -398,7 +399,7 @@ instance FromHttpApiData OrderBy
parseUrlPiece _ = Left "Unexpected value of OrderBy" parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance ToHttpApiData OrderBy where instance ToHttpApiData OrderBy where
toUrlPiece = pack . show toUrlPiece = T.pack . show
instance ToParamSchema OrderBy instance ToParamSchema OrderBy
instance FromJSON OrderBy instance FromJSON OrderBy
...@@ -418,6 +419,27 @@ data NgramsSearchQuery = NgramsSearchQuery ...@@ -418,6 +419,27 @@ data NgramsSearchQuery = NgramsSearchQuery
------------------------------------------------------------------------ ------------------------------------------------------------------------
type NgramsTableMap = Map NgramsTerm NgramsRepoElement type NgramsTableMap = Map NgramsTerm NgramsRepoElement
-- CSV:
-- header: status\tlabel\tforms
-- item: map\taccountability\taccounting|&|accoutns|&|account
instance MimeRender CSV NgramsTableMap where
-- mimeRender _ _val = encode ([] :: [(Text, Text)])
mimeRender _ val = encodeByNameWith encOptions (header ["status", "label", "forms"]) $ fn <$> Map.toList val
where
encOptions = defaultEncodeOptions { encDelimiter = fromIntegral (ord '\t')
, encQuoting = QuoteNone }
fn :: (NgramsTerm, NgramsRepoElement) -> NamedRecord
fn (NgramsTerm term, NgramsRepoElement { _nre_list, _nre_children }) =
namedRecord [ "status" Csv..= toText _nre_list
, "label" Csv..= term
, "forms" Csv..= T.intercalate "|&|" (unNgramsTerm <$> mSetToList _nre_children)]
toText :: ListType -> Text
toText CandidateTerm = "candidate"
toText MapTerm = "map"
toText StopTerm = "stop"
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- On the Client side: -- On the Client side:
--data Action = InGroup NgramsId NgramsId --data Action = InGroup NgramsId NgramsId
...@@ -870,6 +892,22 @@ instance ToSchema UpdateTableNgramsCharts where ...@@ -870,6 +892,22 @@ instance ToSchema UpdateTableNgramsCharts where
------------------------------------------------------------------------ ------------------------------------------------------------------------
type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap)) type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
-- | Same as NgramsList, but wraps node_id so that the inner .json file can have proper name
data NgramsListZIP =
NgramsListZIP { _nlz_nl :: NgramsList
, _nlz_list_id :: ListId } deriving (Generic)
instance ToSchema NgramsListZIP where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nlz_")
nlzFileName :: NgramsListZIP -> Text
nlzFileName (NgramsListZIP { .. }) = "GarganText_NgramsList-" <> show _nlz_list_id <> ".json"
instance MimeRender ZIP NgramsListZIP where
mimeRender _ nlz@(NgramsListZIP { .. }) =
zipContentsPure (T.unpack $ nlzFileName nlz) (encode _nlz_nl)
-- --
-- Serialise instances -- Serialise instances
-- --
......
...@@ -19,13 +19,13 @@ module Gargantext.Core.Text.Corpus.API.Arxiv ...@@ -19,13 +19,13 @@ module Gargantext.Core.Text.Corpus.API.Arxiv
) where ) where
import Arxiv qualified as Arxiv import Arxiv qualified as Arxiv
import Conduit import Conduit ( ConduitT, (.|), mapC, takeC )
import Data.Text (unpack) import Data.Text (unpack)
import Data.Text qualified as Text import Data.Text qualified as Text
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..)) import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Network.Api.Arxiv qualified as Ax import Network.Api.Arxiv qualified as Ax
...@@ -46,7 +46,7 @@ convertQuery q = mkQuery (interpretQuery q transformAST) ...@@ -46,7 +46,7 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub (BConst (Negative term)) BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated. -- The second term become positive, so that it can be translated.
-> Ax.AndNot <$> (transformAST sub) <*> transformAST (BConst (Positive term)) -> Ax.AndNot <$> transformAST sub <*> transformAST (BConst (Positive term))
BAnd term1 (BNot term2) BAnd term1 (BNot term2)
-> Ax.AndNot <$> transformAST term1 <*> transformAST term2 -> Ax.AndNot <$> transformAST term1 <*> transformAST term2
BAnd sub1 sub2 BAnd sub1 sub2
...@@ -88,7 +88,7 @@ toDoc l (Arxiv.Result { abstract ...@@ -88,7 +88,7 @@ toDoc l (Arxiv.Result { abstract
, authors = aus , authors = aus
--, categories --, categories
, doi , doi
, id -- , id
, journal , journal
--, primaryCategory --, primaryCategory
, publication_date , publication_date
...@@ -99,8 +99,6 @@ toDoc l (Arxiv.Result { abstract ...@@ -99,8 +99,6 @@ toDoc l (Arxiv.Result { abstract
) = HyperdataDocument { _hd_bdd = Just "Arxiv" ) = HyperdataDocument { _hd_bdd = Just "Arxiv"
, _hd_doi = Just $ Text.pack doi , _hd_doi = Just $ Text.pack doi
, _hd_url = Just $ Text.pack url , _hd_url = Just $ Text.pack url
, _hd_uniqId = Just $ Text.pack id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just $ Text.pack title , _hd_title = Just $ Text.pack title
, _hd_authors = authors aus , _hd_authors = authors aus
...@@ -118,13 +116,10 @@ toDoc l (Arxiv.Result { abstract ...@@ -118,13 +116,10 @@ toDoc l (Arxiv.Result { abstract
where where
authors :: [Ax.Author] -> Maybe Text authors :: [Ax.Author] -> Maybe Text
authors [] = Nothing authors [] = Nothing
authors aus' = Just $ (Text.intercalate ", ") authors aus' = Just $ Text.intercalate ", "
$ map Text.pack $ map (Text.pack . Ax.auName) aus'
$ map Ax.auName aus'
institutes :: [Ax.Author] -> Maybe Text institutes :: [Ax.Author] -> Maybe Text
institutes [] = Nothing institutes [] = Nothing
institutes aus' = Just $ (Text.intercalate ", ") institutes aus' = Just $ Text.intercalate ", "
$ (map (Text.replace ", " " - ")) $ map ((Text.replace ", " " - " . Text.pack) . Ax.auFil) aus'
$ map Text.pack
$ map Ax.auFil aus'
...@@ -9,7 +9,7 @@ Portability : POSIX ...@@ -9,7 +9,7 @@ Portability : POSIX
-} -}
module Gargantext.Core.Text.Corpus.API.EPO where module Gargantext.Core.Text.Corpus.API.EPO where
import Conduit import Conduit ( ConduitT, (.|), mapC )
import Data.LanguageCodes (ISO639_1) import Data.LanguageCodes (ISO639_1)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Text qualified as T import Data.Text qualified as T
...@@ -17,7 +17,7 @@ import EPO.API.Client.Types qualified as EPO ...@@ -17,7 +17,7 @@ import EPO.API.Client.Types qualified as EPO
import EPO.API.Client.Implementation qualified as EPO import EPO.API.Client.Implementation qualified as EPO
import Gargantext.Core (iso639ToText) import Gargantext.Core (iso639ToText)
import Gargantext.Core.Text.Corpus.Query qualified as Corpus import Gargantext.Core.Text.Corpus.Query qualified as Corpus
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Network.URI (parseURI) import Network.URI (parseURI)
import Protolude import Protolude
import Servant.Client.Core (ClientError(ConnectionError)) import Servant.Client.Core (ClientError(ConnectionError))
...@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do ...@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do
Just apiUrl -> do Just apiUrl -> do
eRes <- EPO.searchEPOAPIC apiUrl authKey Nothing limit (Corpus.getRawQuery q) eRes <- EPO.searchEPOAPIC apiUrl authKey Nothing limit (Corpus.getRawQuery q)
pure $ (\(total, itemsC) -> (Just total, itemsC .| mapC (toDoc lang))) <$> eRes pure $ (\(total, itemsC) -> (Just total, itemsC .| mapC (toDoc lang))) <$> eRes
-- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q) -- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q)
-- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) ) -- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) )
...@@ -48,8 +48,6 @@ toDoc lang (EPO.HyperdataDocument { .. }) = ...@@ -48,8 +48,6 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
HyperdataDocument { _hd_bdd = Just "EPO" HyperdataDocument { _hd_bdd = Just "EPO"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = id
, _hd_uniqIdBdd = id
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Map.lookup lang titles , _hd_title = Map.lookup lang titles
, _hd_authors = authors_ , _hd_authors = authors_
...@@ -66,10 +64,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) = ...@@ -66,10 +64,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
, _hd_language_iso2 = Just $ iso639ToText lang } , _hd_language_iso2 = Just $ iso639ToText lang }
where where
authors_ = if authors == [] authors_ = if null authors
then Nothing then Nothing
else Just (T.intercalate ", " authors) else Just (T.intercalate ", " authors)
-- EPO.withAuthKey authKey $ \token -> do -- EPO.withAuthKey authKey $ \token -> do
-- let range = EPO.Range { rBegin = 1, rEnd = limit } -- let range = EPO.Range { rBegin = 1, rEnd = limit }
-- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range) -- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range)
......
...@@ -12,14 +12,12 @@ Portability : POSIX ...@@ -12,14 +12,12 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Hal module Gargantext.Core.Text.Corpus.API.Hal
where where
import Conduit import Conduit ( ConduitT, (.|), mapMC )
import Data.Either
import Data.LanguageCodes qualified as ISO639 import Data.LanguageCodes qualified as ISO639
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Maybe import Data.Text (pack)
import Data.Text (pack, intercalate)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (intercalate) import Gargantext.Prelude hiding (intercalate)
import HAL qualified as HAL import HAL qualified as HAL
...@@ -30,7 +28,7 @@ import Servant.Client (ClientError) ...@@ -30,7 +28,7 @@ import Servant.Client (ClientError)
get :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO [HyperdataDocument] get :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO [HyperdataDocument]
get la q ml = do get la q ml = do
eDocs <- HAL.getMetadataWith [q] (Just 0) (fromIntegral <$> ml) la eDocs <- HAL.getMetadataWith [q] (Just 0) (fromIntegral <$> ml) la
either (panicTrace . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs either (panicTrace . pack . show) (mapM (toDoc' la) . HAL._docs) eDocs
getC :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ())) getC :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do getC la q ml = do
...@@ -45,18 +43,16 @@ toDoc' la (HAL.Corpus { .. }) = do ...@@ -45,18 +43,16 @@ toDoc' la (HAL.Corpus { .. }) = do
-- printDebug "[toDoc corpus] h" h -- printDebug "[toDoc corpus] h" h
let mDateS = maybe (Just $ pack $ show Defaults.year) Just _corpus_date let mDateS = maybe (Just $ pack $ show Defaults.year) Just _corpus_date
let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
let abstractDefault = intercalate " " _corpus_abstract let abstractDefault = unwords _corpus_abstract
let abstract = case la of let abstract = case la of
Nothing -> abstractDefault Nothing -> abstractDefault
Just l -> fromMaybe abstractDefault (intercalate " " <$> Map.lookup l _corpus_abstract_lang_map) Just l -> maybe abstractDefault unwords (Map.lookup l _corpus_abstract_lang_map)
pure HyperdataDocument { _hd_bdd = Just "Hal" pure HyperdataDocument { _hd_bdd = Just "Hal"
, _hd_doi = Just $ pack $ show _corpus_docid , _hd_doi = Just $ pack $ show _corpus_docid
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just $ intercalate " " _corpus_title , _hd_title = Just $ unwords _corpus_title
, _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" _corpus_authors_names 1 , _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" _corpus_authors_names
, _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" $ _corpus_authors_affiliations <> map show _corpus_struct_id , _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" $ _corpus_authors_affiliations <> map show _corpus_struct_id
, _hd_source = Just $ maybe "Nothing" identity _corpus_source , _hd_source = Just $ maybe "Nothing" identity _corpus_source
, _hd_abstract = Just abstract , _hd_abstract = Just abstract
......
...@@ -18,12 +18,12 @@ import Gargantext.Core (Lang(..)) ...@@ -18,12 +18,12 @@ import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (cleanText) import Gargantext.Core.Text.Corpus.Parsers (cleanText)
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv) import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Isidore qualified as Isidore import Isidore qualified as Isidore
import Isidore.Client import Isidore.Client
import Servant.Client import Servant.Client ( ClientError(DecodeFailure) )
-- | TODO work with the ServantErr -- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit get :: Lang -> Maybe Isidore.Limit
...@@ -40,7 +40,7 @@ get la l q a = do ...@@ -40,7 +40,7 @@ get la l q a = do
iDocs <- either printErr _content <$> Isidore.get l q a iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs) hDocs <- mapM (isidoreToDoc la) (toIsidoreDocs iDocs)
pure hDocs pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
...@@ -54,7 +54,7 @@ isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument ...@@ -54,7 +54,7 @@ isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do isidoreToDoc l (IsidoreDoc t a d u s as) = do
let let
author :: Author -> Text author :: Author -> Text
author (Author fn ln) = (_name fn) <> ", " <> (_name ln) author (Author fn ln) = _name fn <> ", " <> _name ln
author (Authors aus) = Text.intercalate ". " $ map author aus author (Authors aus) = Text.intercalate ". " $ map author aus
creator2text :: Creator -> Text creator2text :: Creator -> Text
...@@ -66,21 +66,19 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do ...@@ -66,21 +66,19 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2 langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) (Just) d let mDateS = maybe (Just $ Text.pack $ show Defaults.year) Just d
let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
pure HyperdataDocument pure HyperdataDocument
{ _hd_bdd = Just "Isidore" { _hd_bdd = Just "Isidore"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = u , _hd_url = u
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just $ cleanText $ langText t , _hd_title = Just $ cleanText $ langText t
, _hd_authors = creator2text <$> as , _hd_authors = creator2text <$> as
, _hd_institutes = Nothing , _hd_institutes = Nothing
, _hd_source = Just $ maybe "Nothing" identity $ _sourceName <$> s , _hd_source = Just $ maybe "Nothing" (identity . _sourceName) s
, _hd_abstract = cleanText <$> langText <$> a , _hd_abstract = cleanText . langText <$> a
, _hd_publication_date = fmap (Text.pack . show) utcTime , _hd_publication_date = fmap (Text.pack . show) utcTime
, _hd_publication_year = pub_year , _hd_publication_year = pub_year
, _hd_publication_month = pub_month , _hd_publication_month = pub_month
......
...@@ -10,15 +10,15 @@ Portability : POSIX ...@@ -10,15 +10,15 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.OpenAlex where module Gargantext.Core.Text.Corpus.API.OpenAlex where
import Conduit import Conduit ( ConduitT, (.|), mapC, takeC )
import Data.LanguageCodes qualified as ISO639 import Data.LanguageCodes qualified as ISO639
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.Core (iso639ToText) import Gargantext.Core (iso639ToText)
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Protolude
import OpenAlex qualified as OA import OpenAlex qualified as OA
import OpenAlex.Types qualified as OA import OpenAlex.Types qualified as OA
import Protolude
import Servant.Client (ClientError) import Servant.Client (ClientError)
...@@ -38,8 +38,6 @@ toDoc (OA.Work { .. } ) = ...@@ -38,8 +38,6 @@ toDoc (OA.Work { .. } ) =
HyperdataDocument { _hd_bdd = Just "OpenAlex" HyperdataDocument { _hd_bdd = Just "OpenAlex"
, _hd_doi = doi , _hd_doi = doi
, _hd_url = url , _hd_url = url
, _hd_uniqId = Just id
, _hd_uniqIdBdd = Just id
, _hd_page = firstPage biblio , _hd_page = firstPage biblio
, _hd_title = title , _hd_title = title
, _hd_authors = authors authorships , _hd_authors = authors authorships
...@@ -56,25 +54,25 @@ toDoc (OA.Work { .. } ) = ...@@ -56,25 +54,25 @@ toDoc (OA.Work { .. } ) =
, _hd_language_iso2 = language } , _hd_language_iso2 = language }
where where
firstPage :: OA.Biblio -> Maybe Int firstPage :: OA.Biblio -> Maybe Int
firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page firstPage OA.Biblio { first_page } = (readMaybe . T.unpack) =<< first_page
authors :: [OA.Authorship] -> Maybe Text authors :: [OA.Authorship] -> Maybe Text
authors [] = Nothing authors [] = Nothing
authors aus = Just $ T.intercalate ", " $ catMaybes (getDisplayName <$> aus) authors aus = Just $ T.intercalate ", " $ mapMaybe getDisplayName aus
where where
getDisplayName :: OA.Authorship -> Maybe Text getDisplayName :: OA.Authorship -> Maybe Text
getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
institutes :: [OA.Authorship] -> Maybe Text institutes :: [OA.Authorship] -> Maybe Text
institutes [] = Nothing institutes [] = Nothing
institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus) institutes aus = Just $ T.intercalate ", " (T.replace ", " " - " . getInstitutesNames <$> aus)
where where
getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
getDisplayName :: OA.DehydratedInstitution -> Text getDisplayName :: OA.DehydratedInstitution -> Text
getDisplayName OA.DehydratedInstitution { display_name = dn } = dn getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
source :: Maybe Text source :: Maybe Text
source = maybe Nothing getSource primary_location source = getSource =<< primary_location
where where
getSource OA.Location { source = s } = getSourceDisplayName <$> s getSource OA.Location { source = s } = getSourceDisplayName <$> s
getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn
...@@ -20,13 +20,13 @@ module Gargantext.Core.Text.Corpus.API.Pubmed ...@@ -20,13 +20,13 @@ module Gargantext.Core.Text.Corpus.API.Pubmed
) )
where where
import Conduit import Conduit ( ConduitT, (.|), mapC, takeC )
import Data.Text qualified as Text import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE import Data.Text.Encoding qualified as TE
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..)) import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape) import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import PUBMED qualified as PubMed import PUBMED qualified as PubMed
...@@ -64,7 +64,7 @@ convertQuery q = ESearch (interpretQuery q transformAST) ...@@ -64,7 +64,7 @@ convertQuery q = ESearch (interpretQuery q transformAST)
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub (BConst (Negative term)) BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated. -- The second term become positive, so that it can be translated.
-> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term)) -> transformAST sub <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
BAnd term1 (BNot term2) BAnd term1 (BNot term2)
-> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2 -> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
BAnd sub1 sub2 BAnd sub1 sub2
...@@ -108,14 +108,11 @@ get apiKey q l = do ...@@ -108,14 +108,11 @@ get apiKey q l = do
-- <$> PubMed.getMetadataWithC q l -- <$> PubMed.getMetadataWithC q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed { pubmed_id toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus
, pubmed_article = PubMedDoc.PubMedArticle t j as aus
, pubmed_date = PubMedDoc.PubMedDate a y m d } , pubmed_date = PubMedDoc.PubMedDate a y m d }
) = HyperdataDocument { _hd_bdd = Just "PubMed" ) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Just $ Text.pack $ show pubmed_id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = t , _hd_title = t
, _hd_authors = authors aus , _hd_authors = authors aus
...@@ -133,16 +130,14 @@ toDoc l (PubMedDoc.PubMed { pubmed_id ...@@ -133,16 +130,14 @@ toDoc l (PubMedDoc.PubMed { pubmed_id
where where
authors :: [PubMedDoc.Author] -> Maybe Text authors :: [PubMedDoc.Author] -> Maybe Text
authors [] = Nothing authors [] = Nothing
authors au = Just $ (Text.intercalate ", ") authors au = Just $ Text.intercalate ", "
$ catMaybes $ mapMaybe (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
$ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
institutes :: [PubMedDoc.Author] -> Maybe Text institutes :: [PubMedDoc.Author] -> Maybe Text
institutes [] = Nothing institutes [] = Nothing
institutes au = Just $ (Text.intercalate ", ") institutes au = Just $ Text.intercalate ", "
$ (map (Text.replace ", " " - ")) $ map (Text.replace ", " " - ")
$ catMaybes $ mapMaybe PubMedDoc.affiliation au
$ map PubMedDoc.affiliation au
abstract :: [Text] -> Maybe Text abstract :: [Text] -> Maybe Text
......
...@@ -51,8 +51,7 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex) ...@@ -51,8 +51,7 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex)
import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS
import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich) import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import Gargantext.Core.Text.Corpus.Parsers.WOS qualified as WOS import Gargantext.Core.Text.Corpus.Parsers.WOS qualified as WOS
import Gargantext.Core.Text.Corpus.Parsers.Types import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Query.Table.Ngrams (NgramsType(..)) import Gargantext.Database.Query.Table.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (show, undefined) import Gargantext.Prelude hiding (show, undefined)
import Gargantext.Utils.Zip qualified as UZip import Gargantext.Utils.Zip qualified as UZip
...@@ -82,10 +81,10 @@ parseFormatC :: MonadBaseControl IO m ...@@ -82,10 +81,10 @@ parseFormatC :: MonadBaseControl IO m
-> m (Either Text (Integer, ConduitT () HyperdataDocument IO ())) -> m (Either Text (Integer, ConduitT () HyperdataDocument IO ()))
parseFormatC CsvGargV3 Plain bs = do parseFormatC CsvGargV3 Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs let eParsedC = parseCsvC $ DBL.fromStrict bs
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC) pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
parseFormatC CsvHal Plain bs = do parseFormatC CsvHal Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs let eParsedC = parseCsvC $ DBL.fromStrict bs
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC) pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
parseFormatC Istex Plain bs = do parseFormatC Istex Plain bs = do
ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs
pure $ (\p -> (1, yieldMany [p])) <$> ep pure $ (\p -> (1, yieldMany [p])) <$> ep
...@@ -114,15 +113,15 @@ parseFormatC Iramuteq Plain bs = do ...@@ -114,15 +113,15 @@ parseFormatC Iramuteq Plain bs = do
, yieldMany docs , yieldMany docs
.| mapC (map $ first Iramuteq.keys) .| mapC (map $ first Iramuteq.keys)
.| mapC (map $ both decodeUtf8) .| mapC (map $ both decodeUtf8)
.| mapMC ((toDoc Iramuteq) . (map (second (DT.replace "_" " ")))) .| mapMC (toDoc Iramuteq . map (second (DT.replace "_" " ")))
) )
) )
<$> eDocs <$> eDocs
parseFormatC JSON Plain bs = do parseFormatC JSON Plain bs = do
let eParsedC = parseJSONC $ DBL.fromStrict bs let eParsedC = parseJSONC $ DBL.fromStrict bs
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC) pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
fileNames <- filter (filterZIPFileNameP ft) <$> DM.keys <$> getEntries fileNames <- filter (filterZIPFileNameP ft) . DM.keys <$> getEntries
printDebug "[parseFormatC] fileNames" fileNames printDebug "[parseFormatC] fileNames" fileNames
fileContents <- mapM getEntry fileNames fileContents <- mapM getEntry fileNames
--printDebug "[parseFormatC] fileContents" fileContents --printDebug "[parseFormatC] fileContents" fileContents
...@@ -139,19 +138,19 @@ parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do ...@@ -139,19 +138,19 @@ parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
let contents' = snd <$> contents let contents' = snd <$> contents
let totalLength = sum lenghts let totalLength = sum lenghts
pure $ Right ( totalLength pure $ Right ( totalLength
, sequenceConduits contents' >> pure () ) -- .| mapM_C (printDebug "[parseFormatC] doc") , void (sequenceConduits contents') ) -- .| mapM_C (printDebug "[parseFormatC] doc")
_ -> pure $ Left $ DT.intercalate "\n" errs _ -> pure $ Left $ DT.intercalate "\n" errs
parseFormatC _ _ _ = pure $ Left "Not implemented" parseFormatC _ _ _ = pure $ Left "Not implemented"
filterZIPFileNameP :: FileType -> EntrySelector -> Bool filterZIPFileNameP :: FileType -> EntrySelector -> Bool
filterZIPFileNameP Istex f = (takeExtension (unEntrySelector f) == ".json") && filterZIPFileNameP Istex f = (takeExtension (unEntrySelector f) == ".json") &&
((unEntrySelector f) /= "manifest.json") (unEntrySelector f /= "manifest.json")
filterZIPFileNameP _ _ = True filterZIPFileNameP _ _ = True
etale :: [HyperdataDocument] -> [HyperdataDocument] etale :: [HyperdataDocument] -> [HyperdataDocument]
etale = concat . (map etale') etale = concatMap etale'
where where
etale' :: HyperdataDocument -> [HyperdataDocument] etale' :: HyperdataDocument -> [HyperdataDocument]
etale' h = map (\t -> h { _hd_abstract = Just t }) etale' h = map (\t -> h { _hd_abstract = Just t })
...@@ -226,8 +225,6 @@ toDoc ff d = do ...@@ -226,8 +225,6 @@ toDoc ff d = do
let hd = HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff let hd = HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d , _hd_doi = lookup "doi" d
, _hd_url = lookup "URL" d , _hd_url = lookup "URL" d
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = lookup "title" d , _hd_title = lookup "title" d
, _hd_authors = lookup "authors" d , _hd_authors = lookup "authors" d
...@@ -287,7 +284,7 @@ runParser format text = pure $ runParser' format text ...@@ -287,7 +284,7 @@ runParser format text = pure $ runParser' format text
runParser' :: FileType runParser' :: FileType
-> DB.ByteString -> DB.ByteString
-> (Either Text [[(DB.ByteString, DB.ByteString)]]) -> Either Text [[(DB.ByteString, DB.ByteString)]]
runParser' format text = first DT.pack $ parseOnly (withParser format) text runParser' format text = first DT.pack $ parseOnly (withParser format) text
openZip :: FilePath -> IO [DB.ByteString] openZip :: FilePath -> IO [DB.ByteString]
...@@ -311,5 +308,5 @@ clean txt = DBC.map clean' txt ...@@ -311,5 +308,5 @@ clean txt = DBC.map clean' txt
-- --
splitOn :: NgramsType -> Maybe Text -> Text -> [Text] splitOn :: NgramsType -> Maybe Text -> Text -> [Text]
splitOn Authors (Just "WOS") = (DT.splitOn "; ") splitOn Authors (Just "WOS") = DT.splitOn "; "
splitOn _ _ = (DT.splitOn ", ") splitOn _ _ = DT.splitOn ", "
...@@ -33,8 +33,8 @@ book2csv :: Int -> FileDir -> FileOut -> IO () ...@@ -33,8 +33,8 @@ book2csv :: Int -> FileDir -> FileOut -> IO ()
book2csv n f_in f_out = do book2csv n f_in f_out = do
files <- filesOf f_in files <- filesOf f_in
texts <- readPublis f_in files texts <- readPublis f_in files
let publis = List.concat $ map (file2publi n) texts let publis = concatMap (file2publi n) texts
let docs = map (\(y,p) -> publiToHyperdata y p) $ List.zip [1..] publis let docs = zipWith publiToHyperdata [1..] publis
DBL.writeFile f_out (hyperdataDocument2csv docs) DBL.writeFile f_out (hyperdataDocument2csv docs)
filesOf :: FileDir -> IO [FilePath] filesOf :: FileDir -> IO [FilePath]
...@@ -43,7 +43,7 @@ filesOf fd = List.sort -- sort by filenam ...@@ -43,7 +43,7 @@ filesOf fd = List.sort -- sort by filenam
<$> getDirectoryContents fd <$> getDirectoryContents fd
readPublis :: FileDir -> [FilePath] -> IO [(FilePath, Text)] readPublis :: FileDir -> [FilePath] -> IO [(FilePath, Text)]
readPublis fd fps = mapM (\fp -> DBL.readFile (fd <> fp) >>= \txt -> pure (fp, cs txt)) fps readPublis fd = mapM (\fp -> DBL.readFile (fd <> fp) >>= \txt -> pure (fp, cs txt))
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Main Types -- Main Types
...@@ -63,7 +63,7 @@ type FileDir = FilePath ...@@ -63,7 +63,7 @@ type FileDir = FilePath
--------------------------------------------------------------------- ---------------------------------------------------------------------
file2publi :: Int -> (FilePath, Text) -> [Publi] file2publi :: Int -> (FilePath, Text) -> [Publi]
file2publi n (fp,theText) = map (\(t,txt) -> Publi authors source t txt) theTexts file2publi n (fp,theText) = map (uncurry (Publi authors source)) theTexts
where where
theTexts = text2titleParagraphs n theText theTexts = text2titleParagraphs n theText
FileInfo authors source = fileNameInfo fp FileInfo authors source = fileNameInfo fp
...@@ -81,8 +81,6 @@ publiToHyperdata y (Publi a s t txt) = ...@@ -81,8 +81,6 @@ publiToHyperdata y (Publi a s t txt) =
HyperdataDocument { _hd_bdd = Just "Book File" HyperdataDocument { _hd_bdd = Just "Book File"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just t , _hd_title = Just t
, _hd_authors = Just (DT.concat a) , _hd_authors = Just (DT.concat a)
......
...@@ -15,7 +15,7 @@ CSV parser for Gargantext corpus files. ...@@ -15,7 +15,7 @@ CSV parser for Gargantext corpus files.
module Gargantext.Core.Text.Corpus.Parsers.CSV module Gargantext.Core.Text.Corpus.Parsers.CSV
where where
import Conduit import Conduit ( ConduitT, (.|), yieldMany, mapC )
import Data.ByteString qualified as BS import Data.ByteString qualified as BS
import Data.ByteString.Lazy qualified as BL import Data.ByteString.Lazy qualified as BL
import Data.Csv (DecodeOptions(..), EncodeOptions(..), FromField, FromNamedRecord(..), Header, Parser, ToField(..), ToNamedRecord(..), (.:), (.=), decodeByNameWith, defaultDecodeOptions, defaultEncodeOptions, encodeByNameWith, header, namedRecord, parseField, parseNamedRecord, runParser) import Data.Csv (DecodeOptions(..), EncodeOptions(..), FromField, FromNamedRecord(..), Header, Parser, ToField(..), ToNamedRecord(..), (.:), (.=), decodeByNameWith, defaultDecodeOptions, defaultEncodeOptions, encodeByNameWith, header, namedRecord, parseField, parseNamedRecord, runParser)
...@@ -23,10 +23,11 @@ import Data.Text qualified as T ...@@ -23,10 +23,11 @@ import Data.Text qualified as T
import Data.Time.Segment (jour) import Data.Time.Segment (jour)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Vector qualified as V import Data.Vector qualified as V
import Gargantext.Core.Text (sentences, unsentences) import Gargantext.Core.Text ( sentences, unsentences )
import Gargantext.Core.Text.Context (SplitContext(..), splitBy) import Gargantext.Core.Text.Context ( splitBy, SplitContext(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude import Gargantext.Prelude hiding (length, show)
import Protolude
--------------------------------------------------------------- ---------------------------------------------------------------
headerCsvGargV3 :: Header headerCsvGargV3 :: Header
...@@ -58,8 +59,6 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) = ...@@ -58,8 +59,6 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
HyperdataDocument { _hd_bdd = Just "CSV" HyperdataDocument { _hd_bdd = Just "CSV"
, _hd_doi = Just . T.pack . show $ did , _hd_doi = Just . T.pack . show $ did
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just dt , _hd_title = Just dt
, _hd_authors = Nothing , _hd_authors = Nothing
...@@ -91,11 +90,11 @@ toDocs v = V.toList ...@@ -91,11 +90,11 @@ toDocs v = V.toList
(V.enumFromN 1 (V.length v'')) v'' (V.enumFromN 1 (V.length v'')) v''
where where
v'' = V.foldl' (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps v'' = V.foldl' (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
seps= (V.fromList [Paragraphs 1, Sentences 3, Chars 3]) seps= V.fromList [Paragraphs 1, Sentences 3, Chars 3]
--------------------------------------------------------------- ---------------------------------------------------------------
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs docs = V.map fromDocs' docs fromDocs = V.map fromDocs'
where where
fromDocs' (CsvGargV3 { .. }) = CsvDoc { csv_title = d_title fromDocs' (CsvGargV3 { .. }) = CsvDoc { csv_title = d_title
, csv_source = d_source , csv_source = d_source
...@@ -109,16 +108,11 @@ fromDocs docs = V.map fromDocs' docs ...@@ -109,16 +108,11 @@ fromDocs docs = V.map fromDocs' docs
-- | Split a document in its context -- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average -- TODO adapt the size of the paragraph according to the corpus average
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc m splt doc = let docSize = (T.length $ csv_abstract doc) in splitDoc m splt doc =
if docSize > 1000 let docSize = (T.length $ csv_abstract doc) in
then if (docSize > 1000) && (mod (round m) docSize >= 10)
if (mod (round m) docSize) >= 10 then splitDoc' splt doc
then else V.fromList [doc]
splitDoc' splt doc
else
V.fromList [doc]
else
V.fromList [doc]
where where
splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' contextSize (CsvDoc { .. }) = V.fromList $ [firstDoc] <> nextDocs splitDoc' contextSize (CsvDoc { .. }) = V.fromList $ [firstDoc] <> nextDocs
...@@ -150,7 +144,7 @@ unIntOrDec :: IntOrDec -> Int ...@@ -150,7 +144,7 @@ unIntOrDec :: IntOrDec -> Int
unIntOrDec (IntOrDec i) = i unIntOrDec (IntOrDec i) = i
instance FromField IntOrDec where instance FromField IntOrDec where
parseField s = case runParser (parseField s :: Parser Int) of parseField s = case runParser (parseField s :: Parser Int) of
Left _err -> IntOrDec <$> floor <$> (parseField s :: Parser Double) Left _err -> IntOrDec . floor <$> (parseField s :: Parser Double)
Right n -> pure $ IntOrDec n Right n -> pure $ IntOrDec n
instance ToField IntOrDec where instance ToField IntOrDec where
toField (IntOrDec i) = toField i toField (IntOrDec i) = toField i
...@@ -251,15 +245,15 @@ readByteStringStrict :: (FromNamedRecord a) ...@@ -251,15 +245,15 @@ readByteStringStrict :: (FromNamedRecord a)
-> Delimiter -> Delimiter
-> BS.ByteString -> BS.ByteString
-> Either Text (Header, Vector a) -> Either Text (Header, Vector a)
readByteStringStrict d ff = (readByteStringLazy d ff) . BL.fromStrict readByteStringStrict d ff = readByteStringLazy d ff . BL.fromStrict
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO use readFileLazy -- | TODO use readFileLazy
readCSVFile :: FilePath -> IO (Either Text (Header, Vector CsvDoc)) readCSVFile :: FilePath -> IO (Either Text (Header, Vector CsvDoc))
readCSVFile fp = do readCSVFile fp = do
result <- fmap (readCsvLazyBS Comma) $ BL.readFile fp result <- readCsvLazyBS Comma <$> BL.readFile fp
case result of case result of
Left _err -> fmap (readCsvLazyBS Tab) $ BL.readFile fp Left _err -> readCsvLazyBS Tab <$> BL.readFile fp
Right res -> pure $ Right res Right res -> pure $ Right res
...@@ -380,8 +374,6 @@ csvHal2doc (CsvHal { .. }) = ...@@ -380,8 +374,6 @@ csvHal2doc (CsvHal { .. }) =
HyperdataDocument { _hd_bdd = Just "CsvHal" HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Just csvHal_doiId_s , _hd_doi = Just csvHal_doiId_s
, _hd_url = Just csvHal_url , _hd_url = Just csvHal_url
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just csvHal_title , _hd_title = Just csvHal_title
, _hd_authors = Just csvHal_authors , _hd_authors = Just csvHal_authors
...@@ -405,8 +397,6 @@ csv2doc (CsvDoc { .. }) ...@@ -405,8 +397,6 @@ csv2doc (CsvDoc { .. })
= HyperdataDocument { _hd_bdd = Just "CsvHal" = HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just csv_title , _hd_title = Just csv_title
, _hd_authors = Just csv_authors , _hd_authors = Just csv_authors
...@@ -432,10 +422,10 @@ csv2doc (CsvDoc { .. }) ...@@ -432,10 +422,10 @@ csv2doc (CsvDoc { .. })
parseHal :: FilePath -> IO (Either Text [HyperdataDocument]) parseHal :: FilePath -> IO (Either Text [HyperdataDocument])
parseHal fp = do parseHal fp = do
r <- readCsvHal fp r <- readCsvHal fp
pure $ (V.toList . V.map csvHal2doc . snd) <$> r pure $ V.toList . V.map csvHal2doc . snd <$> r
parseHal' :: BL.ByteString -> Either Text [HyperdataDocument] parseHal' :: BL.ByteString -> Either Text [HyperdataDocument]
parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs parseHal' bs = V.toList . V.map csvHal2doc . snd <$> readCsvHalLazyBS bs
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -453,7 +443,7 @@ parseCsv' bs = do ...@@ -453,7 +443,7 @@ parseCsv' bs = do
result = case readCsvLazyBS Comma bs of result = case readCsvLazyBS Comma bs of
Left _err -> readCsvLazyBS Tab bs Left _err -> readCsvLazyBS Tab bs
Right res -> Right res Right res -> Right res
(V.toList . V.map csv2doc . snd) <$> result V.toList . V.map csv2doc . snd <$> result
parseCsvC :: BL.ByteString parseCsvC :: BL.ByteString
-> Either Text (Integer, ConduitT () HyperdataDocument Identity ()) -> Either Text (Integer, ConduitT () HyperdataDocument Identity ())
......
...@@ -13,12 +13,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Gitlab ( ...@@ -13,12 +13,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Gitlab (
Issue(..), gitlabIssue2hyperdataDocument, readFile_Issues, readFile_IssuesAsDocs Issue(..), gitlabIssue2hyperdataDocument, readFile_Issues, readFile_IssuesAsDocs
) where ) where
import Data.Aeson import Data.Aeson ( FromJSON(parseJSON), decode, (.:), (.:?), withObject )
import Data.ByteString.Lazy qualified as DBL import Data.ByteString.Lazy qualified as DBL
import Data.Text qualified as DT import Data.Text qualified as DT
import Data.Time import Data.Time
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude import Gargantext.Prelude
data Issue = Issue { _issue_id :: !Int data Issue = Issue { _issue_id :: !Int
...@@ -42,8 +42,6 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument ...@@ -42,8 +42,6 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument
{ _hd_bdd = Nothing { _hd_bdd = Nothing
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just (_issue_title issue) , _hd_title = Just (_issue_title issue)
, _hd_authors = Nothing , _hd_authors = Nothing
......
...@@ -30,8 +30,8 @@ import Data.ByteString.Lazy qualified as DBL ...@@ -30,8 +30,8 @@ import Data.ByteString.Lazy qualified as DBL
import Data.JsonStream.Parser qualified as P import Data.JsonStream.Parser qualified as P
import Data.Text qualified as Text import Data.Text qualified as Text
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument)
import Gargantext.Database.GargDB import Gargantext.Database.GargDB ( ReadFile(..) )
import Gargantext.Prelude import Gargantext.Prelude
...@@ -43,14 +43,14 @@ data GrandDebatReference = GrandDebatReference ...@@ -43,14 +43,14 @@ data GrandDebatReference = GrandDebatReference
, createdAt :: !(Maybe Text) , createdAt :: !(Maybe Text)
, publishedAt :: !(Maybe Text) , publishedAt :: !(Maybe Text)
, updatedAt :: !(Maybe Text) , updatedAt :: !(Maybe Text)
, trashed :: !(Maybe Bool) , trashed :: !(Maybe Bool)
, trashedStatus :: !(Maybe Text) , trashedStatus :: !(Maybe Text)
, authorId :: !(Maybe Text) , authorId :: !(Maybe Text)
, authorType :: !(Maybe Text) , authorType :: !(Maybe Text)
, authorZipCode :: !(Maybe Text) , authorZipCode :: !(Maybe Text)
, responses :: !(Maybe [GrandDebatResponse]) , responses :: !(Maybe [GrandDebatResponse])
} }
deriving (Show, Generic) deriving (Show, Generic)
...@@ -77,8 +77,6 @@ instance ToHyperdataDocument GrandDebatReference ...@@ -77,8 +77,6 @@ instance ToHyperdataDocument GrandDebatReference
HyperdataDocument { _hd_bdd = Just "GrandDebat" HyperdataDocument { _hd_bdd = Just "GrandDebat"
, _hd_doi = id , _hd_doi = id
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = title , _hd_title = title
, _hd_authors = authorType , _hd_authors = authorType
...@@ -94,12 +92,10 @@ instance ToHyperdataDocument GrandDebatReference ...@@ -94,12 +92,10 @@ instance ToHyperdataDocument GrandDebatReference
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ Text.pack $ show FR } , _hd_language_iso2 = Just $ Text.pack $ show FR }
where where
toAbstract = (Text.intercalate " . ") . ((filter (/= "")) . (map toSentence)) toAbstract = Text.intercalate " . " . (filter (/= "") . map toSentence)
toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of
Nothing -> "" Nothing -> ""
Just r' -> case Text.length r' > 10 of Just r' -> if Text.length r' > 10 then r' else ""
True -> r'
False -> ""
instance ReadFile [GrandDebatReference] instance ReadFile [GrandDebatReference]
where where
......
...@@ -20,14 +20,14 @@ TODO: ...@@ -20,14 +20,14 @@ TODO:
module Gargantext.Core.Text.Corpus.Parsers.Isidore where module Gargantext.Core.Text.Corpus.Parsers.Isidore where
import Control.Lens hiding (contains) import Control.Lens ( (^.), (.~) )
import Data.ByteString.Lazy (ByteString) import Data.ByteString.Lazy (ByteString)
import Data.RDF hiding (triple, Query) import Data.RDF ( Node(LNode, UNode), LValue(PlainLL, TypedL, PlainL) )
import Data.Text qualified as T import Data.Text qualified as T
import Database.HSparql.Connection import Database.HSparql.Connection ( BindingValue(..), EndPoint, structureContent )
import Database.HSparql.QueryGenerator import Database.HSparql.QueryGenerator
import Gargantext.Core (Lang) import Gargantext.Core (Lang)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (ByteString) import Gargantext.Prelude hiding (ByteString)
import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody) import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
import Prelude qualified import Prelude qualified
...@@ -115,7 +115,7 @@ unbound _ Unbound = Nothing ...@@ -115,7 +115,7 @@ unbound _ Unbound = Nothing
unbound _ (Bound (UNode x)) = Just x unbound _ (Bound (UNode x)) = Just x
unbound _ (Bound (LNode (TypedL x _))) = Just x unbound _ (Bound (LNode (TypedL x _))) = Just x
unbound _ (Bound (LNode (PlainL x))) = Just x unbound _ (Bound (LNode (PlainL x))) = Just x
unbound l (Bound (LNode (PlainLL x l'))) = if l' == (T.toLower $ show l) then Just x else Nothing unbound l (Bound (LNode (PlainLL x l'))) = if l' == T.toLower (show l) then Just x else Nothing
unbound _ _ = Nothing unbound _ _ = Nothing
bind2doc :: Lang -> [BindingValue] -> HyperdataDocument bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
...@@ -123,8 +123,6 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract ...@@ -123,8 +123,6 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract
HyperdataDocument { _hd_bdd = Just "Isidore" HyperdataDocument { _hd_bdd = Just "Isidore"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = unbound l link' , _hd_url = unbound l link'
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = unbound l title , _hd_title = unbound l title
, _hd_authors = unbound l authors , _hd_authors = unbound l authors
......
...@@ -19,11 +19,10 @@ module Gargantext.Core.Text.Corpus.Parsers.JSON.Istex where ...@@ -19,11 +19,10 @@ module Gargantext.Core.Text.Corpus.Parsers.JSON.Istex where
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (length, show) import Gargantext.Prelude hiding (length)
import ISTEX.Client qualified as ISTEX import ISTEX.Client qualified as ISTEX
import Protolude
-- | TODO remove dateSplit here -- | TODO remove dateSplit here
...@@ -37,12 +36,10 @@ toDoc la (ISTEX.Document i t a ab d s) = do ...@@ -37,12 +36,10 @@ toDoc la (ISTEX.Document i t a ab d s) = do
pure $ HyperdataDocument { _hd_bdd = Just "Istex" pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i , _hd_doi = Just i
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = t , _hd_title = t
, _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (map ISTEX._author_name a) , _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a) , _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (concatMap ISTEX._author_affiliations a)
, _hd_source = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (ISTEX._source_title s) , _hd_source = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (ISTEX._source_title s)
, _hd_abstract = ab , _hd_abstract = ab
, _hd_publication_date = fmap (T.pack . show) utctime , _hd_publication_date = fmap (T.pack . show) utctime
......
...@@ -20,11 +20,11 @@ module Gargantext.Core.Text.Corpus.Parsers.Wikidata where ...@@ -20,11 +20,11 @@ module Gargantext.Core.Text.Corpus.Parsers.Wikidata where
import Data.List qualified as List import Data.List qualified as List
import Data.Text (concat) import Data.Text (concat)
import Database.HSparql.Connection import Database.HSparql.Connection ( BindingValue, EndPoint, selectQueryRaw )
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit) import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Isidore (unbound) import Gargantext.Core.Text.Corpus.Parsers.Isidore (unbound)
import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler ( crawlPage )
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Prelude hiding (concat) import Gargantext.Prelude hiding (concat)
import Prelude qualified import Prelude qualified
...@@ -56,11 +56,9 @@ wikiPageToDocument m wr = do ...@@ -56,11 +56,9 @@ wikiPageToDocument m wr = do
let bdd = Just "wikidata" let bdd = Just "wikidata"
doi = Nothing doi = Nothing
url = (wr ^. wr_url) url = wr ^. wr_url
uniqId = Nothing
uniqIdBdd = Nothing
page = Nothing page = Nothing
title = (wr ^. wr_title) title = wr ^. wr_title
authors = Nothing authors = Nothing
institutes = Nothing institutes = Nothing
source = Nothing source = Nothing
...@@ -82,8 +80,6 @@ wikiPageToDocument m wr = do ...@@ -82,8 +80,6 @@ wikiPageToDocument m wr = do
pure $ HyperdataDocument { _hd_bdd = bdd pure $ HyperdataDocument { _hd_bdd = bdd
, _hd_doi = doi , _hd_doi = doi
, _hd_url = url , _hd_url = url
, _hd_uniqId = uniqId
, _hd_uniqIdBdd = uniqIdBdd
, _hd_page = page , _hd_page = page
, _hd_title = title , _hd_title = title
, _hd_authors = authors , _hd_authors = authors
......
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Text.Terms.Mono.Stem.Lancaster
( stemIt
) where
import Prelude
import Data.Text (Text)
import qualified Data.Text as T
data Rule = Rule
{ _match :: Text
, _replacement :: Text
, _ruleType :: RuleType
} deriving (Show, Eq)
data RuleType
= Intact
| Continue
| Contint
| Stop
| Protect
deriving (Show, Eq)
type RuleCollection = [(Char, [Rule])]
stop, intact, cont, protect, contint :: RuleType
stop = Stop
intact = Intact
cont = Continue
protect = Protect
contint = Contint
-- Define rules
rulesPaper :: RuleCollection
rulesPaper =
[ ('a', [ Rule "ia" "" intact, Rule "a" "" intact ])
, ('b', [ Rule "bb" "b" stop ])
, ('c', [ Rule "ytic" "ys" stop, Rule "ic" "" cont, Rule "nc" "nt" cont ])
, ('d', [ Rule "dd" "d" stop, Rule "ied" "i" stop, Rule "ceed" "cess" stop, Rule "eed" "ee" stop
, Rule "ed" "" cont, Rule "hood" "" cont ])
, ('e', [ Rule "e" "" cont ])
, ('f', [ Rule "lief" "liev" stop, Rule "if" "" cont ])
, ('g', [ Rule "ing" "" cont, Rule "iag" "y" stop, Rule "ag" "" cont, Rule "gg" "g" stop ])
, ('h', [ Rule "th" "" intact, Rule "guish" "ct" stop, Rule "ish" "" cont ])
, ('i', [ Rule "i" "" intact, Rule "i" "y" cont ])
, ('j', [ Rule "ij" "id" stop, Rule "fuj" "fus" stop, Rule "uj" "ud" stop, Rule "oj" "od" stop
, Rule "hej" "her" stop, Rule "verj" "vert" stop, Rule "misj" "mit" stop, Rule "nj" "nd" stop
, Rule "j" "s" stop ])
, ('l', [ Rule "ifiabl" "" stop, Rule "iabl" "y" stop, Rule "abl" "" cont, Rule "ibl" "" stop
, Rule "bil" "bl" cont, Rule "cl" "c" stop, Rule "iful" "y" stop, Rule "ful" "" cont
, Rule "ul" "" stop, Rule "ial" "" cont, Rule "ual" "" cont, Rule "al" "" cont
, Rule "ll" "l" stop ])
, ('m', [ Rule "ium" "" stop, Rule "um" "" intact, Rule "ism" "" cont, Rule "mm" "m" stop ])
, ('n', [ Rule "sion" "j" cont, Rule "xion" "ct" stop, Rule "ion" "" cont, Rule "ian" "" cont
, Rule "an" "" cont, Rule "een" "" protect, Rule "en" "" cont, Rule "nn" "n" stop ])
, ('p', [ Rule "ship" "" cont, Rule "pp" "p" stop ])
, ('r', [ Rule "er" "" cont, Rule "ear" "" protect, Rule "ar" "" stop, Rule "or" "" cont
, Rule "ur" "" cont, Rule "rr" "r" stop, Rule "tr" "t" cont, Rule "ier" "y" cont ])
, ('s', [ Rule "ies" "y" cont, Rule "sis" "s" stop, Rule "is" "" cont, Rule "ness" "" cont
, Rule "ss" "" protect, Rule "ous" "" cont, Rule "us" "" intact, Rule "s" "" contint
, Rule "s" "" protect ])
, ('t', [ Rule "plicat" "ply" stop, Rule "at" "" cont, Rule "ment" "" cont, Rule "ent" "" cont
, Rule "ant" "" cont, Rule "ript" "rib" stop, Rule "orpt" "orb" stop, Rule "duct" "duc" stop
, Rule "sumpt" "sum" stop, Rule "cept" "ceiv" stop, Rule "olut" "olv" stop
, Rule "sist" "" protect, Rule "ist" "" cont, Rule "tt" "t" stop ])
, ('u', [ Rule "iqu" "" stop, Rule "ogu" "og" stop ])
, ('v', [ Rule "siv" "j" cont, Rule "eiv" "" protect, Rule "iv" "" cont ])
, ('y', [ Rule "bly" "bl" cont, Rule "ily" "y" cont, Rule "ply" "" protect, Rule "ly" "" cont
, Rule "ogy" "og" stop, Rule "phy" "ph" stop, Rule "omy" "om" stop, Rule "opy" "op" stop
, Rule "ity" "" cont, Rule "ety" "" cont, Rule "lty" "l" stop, Rule "istry" "" stop
, Rule "ary" "" cont, Rule "ory" "" cont, Rule "ify" "" stop, Rule "ncy" "nt" cont
, Rule "acy" "" cont ])
, ('z', [ Rule "iz" "" cont, Rule "yz" "ys" stop ])
]
-- Returns 'True' if the input character is a vowel.
isVowel :: Char -> Bool
isVowel c = c `elem` vowelsSet
{-# INLINE isVowel #-}
vowelsSet :: String
vowelsSet = "aeiouy"
{-# INLINE vowelsSet #-}
stemIt :: Text -> Text
stemIt inputText = lancasterStemmer inputText rulesPaper
-- Lancaster Stemmer
lancasterStemmer :: Text -> RuleCollection -> Text
lancasterStemmer inputText rules = applyRules (T.toLower inputText) True rules
applyRules :: Text -> Bool -> RuleCollection -> Text
applyRules value isIntact rules =
case T.unsnoc value of
Nothing -> value
Just (_, lastChar) ->
case lookup lastChar rules of
Nothing -> value
Just ruleset -> applyRuleSet value isIntact ruleset
where
applyRuleSet :: Text -> Bool -> [Rule] -> Text
applyRuleSet val _ [] = val
applyRuleSet val isIntact' (rule:rest) =
case ruleApplication value isIntact' rule of
Just res -> res
Nothing -> applyRuleSet val isIntact' rest
ruleApplication :: Text -> Bool -> Rule -> Maybe Text
ruleApplication val isIntact' (Rule m r t) =
if not isIntact' && (t == intact || t == contint)
then Nothing
else case T.stripSuffix m val of
Nothing -> Nothing
Just stem ->
let next = stem `T.append` r
in if not (acceptable next)
then Nothing
else if t == cont || t == contint
then Just $ applyRules next False rules
else Just next
-- | Returns 'True' if a stem is acceptable.
acceptable :: Text -> Bool
acceptable val
| T.null val = False
| otherwise
= if isVowel (T.head val)
then T.length val > 1
else T.length val > 2 && T.any isVowel val
...@@ -183,13 +183,10 @@ combineTokenTags (TokenTag w1 l1 p1 n1 s1 e1) (TokenTag w2 l2 p2 _ s2 e2) = Toke ...@@ -183,13 +183,10 @@ combineTokenTags (TokenTag w1 l1 p1 n1 s1 e1) (TokenTag w2 l2 p2 _ s2 e2) = Toke
_ -> p1 _ -> p1
emptyTokenTag :: TokenTag instance Monoid TokenTag where
emptyTokenTag = TokenTag [] empty Nothing Nothing 0 0 mempty = TokenTag [] empty Nothing Nothing
mconcat = foldl' mappend mempty
-- instance Monoid TokenTag where -- mappend t1 t2 = (<>) t1 t2
-- mempty = TokenTag [] empty Nothing Nothing 0 0
-- mconcat = foldl mappend mempty
-- -- mappend t1 t2 = (<>) t1 t2
class HasValidationError e where class HasValidationError e where
......
...@@ -9,22 +9,21 @@ Portability : POSIX ...@@ -9,22 +9,21 @@ Portability : POSIX
-} -}
{-# LANGUAGE DeriveAnyClass #-}
module Gargantext.Core.Types.Search where module Gargantext.Core.Types.Search where
import Data.Aeson hiding (defaultTaggedObject) import Data.Aeson hiding (defaultTaggedObject)
import Data.Swagger hiding (fieldLabelModifier, Contact) import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema )
import Data.Text qualified as Text import Data.Text qualified as Text
import Data.Time (UTCTime) import Data.Time (UTCTime)
import Gargantext.Core.Utils.Prefix (dropPrefix, unCapitalize, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (dropPrefix, unCapitalize, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata (ContactWhere(..), HyperdataContact(..), HyperdataDocument(..), ContactWho(..)) import Gargantext.Database.Admin.Types.Hyperdata.Contact ( ContactWhere(..), HyperdataContact(..), ContactWho(..) )
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId )
import Gargantext.Database.Query.Facet.Types (Facet(..), FacetDoc, FacetPaired(..)) import Gargantext.Database.Query.Facet.Types (Facet(..), FacetDoc, FacetPaired(..))
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Aeson (defaultTaggedObject) import Gargantext.Utils.Aeson (defaultTaggedObject)
import Test.QuickCheck.Arbitrary import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
data Row = data Row =
...@@ -93,8 +92,6 @@ data HyperdataRow = ...@@ -93,8 +92,6 @@ data HyperdataRow =
, _hr_source :: !Text , _hr_source :: !Text
, _hr_title :: !Text , _hr_title :: !Text
, _hr_url :: !Text , _hr_url :: !Text
, _hr_uniqId :: !Text
, _hr_uniqIdBdd :: !Text
} }
| HyperdataRowContact { _hr_firstname :: !Text | HyperdataRowContact { _hr_firstname :: !Text
, _hr_lastname :: !Text , _hr_lastname :: !Text
...@@ -148,9 +145,7 @@ instance ToHyperdataRow HyperdataDocument where ...@@ -148,9 +145,7 @@ instance ToHyperdataRow HyperdataDocument where
, _hr_publication_second = fromMaybe 0 _hd_publication_second , _hr_publication_second = fromMaybe 0 _hd_publication_second
, _hr_source = fromMaybe "" _hd_source , _hr_source = fromMaybe "" _hd_source
, _hr_title = fromMaybe "Title" _hd_title , _hr_title = fromMaybe "Title" _hd_title
, _hr_url = fromMaybe "" _hd_url , _hr_url = fromMaybe "" _hd_url }
, _hr_uniqId = fromMaybe "" _hd_uniqId
, _hr_uniqIdBdd = fromMaybe "" _hd_uniqIdBdd }
instance ToHyperdataRow HyperdataContact where instance ToHyperdataRow HyperdataContact where
toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _ _), _hc_where = ou} ) = toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _ _), _hc_where = ou} ) =
......
...@@ -59,8 +59,8 @@ cooc2graph' distance threshold myCooc ...@@ -59,8 +59,8 @@ cooc2graph' distance threshold myCooc
$ mat2map $ mat2map
$ measure distance $ measure distance
$ case distance of $ case distance of
Conditional -> map2mat Triangle 0 tiSize Conditional -> map2mat Square 1 tiSize
_ -> map2mat Square 0 tiSize _ -> map2mat Square 0 tiSize
$ Map.filter (> 1) myCooc' $ Map.filter (> 1) myCooc'
where where
......
...@@ -231,7 +231,7 @@ defaultConfig = ...@@ -231,7 +231,7 @@ defaultConfig =
, similarity = WeightedLogJaccard 0.5 2 , similarity = WeightedLogJaccard 0.5 2
, seaElevation = Constante 0.1 0.1 , seaElevation = Constante 0.1 0.1
, defaultMode = False , defaultMode = False
, findAncestors = True , findAncestors = False
, phyloSynchrony = ByProximityThreshold 0.6 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.6 0 AllBranches MergeAllGroups
, phyloQuality = Quality 0.5 3 , phyloQuality = Quality 0.5 3
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
......
...@@ -50,9 +50,7 @@ type FlowCmdM env err m = ...@@ -50,9 +50,7 @@ type FlowCmdM env err m =
, MonadLogger m , MonadLogger m
) )
type FlowCorpus a = ( AddUniqId a type FlowCorpus a = ( UniqParameters a
, UniqId a
, UniqParameters a
, InsertDb a , InsertDb a
, ExtractNgramsT a , ExtractNgramsT a
, HasText a , HasText a
......
...@@ -16,7 +16,6 @@ Portability : POSIX ...@@ -16,7 +16,6 @@ Portability : POSIX
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
...@@ -32,7 +31,7 @@ import Gargantext.API.GraphQL.Utils qualified as GAGU ...@@ -32,7 +31,7 @@ import Gargantext.API.GraphQL.Utils qualified as GAGU
import Gargantext.Core.Text (HasText(..)) import Gargantext.Core.Text (HasText(..))
import Gargantext.Database.Admin.Types.Hyperdata.Prelude import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.UTCTime import Gargantext.Utils.UTCTime ( NUTCTime(..) )
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
data HyperdataContact = data HyperdataContact =
...@@ -42,8 +41,6 @@ data HyperdataContact = ...@@ -42,8 +41,6 @@ data HyperdataContact =
, _hc_title :: Maybe Text -- TODO remove (only demo) , _hc_title :: Maybe Text -- TODO remove (only demo)
, _hc_source :: Maybe Text -- TODO remove (only demo) , _hc_source :: Maybe Text -- TODO remove (only demo)
, _hc_lastValidation :: Maybe Text -- TODO UTCTime , _hc_lastValidation :: Maybe Text -- TODO UTCTime
, _hc_uniqIdBdd :: Maybe Text
, _hc_uniqId :: Maybe Text
} deriving (Eq, Show, Generic) } deriving (Eq, Show, Generic)
instance GQLType HyperdataContact where instance GQLType HyperdataContact where
...@@ -61,9 +58,7 @@ defaultHyperdataContact = ...@@ -61,9 +58,7 @@ defaultHyperdataContact =
, _hc_where = [defaultContactWhere] , _hc_where = [defaultContactWhere]
, _hc_title =Just "Title" , _hc_title =Just "Title"
, _hc_source = Just "Source" , _hc_source = Just "Source"
, _hc_lastValidation = Just "TODO lastValidation date" , _hc_lastValidation = Just "TODO lastValidation date" }
, _hc_uniqIdBdd = Just "DO NOT expose this"
, _hc_uniqId = Just "DO NOT expose this" }
hyperdataContact :: FirstName -> LastName -> HyperdataContact hyperdataContact :: FirstName -> LastName -> HyperdataContact
hyperdataContact fn ln = hyperdataContact fn ln =
...@@ -73,9 +68,7 @@ hyperdataContact fn ln = ...@@ -73,9 +68,7 @@ hyperdataContact fn ln =
, _hc_where = [] , _hc_where = []
, _hc_title = Nothing , _hc_title = Nothing
, _hc_source = Nothing , _hc_source = Nothing
, _hc_lastValidation = Nothing , _hc_lastValidation = Nothing }
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
-- TOD0 contact metadata (Type is too flat) -- TOD0 contact metadata (Type is too flat)
data ContactMetaData = data ContactMetaData =
...@@ -94,9 +87,7 @@ arbitraryHyperdataContact = ...@@ -94,9 +87,7 @@ arbitraryHyperdataContact =
, _hc_where = [] , _hc_where = []
, _hc_title = Nothing , _hc_title = Nothing
, _hc_source = Nothing , _hc_source = Nothing
, _hc_lastValidation = Nothing , _hc_lastValidation = Nothing }
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
data ContactWho = data ContactWho =
...@@ -188,7 +179,7 @@ instance ToSchema ContactMetaData where ...@@ -188,7 +179,7 @@ instance ToSchema ContactMetaData where
-- | Arbitrary instances -- | Arbitrary instances
instance Arbitrary HyperdataContact where instance Arbitrary HyperdataContact where
arbitrary = elements [HyperdataContact Nothing Nothing [] Nothing Nothing Nothing Nothing Nothing] arbitrary = elements [ HyperdataContact Nothing Nothing [] Nothing Nothing Nothing ]
-- | Specific Gargantext instance -- | Specific Gargantext instance
instance Hyperdata HyperdataContact instance Hyperdata HyperdataContact
......
...@@ -30,8 +30,6 @@ import Gargantext.Prelude hiding (ByteString) ...@@ -30,8 +30,6 @@ import Gargantext.Prelude hiding (ByteString)
data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text) data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text)
, _hd_doi :: !(Maybe Text) , _hd_doi :: !(Maybe Text)
, _hd_url :: !(Maybe Text) , _hd_url :: !(Maybe Text)
, _hd_uniqId :: !(Maybe Text)
, _hd_uniqIdBdd :: !(Maybe Text)
, _hd_page :: !(Maybe Int) , _hd_page :: !(Maybe Int)
, _hd_title :: !(Maybe Text) , _hd_title :: !(Maybe Text)
, _hd_authors :: !(Maybe Text) , _hd_authors :: !(Maybe Text)
...@@ -59,7 +57,7 @@ instance HasText HyperdataDocument ...@@ -59,7 +57,7 @@ instance HasText HyperdataDocument
defaultHyperdataDocument :: HyperdataDocument defaultHyperdataDocument :: HyperdataDocument
defaultHyperdataDocument = case decode docExample of defaultHyperdataDocument = case decode docExample of
Just hp -> hp Just hp -> hp
Nothing -> HyperdataDocument Nothing Nothing Nothing Nothing Nothing -> HyperdataDocument Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
...@@ -108,7 +106,8 @@ instance ToHyperdataDocument HyperdataDocument ...@@ -108,7 +106,8 @@ instance ToHyperdataDocument HyperdataDocument
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance Eq HyperdataDocument where instance Eq HyperdataDocument where
(==) h1 h2 = (==) (_hd_uniqId h1) (_hd_uniqId h2) (==) h1 h2 = _hd_title h1 == _hd_title h2
&& _hd_abstract h1 == _hd_abstract h2
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance Ord HyperdataDocument where instance Ord HyperdataDocument where
...@@ -127,7 +126,7 @@ arbitraryHyperdataDocuments = ...@@ -127,7 +126,7 @@ arbitraryHyperdataDocuments =
] :: [(Text, Text)]) ] :: [(Text, Text)])
where where
toHyperdataDocument' (t1,t2) = toHyperdataDocument' (t1,t2) =
HyperdataDocument Nothing Nothing Nothing Nothing Nothing Nothing (Just t1) HyperdataDocument Nothing Nothing Nothing Nothing (Just t1)
Nothing Nothing (Just t2) Nothing Nothing Nothing Nothing Nothing Nothing Nothing (Just t2) Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
......
...@@ -57,14 +57,12 @@ the concatenation of the parameters defined by @shaParameters@. ...@@ -57,14 +57,12 @@ the concatenation of the parameters defined by @shaParameters@.
module Gargantext.Database.Query.Table.Node.Document.Insert module Gargantext.Database.Query.Table.Node.Document.Insert
where where
import Control.Lens (set, view) import Data.Aeson (toJSON, ToJSON)
import Control.Lens.Cons
import Control.Lens.Prism
import Data.Text qualified as DT (pack, concat, take, filter, toLower) import Data.Text qualified as DT (pack, concat, take, filter, toLower)
import Data.Time.Segment (jour) import Data.Time.Segment (jour)
import Database.PostgreSQL.Simple (FromRow, Query, Only(..)) import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
import Database.PostgreSQL.Simple.FromRow (fromRow, field) import Database.PostgreSQL.Simple.FromRow (fromRow, field)
import Database.PostgreSQL.Simple.SqlQQ import Database.PostgreSQL.Simple.SqlQQ ( sql )
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-}) import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core (HasDBid(toDBid)) import Gargantext.Core (HasDBid(toDBid))
...@@ -92,7 +90,7 @@ import Database.PostgreSQL.Simple (formatQuery) ...@@ -92,7 +90,7 @@ import Database.PostgreSQL.Simple (formatQuery)
insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> Maybe ParentId -> [a] -> DBCmd err [ReturnId] insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> Maybe ParentId -> [a] -> DBCmd err [ReturnId]
insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p) insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
where where
fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes fields = map (QualifiedIdentifier Nothing) inputSqlTypes
class InsertDb a class InsertDb a
where where
...@@ -107,18 +105,18 @@ instance InsertDb HyperdataDocument ...@@ -107,18 +105,18 @@ instance InsertDb HyperdataDocument
, toField p , toField p
, toField $ maybe "No Title" (DT.take 255) (_hd_title h) , toField $ maybe "No Title" (DT.take 255) (_hd_title h)
, toField $ _hd_publication_date h -- TODO USE UTCTime , toField $ _hd_publication_date h -- TODO USE UTCTime
, (toField . toJSON) (addUniqId h) -- , (toField . toJSON) (addUniqId h)
] ]
instance InsertDb HyperdataContact instance InsertDb HyperdataContact
where where
insertDb' u p h = [ toField ("" :: Text) insertDb' u p _h = [ toField ("" :: Text)
, toField $ toDBid NodeContact , toField $ toDBid NodeContact
, toField u , toField u
, toField p , toField p
, toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h) , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
, toField $ jour 0 1 1 -- TODO put default date , toField $ jour 0 1 1 -- TODO put default date
, (toField . toJSON) (addUniqId h) -- , (toField . toJSON) (addUniqId h)
] ]
instance ToJSON a => InsertDb (Node a) instance ToJSON a => InsertDb (Node a)
...@@ -193,73 +191,73 @@ class AddUniqId a ...@@ -193,73 +191,73 @@ class AddUniqId a
where where
addUniqId :: a -> a addUniqId :: a -> a
-- instance AddUniqId HyperdataDocument
-- where
-- addUniqId = addUniqIdsDoc
-- where
-- addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
-- addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
-- $ set hd_uniqId (Just shaUni) doc
-- where
-- shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
-- shaBdd = hash $ DT.concat $ map ($ doc) ([maybeText . _hd_bdd] <> shaParametersDoc)
-- shaParametersDoc :: [HyperdataDocument -> Text]
-- shaParametersDoc = [ filterText . maybeText . _hd_title
-- , filterText . maybeText . _hd_abstract
-- , filterText . maybeText . _hd_source
-- -- , \d -> maybeText (_hd_publication_date d)
-- ]
class UniqParameters a class UniqParameters a
where where
uniqParameters :: ParentId -> a -> Text uniqParameters :: a -> Text
instance AddUniqId HyperdataDocument
where
addUniqId = addUniqIdsDoc
where
addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
$ set hd_uniqId (Just shaUni) doc
where
shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)]
shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d)
, \d -> filterText $ maybeText (_hd_abstract d)
, \d -> filterText $ maybeText (_hd_source d)
-- , \d -> maybeText (_hd_publication_date d)
]
instance UniqParameters HyperdataDocument instance UniqParameters HyperdataDocument
where where
uniqParameters _ h = filterText $ DT.concat $ map maybeText $ [_hd_title h, _hd_abstract h, _hd_source h] uniqParameters h = filterText $ DT.concat $ map maybeText $ [_hd_title h, _hd_abstract h, _hd_source h]
instance UniqParameters HyperdataContact instance UniqParameters HyperdataContact
where where
uniqParameters _ _ = "" uniqParameters _ = ""
instance UniqParameters (Node a) instance UniqParameters (Node a)
where where
uniqParameters _ _ = undefined uniqParameters _ = undefined
filterText :: Text -> Text filterText :: Text -> Text
filterText = DT.toLower . (DT.filter isAlphaNum) filterText = DT.toLower . DT.filter isAlphaNum
instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a) instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a)
where where
addUniqId (Node nid _ t u p n d h) = Node nid (Just newHash) t u p n d h addUniqId (Node nid _ t u p n d h) = Node nid (Just newHash) t u p n d h
where where
newHash = "\\x" <> (hash $ uniqParameters (fromMaybe 0 p) h) newHash = "\\x" <> hash (uniqParameters h)
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
-- * Uniqueness of document definition -- * Uniqueness of document definition
-- TODO factorize with above (use the function below for tests) -- TODO factorize with above (use the function below for tests)
instance AddUniqId HyperdataContact -- instance AddUniqId HyperdataContact
where -- where
addUniqId = addUniqIdsContact -- addUniqId = addUniqIdsContact
addUniqIdsContact :: HyperdataContact -> HyperdataContact -- addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd) -- addUniqIdsContact hc = set hc_uniqIdBdd (Just shaBdd)
$ set (hc_uniqId ) (Just shaUni) hc -- $ set hc_uniqId (Just shaUni) hc
where -- where
shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact -- shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact
shaBdd = hash $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact) -- shaBdd = hash $ DT.concat $ map ($ hc) ([maybeText . view hc_bdd] <> shaParametersContact)
-- | TODO add more shaparameters -- -- | TODO add more shaparameters
shaParametersContact :: [(HyperdataContact -> Text)] -- shaParametersContact :: [HyperdataContact -> Text]
shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName ) d -- shaParametersContact = [ maybeText . view (hc_who . _Just . cw_firstName )
, \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d -- , maybeText . view (hc_who . _Just . cw_lastName )
, \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d -- , maybeText . view (hc_where . _head . cw_touch . _Just . ct_mail)
] -- ]
maybeText :: Maybe Text -> Text maybeText :: Maybe Text -> Text
...@@ -285,7 +283,7 @@ instance ToNode HyperdataDocument where ...@@ -285,7 +283,7 @@ instance ToNode HyperdataDocument where
-- TODO better Node -- TODO better Node
instance ToNode HyperdataContact where instance ToNode HyperdataContact where
toNode u p h = Node 0 Nothing (toDBid NodeContact) u p "Contact" date h toNode u p = Node 0 Nothing (toDBid NodeContact) u p "Contact" date
where where
date = jour 2020 01 01 date = jour 2020 01 01
......
...@@ -10,18 +10,17 @@ Portability : POSIX ...@@ -10,18 +10,17 @@ Portability : POSIX
module Gargantext.Utils.Servant where module Gargantext.Utils.Servant where
import qualified Data.ByteString.Lazy.Char8 as BSC import Data.ByteString.Lazy.Char8 qualified as BSC
import Data.Csv (defaultEncodeOptions, encodeByNameWith, encodeDefaultOrderedByName, header, namedRecord, (.=), DefaultOrdered, EncodeOptions(..), NamedRecord, Quoting(QuoteNone), ToNamedRecord) import Data.Csv (encodeDefaultOrderedByName, DefaultOrdered, ToNamedRecord)
import qualified Data.Map.Strict as Map import Data.Text qualified as T
import qualified Data.Text as T import Data.Text.Encoding qualified as TE
import Gargantext.Core.Ngrams.Types (mSetToList, NgramsRepoElement(..), NgramsTableMap, NgramsTerm(..), unNgramsTerm)
import Gargantext.Core.Types.Main (ListType(..))
import Network.HTTP.Media ((//), (/:)) import Network.HTTP.Media ((//), (/:))
import qualified Prelude import Prelude qualified
import Protolude import Protolude
import Protolude.Partial (read) import Protolude.Partial (read)
import Servant ( Accept(contentType), MimeRender(..), MimeUnrender(mimeUnrender) ) import Servant ( Accept(contentType), MimeRender(..), MimeUnrender(mimeUnrender) )
data CSV = CSV data CSV = CSV
instance Accept CSV where instance Accept CSV where
...@@ -33,25 +32,6 @@ instance (DefaultOrdered a, ToNamedRecord a) => MimeRender CSV [a] where ...@@ -33,25 +32,6 @@ instance (DefaultOrdered a, ToNamedRecord a) => MimeRender CSV [a] where
instance MimeRender CSV T.Text where instance MimeRender CSV T.Text where
mimeRender _ = toUtf8Lazy mimeRender _ = toUtf8Lazy
-- CSV:
-- header: status\tlabel\tforms
-- item: map\taccountability\taccounting|&|accoutns|&|account
instance MimeRender CSV NgramsTableMap where
-- mimeRender _ _val = encode ([] :: [(Text, Text)])
mimeRender _ val = encodeByNameWith encOptions (header ["status", "label", "forms"]) $ fn <$> Map.toList val
where
encOptions = defaultEncodeOptions { encDelimiter = fromIntegral (ord '\t')
, encQuoting = QuoteNone }
fn :: (NgramsTerm, NgramsRepoElement) -> NamedRecord
fn (NgramsTerm term, NgramsRepoElement { _nre_list, _nre_children }) =
namedRecord [ "status" .= toText _nre_list
, "label" .= term
, "forms" .= (T.intercalate "|&|" $ unNgramsTerm <$> mSetToList _nre_children)]
toText :: ListType -> Text
toText CandidateTerm = "candidate"
toText MapTerm = "map"
toText StopTerm = "stop"
instance Read a => MimeUnrender CSV a where instance Read a => MimeUnrender CSV a where
mimeUnrender _ bs = case BSC.take len bs of mimeUnrender _ bs = case BSC.take len bs of
"text/csv" -> pure . read . BSC.unpack $ BSC.drop len bs "text/csv" -> pure . read . BSC.unpack $ BSC.drop len bs
...@@ -74,4 +54,19 @@ instance MimeRender Markdown T.Text where ...@@ -74,4 +54,19 @@ instance MimeRender Markdown T.Text where
mimeRender _ = toUtf8Lazy mimeRender _ = toUtf8Lazy
instance MimeUnrender Markdown T.Text where instance MimeUnrender Markdown T.Text where
mimeUnrender _ = Right . decodeUtf8 . BSC.toStrict mimeUnrender _ = Right . TE.decodeUtf8 . BSC.toStrict
---------------------------
data ZIP = ZIP
instance Accept ZIP where
contentType _ = "application" // "zip"
instance MimeRender ZIP BSC.ByteString where
mimeRender _ = identity
instance MimeUnrender ZIP BSC.ByteString where
mimeUnrender _ = Right . identity
...@@ -15,18 +15,38 @@ Utilities for handling zip files ...@@ -15,18 +15,38 @@ Utilities for handling zip files
module Gargantext.Utils.Zip where module Gargantext.Utils.Zip where
import "zip" Codec.Archive.Zip (withArchive, ZipArchive) import "zip" Codec.Archive.Zip (addEntry, createArchive, mkEntrySelector, withArchive, CompressionMethod(BZip2), ZipArchive)
-- import Control.Monad.Base (liftBase) import "zip-archive" Codec.Archive.Zip qualified as ZArch
import Control.Monad.Base (MonadBase, liftBase)
import Data.ByteString qualified as BS import Data.ByteString qualified as BS
import Data.ByteString.Lazy.Char8 qualified as BSC
import Protolude import Protolude
import System.Directory (removeFile) import System.Directory (removeFile)
import System.IO.Temp (emptySystemTempFile) import System.IO.Temp (emptySystemTempFile)
-- | Take a zip file (in for of a ByteString) and work on its contents (using the ZipArchive monad)
withZipFileBS :: MonadIO m => BS.ByteString -> ZipArchive a -> m a withZipFileBS :: MonadIO m => BS.ByteString -> ZipArchive a -> m a
withZipFileBS bs actions = withZipFileBS bs actions = liftIO $
liftIO $ bracket (emptySystemTempFile "parsed-zip") bracket (emptySystemTempFile "parsed-zip")
(\path -> removeFile path) $ removeFile
\path -> do (\path -> do
BS.writeFile path bs BS.writeFile path bs
withArchive path actions withArchive path actions)
-- | Zip ByteString contents and return the ZIP file as ByteString
zipContents :: MonadBase IO m => FilePath -> BS.ByteString -> m BS.ByteString
zipContents fpath bsContents = liftBase $
bracket (emptySystemTempFile "zip-contents")
removeFile
(\path -> do
s <- mkEntrySelector fpath
createArchive path (addEntry BZip2 bsContents s)
BS.readFile path)
-- | Same as zipContents above, but pure (in-memory)
zipContentsPure :: FilePath -> BSC.ByteString -> BSC.ByteString
zipContentsPure fpath bscContents = ZArch.fromArchive (ZArch.addEntryToArchive e ZArch.emptyArchive)
where
e = ZArch.toEntry fpath 0 bscContents
...@@ -322,7 +322,7 @@ flags: ...@@ -322,7 +322,7 @@ flags:
"full-text-search": "full-text-search":
"build-search-demo": false "build-search-demo": false
gargantext: gargantext:
"disable-db-obfuscation-executable": false "disable-db-obfuscation-executable": true
"no-phylo-debug-logs": false "no-phylo-debug-logs": false
"test-crypto": false "test-crypto": false
"generic-deriving": "generic-deriving":
...@@ -561,6 +561,8 @@ flags: ...@@ -561,6 +561,8 @@ flags:
"tasty-bench": "tasty-bench":
debug: false debug: false
tasty: true tasty: true
"tasty-golden":
"build-example": false
texmath: texmath:
executable: false executable: false
server: false server: false
......
1,collab
2,postpart
3,cat
4,cat
5,dog
6,dog
7,run
8,run
9,run
10,jump
11,jump
12,jump
13,swim
14,swim
15,swim
16,fish
17,fish
18,fish
19,eat
20,eat
21,eat
22,talk
23,talk
24,talk
25,walk
26,walk
27,walk
28,dant
29,dant
30,dant
31,sing
32,sing
33,sing
34,play
35,play
36,play
37,work
38,work
39,work
40,teach
41,teach
42,teach
43,learn
44,learn
45,learn
46,read
47,read
48,read
49,writ
50,writ
51,writ
52,paint
53,paint
54,paint
55,draw
56,draw
57,draw
58,speak
59,speak
60,speak
61,think
62,think
63,think
64,see
65,see
66,seen
67,hear
68,hear
69,heard
70,touch
71,touch
72,touch
73,smel
74,smel
75,smel
76,tast
77,tast
78,tast
79,laugh
80,laugh
81,laugh
82,cry
83,cry
84,cri
85,smil
86,smil
87,smil
88,frown
89,frown
90,frown
91,happy
92,happy
93,happiest
94,sad
95,sad
96,saddest
97,angry
98,angry
99,angriest
100,calm
101,calm
102,calmest
103,corrob
{-|
Module : Core.Similarity
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Test.Core.Similarity where
import Gargantext.Core.Methods.Similarities.Conditional
import Gargantext.Prelude
import Test.Hspec
test :: Spec
test = do
describe "check if similarities optimizations are well implemented" $ do
it "Conditional" $ do
conditional_test `shouldBe` True
...@@ -64,7 +64,6 @@ exampleDocument_01 = either errorTrace identity $ parseEither parseJSON $ [aeson ...@@ -64,7 +64,6 @@ exampleDocument_01 = either errorTrace identity $ parseEither parseJSON $ [aeson
exampleDocument_02 :: HyperdataDocument exampleDocument_02 :: HyperdataDocument
exampleDocument_02 = either errorTrace identity $ parseEither parseJSON $ [aesonQQ| exampleDocument_02 = either errorTrace identity $ parseEither parseJSON $ [aesonQQ|
{ "doi":"" { "doi":""
, "uniqId": "1405.3072v3"
, "bdd": "Arxiv" , "bdd": "Arxiv"
, "publication_day":6 , "publication_day":6
, "language_iso2":"EN" , "language_iso2":"EN"
...@@ -89,7 +88,6 @@ exampleDocument_03 = either errorTrace identity $ parseEither parseJSON $ [aeson ...@@ -89,7 +88,6 @@ exampleDocument_03 = either errorTrace identity $ parseEither parseJSON $ [aeson
, "url": "http://arxiv.org/pdf/1405.3072v2" , "url": "http://arxiv.org/pdf/1405.3072v2"
, "title": "Haskell for OCaml programmers" , "title": "Haskell for OCaml programmers"
, "source": "" , "source": ""
, "uniqId": "1405.3072v2"
, "authors": "Raphael Poss, Herbert Ballerina" , "authors": "Raphael Poss, Herbert Ballerina"
, "abstract": " This introduction to Haskell is written to optimize learning by programmers who already know OCaml. " , "abstract": " This introduction to Haskell is written to optimize learning by programmers who already know OCaml. "
, "institutes": "" , "institutes": ""
...@@ -107,7 +105,6 @@ exampleDocument_04 = either errorTrace identity $ parseEither parseJSON $ [aeson ...@@ -107,7 +105,6 @@ exampleDocument_04 = either errorTrace identity $ parseEither parseJSON $ [aeson
, "url": "http://arxiv.org/pdf/1407.5670v1" , "url": "http://arxiv.org/pdf/1407.5670v1"
, "title": "Rust for functional programmers" , "title": "Rust for functional programmers"
, "source": "" , "source": ""
, "uniqId": "1407.5670v1"
, "authors": "Raphael Poss" , "authors": "Raphael Poss"
, "abstract": " This article provides an introduction to Rust , a systems language by Mozilla , to programmers already familiar with Haskell , OCaml or other functional languages. " , "institutes": "" , "abstract": " This article provides an introduction to Rust , a systems language by Mozilla , to programmers already familiar with Haskell , OCaml or other functional languages. " , "institutes": ""
, "language_iso2": "EN" , "language_iso2": "EN"
......
...@@ -28,7 +28,7 @@ phyloConfig = PhyloConfig { ...@@ -28,7 +28,7 @@ phyloConfig = PhyloConfig {
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2} , similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1} , seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True , defaultMode = True
, findAncestors = True , findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups} , phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3} , phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5} , timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
......
module Test.Offline.Stemming.Lancaster where
import Prelude
import Data.ByteString.Char8 qualified as C8
import Data.Text qualified as T
import Gargantext.Core.Text.Terms.Mono.Stem.Lancaster (stemIt)
import Gargantext.Prelude (toS)
import Test.Tasty
import Test.Tasty.Golden (goldenVsStringDiff)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text.Encoding as TE
tests :: TestTree
tests = testGroup "Lancaster" [
goldenVsStringDiff "test vector works" (\ref new -> ["cabal", "v2-run", "-v0", "garg-golden-file-diff", "--", ref, new]) "test-data/stemming/lancaster.txt" mkTestVector
]
-- | List un /unstemmed/ test words
testWords :: [(Int, T.Text)]
testWords = [
(1, "collaboration")
, (2, "postpartum")
, (3, "cat")
, (4, "cats")
, (5, "dog")
, (6, "dogs")
, (7, "run")
, (8, "running")
, (9, "runner")
, (10, "jump")
, (11, "jumped")
, (12, "jumping")
, (13, "swim")
, (14, "swimming")
, (15, "swimmer")
, (16, "fish")
, (17, "fishing")
, (18, "fisher")
, (19, "eat")
, (20, "eating")
, (21, "eater")
, (22, "talk")
, (23, "talking")
, (24, "talks")
, (25, "walk")
, (26, "walking")
, (27, "walker")
, (28, "dance")
, (29, "dancing")
, (30, "dancer")
, (31, "sing")
, (32, "singing")
, (33, "singer")
, (34, "play")
, (35, "playing")
, (36, "player")
, (37, "work")
, (38, "working")
, (39, "worker")
, (40, "teach")
, (41, "teaching")
, (42, "teacher")
, (43, "learn")
, (44, "learning")
, (45, "learner")
, (46, "read")
, (47, "reading")
, (48, "reader")
, (49, "write")
, (50, "writing")
, (51, "writer")
, (52, "paint")
, (53, "painting")
, (54, "painter")
, (55, "draw")
, (56, "drawing")
, (57, "drawer")
, (58, "speak")
, (59, "speaking")
, (60, "speaker")
, (61, "think")
, (62, "thinking")
, (63, "thinker")
, (64, "see")
, (65, "seeing")
, (66, "seen")
, (67, "hear")
, (68, "hearing")
, (69, "heard")
, (70, "touch")
, (71, "touching")
, (72, "touched")
, (73, "smell")
, (74, "smelling")
, (75, "smelled")
, (76, "taste")
, (77, "tasting")
, (78, "tasted")
, (79, "laugh")
, (80, "laughing")
, (81, "laughed")
, (82, "cry")
, (83, "crying")
, (84, "cried")
, (85, "smile")
, (86, "smiling")
, (87, "smiled")
, (88, "frown")
, (89, "frowning")
, (90, "frowned")
, (91, "happy")
, (92, "happier")
, (93, "happiest")
, (94, "sad")
, (95, "sadder")
, (96, "saddest")
, (97, "angry")
, (98, "angrier")
, (99, "angriest")
, (100, "calm")
, (101, "calmer")
, (102, "calmest")
, (103, "corroborate")
]
mkTestVector :: IO BL.ByteString
mkTestVector = pure $ toS $ C8.unlines (map (\(indx, w) -> (C8.pack $ show indx) <> "," <> TE.encodeUtf8 (stemIt w)) testWords)
...@@ -12,18 +12,20 @@ module Main where ...@@ -12,18 +12,20 @@ module Main where
import Gargantext.Prelude import Gargantext.Prelude
import qualified Test.Core.Text.Corpus.Query as CorpusQuery import qualified Test.Core.Text.Corpus.Query as CorpusQuery
import qualified Test.Core.Utils as Utils import qualified Test.Core.Utils as Utils
import qualified Test.Core.Text.Tokenize as Tokenize import qualified Test.Graph.Clustering as Graph
import qualified Test.Graph.Clustering as Graph import qualified Test.Ngrams.NLP as NLP
import qualified Test.Ngrams.NLP as NLP import qualified Test.Ngrams.Query as NgramsQuery
import qualified Test.Ngrams.Query as NgramsQuery import qualified Test.Offline.JSON as JSON
import qualified Test.Offline.JSON as JSON import qualified Test.Offline.Errors as Errors
import qualified Test.Offline.Errors as Errors import qualified Test.Offline.Phylo as Phylo
import qualified Test.Offline.Phylo as Phylo import qualified Test.Offline.Stemming.Lancaster as Lancaster
import qualified Test.Parsers.Date as PD import qualified Test.Parsers.Date as PD
import qualified Test.Utils.Crypto as Crypto import qualified Test.Utils.Crypto as Crypto
import qualified Test.Utils.Jobs as Jobs import qualified Test.Utils.Jobs as Jobs
import qualified Test.Core.Similarity as Similarity
import Test.Tasty import Test.Tasty
import Test.Tasty.Hspec import Test.Tasty.Hspec
...@@ -38,6 +40,7 @@ main = do ...@@ -38,6 +40,7 @@ main = do
nlpSpec <- testSpec "NLP" NLP.test nlpSpec <- testSpec "NLP" NLP.test
jobsSpec <- testSpec "Jobs" Jobs.test jobsSpec <- testSpec "Jobs" Jobs.test
tokenizeSpec <- testSpec "Tokenize" Tokenize.test tokenizeSpec <- testSpec "Tokenize" Tokenize.test
similaritySpec <- testSpec "Similarity" Similarity.test
defaultMain $ testGroup "Gargantext" defaultMain $ testGroup "Gargantext"
[ utilSpec [ utilSpec
...@@ -52,5 +55,7 @@ main = do ...@@ -52,5 +55,7 @@ main = do
, CorpusQuery.tests , CorpusQuery.tests
, JSON.tests , JSON.tests
, Errors.tests , Errors.tests
, similaritySpec
, Phylo.tests , Phylo.tests
, testGroup "Stemming" [ Lancaster.tests ]
] ]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment