Verified Commit 6e38bade authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 224-dev-uniform-ngrams-creation

parents 03427377 135a1220
## Version 0.0.6.9.9.9.6.5 [RELEASE CANDIDATE 007]
* [BACK][WIP][Singulars and plurals not grouped anymore (#169)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169)
* [BACK][FEAT][Coherent Stemming interface (#324)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/324)
* [BACK][FIX] Order 1 aligned with first implementation with tests
## Version 0.0.6.9.9.9.6.4 [RELEASE CANDIDATE 007]
* [BACK][FEAT][[Node Phylo] Change the default value of findAncestors to False (#314)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/314)
* [BACK][OPTIM][Export Data as zip for all exports (#312)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/312)
* [BACK][METRICS] SQL queries to follow the community (export to CSV)
## Version 0.0.6.9.9.9.6.3 [Release Candidate for 007]
* [BACK][OPTIM] Option to enable GHC buld with O2 option
......
......@@ -25,7 +25,7 @@ phyloConfig = PhyloConfig {
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = True
, findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
......
module Main where
import Prelude
import Data.TreeDiff.Class
import Data.TreeDiff.Pretty
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.Environment (getArgs)
import System.Exit (exitFailure)
import Control.Monad (unless)
import qualified Data.List as L
-- | Renders in a pretty way the content of two golden files. The
-- first file should contain the expected output, the second the
-- actual data generated by the test suite.
main :: IO ()
main = do
(refPath:newPath:_) <- getArgs
ref <- T.lines <$> TIO.readFile refPath
new <- T.lines <$> TIO.readFile newPath
let differences = filter (\(r,n) -> r /= n) $ zip ref new
unless (L.null differences) $ do
putStrLn $ show $ ansiWlEditExpr $ ediff' (map fst differences) (map snd differences)
exitFailure
......@@ -44,7 +44,7 @@ phyloConfig outdir = PhyloConfig {
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = True
, findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
......
select count(*) from nodes n where n.typename = 30;
WITH total AS (SELECT * from nodes n where n.typename = 30)
, sum AS (SELECT count(*) AS "TOTAL" from total)
, increase AS (SELECT count(*) from total as t WHERE t.date >= date_trunc('month', current_date - interval '3' month))
SELECT *,
(SELECT TO_CHAR((ROUND(100 * NULLIF((SELECT * from increase),0) / NULLIF((SELECT * from sum), 0))), 'fm99%') AS "CREATED LAST 3 MONTHS")
FROM sum
WITH total AS (SELECT * from nodes n where n.typename = 9)
, sum AS (SELECT count(*) AS "TOTAL" from total)
, increase AS (SELECT count(*) from total as t WHERE t.date >= date_trunc('month', current_date - interval '3' month))
SELECT *,
(SELECT TO_CHAR((ROUND(100 * NULLIF((SELECT * from increase),0) / NULLIF((SELECT * from sum), 0))), 'fm99%') AS "CREATED LAST 3 MONTHS")
FROM sum
WITH total AS (SELECT * from nodes n where n.typename = 90)
, sum AS (SELECT count(*) AS "TOTAL" from total)
, increase AS (SELECT count(*) from total as t WHERE t.date >= date_trunc('month', current_date - interval '3' month))
SELECT *,
(SELECT TO_CHAR((ROUND(100 * NULLIF((SELECT * from increase),0) / NULLIF((SELECT * from sum), 0))), 'fm99%') AS "CREATED LAST 3 MONTHS")
FROM sum
WITH total AS (SELECT * from nodes n where n.typename = 210)
, sum AS (SELECT count(*) AS "TOTAL" from total)
, increase AS (SELECT count(*) from total as t WHERE t.date >= date_trunc('month', current_date - interval '3' month))
SELECT *,
(SELECT TO_CHAR((ROUND(100 * NULLIF((SELECT * from increase),0) / NULLIF((SELECT * from sum), 0))), 'fm99%') AS "CREATED LAST 3 MONTHS")
FROM sum
WITH total AS (SELECT * from auth_user as A)
, sum AS (SELECT count(*) AS "TOTAL" from total)
, increase AS (SELECT count(*) from total as t WHERE t.date_joined >= date_trunc('month', current_date - interval '3' month))
SELECT *,
(SELECT TO_CHAR((ROUND(100 * NULLIF((SELECT * from increase),0) / NULLIF((SELECT * from sum), 0))), 'fm99%') AS "CREATED LAST 3 MONTHS")
FROM sum
select count(*) from nodes n where n.typename = 9;
\COPY (SELECT count(*), date_trunc('month', n.date) FROM nodes n WHERE n.typename = 30 GROUP BY 2 ORDER BY 2) TO '/tmp/corpora.csv' (FORMAT csv);
\COPY (SELECT count(*), date_trunc('month', n.date) from nodes n where n.typename = 9 group by 2 ORDER BY 2) TO '/tmp/graphs.csv' (FORMAT csv);
\COPY (select count(*), date_trunc('month', n.date) from nodes n where n.typename = 90 group by 2 ORDER BY 2) TO '/tmp/phylos.csv' (FORMAT csv);
\COPY (select count(*), date_trunc('month', n.date) from nodes n where n.typename = 210 group by 2 ORDER BY 2) TO '/tmp/teams.csv' (FORMAT csv);
\COPY (select count(*), date_trunc('month', a.date_joined) from auth_user a group by 2) TO '/tmp/users.csv' (FORMAT csv);
select count(*) from nodes n where n.typename = 90;
select count(*) from nodes n where n.typename = 210;
select count(*) from auth_user;
......@@ -19,7 +19,7 @@ fi
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
expected_cabal_project_hash="c9fe39301e8b60bfd183e60e7e25a14cd1c9c66d8739bf9041ca3f4db89db7c6"
expected_cabal_project_freeze_hash="db24c7d3006167102532e3101e2b49bae13d478003459c7d3f1d66590e57740a"
expected_cabal_project_freeze_hash="2c8960ffcf1b94aa11a3543e3b5facd2db5af19569fecaec4bc0ab4c1edd22a5"
cabal --store-dir=$STORE_DIR v2-build --dry-run
cabal2stack --system-ghc --allow-newer --resolver lts-21.17 --resolver-file devops/stack/lts-21.17.yaml -o stack.yaml
......
This diff is collapsed.
......@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.9.9.9.6.3
version: 0.0.6.9.9.9.6.5
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -38,6 +38,7 @@ data-files:
test-data/phylo/bpa_phylo_test.json
test-data/phylo/open_science.json
test-data/phylo/issue-290-small.golden.json
test-data/stemming/lancaster.txt
test-data/test_config.ini
gargantext-cors-settings.toml
.clippy.dhall
......@@ -87,6 +88,7 @@ common tests
, hspec-wai
, hspec-wai-json
, tasty ^>= 1.5
, tasty-golden
, tasty-hspec
, tasty-hunit
, tasty-quickcheck
......@@ -151,6 +153,7 @@ library
Gargantext.Core.Methods.Similarities
Gargantext.Core.Ngrams.Tools
Gargantext.Core.Ngrams.Types
Gargantext.Core.Methods.Similarities.Conditional
Gargantext.Core.NLP
Gargantext.Core.NodeStory
Gargantext.Core.NodeStory.DB
......@@ -181,6 +184,7 @@ library
Gargantext.Core.Text.Terms.Eleve
Gargantext.Core.Text.Terms.Mono
Gargantext.Core.Text.Terms.Mono.Stem.En
Gargantext.Core.Text.Terms.Mono.Stem.Lancaster
Gargantext.Core.Text.Terms.Multi
Gargantext.Core.Text.Terms.Multi.Lang.En
Gargantext.Core.Text.Terms.Multi.Lang.Fr
......@@ -280,8 +284,8 @@ library
Gargantext.API.Node.Corpus.Searx
Gargantext.API.Node.Document.Export
Gargantext.API.Node.Document.Export.Types
Gargantext.API.Node.DocumentsFromWriteNodes
Gargantext.API.Node.DocumentUpload
Gargantext.API.Node.DocumentsFromWriteNodes
Gargantext.API.Node.FrameCalcUpload
Gargantext.API.Node.Get
Gargantext.API.Node.New
......@@ -305,7 +309,6 @@ library
Gargantext.Core.Methods.Similarities.Accelerate.Conditional
Gargantext.Core.Methods.Similarities.Accelerate.Distributional
Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
Gargantext.Core.Methods.Similarities.Conditional
Gargantext.Core.Methods.Similarities.Distributional
Gargantext.Core.Ngrams.NgramsTree
Gargantext.Core.Statistics
......@@ -326,10 +329,10 @@ library
Gargantext.Core.Text.Corpus.Parsers.RIS
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
Gargantext.Core.Text.Corpus.Parsers.Telegram
Gargantext.Core.Text.Corpus.Parsers.WOS
Gargantext.Core.Text.Corpus.Parsers.Wikidata
Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
Gargantext.Core.Text.Corpus.Parsers.Wikimedia
Gargantext.Core.Text.Corpus.Parsers.WOS
Gargantext.Core.Text.Learn
Gargantext.Core.Text.List
Gargantext.Core.Text.List.Group
......@@ -427,12 +430,12 @@ library
Gargantext.Database.Query.Table.Node.Select
Gargantext.Database.Query.Table.Node.Update
Gargantext.Database.Query.Table.Node.User
Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
Gargantext.Database.Query.Table.NodeContext
Gargantext.Database.Query.Table.NodeContext_NodeContext
Gargantext.Database.Query.Table.NodeNgrams
Gargantext.Database.Query.Table.NodeNode
Gargantext.Database.Query.Table.NodeNodeNgrams
Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
Gargantext.Database.Query.Table.NodesNgramsRepo
Gargantext.Database.Query.Tree
Gargantext.Database.Query.Tree.Error
......@@ -440,13 +443,13 @@ library
Gargantext.Database.Schema.ContextNodeNgrams
Gargantext.Database.Schema.ContextNodeNgrams2
Gargantext.Database.Schema.NgramsPostag
Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
Gargantext.Database.Schema.NodeContext
Gargantext.Database.Schema.NodeContext_NodeContext
Gargantext.Database.Schema.NodeNgrams
Gargantext.Database.Schema.NodeNode
Gargantext.Database.Schema.NodeNodeNgrams
Gargantext.Database.Schema.NodeNodeNgrams2
Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
Gargantext.Database.Schema.NodesNgramsRepo
Gargantext.Database.Schema.Prelude
Gargantext.Database.Types
......@@ -651,7 +654,8 @@ library
, xml-conduit ^>= 1.9.1.3
, xml-types ^>= 0.3.8
, yaml ^>= 0.11.8.0
, zip ^>= 2.0.0
, zip ^>= 1.7.2
, zip-archive ^>= 0.4.3
, zlib ^>= 0.6.2.3
executable gargantext-admin
......@@ -865,6 +869,7 @@ test-suite garg-test-tasty
Test.Offline.Errors
Test.Offline.JSON
Test.Offline.Phylo
Test.Offline.Stemming.Lancaster
Test.Parsers.Date
Test.Parsers.Types
Test.Parsers.WOS
......@@ -902,6 +907,7 @@ test-suite garg-test-tasty
, patches-map ^>= 0.1.0.1
, postgres-options >= 0.2 && < 0.3
, postgresql-simple >= 0.6.4 && < 0.7
, pretty
, process ^>= 1.6.13.2
, quickcheck-instances ^>= 0.3.25.2
, raw-strings-qq
......@@ -915,8 +921,11 @@ test-suite garg-test-tasty
, servant-server ^>= 0.20
, shelly >= 1.9 && < 2
, stm ^>= 2.5.0.1
, template-haskell ^>= 2.19.0.0
, template-haskell
, text ^>= 1.2.4.1
, time ^>= 1.9.3
, tmp-postgres >= 1.34.1 && < 1.35
, tree-diff
, unordered-containers ^>= 0.2.16.0
, wai
, wai-extra
......@@ -1030,3 +1039,16 @@ executable gargantext-phylo-profile
, shelly
, split
default-language: Haskell2010
executable garg-golden-file-diff
import:
defaults
, optimized
main-is: Main.hs
hs-source-dirs:
bin/gargantext-golden-file-diff
build-depends:
base
, text
, tree-diff
default-language: Haskell2010
......@@ -25,10 +25,10 @@ import Data.Morpheus.Types
import Data.Text (pack)
import Data.Time.Format.ISO8601 (iso8601Show)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Prelude (GargM)
import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
import Gargantext.Database.Admin.Types.Node (ContextTitle, NodeId(..), NodeTypeId, UserId, unNodeId, ContextId (..))
import Gargantext.Database.Prelude (CmdCommon)
import Gargantext.Database.Query.Table.NodeContext (getNodeContext, getContextsForNgramsTerms, ContextForNgramsTerms(..), {- getContextNgrams, -} getContextNgramsMatchingFTS)
......@@ -71,8 +71,6 @@ data HyperdataRowDocumentGQL =
, hrd_source :: Text
, hrd_title :: Text
, hrd_url :: Text
, hrd_uniqId :: Text
, hrd_uniqIdBdd :: Text
} deriving (Generic, GQLType, Show)
data NodeContextGQL = NodeContextGQL
......@@ -216,8 +214,6 @@ toHyperdataRowDocumentGQL hyperdata =
, hrd_source = _hr_source
, hrd_title = _hr_title
, hrd_url = _hr_url
, hrd_uniqId = _hr_uniqId
, hrd_uniqIdBdd = _hr_uniqIdBdd
}
HyperdataRowContact { } -> Nothing
......
......@@ -54,16 +54,18 @@ import Servant
------------------------------------------------------------------------
type GETAPI = Summary "Get List"
:> "lists"
:> Capture "listId" ListId
:> "json"
:> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
:<|> "lists"
:> "lists"
:> Capture "listId" ListId
:> "csv"
:> Get '[GUS.CSV] (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
:> ( "json"
:> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
:<|> "json.zip"
:> Get '[GUS.ZIP] (Headers '[Header "Content-Disposition" Text] NgramsListZIP)
:<|> "csv"
:> Get '[GUS.CSV] (Headers '[Header "Content-Disposition" Text] NgramsTableMap) )
getApi :: GargServer GETAPI
getApi = getJson :<|> getCsv
getApi listId = getJson listId
:<|> getJsonZip listId
:<|> getCsv listId
--
-- JSON API
......@@ -93,6 +95,18 @@ getJson lId = do
]
) lst
getJsonZip :: HasNodeStory env err m
=> ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsListZIP)
getJsonZip lId = do
lst <- getNgramsList lId
let nlz = NgramsListZIP { _nlz_nl = lst, _nlz_list_id = lId}
pure $ addHeader (concat [ "attachment; filename="
, nlzFileName nlz
, ".zip"
]
) nlz
getCsv :: HasNodeStory env err m
=> ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
......
......@@ -21,12 +21,12 @@ import Data.Map.Strict (fromList)
import Data.Map.Strict qualified as Map
import Data.Text qualified as Text
import Data.Validity
import Gargantext.Core.Ngrams.Tools (getNgramsTableMap)
import Gargantext.Core.Ngrams.Types
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.API.Ngrams (getNgramsTableMap)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types (ListType)
import Gargantext.Core.Text.List.Social.Prelude ( unPatchMapToHashMap )
import Gargantext.Core.Types.Main ( ListType )
import Gargantext.Database.Admin.Types.Node (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypes)
import Gargantext.Prelude
......
......@@ -21,22 +21,22 @@ import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text (pack)
import Gargantext.API.Node.Corpus.Export.Types
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm) )
import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..) )
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.Core.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.NodeStory.Types ( NodeListStory )
import Gargantext.Core.Types
import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node ( defaultList )
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
import Gargantext.Database.Schema.Context (_context_id)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash)
......@@ -51,9 +51,7 @@ getCorpus :: CorpusId
getCorpus cId lId nt' = do
let
nt = case nt' of
Nothing -> NgramsTerms
Just t -> t
nt = fromMaybe NgramsTerms nt'
listId <- case lId of
Nothing -> defaultList cId
......@@ -75,10 +73,10 @@ getCorpus cId lId nt' = do
) ns (Map.map (Set.map unNgramsTerm) ngs)
where
d_hash :: Context HyperdataDocument -> Set Text -> Text
d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
, hash b
d_hash _a b = hash [ -- fromMaybe "" (_hd_uniqId $ _context_hyperdata a),
hash b
]
pure $ addHeader ("attachment; filename=GarganText_corpus-" <> (pack $ show cId) <> ".json")
pure $ addHeader ("attachment; filename=GarganText_corpus-" <> pack (show cId) <> ".json")
$ Corpus { _c_corpus = Map.elems r
, _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
......
......@@ -23,7 +23,7 @@ import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM)
import Data.Tuple.Select (sel1, sel2, sel3)
import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
......@@ -39,8 +39,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId, ListId, NodeType(NodeTexts))
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getOrMkList)
import Gargantext.Database.Query.Table.Node (insertDefaultNodeIfNotExists)
import Gargantext.Database.Query.Table.Node (getOrMkList, insertDefaultNodeIfNotExists)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
......@@ -53,7 +52,7 @@ import Prelude qualified
langToSearx :: Lang -> Text
langToSearx All = "en-US"
langToSearx x = (Text.toLower acronym) <> "-" <> acronym
langToSearx x = Text.toLower acronym <> "-" <> acronym
where
acronym = show x
......@@ -136,7 +135,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) =
-- docs :: [Either Text HyperdataDocument]
let docs = hyperdataDocumentFromSearxResult l <$> _srs_results
--printDebug "[triggerSearxSearch] docs" docs
let docs' = catMaybes $ rightToMaybe <$> docs
let docs' = mapMaybe rightToMaybe docs
{-
Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do
printDebug "[triggerSearxSearch] doc time" $
......@@ -214,16 +213,14 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p
Right HyperdataDocument { _hd_bdd = Just "Searx"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just _sr_title
, _hd_authors = Nothing
, _hd_institutes = Nothing
, _hd_source = Just _sr_engine
, _hd_abstract = _sr_content
, _hd_publication_date = T.pack <$> formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" <$> mDate
, _hd_publication_year = fromIntegral <$> sel1 <$> mGregorian
, _hd_publication_date = T.pack Prelude.. formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" Prelude.<$> mDate
, _hd_publication_year = fromIntegral Prelude.. sel1 Prelude.<$> mGregorian
, _hd_publication_month = sel2 <$> mGregorian
, _hd_publication_day = sel3 <$> mGregorian
, _hd_publication_hour = Nothing
......
......@@ -12,28 +12,29 @@ module Gargantext.API.Node.Document.Export
where
import Control.Lens (view)
import Data.ByteString.Lazy.Char8 qualified as BSC
import Data.Csv (encodeDefaultOrderedByName)
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Data.Version (showVersion)
import Gargantext.API.Node.Document.Export.Types
import Gargantext.API.Prelude (GargNoServer, GargServer)
import Gargantext.Core (toDBid)
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Node (DocId, NodeId, NodeType(..))
import Gargantext.Database.Query.Facet (runViewDocuments, Facet(..))
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType)
import Gargantext.Database.Query.Table.Node.User
import Gargantext.Database.Query.Table.Node.User ( getNodeUser )
import Gargantext.Database.Schema.Node (NodePoly(..), node_user_id)
import Gargantext.Prelude
import Servant
import qualified Data.ByteString.Lazy.Char8 as BSC
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Paths_gargantext as PG -- cabal magic build module
import Paths_gargantext qualified as PG -- cabal magic build module
import Servant ( addHeader, (:<|>)((:<|>)), Header, Headers(getResponse) )
api :: NodeId
-- ^ The ID of the target user
-> DocId
-> GargServer API
api userNodeId dId = getDocumentsJSON userNodeId dId
:<|> getDocumentsJSONZip userNodeId dId
:<|> getDocumentsCSV userNodeId dId
--------------------------------------------------
......@@ -47,11 +48,12 @@ getDocumentsJSON nodeUserId pId = do
mcId <- getClosestParentIdByType pId NodeCorpus
let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId
docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing
pure $ addHeader (T.concat [ "attachment; filename=GarganText_DocsList-"
, T.pack $ show pId
, ".json"])
DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
let dexp = DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
pure $ addHeader (T.concat [ "attachment; filename="
, "GarganText_DocsList-"
, T.pack (show pId)
, ".json" ]) dexp
where
mapFacetDoc uId (FacetDoc { .. }) =
Document { _d_document =
......@@ -71,6 +73,18 @@ getDocumentsJSON nodeUserId pId = do
, _ng_hash = "" }
, _d_hash = ""}
getDocumentsJSONZip :: NodeId
-- ^ The Node ID of the target user
-> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] DocumentExportZIP) -- [Document]
getDocumentsJSONZip userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId
let dexp = getResponse dJSON
let dexpz = DocumentExportZIP { _dez_dexp = dexp, _dez_doc_id = pId }
pure $ addHeader (T.concat [ "attachment; filename="
, dezFileName dexpz
, ".zip" ]) dexpz
getDocumentsCSV :: NodeId
-- ^ The Node ID of the target user
-> DocId
......
......@@ -13,14 +13,21 @@ Portability : POSIX
module Gargantext.API.Node.Document.Export.Types where
import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON)
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..), (.=), header, namedRecord)
import Data.Swagger ( genericDeclareNamedSchema, ToParamSchema(..), ToSchema(..) )
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Gargantext.Core.Types ( Node, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Node (DocId)
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude
import Servant
import Gargantext.Utils.Servant (ZIP)
import Gargantext.Utils.Zip (zipContentsPure)
import Protolude
import Servant ((:>), (:<|>), Get, Header, Headers(..), JSON, MimeRender(..), PlainText, Summary)
-- | Document Export
......@@ -29,6 +36,12 @@ data DocumentExport =
, _de_garg_version :: Text
} deriving (Generic)
-- | This is to represent a zipped document export. We want to have doc_id in zipped file name.
data DocumentExportZIP =
DocumentExportZIP { _dez_dexp :: DocumentExport
, _dez_doc_id :: DocId } deriving (Generic)
data Document =
Document { _d_document :: Node HyperdataDocument
, _d_ngrams :: Ngrams
......@@ -66,6 +79,9 @@ type Hash = Text
instance ToSchema DocumentExport where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_de_")
instance ToSchema DocumentExportZIP where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_dez_")
instance ToSchema Document where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
......@@ -76,6 +92,9 @@ instance ToSchema Ngrams where
instance ToParamSchema DocumentExport where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema DocumentExportZIP where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Document where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
......@@ -85,10 +104,25 @@ instance ToParamSchema Ngrams where
type API = Summary "Document Export"
:> "export"
:> ( "json"
:> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExport)
:> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExport)
:<|> "json.zip"
:> Get '[ZIP] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExportZIP)
:<|> "csv"
:> Get '[PlainText] (Headers '[Servant.Header "Content-Disposition" Text] Text)) -- [Document])
:> Get '[PlainText] (Headers '[Servant.Header "Content-Disposition" Text] Text) )
$(deriveJSON (unPrefix "_ng_") ''Ngrams)
$(deriveJSON (unPrefix "_d_") ''Document)
$(deriveJSON (unPrefix "_de_") ''DocumentExport)
------
-- Needs to be here because of deriveJSON TH above
dezFileName :: DocumentExportZIP -> Text
dezFileName (DocumentExportZIP { .. }) = "GarganText_DocsList-" <> show _dez_doc_id <> ".json"
instance MimeRender ZIP DocumentExportZIP where
mimeRender _ dexpz@(DocumentExportZIP { .. }) =
zipContentsPure (T.unpack $ dezFileName dexpz) (encode _dez_dexp)
......@@ -22,22 +22,22 @@ import Data.Swagger (ToSchema)
import Data.Text qualified as T
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Errors.Types
import Gargantext.API.Prelude
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Prelude ( GargM )
import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus )
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Node ( DocId, NodeId, NodeType(NodeCorpus) )
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType')
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant
import Servant ( JSON, Summary, type (:>), HasServer(ServerT) )
data DocumentUpload = DocumentUpload
......@@ -108,8 +108,6 @@ documentUpload nId doc = do
let hd = HyperdataDocument { _hd_bdd = Nothing
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ if view du_title doc == "" then T.take 50 (view du_abstract doc) else view du_title doc
, _hd_authors = Just $ view du_authors doc
......
......@@ -10,23 +10,23 @@ Portability : POSIX
-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.DocumentsFromWriteNodes
where
import Conduit
import Data.Aeson (defaultOptions, genericParseJSON, genericToJSON)
import Conduit ( yieldMany )
import Control.Lens ((^.))
import Data.Aeson ( genericParseJSON, defaultOptions, genericToJSON, FromJSON(parseJSON), ToJSON(toJSON) )
import Data.List qualified as List
import Data.Swagger
import Data.Swagger ( ToSchema )
import Data.Text qualified as T
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser, auth_node_id, auth_user_id )
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Ngrams (commitStatePatch)
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Prelude (GargM)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Ngrams.Types (Versioned(..))
......@@ -39,13 +39,13 @@ import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flowDataText)
import Gargantext.Database.Action.Flow.Types (DataText(..), FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..), getHyperdataFrameContents )
import Gargantext.Database.Admin.Types.Node ( NodeId, Node, NodeType(..) )
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith, getOrMkList)
import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant
import Servant ( JSON, Summary, type (:>), HasServer(ServerT) )
------------------------------------------------------------------------
type API = Summary " Documents from Write nodes."
......@@ -106,7 +106,7 @@ documentsFromWriteNodes authenticatedUser nId Params { selection, lang, paragrap
pure (node, contents)
) frameWrites
let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
let paragraphs' = fromMaybe (7 :: Int) $ readMaybe (T.unpack paragraphs)
let parsedE = (\(node, contents)
-> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
let parsed = List.concat $ rights parsedE
......@@ -159,8 +159,6 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just $ show Notes
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just t
, _hd_authors = Just authors'
......
......@@ -16,13 +16,13 @@ Here is writtent a common interface.
module Gargantext.Core.Ext.IMTUser -- (deserialiseImtUsersFromFile)
where
import Codec.Serialise
import Codec.Serialise ( Serialise, deserialise )
import Data.ByteString.Lazy qualified as BL
import Data.Csv
import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header )
import Data.Text qualified as T
import Data.Vector (Vector)
import Data.Vector qualified as Vector
import Gargantext.Core.Text.Corpus.Parsers.CSV
import Gargantext.Core.Text.Corpus.Parsers.CSV ( csvDecodeOptions, Delimiter(Tab) )
import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Prelude
import System.FilePath.Posix (takeExtension)
......@@ -156,11 +156,9 @@ imtUser2gargContact (IMTUser { id
, _hc_where = [ou]
, _hc_title = title
, _hc_source = entite
, _hc_lastValidation = date_modification
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
, _hc_lastValidation = date_modification }
where
title = (<>) <$> (fmap (\p -> p <> " ") prenom) <*> nom
title = (<>) <$> fmap (\p -> p <> " ") prenom <*> nom
qui = ContactWho { _cw_id = id
, _cw_firstName = prenom
, _cw_lastName = nom
......@@ -182,7 +180,7 @@ imtUser2gargContact (IMTUser { id
-- meta = ContactMetaData (Just "IMT annuaire") date_modification'
toList' Nothing = []
toList' (Just x) = [x]
......@@ -15,7 +15,6 @@ Portability : POSIX
module Gargantext.Core.Flow.Types where
import Control.Lens
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Schema.Node (node_hash_id)
import Gargantext.Prelude
......@@ -25,14 +24,6 @@ class UniqId a
where
uniqId :: Lens' a (Maybe Hash)
instance UniqId HyperdataDocument
where
uniqId = hd_uniqId
instance UniqId HyperdataContact
where
uniqId = hc_uniqId
instance UniqId (Node a)
where
uniqId = node_hash_id
......
......@@ -125,7 +125,7 @@ matrixEye n' =
diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
diagNull n m = trace ("diagNull") $ zipWith (*) m (matrixEye n)
diagNull n m = zipWith (*) m (matrixEye n)
-- Returns an N-dimensional array with the values of x for the indices where
......
......@@ -19,7 +19,7 @@ import Data.Array.Accelerate (Matrix)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional')
import Gargantext.Core.Methods.Similarities.Accelerate.Distributional (logDistributional2)
-- import Gargantext.Core.Text.Metrics.Count (coocOn)
-- import Gargantext.Core.Viz.Graph.Index
......@@ -35,13 +35,13 @@ data Similarity = Conditional | Distributional
deriving (Show, Eq)
measure :: Similarity -> Matrix Int -> Matrix Double
measure Conditional x = measureConditional x
measure Conditional x = measureConditional' x
measure Distributional x = logDistributional2 x
------------------------------------------------------------------------
withMetric :: GraphMetric -> Similarity
withMetric Order1 = Conditional
withMetric _ = Distributional
withMetric _ = Distributional
------------------------------------------------------------------------
-- Order2 type is for keeping Database json compatibility
......
......@@ -48,8 +48,30 @@ import qualified Gargantext.Prelude as P
-- Filtered with MiniMax.
measureConditional :: Matrix Int -> Matrix Double
measureConditional m = run $ x $ map fromIntegral $ use m
where
x :: Acc (Matrix Double) -> Acc (Matrix Double)
x mat = maxOnly $ diagNull r $ divByDiag r mat
r :: Dim
r = dim m
-- Maybe we should use backpermute to accelerate it (no need to access to cells then
maxOnly :: Acc (SymetricMatrix Double) -> Acc (Matrix Double)
maxOnly m' = generate (shape m')
((\coord
-> let (Z :. (i :: Exp Int) :. (j :: Exp Int)) = unlift coord
ij = m' ! (lift $ (Z :. i :. j))
ji = m' ! (lift $ (Z :. j :. i))
in
ifThenElse (ij > ji) ij (constant 0)
)
)
measureConditional' :: Matrix Int -> Matrix Double
measureConditional' m = run $ x $ map fromIntegral $ use m
where
x :: Acc (Matrix Double) -> Acc (Matrix Double)
x mat = matMiniMax $ matProba r mat
......@@ -58,6 +80,7 @@ measureConditional m = run $ x $ map fromIntegral $ use m
r = dim m
-- | To filter the nodes
-- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see
......
......@@ -21,7 +21,9 @@ import Data.HashMap.Strict qualified as Map
import Data.Set qualified as Set
import Gargantext.Core.Viz.Graph.Utils (getMax)
import Gargantext.Prelude
import Data.Map.Strict qualified as M
import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Viz.Graph.Index (score, MatrixShape(..))
type HashMap = Map.HashMap
------------------------------------------------------------------------
......@@ -35,7 +37,11 @@ conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq)
where
results' = [ let
ij = (/) <$> Map.lookup (i,j) m <*> Map.lookup (j,j) m
ji = (/) <$> Map.lookup (j,i) m <*> Map.lookup (i,i) m
-- proba of i|j, high values means i is more generic than j
ji = (/) <$> Map.lookup (i,j) m <*> Map.lookup (i,i) m
-- proba of j|i, high values means j is more generic than i
in getMax (i,j) ij ji
| i <- keys
......@@ -49,4 +55,45 @@ conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq)
keys = Set.toList $ Set.fromList (x <> y)
(x,y) = unzip $ Map.keys m
{-
Only for TESTs
-}
conditional_test :: Bool
conditional_test = conditional_test1 == conditional_test2
conditional_test1 :: HashMap (Text,Text) Double
conditional_test1 = conditional $ Map.fromList example_matrix
conditional_test2 :: HashMap (Text,Text) Double
conditional_test2 = Map.fromList
$ M.toList
$ M.filter (>0)
$ score Square measureConditional
$ M.fromList example_matrix
example_matrix :: [((Text,Text), Int)]
example_matrix = concat [
compte "polygon" "polygon" 19
, compte "polygon" "square" 6
, compte "polygon" "triangle" 10
, compte "polygon" "losange" 3
, compte "triangle" "triangle" 11
, compte "square" "square" 7
, compte "losange" "losange" 15
, compte "shape" "shape" 10
, compte "circle" "circle" 6
, compte "shape" "circle" 3
, compte "shape" "square" 2
, compte "polygon" "shape" 10
]
where
compte a b c = if a /= b
then [((a,b),c), ((b,a), c)]
else [((a,b),c)]
......@@ -116,21 +116,20 @@ where
import Codec.Serialise (Serialise())
import Control.Category ((>>>))
import Control.Lens (makePrisms, Iso', iso, from, (.=), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^?), (%~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~), over)
import Control.Monad.State
import Data.Aeson qualified as Aeson
import Data.Aeson (FromJSONKey(..), FromJSONKeyFunction(..), ToJSONKey(..), decode, encode, genericFromJSONKey, defaultJSONKeyOptions, genericToJSONKey, defaultJSONKeyOptions, genericParseJSON, genericToEncoding, genericToJSON, (.:), (.:?), withObject, object)
import Data.Foldable
import Control.Lens (makeLenses, makePrisms, Iso', iso, from, (.~), (.=), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^?), (%~), (.~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~), over)
import Data.Aeson hiding ((.=))
import Data.Aeson.TH (deriveJSON)
import Data.Csv (defaultEncodeOptions, encodeByNameWith, header, namedRecord, EncodeOptions(..), NamedRecord, Quoting(QuoteNone))
import Data.Csv qualified as Csv
import Data.HashMap.Strict.InsOrd qualified as InsOrdHashMap
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Monoid
import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, MaybePatch(Mod), unMod, old, new)
import Data.Set qualified as Set
import Data.String (IsString(..))
import Data.Swagger hiding (version, patch)
import Data.Text (pack, strip)
import Data.Validity
import Data.Swagger ( NamedSchema(NamedSchema), declareSchemaRef, genericDeclareNamedSchema, SwaggerType(SwaggerObject), ToParamSchema, ToSchema(..), HasProperties(properties), HasRequired(required), HasType(type_) )
import Data.Text qualified as T
import Data.Validity ( Validity(..) )
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField)
import Gargantext.Core.Text (size)
......@@ -139,10 +138,12 @@ import Gargantext.Core.Types.Query (Limit, Offset, MaxSize, MinSize)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node (ContextId)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (IsString, hash, from, rem, replace, to)
import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (IsString, hash, from, replace, to)
import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
import Servant hiding (Patch)
import Gargantext.Utils.Servant (CSV, ZIP)
import Gargantext.Utils.Zip (zipContentsPure)
import Servant ( FromHttpApiData(parseUrlPiece), ToHttpApiData(toUrlPiece), Required, Strict, QueryParam', MimeRender(.. ))
import Servant.Job.Utils (jsonOptions)
import Test.QuickCheck (elements, frequency)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
......@@ -183,7 +184,7 @@ instance FromHttpApiData TabType where
parseUrlPiece _ = Left "Unexpected value of TabType"
instance ToHttpApiData TabType where
toUrlPiece = pack . show
toUrlPiece = T.pack . show
instance ToParamSchema TabType
instance ToJSON TabType
instance FromJSON TabType
......@@ -232,9 +233,9 @@ instance IsHashable NgramsTerm where
instance Monoid NgramsTerm where
mempty = NgramsTerm ""
instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ T.strip t
instance IsString NgramsTerm where
fromString s = NgramsTerm $ pack s
fromString s = NgramsTerm $ T.pack s
data RootParent = RootParent
......@@ -398,7 +399,7 @@ instance FromHttpApiData OrderBy
parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance ToHttpApiData OrderBy where
toUrlPiece = pack . show
toUrlPiece = T.pack . show
instance ToParamSchema OrderBy
instance FromJSON OrderBy
......@@ -418,6 +419,27 @@ data NgramsSearchQuery = NgramsSearchQuery
------------------------------------------------------------------------
type NgramsTableMap = Map NgramsTerm NgramsRepoElement
-- CSV:
-- header: status\tlabel\tforms
-- item: map\taccountability\taccounting|&|accoutns|&|account
instance MimeRender CSV NgramsTableMap where
-- mimeRender _ _val = encode ([] :: [(Text, Text)])
mimeRender _ val = encodeByNameWith encOptions (header ["status", "label", "forms"]) $ fn <$> Map.toList val
where
encOptions = defaultEncodeOptions { encDelimiter = fromIntegral (ord '\t')
, encQuoting = QuoteNone }
fn :: (NgramsTerm, NgramsRepoElement) -> NamedRecord
fn (NgramsTerm term, NgramsRepoElement { _nre_list, _nre_children }) =
namedRecord [ "status" Csv..= toText _nre_list
, "label" Csv..= term
, "forms" Csv..= T.intercalate "|&|" (unNgramsTerm <$> mSetToList _nre_children)]
toText :: ListType -> Text
toText CandidateTerm = "candidate"
toText MapTerm = "map"
toText StopTerm = "stop"
------------------------------------------------------------------------
-- On the Client side:
--data Action = InGroup NgramsId NgramsId
......@@ -870,6 +892,22 @@ instance ToSchema UpdateTableNgramsCharts where
------------------------------------------------------------------------
type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
-- | Same as NgramsList, but wraps node_id so that the inner .json file can have proper name
data NgramsListZIP =
NgramsListZIP { _nlz_nl :: NgramsList
, _nlz_list_id :: ListId } deriving (Generic)
instance ToSchema NgramsListZIP where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nlz_")
nlzFileName :: NgramsListZIP -> Text
nlzFileName (NgramsListZIP { .. }) = "GarganText_NgramsList-" <> show _nlz_list_id <> ".json"
instance MimeRender ZIP NgramsListZIP where
mimeRender _ nlz@(NgramsListZIP { .. }) =
zipContentsPure (T.unpack $ nlzFileName nlz) (encode _nlz_nl)
--
-- Serialise instances
--
......
......@@ -19,13 +19,13 @@ module Gargantext.Core.Text.Corpus.API.Arxiv
) where
import Arxiv qualified as Arxiv
import Conduit
import Conduit ( ConduitT, (.|), mapC, takeC )
import Data.Text (unpack)
import Data.Text qualified as Text
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (get)
import Network.Api.Arxiv qualified as Ax
......@@ -46,7 +46,7 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> Ax.AndNot <$> (transformAST sub) <*> transformAST (BConst (Positive term))
-> Ax.AndNot <$> transformAST sub <*> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> Ax.AndNot <$> transformAST term1 <*> transformAST term2
BAnd sub1 sub2
......@@ -88,7 +88,7 @@ toDoc l (Arxiv.Result { abstract
, authors = aus
--, categories
, doi
, id
-- , id
, journal
--, primaryCategory
, publication_date
......@@ -99,8 +99,6 @@ toDoc l (Arxiv.Result { abstract
) = HyperdataDocument { _hd_bdd = Just "Arxiv"
, _hd_doi = Just $ Text.pack doi
, _hd_url = Just $ Text.pack url
, _hd_uniqId = Just $ Text.pack id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ Text.pack title
, _hd_authors = authors aus
......@@ -118,13 +116,10 @@ toDoc l (Arxiv.Result { abstract
where
authors :: [Ax.Author] -> Maybe Text
authors [] = Nothing
authors aus' = Just $ (Text.intercalate ", ")
$ map Text.pack
$ map Ax.auName aus'
authors aus' = Just $ Text.intercalate ", "
$ map (Text.pack . Ax.auName) aus'
institutes :: [Ax.Author] -> Maybe Text
institutes [] = Nothing
institutes aus' = Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ map Text.pack
$ map Ax.auFil aus'
institutes aus' = Just $ Text.intercalate ", "
$ map ((Text.replace ", " " - " . Text.pack) . Ax.auFil) aus'
......@@ -9,7 +9,7 @@ Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API.EPO where
import Conduit
import Conduit ( ConduitT, (.|), mapC )
import Data.LanguageCodes (ISO639_1)
import Data.Map.Strict qualified as Map
import Data.Text qualified as T
......@@ -17,7 +17,7 @@ import EPO.API.Client.Types qualified as EPO
import EPO.API.Client.Implementation qualified as EPO
import Gargantext.Core (iso639ToText)
import Gargantext.Core.Text.Corpus.Query qualified as Corpus
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Network.URI (parseURI)
import Protolude
import Servant.Client.Core (ClientError(ConnectionError))
......@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do
Just apiUrl -> do
eRes <- EPO.searchEPOAPIC apiUrl authKey Nothing limit (Corpus.getRawQuery q)
pure $ (\(total, itemsC) -> (Just total, itemsC .| mapC (toDoc lang))) <$> eRes
-- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q)
-- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) )
......@@ -48,8 +48,6 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
HyperdataDocument { _hd_bdd = Just "EPO"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = id
, _hd_uniqIdBdd = id
, _hd_page = Nothing
, _hd_title = Map.lookup lang titles
, _hd_authors = authors_
......@@ -66,10 +64,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
, _hd_language_iso2 = Just $ iso639ToText lang }
where
authors_ = if authors == []
authors_ = if null authors
then Nothing
else Just (T.intercalate ", " authors)
-- EPO.withAuthKey authKey $ \token -> do
-- let range = EPO.Range { rBegin = 1, rEnd = limit }
-- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range)
......
......@@ -12,14 +12,12 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Hal
where
import Conduit
import Data.Either
import Conduit ( ConduitT, (.|), mapMC )
import Data.LanguageCodes qualified as ISO639
import Data.Map.Strict qualified as Map
import Data.Maybe
import Data.Text (pack, intercalate)
import Data.Text (pack)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (intercalate)
import HAL qualified as HAL
......@@ -30,7 +28,7 @@ import Servant.Client (ClientError)
get :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO [HyperdataDocument]
get la q ml = do
eDocs <- HAL.getMetadataWith [q] (Just 0) (fromIntegral <$> ml) la
either (panicTrace . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
either (panicTrace . pack . show) (mapM (toDoc' la) . HAL._docs) eDocs
getC :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do
......@@ -45,18 +43,16 @@ toDoc' la (HAL.Corpus { .. }) = do
-- printDebug "[toDoc corpus] h" h
let mDateS = maybe (Just $ pack $ show Defaults.year) Just _corpus_date
let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
let abstractDefault = intercalate " " _corpus_abstract
let abstractDefault = unwords _corpus_abstract
let abstract = case la of
Nothing -> abstractDefault
Just l -> fromMaybe abstractDefault (intercalate " " <$> Map.lookup l _corpus_abstract_lang_map)
Just l -> maybe abstractDefault unwords (Map.lookup l _corpus_abstract_lang_map)
pure HyperdataDocument { _hd_bdd = Just "Hal"
, _hd_doi = Just $ pack $ show _corpus_docid
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ intercalate " " _corpus_title
, _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" _corpus_authors_names
, _hd_title = Just $ unwords _corpus_title
1 , _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" _corpus_authors_names
, _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" $ _corpus_authors_affiliations <> map show _corpus_struct_id
, _hd_source = Just $ maybe "Nothing" identity _corpus_source
, _hd_abstract = Just abstract
......
......@@ -18,12 +18,12 @@ import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (cleanText)
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (get)
import Isidore qualified as Isidore
import Isidore.Client
import Servant.Client
import Servant.Client ( ClientError(DecodeFailure) )
-- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit
......@@ -40,7 +40,7 @@ get la l q a = do
iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
hDocs <- mapM (isidoreToDoc la) (toIsidoreDocs iDocs)
pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
......@@ -54,7 +54,7 @@ isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do
let
author :: Author -> Text
author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
author (Author fn ln) = _name fn <> ", " <> _name ln
author (Authors aus) = Text.intercalate ". " $ map author aus
creator2text :: Creator -> Text
......@@ -66,21 +66,19 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) (Just) d
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) Just d
let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
pure HyperdataDocument
{ _hd_bdd = Just "Isidore"
, _hd_doi = Nothing
, _hd_url = u
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ cleanText $ langText t
, _hd_authors = creator2text <$> as
, _hd_institutes = Nothing
, _hd_source = Just $ maybe "Nothing" identity $ _sourceName <$> s
, _hd_abstract = cleanText <$> langText <$> a
, _hd_source = Just $ maybe "Nothing" (identity . _sourceName) s
, _hd_abstract = cleanText . langText <$> a
, _hd_publication_date = fmap (Text.pack . show) utcTime
, _hd_publication_year = pub_year
, _hd_publication_month = pub_month
......
......@@ -10,15 +10,15 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.OpenAlex where
import Conduit
import Conduit ( ConduitT, (.|), mapC, takeC )
import Data.LanguageCodes qualified as ISO639
import Data.Text qualified as T
import Gargantext.Core (iso639ToText)
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Protolude
import OpenAlex qualified as OA
import OpenAlex.Types qualified as OA
import Protolude
import Servant.Client (ClientError)
......@@ -38,8 +38,6 @@ toDoc (OA.Work { .. } ) =
HyperdataDocument { _hd_bdd = Just "OpenAlex"
, _hd_doi = doi
, _hd_url = url
, _hd_uniqId = Just id
, _hd_uniqIdBdd = Just id
, _hd_page = firstPage biblio
, _hd_title = title
, _hd_authors = authors authorships
......@@ -56,25 +54,25 @@ toDoc (OA.Work { .. } ) =
, _hd_language_iso2 = language }
where
firstPage :: OA.Biblio -> Maybe Int
firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page
firstPage OA.Biblio { first_page } = (readMaybe . T.unpack) =<< first_page
authors :: [OA.Authorship] -> Maybe Text
authors [] = Nothing
authors aus = Just $ T.intercalate ", " $ catMaybes (getDisplayName <$> aus)
authors aus = Just $ T.intercalate ", " $ mapMaybe getDisplayName aus
where
getDisplayName :: OA.Authorship -> Maybe Text
getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
institutes :: [OA.Authorship] -> Maybe Text
institutes [] = Nothing
institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus)
institutes aus = Just $ T.intercalate ", " (T.replace ", " " - " . getInstitutesNames <$> aus)
where
getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
getDisplayName :: OA.DehydratedInstitution -> Text
getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
source :: Maybe Text
source = maybe Nothing getSource primary_location
source = getSource =<< primary_location
where
getSource OA.Location { source = s } = getSourceDisplayName <$> s
getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn
......@@ -20,13 +20,13 @@ module Gargantext.Core.Text.Corpus.API.Pubmed
)
where
import Conduit
import Conduit ( ConduitT, (.|), mapC, takeC )
import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (get)
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import PUBMED qualified as PubMed
......@@ -64,7 +64,7 @@ convertQuery q = ESearch (interpretQuery q transformAST)
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
-> transformAST sub <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
BAnd sub1 sub2
......@@ -108,14 +108,11 @@ get apiKey q l = do
-- <$> PubMed.getMetadataWithC q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed { pubmed_id
, pubmed_article = PubMedDoc.PubMedArticle t j as aus
toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus
, pubmed_date = PubMedDoc.PubMedDate a y m d }
) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Just $ Text.pack $ show pubmed_id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = authors aus
......@@ -133,16 +130,14 @@ toDoc l (PubMedDoc.PubMed { pubmed_id
where
authors :: [PubMedDoc.Author] -> Maybe Text
authors [] = Nothing
authors au = Just $ (Text.intercalate ", ")
$ catMaybes
$ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
authors au = Just $ Text.intercalate ", "
$ mapMaybe (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
institutes :: [PubMedDoc.Author] -> Maybe Text
institutes [] = Nothing
institutes au = Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ catMaybes
$ map PubMedDoc.affiliation au
institutes au = Just $ Text.intercalate ", "
$ map (Text.replace ", " " - ")
$ mapMaybe PubMedDoc.affiliation au
abstract :: [Text] -> Maybe Text
......
......@@ -51,8 +51,7 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex)
import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS
import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import Gargantext.Core.Text.Corpus.Parsers.WOS qualified as WOS
import Gargantext.Core.Text.Corpus.Parsers.Types
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Query.Table.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (show, undefined)
import Gargantext.Utils.Zip qualified as UZip
......@@ -82,10 +81,10 @@ parseFormatC :: MonadBaseControl IO m
-> m (Either Text (Integer, ConduitT () HyperdataDocument IO ()))
parseFormatC CsvGargV3 Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
parseFormatC CsvHal Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
parseFormatC Istex Plain bs = do
ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs
pure $ (\p -> (1, yieldMany [p])) <$> ep
......@@ -114,15 +113,15 @@ parseFormatC Iramuteq Plain bs = do
, yieldMany docs
.| mapC (map $ first Iramuteq.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC ((toDoc Iramuteq) . (map (second (DT.replace "_" " "))))
.| mapMC (toDoc Iramuteq . map (second (DT.replace "_" " ")))
)
)
<$> eDocs
parseFormatC JSON Plain bs = do
let eParsedC = parseJSONC $ DBL.fromStrict bs
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
fileNames <- filter (filterZIPFileNameP ft) <$> DM.keys <$> getEntries
fileNames <- filter (filterZIPFileNameP ft) . DM.keys <$> getEntries
printDebug "[parseFormatC] fileNames" fileNames
fileContents <- mapM getEntry fileNames
--printDebug "[parseFormatC] fileContents" fileContents
......@@ -139,19 +138,19 @@ parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
let contents' = snd <$> contents
let totalLength = sum lenghts
pure $ Right ( totalLength
, sequenceConduits contents' >> pure () ) -- .| mapM_C (printDebug "[parseFormatC] doc")
, void (sequenceConduits contents') ) -- .| mapM_C (printDebug "[parseFormatC] doc")
_ -> pure $ Left $ DT.intercalate "\n" errs
parseFormatC _ _ _ = pure $ Left "Not implemented"
filterZIPFileNameP :: FileType -> EntrySelector -> Bool
filterZIPFileNameP Istex f = (takeExtension (unEntrySelector f) == ".json") &&
((unEntrySelector f) /= "manifest.json")
(unEntrySelector f /= "manifest.json")
filterZIPFileNameP _ _ = True
etale :: [HyperdataDocument] -> [HyperdataDocument]
etale = concat . (map etale')
etale = concatMap etale'
where
etale' :: HyperdataDocument -> [HyperdataDocument]
etale' h = map (\t -> h { _hd_abstract = Just t })
......@@ -226,8 +225,6 @@ toDoc ff d = do
let hd = HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d
, _hd_url = lookup "URL" d
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = lookup "title" d
, _hd_authors = lookup "authors" d
......@@ -287,7 +284,7 @@ runParser format text = pure $ runParser' format text
runParser' :: FileType
-> DB.ByteString
-> (Either Text [[(DB.ByteString, DB.ByteString)]])
-> Either Text [[(DB.ByteString, DB.ByteString)]]
runParser' format text = first DT.pack $ parseOnly (withParser format) text
openZip :: FilePath -> IO [DB.ByteString]
......@@ -311,5 +308,5 @@ clean txt = DBC.map clean' txt
--
splitOn :: NgramsType -> Maybe Text -> Text -> [Text]
splitOn Authors (Just "WOS") = (DT.splitOn "; ")
splitOn _ _ = (DT.splitOn ", ")
splitOn Authors (Just "WOS") = DT.splitOn "; "
splitOn _ _ = DT.splitOn ", "
......@@ -33,8 +33,8 @@ book2csv :: Int -> FileDir -> FileOut -> IO ()
book2csv n f_in f_out = do
files <- filesOf f_in
texts <- readPublis f_in files
let publis = List.concat $ map (file2publi n) texts
let docs = map (\(y,p) -> publiToHyperdata y p) $ List.zip [1..] publis
let publis = concatMap (file2publi n) texts
let docs = zipWith publiToHyperdata [1..] publis
DBL.writeFile f_out (hyperdataDocument2csv docs)
filesOf :: FileDir -> IO [FilePath]
......@@ -43,7 +43,7 @@ filesOf fd = List.sort -- sort by filenam
<$> getDirectoryContents fd
readPublis :: FileDir -> [FilePath] -> IO [(FilePath, Text)]
readPublis fd fps = mapM (\fp -> DBL.readFile (fd <> fp) >>= \txt -> pure (fp, cs txt)) fps
readPublis fd = mapM (\fp -> DBL.readFile (fd <> fp) >>= \txt -> pure (fp, cs txt))
------------------------------------------------------------------------
-- Main Types
......@@ -63,7 +63,7 @@ type FileDir = FilePath
---------------------------------------------------------------------
file2publi :: Int -> (FilePath, Text) -> [Publi]
file2publi n (fp,theText) = map (\(t,txt) -> Publi authors source t txt) theTexts
file2publi n (fp,theText) = map (uncurry (Publi authors source)) theTexts
where
theTexts = text2titleParagraphs n theText
FileInfo authors source = fileNameInfo fp
......@@ -81,8 +81,6 @@ publiToHyperdata y (Publi a s t txt) =
HyperdataDocument { _hd_bdd = Just "Book File"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just t
, _hd_authors = Just (DT.concat a)
......
......@@ -15,7 +15,7 @@ CSV parser for Gargantext corpus files.
module Gargantext.Core.Text.Corpus.Parsers.CSV
where
import Conduit
import Conduit ( ConduitT, (.|), yieldMany, mapC )
import Data.ByteString qualified as BS
import Data.ByteString.Lazy qualified as BL
import Data.Csv (DecodeOptions(..), EncodeOptions(..), FromField, FromNamedRecord(..), Header, Parser, ToField(..), ToNamedRecord(..), (.:), (.=), decodeByNameWith, defaultDecodeOptions, defaultEncodeOptions, encodeByNameWith, header, namedRecord, parseField, parseNamedRecord, runParser)
......@@ -23,10 +23,11 @@ import Data.Text qualified as T
import Data.Time.Segment (jour)
import Data.Vector (Vector)
import Data.Vector qualified as V
import Gargantext.Core.Text (sentences, unsentences)
import Gargantext.Core.Text.Context (SplitContext(..), splitBy)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import Gargantext.Core.Text ( sentences, unsentences )
import Gargantext.Core.Text.Context ( splitBy, SplitContext(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (length, show)
import Protolude
---------------------------------------------------------------
headerCsvGargV3 :: Header
......@@ -58,8 +59,6 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
HyperdataDocument { _hd_bdd = Just "CSV"
, _hd_doi = Just . T.pack . show $ did
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just dt
, _hd_authors = Nothing
......@@ -91,11 +90,11 @@ toDocs v = V.toList
(V.enumFromN 1 (V.length v'')) v''
where
v'' = V.foldl' (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
seps= (V.fromList [Paragraphs 1, Sentences 3, Chars 3])
seps= V.fromList [Paragraphs 1, Sentences 3, Chars 3]
---------------------------------------------------------------
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs docs = V.map fromDocs' docs
fromDocs = V.map fromDocs'
where
fromDocs' (CsvGargV3 { .. }) = CsvDoc { csv_title = d_title
, csv_source = d_source
......@@ -109,16 +108,11 @@ fromDocs docs = V.map fromDocs' docs
-- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc m splt doc = let docSize = (T.length $ csv_abstract doc) in
if docSize > 1000
then
if (mod (round m) docSize) >= 10
then
splitDoc' splt doc
else
V.fromList [doc]
else
V.fromList [doc]
splitDoc m splt doc =
let docSize = (T.length $ csv_abstract doc) in
if (docSize > 1000) && (mod (round m) docSize >= 10)
then splitDoc' splt doc
else V.fromList [doc]
where
splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' contextSize (CsvDoc { .. }) = V.fromList $ [firstDoc] <> nextDocs
......@@ -150,7 +144,7 @@ unIntOrDec :: IntOrDec -> Int
unIntOrDec (IntOrDec i) = i
instance FromField IntOrDec where
parseField s = case runParser (parseField s :: Parser Int) of
Left _err -> IntOrDec <$> floor <$> (parseField s :: Parser Double)
Left _err -> IntOrDec . floor <$> (parseField s :: Parser Double)
Right n -> pure $ IntOrDec n
instance ToField IntOrDec where
toField (IntOrDec i) = toField i
......@@ -251,15 +245,15 @@ readByteStringStrict :: (FromNamedRecord a)
-> Delimiter
-> BS.ByteString
-> Either Text (Header, Vector a)
readByteStringStrict d ff = (readByteStringLazy d ff) . BL.fromStrict
readByteStringStrict d ff = readByteStringLazy d ff . BL.fromStrict
------------------------------------------------------------------------
-- | TODO use readFileLazy
readCSVFile :: FilePath -> IO (Either Text (Header, Vector CsvDoc))
readCSVFile fp = do
result <- fmap (readCsvLazyBS Comma) $ BL.readFile fp
result <- readCsvLazyBS Comma <$> BL.readFile fp
case result of
Left _err -> fmap (readCsvLazyBS Tab) $ BL.readFile fp
Left _err -> readCsvLazyBS Tab <$> BL.readFile fp
Right res -> pure $ Right res
......@@ -380,8 +374,6 @@ csvHal2doc (CsvHal { .. }) =
HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Just csvHal_doiId_s
, _hd_url = Just csvHal_url
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just csvHal_title
, _hd_authors = Just csvHal_authors
......@@ -405,8 +397,6 @@ csv2doc (CsvDoc { .. })
= HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just csv_title
, _hd_authors = Just csv_authors
......@@ -432,10 +422,10 @@ csv2doc (CsvDoc { .. })
parseHal :: FilePath -> IO (Either Text [HyperdataDocument])
parseHal fp = do
r <- readCsvHal fp
pure $ (V.toList . V.map csvHal2doc . snd) <$> r
pure $ V.toList . V.map csvHal2doc . snd <$> r
parseHal' :: BL.ByteString -> Either Text [HyperdataDocument]
parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs
parseHal' bs = V.toList . V.map csvHal2doc . snd <$> readCsvHalLazyBS bs
------------------------------------------------------------------------
......@@ -453,7 +443,7 @@ parseCsv' bs = do
result = case readCsvLazyBS Comma bs of
Left _err -> readCsvLazyBS Tab bs
Right res -> Right res
(V.toList . V.map csv2doc . snd) <$> result
V.toList . V.map csv2doc . snd <$> result
parseCsvC :: BL.ByteString
-> Either Text (Integer, ConduitT () HyperdataDocument Identity ())
......
......@@ -13,12 +13,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Gitlab (
Issue(..), gitlabIssue2hyperdataDocument, readFile_Issues, readFile_IssuesAsDocs
) where
import Data.Aeson
import Data.Aeson ( FromJSON(parseJSON), decode, (.:), (.:?), withObject )
import Data.ByteString.Lazy qualified as DBL
import Data.Text qualified as DT
import Data.Time
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude
data Issue = Issue { _issue_id :: !Int
......@@ -42,8 +42,6 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument
{ _hd_bdd = Nothing
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just (_issue_title issue)
, _hd_authors = Nothing
......
......@@ -30,8 +30,8 @@ import Data.ByteString.Lazy qualified as DBL
import Data.JsonStream.Parser qualified as P
import Data.Text qualified as Text
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument)
import Gargantext.Database.GargDB
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument)
import Gargantext.Database.GargDB ( ReadFile(..) )
import Gargantext.Prelude
......@@ -43,14 +43,14 @@ data GrandDebatReference = GrandDebatReference
, createdAt :: !(Maybe Text)
, publishedAt :: !(Maybe Text)
, updatedAt :: !(Maybe Text)
, trashed :: !(Maybe Bool)
, trashedStatus :: !(Maybe Text)
, authorId :: !(Maybe Text)
, authorType :: !(Maybe Text)
, authorZipCode :: !(Maybe Text)
, responses :: !(Maybe [GrandDebatResponse])
}
deriving (Show, Generic)
......@@ -77,8 +77,6 @@ instance ToHyperdataDocument GrandDebatReference
HyperdataDocument { _hd_bdd = Just "GrandDebat"
, _hd_doi = id
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = title
, _hd_authors = authorType
......@@ -94,12 +92,10 @@ instance ToHyperdataDocument GrandDebatReference
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ Text.pack $ show FR }
where
toAbstract = (Text.intercalate " . ") . ((filter (/= "")) . (map toSentence))
toAbstract = Text.intercalate " . " . (filter (/= "") . map toSentence)
toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of
Nothing -> ""
Just r' -> case Text.length r' > 10 of
True -> r'
False -> ""
Just r' -> if Text.length r' > 10 then r' else ""
instance ReadFile [GrandDebatReference]
where
......
......@@ -20,14 +20,14 @@ TODO:
module Gargantext.Core.Text.Corpus.Parsers.Isidore where
import Control.Lens hiding (contains)
import Control.Lens ( (^.), (.~) )
import Data.ByteString.Lazy (ByteString)
import Data.RDF hiding (triple, Query)
import Data.RDF ( Node(LNode, UNode), LValue(PlainLL, TypedL, PlainL) )
import Data.Text qualified as T
import Database.HSparql.Connection
import Database.HSparql.Connection ( BindingValue(..), EndPoint, structureContent )
import Database.HSparql.QueryGenerator
import Gargantext.Core (Lang)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (ByteString)
import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
import Prelude qualified
......@@ -115,7 +115,7 @@ unbound _ Unbound = Nothing
unbound _ (Bound (UNode x)) = Just x
unbound _ (Bound (LNode (TypedL x _))) = Just x
unbound _ (Bound (LNode (PlainL x))) = Just x
unbound l (Bound (LNode (PlainLL x l'))) = if l' == (T.toLower $ show l) then Just x else Nothing
unbound l (Bound (LNode (PlainLL x l'))) = if l' == T.toLower (show l) then Just x else Nothing
unbound _ _ = Nothing
bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
......@@ -123,8 +123,6 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract
HyperdataDocument { _hd_bdd = Just "Isidore"
, _hd_doi = Nothing
, _hd_url = unbound l link'
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = unbound l title
, _hd_authors = unbound l authors
......
......@@ -19,11 +19,10 @@ module Gargantext.Core.Text.Corpus.Parsers.JSON.Istex where
import Data.Text qualified as T
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (length, show)
import Gargantext.Prelude hiding (length)
import ISTEX.Client qualified as ISTEX
import Protolude
-- | TODO remove dateSplit here
......@@ -37,12 +36,10 @@ toDoc la (ISTEX.Document i t a ab d s) = do
pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
, _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (concatMap ISTEX._author_affiliations a)
, _hd_source = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (ISTEX._source_title s)
, _hd_abstract = ab
, _hd_publication_date = fmap (T.pack . show) utctime
......
......@@ -20,11 +20,11 @@ module Gargantext.Core.Text.Corpus.Parsers.Wikidata where
import Data.List qualified as List
import Data.Text (concat)
import Database.HSparql.Connection
import Database.HSparql.Connection ( BindingValue, EndPoint, selectQueryRaw )
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Isidore (unbound)
import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler ( crawlPage )
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Prelude hiding (concat)
import Prelude qualified
......@@ -56,11 +56,9 @@ wikiPageToDocument m wr = do
let bdd = Just "wikidata"
doi = Nothing
url = (wr ^. wr_url)
uniqId = Nothing
uniqIdBdd = Nothing
url = wr ^. wr_url
page = Nothing
title = (wr ^. wr_title)
title = wr ^. wr_title
authors = Nothing
institutes = Nothing
source = Nothing
......@@ -82,8 +80,6 @@ wikiPageToDocument m wr = do
pure $ HyperdataDocument { _hd_bdd = bdd
, _hd_doi = doi
, _hd_url = url
, _hd_uniqId = uniqId
, _hd_uniqIdBdd = uniqIdBdd
, _hd_page = page
, _hd_title = title
, _hd_authors = authors
......
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Text.Terms.Mono.Stem.Lancaster
( stemIt
) where
import Prelude
import Data.Text (Text)
import qualified Data.Text as T
data Rule = Rule
{ _match :: Text
, _replacement :: Text
, _ruleType :: RuleType
} deriving (Show, Eq)
data RuleType
= Intact
| Continue
| Contint
| Stop
| Protect
deriving (Show, Eq)
type RuleCollection = [(Char, [Rule])]
stop, intact, cont, protect, contint :: RuleType
stop = Stop
intact = Intact
cont = Continue
protect = Protect
contint = Contint
-- Define rules
rulesPaper :: RuleCollection
rulesPaper =
[ ('a', [ Rule "ia" "" intact, Rule "a" "" intact ])
, ('b', [ Rule "bb" "b" stop ])
, ('c', [ Rule "ytic" "ys" stop, Rule "ic" "" cont, Rule "nc" "nt" cont ])
, ('d', [ Rule "dd" "d" stop, Rule "ied" "i" stop, Rule "ceed" "cess" stop, Rule "eed" "ee" stop
, Rule "ed" "" cont, Rule "hood" "" cont ])
, ('e', [ Rule "e" "" cont ])
, ('f', [ Rule "lief" "liev" stop, Rule "if" "" cont ])
, ('g', [ Rule "ing" "" cont, Rule "iag" "y" stop, Rule "ag" "" cont, Rule "gg" "g" stop ])
, ('h', [ Rule "th" "" intact, Rule "guish" "ct" stop, Rule "ish" "" cont ])
, ('i', [ Rule "i" "" intact, Rule "i" "y" cont ])
, ('j', [ Rule "ij" "id" stop, Rule "fuj" "fus" stop, Rule "uj" "ud" stop, Rule "oj" "od" stop
, Rule "hej" "her" stop, Rule "verj" "vert" stop, Rule "misj" "mit" stop, Rule "nj" "nd" stop
, Rule "j" "s" stop ])
, ('l', [ Rule "ifiabl" "" stop, Rule "iabl" "y" stop, Rule "abl" "" cont, Rule "ibl" "" stop
, Rule "bil" "bl" cont, Rule "cl" "c" stop, Rule "iful" "y" stop, Rule "ful" "" cont
, Rule "ul" "" stop, Rule "ial" "" cont, Rule "ual" "" cont, Rule "al" "" cont
, Rule "ll" "l" stop ])
, ('m', [ Rule "ium" "" stop, Rule "um" "" intact, Rule "ism" "" cont, Rule "mm" "m" stop ])
, ('n', [ Rule "sion" "j" cont, Rule "xion" "ct" stop, Rule "ion" "" cont, Rule "ian" "" cont
, Rule "an" "" cont, Rule "een" "" protect, Rule "en" "" cont, Rule "nn" "n" stop ])
, ('p', [ Rule "ship" "" cont, Rule "pp" "p" stop ])
, ('r', [ Rule "er" "" cont, Rule "ear" "" protect, Rule "ar" "" stop, Rule "or" "" cont
, Rule "ur" "" cont, Rule "rr" "r" stop, Rule "tr" "t" cont, Rule "ier" "y" cont ])
, ('s', [ Rule "ies" "y" cont, Rule "sis" "s" stop, Rule "is" "" cont, Rule "ness" "" cont
, Rule "ss" "" protect, Rule "ous" "" cont, Rule "us" "" intact, Rule "s" "" contint
, Rule "s" "" protect ])
, ('t', [ Rule "plicat" "ply" stop, Rule "at" "" cont, Rule "ment" "" cont, Rule "ent" "" cont
, Rule "ant" "" cont, Rule "ript" "rib" stop, Rule "orpt" "orb" stop, Rule "duct" "duc" stop
, Rule "sumpt" "sum" stop, Rule "cept" "ceiv" stop, Rule "olut" "olv" stop
, Rule "sist" "" protect, Rule "ist" "" cont, Rule "tt" "t" stop ])
, ('u', [ Rule "iqu" "" stop, Rule "ogu" "og" stop ])
, ('v', [ Rule "siv" "j" cont, Rule "eiv" "" protect, Rule "iv" "" cont ])
, ('y', [ Rule "bly" "bl" cont, Rule "ily" "y" cont, Rule "ply" "" protect, Rule "ly" "" cont
, Rule "ogy" "og" stop, Rule "phy" "ph" stop, Rule "omy" "om" stop, Rule "opy" "op" stop
, Rule "ity" "" cont, Rule "ety" "" cont, Rule "lty" "l" stop, Rule "istry" "" stop
, Rule "ary" "" cont, Rule "ory" "" cont, Rule "ify" "" stop, Rule "ncy" "nt" cont
, Rule "acy" "" cont ])
, ('z', [ Rule "iz" "" cont, Rule "yz" "ys" stop ])
]
-- Returns 'True' if the input character is a vowel.
isVowel :: Char -> Bool
isVowel c = c `elem` vowelsSet
{-# INLINE isVowel #-}
vowelsSet :: String
vowelsSet = "aeiouy"
{-# INLINE vowelsSet #-}
stemIt :: Text -> Text
stemIt inputText = lancasterStemmer inputText rulesPaper
-- Lancaster Stemmer
lancasterStemmer :: Text -> RuleCollection -> Text
lancasterStemmer inputText rules = applyRules (T.toLower inputText) True rules
applyRules :: Text -> Bool -> RuleCollection -> Text
applyRules value isIntact rules =
case T.unsnoc value of
Nothing -> value
Just (_, lastChar) ->
case lookup lastChar rules of
Nothing -> value
Just ruleset -> applyRuleSet value isIntact ruleset
where
applyRuleSet :: Text -> Bool -> [Rule] -> Text
applyRuleSet val _ [] = val
applyRuleSet val isIntact' (rule:rest) =
case ruleApplication value isIntact' rule of
Just res -> res
Nothing -> applyRuleSet val isIntact' rest
ruleApplication :: Text -> Bool -> Rule -> Maybe Text
ruleApplication val isIntact' (Rule m r t) =
if not isIntact' && (t == intact || t == contint)
then Nothing
else case T.stripSuffix m val of
Nothing -> Nothing
Just stem ->
let next = stem `T.append` r
in if not (acceptable next)
then Nothing
else if t == cont || t == contint
then Just $ applyRules next False rules
else Just next
-- | Returns 'True' if a stem is acceptable.
acceptable :: Text -> Bool
acceptable val
| T.null val = False
| otherwise
= if isVowel (T.head val)
then T.length val > 1
else T.length val > 2 && T.any isVowel val
......@@ -183,13 +183,10 @@ combineTokenTags (TokenTag w1 l1 p1 n1 s1 e1) (TokenTag w2 l2 p2 _ s2 e2) = Toke
_ -> p1
emptyTokenTag :: TokenTag
emptyTokenTag = TokenTag [] empty Nothing Nothing 0 0
-- instance Monoid TokenTag where
-- mempty = TokenTag [] empty Nothing Nothing 0 0
-- mconcat = foldl mappend mempty
-- -- mappend t1 t2 = (<>) t1 t2
instance Monoid TokenTag where
mempty = TokenTag [] empty Nothing Nothing
mconcat = foldl' mappend mempty
-- mappend t1 t2 = (<>) t1 t2
class HasValidationError e where
......
......@@ -9,22 +9,21 @@ Portability : POSIX
-}
{-# LANGUAGE DeriveAnyClass #-}
module Gargantext.Core.Types.Search where
import Data.Aeson hiding (defaultTaggedObject)
import Data.Swagger hiding (fieldLabelModifier, Contact)
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema )
import Data.Text qualified as Text
import Data.Time (UTCTime)
import Gargantext.Core.Utils.Prefix (dropPrefix, unCapitalize, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata (ContactWhere(..), HyperdataContact(..), HyperdataDocument(..), ContactWho(..))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( ContactWhere(..), HyperdataContact(..), ContactWho(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId )
import Gargantext.Database.Query.Facet.Types (Facet(..), FacetDoc, FacetPaired(..))
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude
import Gargantext.Utils.Aeson (defaultTaggedObject)
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
data Row =
......@@ -93,8 +92,6 @@ data HyperdataRow =
, _hr_source :: !Text
, _hr_title :: !Text
, _hr_url :: !Text
, _hr_uniqId :: !Text
, _hr_uniqIdBdd :: !Text
}
| HyperdataRowContact { _hr_firstname :: !Text
, _hr_lastname :: !Text
......@@ -148,9 +145,7 @@ instance ToHyperdataRow HyperdataDocument where
, _hr_publication_second = fromMaybe 0 _hd_publication_second
, _hr_source = fromMaybe "" _hd_source
, _hr_title = fromMaybe "Title" _hd_title
, _hr_url = fromMaybe "" _hd_url
, _hr_uniqId = fromMaybe "" _hd_uniqId
, _hr_uniqIdBdd = fromMaybe "" _hd_uniqIdBdd }
, _hr_url = fromMaybe "" _hd_url }
instance ToHyperdataRow HyperdataContact where
toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _ _), _hc_where = ou} ) =
......
......@@ -59,8 +59,8 @@ cooc2graph' distance threshold myCooc
$ mat2map
$ measure distance
$ case distance of
Conditional -> map2mat Triangle 0 tiSize
_ -> map2mat Square 0 tiSize
Conditional -> map2mat Square 1 tiSize
_ -> map2mat Square 0 tiSize
$ Map.filter (> 1) myCooc'
where
......
......@@ -231,7 +231,7 @@ defaultConfig =
, similarity = WeightedLogJaccard 0.5 2
, seaElevation = Constante 0.1 0.1
, defaultMode = False
, findAncestors = True
, findAncestors = False
, phyloSynchrony = ByProximityThreshold 0.6 0 AllBranches MergeAllGroups
, phyloQuality = Quality 0.5 3
, timeUnit = Year 3 1 5
......
......@@ -50,9 +50,7 @@ type FlowCmdM env err m =
, MonadLogger m
)
type FlowCorpus a = ( AddUniqId a
, UniqId a
, UniqParameters a
type FlowCorpus a = ( UniqParameters a
, InsertDb a
, ExtractNgramsT a
, HasText a
......
......@@ -16,7 +16,6 @@ Portability : POSIX
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
......@@ -32,7 +31,7 @@ import Gargantext.API.GraphQL.Utils qualified as GAGU
import Gargantext.Core.Text (HasText(..))
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Prelude
import Gargantext.Utils.UTCTime
import Gargantext.Utils.UTCTime ( NUTCTime(..) )
--------------------------------------------------------------------------------
data HyperdataContact =
......@@ -42,8 +41,6 @@ data HyperdataContact =
, _hc_title :: Maybe Text -- TODO remove (only demo)
, _hc_source :: Maybe Text -- TODO remove (only demo)
, _hc_lastValidation :: Maybe Text -- TODO UTCTime
, _hc_uniqIdBdd :: Maybe Text
, _hc_uniqId :: Maybe Text
} deriving (Eq, Show, Generic)
instance GQLType HyperdataContact where
......@@ -61,9 +58,7 @@ defaultHyperdataContact =
, _hc_where = [defaultContactWhere]
, _hc_title =Just "Title"
, _hc_source = Just "Source"
, _hc_lastValidation = Just "TODO lastValidation date"
, _hc_uniqIdBdd = Just "DO NOT expose this"
, _hc_uniqId = Just "DO NOT expose this" }
, _hc_lastValidation = Just "TODO lastValidation date" }
hyperdataContact :: FirstName -> LastName -> HyperdataContact
hyperdataContact fn ln =
......@@ -73,9 +68,7 @@ hyperdataContact fn ln =
, _hc_where = []
, _hc_title = Nothing
, _hc_source = Nothing
, _hc_lastValidation = Nothing
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
, _hc_lastValidation = Nothing }
-- TOD0 contact metadata (Type is too flat)
data ContactMetaData =
......@@ -94,9 +87,7 @@ arbitraryHyperdataContact =
, _hc_where = []
, _hc_title = Nothing
, _hc_source = Nothing
, _hc_lastValidation = Nothing
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
, _hc_lastValidation = Nothing }
data ContactWho =
......@@ -188,7 +179,7 @@ instance ToSchema ContactMetaData where
-- | Arbitrary instances
instance Arbitrary HyperdataContact where
arbitrary = elements [HyperdataContact Nothing Nothing [] Nothing Nothing Nothing Nothing Nothing]
arbitrary = elements [ HyperdataContact Nothing Nothing [] Nothing Nothing Nothing ]
-- | Specific Gargantext instance
instance Hyperdata HyperdataContact
......
......@@ -30,8 +30,6 @@ import Gargantext.Prelude hiding (ByteString)
data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text)
, _hd_doi :: !(Maybe Text)
, _hd_url :: !(Maybe Text)
, _hd_uniqId :: !(Maybe Text)
, _hd_uniqIdBdd :: !(Maybe Text)
, _hd_page :: !(Maybe Int)
, _hd_title :: !(Maybe Text)
, _hd_authors :: !(Maybe Text)
......@@ -59,7 +57,7 @@ instance HasText HyperdataDocument
defaultHyperdataDocument :: HyperdataDocument
defaultHyperdataDocument = case decode docExample of
Just hp -> hp
Nothing -> HyperdataDocument Nothing Nothing Nothing Nothing
Nothing -> HyperdataDocument Nothing Nothing
Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
......@@ -108,7 +106,8 @@ instance ToHyperdataDocument HyperdataDocument
------------------------------------------------------------------------
instance Eq HyperdataDocument where
(==) h1 h2 = (==) (_hd_uniqId h1) (_hd_uniqId h2)
(==) h1 h2 = _hd_title h1 == _hd_title h2
&& _hd_abstract h1 == _hd_abstract h2
------------------------------------------------------------------------
instance Ord HyperdataDocument where
......@@ -127,7 +126,7 @@ arbitraryHyperdataDocuments =
] :: [(Text, Text)])
where
toHyperdataDocument' (t1,t2) =
HyperdataDocument Nothing Nothing Nothing Nothing Nothing Nothing (Just t1)
HyperdataDocument Nothing Nothing Nothing Nothing (Just t1)
Nothing Nothing (Just t2) Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
......
......@@ -57,14 +57,12 @@ the concatenation of the parameters defined by @shaParameters@.
module Gargantext.Database.Query.Table.Node.Document.Insert
where
import Control.Lens (set, view)
import Control.Lens.Cons
import Control.Lens.Prism
import Data.Aeson (toJSON, ToJSON)
import Data.Text qualified as DT (pack, concat, take, filter, toLower)
import Data.Time.Segment (jour)
import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
import Database.PostgreSQL.Simple.FromRow (fromRow, field)
import Database.PostgreSQL.Simple.SqlQQ
import Database.PostgreSQL.Simple.SqlQQ ( sql )
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core (HasDBid(toDBid))
......@@ -92,7 +90,7 @@ import Database.PostgreSQL.Simple (formatQuery)
insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> Maybe ParentId -> [a] -> DBCmd err [ReturnId]
insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
where
fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
fields = map (QualifiedIdentifier Nothing) inputSqlTypes
class InsertDb a
where
......@@ -107,18 +105,18 @@ instance InsertDb HyperdataDocument
, toField p
, toField $ maybe "No Title" (DT.take 255) (_hd_title h)
, toField $ _hd_publication_date h -- TODO USE UTCTime
, (toField . toJSON) (addUniqId h)
-- , (toField . toJSON) (addUniqId h)
]
instance InsertDb HyperdataContact
where
insertDb' u p h = [ toField ("" :: Text)
insertDb' u p _h = [ toField ("" :: Text)
, toField $ toDBid NodeContact
, toField u
, toField p
, toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
, toField $ jour 0 1 1 -- TODO put default date
, (toField . toJSON) (addUniqId h)
-- , (toField . toJSON) (addUniqId h)
]
instance ToJSON a => InsertDb (Node a)
......@@ -193,73 +191,73 @@ class AddUniqId a
where
addUniqId :: a -> a
-- instance AddUniqId HyperdataDocument
-- where
-- addUniqId = addUniqIdsDoc
-- where
-- addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
-- addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
-- $ set hd_uniqId (Just shaUni) doc
-- where
-- shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
-- shaBdd = hash $ DT.concat $ map ($ doc) ([maybeText . _hd_bdd] <> shaParametersDoc)
-- shaParametersDoc :: [HyperdataDocument -> Text]
-- shaParametersDoc = [ filterText . maybeText . _hd_title
-- , filterText . maybeText . _hd_abstract
-- , filterText . maybeText . _hd_source
-- -- , \d -> maybeText (_hd_publication_date d)
-- ]
class UniqParameters a
where
uniqParameters :: ParentId -> a -> Text
instance AddUniqId HyperdataDocument
where
addUniqId = addUniqIdsDoc
where
addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
$ set hd_uniqId (Just shaUni) doc
where
shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)]
shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d)
, \d -> filterText $ maybeText (_hd_abstract d)
, \d -> filterText $ maybeText (_hd_source d)
-- , \d -> maybeText (_hd_publication_date d)
]
uniqParameters :: a -> Text
instance UniqParameters HyperdataDocument
where
uniqParameters _ h = filterText $ DT.concat $ map maybeText $ [_hd_title h, _hd_abstract h, _hd_source h]
uniqParameters h = filterText $ DT.concat $ map maybeText $ [_hd_title h, _hd_abstract h, _hd_source h]
instance UniqParameters HyperdataContact
where
uniqParameters _ _ = ""
uniqParameters _ = ""
instance UniqParameters (Node a)
where
uniqParameters _ _ = undefined
uniqParameters _ = undefined
filterText :: Text -> Text
filterText = DT.toLower . (DT.filter isAlphaNum)
filterText = DT.toLower . DT.filter isAlphaNum
instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a)
where
addUniqId (Node nid _ t u p n d h) = Node nid (Just newHash) t u p n d h
where
newHash = "\\x" <> (hash $ uniqParameters (fromMaybe 0 p) h)
newHash = "\\x" <> hash (uniqParameters h)
---------------------------------------------------------------------------
-- * Uniqueness of document definition
-- TODO factorize with above (use the function below for tests)
instance AddUniqId HyperdataContact
where
addUniqId = addUniqIdsContact
-- instance AddUniqId HyperdataContact
-- where
-- addUniqId = addUniqIdsContact
addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
$ set (hc_uniqId ) (Just shaUni) hc
where
shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact
shaBdd = hash $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
-- addUniqIdsContact :: HyperdataContact -> HyperdataContact
-- addUniqIdsContact hc = set hc_uniqIdBdd (Just shaBdd)
-- $ set hc_uniqId (Just shaUni) hc
-- where
-- shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact
-- shaBdd = hash $ DT.concat $ map ($ hc) ([maybeText . view hc_bdd] <> shaParametersContact)
-- | TODO add more shaparameters
shaParametersContact :: [(HyperdataContact -> Text)]
shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName ) d
, \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
, \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
]
-- -- | TODO add more shaparameters
-- shaParametersContact :: [HyperdataContact -> Text]
-- shaParametersContact = [ maybeText . view (hc_who . _Just . cw_firstName )
-- , maybeText . view (hc_who . _Just . cw_lastName )
-- , maybeText . view (hc_where . _head . cw_touch . _Just . ct_mail)
-- ]
maybeText :: Maybe Text -> Text
......@@ -285,7 +283,7 @@ instance ToNode HyperdataDocument where
-- TODO better Node
instance ToNode HyperdataContact where
toNode u p h = Node 0 Nothing (toDBid NodeContact) u p "Contact" date h
toNode u p = Node 0 Nothing (toDBid NodeContact) u p "Contact" date
where
date = jour 2020 01 01
......
......@@ -10,18 +10,17 @@ Portability : POSIX
module Gargantext.Utils.Servant where
import qualified Data.ByteString.Lazy.Char8 as BSC
import Data.Csv (defaultEncodeOptions, encodeByNameWith, encodeDefaultOrderedByName, header, namedRecord, (.=), DefaultOrdered, EncodeOptions(..), NamedRecord, Quoting(QuoteNone), ToNamedRecord)
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import Gargantext.Core.Ngrams.Types (mSetToList, NgramsRepoElement(..), NgramsTableMap, NgramsTerm(..), unNgramsTerm)
import Gargantext.Core.Types.Main (ListType(..))
import Data.ByteString.Lazy.Char8 qualified as BSC
import Data.Csv (encodeDefaultOrderedByName, DefaultOrdered, ToNamedRecord)
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Network.HTTP.Media ((//), (/:))
import qualified Prelude
import Prelude qualified
import Protolude
import Protolude.Partial (read)
import Servant ( Accept(contentType), MimeRender(..), MimeUnrender(mimeUnrender) )
data CSV = CSV
instance Accept CSV where
......@@ -33,25 +32,6 @@ instance (DefaultOrdered a, ToNamedRecord a) => MimeRender CSV [a] where
instance MimeRender CSV T.Text where
mimeRender _ = toUtf8Lazy
-- CSV:
-- header: status\tlabel\tforms
-- item: map\taccountability\taccounting|&|accoutns|&|account
instance MimeRender CSV NgramsTableMap where
-- mimeRender _ _val = encode ([] :: [(Text, Text)])
mimeRender _ val = encodeByNameWith encOptions (header ["status", "label", "forms"]) $ fn <$> Map.toList val
where
encOptions = defaultEncodeOptions { encDelimiter = fromIntegral (ord '\t')
, encQuoting = QuoteNone }
fn :: (NgramsTerm, NgramsRepoElement) -> NamedRecord
fn (NgramsTerm term, NgramsRepoElement { _nre_list, _nre_children }) =
namedRecord [ "status" .= toText _nre_list
, "label" .= term
, "forms" .= (T.intercalate "|&|" $ unNgramsTerm <$> mSetToList _nre_children)]
toText :: ListType -> Text
toText CandidateTerm = "candidate"
toText MapTerm = "map"
toText StopTerm = "stop"
instance Read a => MimeUnrender CSV a where
mimeUnrender _ bs = case BSC.take len bs of
"text/csv" -> pure . read . BSC.unpack $ BSC.drop len bs
......@@ -74,4 +54,19 @@ instance MimeRender Markdown T.Text where
mimeRender _ = toUtf8Lazy
instance MimeUnrender Markdown T.Text where
mimeUnrender _ = Right . decodeUtf8 . BSC.toStrict
mimeUnrender _ = Right . TE.decodeUtf8 . BSC.toStrict
---------------------------
data ZIP = ZIP
instance Accept ZIP where
contentType _ = "application" // "zip"
instance MimeRender ZIP BSC.ByteString where
mimeRender _ = identity
instance MimeUnrender ZIP BSC.ByteString where
mimeUnrender _ = Right . identity
......@@ -15,18 +15,38 @@ Utilities for handling zip files
module Gargantext.Utils.Zip where
import "zip" Codec.Archive.Zip (withArchive, ZipArchive)
-- import Control.Monad.Base (liftBase)
import "zip" Codec.Archive.Zip (addEntry, createArchive, mkEntrySelector, withArchive, CompressionMethod(BZip2), ZipArchive)
import "zip-archive" Codec.Archive.Zip qualified as ZArch
import Control.Monad.Base (MonadBase, liftBase)
import Data.ByteString qualified as BS
import Data.ByteString.Lazy.Char8 qualified as BSC
import Protolude
import System.Directory (removeFile)
import System.IO.Temp (emptySystemTempFile)
-- | Take a zip file (in for of a ByteString) and work on its contents (using the ZipArchive monad)
withZipFileBS :: MonadIO m => BS.ByteString -> ZipArchive a -> m a
withZipFileBS bs actions =
liftIO $ bracket (emptySystemTempFile "parsed-zip")
(\path -> removeFile path) $
\path -> do
BS.writeFile path bs
withArchive path actions
withZipFileBS bs actions = liftIO $
bracket (emptySystemTempFile "parsed-zip")
removeFile
(\path -> do
BS.writeFile path bs
withArchive path actions)
-- | Zip ByteString contents and return the ZIP file as ByteString
zipContents :: MonadBase IO m => FilePath -> BS.ByteString -> m BS.ByteString
zipContents fpath bsContents = liftBase $
bracket (emptySystemTempFile "zip-contents")
removeFile
(\path -> do
s <- mkEntrySelector fpath
createArchive path (addEntry BZip2 bsContents s)
BS.readFile path)
-- | Same as zipContents above, but pure (in-memory)
zipContentsPure :: FilePath -> BSC.ByteString -> BSC.ByteString
zipContentsPure fpath bscContents = ZArch.fromArchive (ZArch.addEntryToArchive e ZArch.emptyArchive)
where
e = ZArch.toEntry fpath 0 bscContents
......@@ -322,7 +322,7 @@ flags:
"full-text-search":
"build-search-demo": false
gargantext:
"disable-db-obfuscation-executable": false
"disable-db-obfuscation-executable": true
"no-phylo-debug-logs": false
"test-crypto": false
"generic-deriving":
......@@ -561,6 +561,8 @@ flags:
"tasty-bench":
debug: false
tasty: true
"tasty-golden":
"build-example": false
texmath:
executable: false
server: false
......
1,collab
2,postpart
3,cat
4,cat
5,dog
6,dog
7,run
8,run
9,run
10,jump
11,jump
12,jump
13,swim
14,swim
15,swim
16,fish
17,fish
18,fish
19,eat
20,eat
21,eat
22,talk
23,talk
24,talk
25,walk
26,walk
27,walk
28,dant
29,dant
30,dant
31,sing
32,sing
33,sing
34,play
35,play
36,play
37,work
38,work
39,work
40,teach
41,teach
42,teach
43,learn
44,learn
45,learn
46,read
47,read
48,read
49,writ
50,writ
51,writ
52,paint
53,paint
54,paint
55,draw
56,draw
57,draw
58,speak
59,speak
60,speak
61,think
62,think
63,think
64,see
65,see
66,seen
67,hear
68,hear
69,heard
70,touch
71,touch
72,touch
73,smel
74,smel
75,smel
76,tast
77,tast
78,tast
79,laugh
80,laugh
81,laugh
82,cry
83,cry
84,cri
85,smil
86,smil
87,smil
88,frown
89,frown
90,frown
91,happy
92,happy
93,happiest
94,sad
95,sad
96,saddest
97,angry
98,angry
99,angriest
100,calm
101,calm
102,calmest
103,corrob
{-|
Module : Core.Similarity
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Test.Core.Similarity where
import Gargantext.Core.Methods.Similarities.Conditional
import Gargantext.Prelude
import Test.Hspec
test :: Spec
test = do
describe "check if similarities optimizations are well implemented" $ do
it "Conditional" $ do
conditional_test `shouldBe` True
......@@ -64,7 +64,6 @@ exampleDocument_01 = either errorTrace identity $ parseEither parseJSON $ [aeson
exampleDocument_02 :: HyperdataDocument
exampleDocument_02 = either errorTrace identity $ parseEither parseJSON $ [aesonQQ|
{ "doi":""
, "uniqId": "1405.3072v3"
, "bdd": "Arxiv"
, "publication_day":6
, "language_iso2":"EN"
......@@ -89,7 +88,6 @@ exampleDocument_03 = either errorTrace identity $ parseEither parseJSON $ [aeson
, "url": "http://arxiv.org/pdf/1405.3072v2"
, "title": "Haskell for OCaml programmers"
, "source": ""
, "uniqId": "1405.3072v2"
, "authors": "Raphael Poss, Herbert Ballerina"
, "abstract": " This introduction to Haskell is written to optimize learning by programmers who already know OCaml. "
, "institutes": ""
......@@ -107,7 +105,6 @@ exampleDocument_04 = either errorTrace identity $ parseEither parseJSON $ [aeson
, "url": "http://arxiv.org/pdf/1407.5670v1"
, "title": "Rust for functional programmers"
, "source": ""
, "uniqId": "1407.5670v1"
, "authors": "Raphael Poss"
, "abstract": " This article provides an introduction to Rust , a systems language by Mozilla , to programmers already familiar with Haskell , OCaml or other functional languages. " , "institutes": ""
, "language_iso2": "EN"
......
......@@ -28,7 +28,7 @@ phyloConfig = PhyloConfig {
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = True
, findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
......
module Test.Offline.Stemming.Lancaster where
import Prelude
import Data.ByteString.Char8 qualified as C8
import Data.Text qualified as T
import Gargantext.Core.Text.Terms.Mono.Stem.Lancaster (stemIt)
import Gargantext.Prelude (toS)
import Test.Tasty
import Test.Tasty.Golden (goldenVsStringDiff)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text.Encoding as TE
tests :: TestTree
tests = testGroup "Lancaster" [
goldenVsStringDiff "test vector works" (\ref new -> ["cabal", "v2-run", "-v0", "garg-golden-file-diff", "--", ref, new]) "test-data/stemming/lancaster.txt" mkTestVector
]
-- | List un /unstemmed/ test words
testWords :: [(Int, T.Text)]
testWords = [
(1, "collaboration")
, (2, "postpartum")
, (3, "cat")
, (4, "cats")
, (5, "dog")
, (6, "dogs")
, (7, "run")
, (8, "running")
, (9, "runner")
, (10, "jump")
, (11, "jumped")
, (12, "jumping")
, (13, "swim")
, (14, "swimming")
, (15, "swimmer")
, (16, "fish")
, (17, "fishing")
, (18, "fisher")
, (19, "eat")
, (20, "eating")
, (21, "eater")
, (22, "talk")
, (23, "talking")
, (24, "talks")
, (25, "walk")
, (26, "walking")
, (27, "walker")
, (28, "dance")
, (29, "dancing")
, (30, "dancer")
, (31, "sing")
, (32, "singing")
, (33, "singer")
, (34, "play")
, (35, "playing")
, (36, "player")
, (37, "work")
, (38, "working")
, (39, "worker")
, (40, "teach")
, (41, "teaching")
, (42, "teacher")
, (43, "learn")
, (44, "learning")
, (45, "learner")
, (46, "read")
, (47, "reading")
, (48, "reader")
, (49, "write")
, (50, "writing")
, (51, "writer")
, (52, "paint")
, (53, "painting")
, (54, "painter")
, (55, "draw")
, (56, "drawing")
, (57, "drawer")
, (58, "speak")
, (59, "speaking")
, (60, "speaker")
, (61, "think")
, (62, "thinking")
, (63, "thinker")
, (64, "see")
, (65, "seeing")
, (66, "seen")
, (67, "hear")
, (68, "hearing")
, (69, "heard")
, (70, "touch")
, (71, "touching")
, (72, "touched")
, (73, "smell")
, (74, "smelling")
, (75, "smelled")
, (76, "taste")
, (77, "tasting")
, (78, "tasted")
, (79, "laugh")
, (80, "laughing")
, (81, "laughed")
, (82, "cry")
, (83, "crying")
, (84, "cried")
, (85, "smile")
, (86, "smiling")
, (87, "smiled")
, (88, "frown")
, (89, "frowning")
, (90, "frowned")
, (91, "happy")
, (92, "happier")
, (93, "happiest")
, (94, "sad")
, (95, "sadder")
, (96, "saddest")
, (97, "angry")
, (98, "angrier")
, (99, "angriest")
, (100, "calm")
, (101, "calmer")
, (102, "calmest")
, (103, "corroborate")
]
mkTestVector :: IO BL.ByteString
mkTestVector = pure $ toS $ C8.unlines (map (\(indx, w) -> (C8.pack $ show indx) <> "," <> TE.encodeUtf8 (stemIt w)) testWords)
......@@ -12,18 +12,20 @@ module Main where
import Gargantext.Prelude
import qualified Test.Core.Text.Corpus.Query as CorpusQuery
import qualified Test.Core.Utils as Utils
import qualified Test.Core.Text.Tokenize as Tokenize
import qualified Test.Graph.Clustering as Graph
import qualified Test.Ngrams.NLP as NLP
import qualified Test.Ngrams.Query as NgramsQuery
import qualified Test.Offline.JSON as JSON
import qualified Test.Offline.Errors as Errors
import qualified Test.Offline.Phylo as Phylo
import qualified Test.Parsers.Date as PD
import qualified Test.Utils.Crypto as Crypto
import qualified Test.Utils.Jobs as Jobs
import qualified Test.Core.Text.Corpus.Query as CorpusQuery
import qualified Test.Core.Utils as Utils
import qualified Test.Graph.Clustering as Graph
import qualified Test.Ngrams.NLP as NLP
import qualified Test.Ngrams.Query as NgramsQuery
import qualified Test.Offline.JSON as JSON
import qualified Test.Offline.Errors as Errors
import qualified Test.Offline.Phylo as Phylo
import qualified Test.Offline.Stemming.Lancaster as Lancaster
import qualified Test.Parsers.Date as PD
import qualified Test.Utils.Crypto as Crypto
import qualified Test.Utils.Jobs as Jobs
import qualified Test.Core.Similarity as Similarity
import Test.Tasty
import Test.Tasty.Hspec
......@@ -38,6 +40,7 @@ main = do
nlpSpec <- testSpec "NLP" NLP.test
jobsSpec <- testSpec "Jobs" Jobs.test
tokenizeSpec <- testSpec "Tokenize" Tokenize.test
similaritySpec <- testSpec "Similarity" Similarity.test
defaultMain $ testGroup "Gargantext"
[ utilSpec
......@@ -52,5 +55,7 @@ main = do
, CorpusQuery.tests
, JSON.tests
, Errors.tests
, similaritySpec
, Phylo.tests
, testGroup "Stemming" [ Lancaster.tests ]
]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment