Commit 5742cfee authored by Karen Konou's avatar Karen Konou

Merge branch 'dev' into 548-dev-node-url-share

parents f2a561a8 fe201115
...@@ -4,6 +4,10 @@ ...@@ -4,6 +4,10 @@
# Profiling # Profiling
*.prof *.prof
*.prof.html
*.hp
*.eventlog
*.eventlog.html
profiling profiling
# Stack # Stack
...@@ -38,3 +42,6 @@ tmp*repo*json ...@@ -38,3 +42,6 @@ tmp*repo*json
data data
devops/docker/js-cache devops/docker/js-cache
cabal.project.local
gargantext_profile_out.dot
...@@ -50,7 +50,7 @@ bench: ...@@ -50,7 +50,7 @@ bench:
- .cabal/ - .cabal/
policy: pull-push policy: pull-push
script: script:
- nix-shell --run "./bin/update-project-dependencies $CABAL_STORE_DIR && cabal --store-dir=$CABAL_STORE_DIR v2-bench --ghc-options='-O2 -fclear-plugins'" - nix-shell --run "./bin/update-project-dependencies $CABAL_STORE_DIR && cabal --store-dir=$CABAL_STORE_DIR v2-bench --flags +no-phylo-debug-logs --ghc-options='-O2 -fclear-plugins'"
allow_failure: true allow_failure: true
test: test:
...@@ -84,7 +84,7 @@ test: ...@@ -84,7 +84,7 @@ test:
mkdir -p /builds/gargantext/haskell-gargantext/devops/coreNLP/stanford-corenlp-current mkdir -p /builds/gargantext/haskell-gargantext/devops/coreNLP/stanford-corenlp-current
cp -R /root/devops/coreNLP/stanford-corenlp-${CORENLP}/* /builds/gargantext/haskell-gargantext/devops/coreNLP/stanford-corenlp-current/ cp -R /root/devops/coreNLP/stanford-corenlp-${CORENLP}/* /builds/gargantext/haskell-gargantext/devops/coreNLP/stanford-corenlp-current/
nix-shell --run "chown -R test:test /root/.config/ && su -m test -c \"export PATH=$PATH:$TEST_NIX_PATH && cd /builds/gargantext/haskell-gargantext; $CABAL --store-dir=$CABAL_STORE_DIR v2-test --test-show-details=streaming --flags test-crypto --ghc-options='-O0 -fclear-plugins'\"" nix-shell --run "chown -R test:test /root/.config/ && su -m test -c \"export PATH=$PATH:$TEST_NIX_PATH && cd /builds/gargantext/haskell-gargantext; $CABAL --store-dir=$CABAL_STORE_DIR v2-test --test-show-details=streaming --flags 'test-crypto no-phylo-debug-logs' --ghc-options='-O0 -fclear-plugins'\""
chown -R root:root dist-newstyle/ chown -R root:root dist-newstyle/
chown -R root:root /root/ chown -R root:root /root/
chown -R root:root $CABAL_STORE_DIR chown -R root:root $CABAL_STORE_DIR
......
## Version 0.0.6.9.9.9.6.2 [Release Candidate for 007]
* [BACK][FIX][Node stories insertion error (SqlError violates foreign key constraint) (#303)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/303)
* [BACK][DOC][Welcome: Door To enter the project (#177)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/177)
* [BACK][OPTIM][Improve Phylo robustness and performance (#292)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/292)
## Version 0.0.6.9.9.9.6.1 ## Version 0.0.6.9.9.9.6.1
* [BACK][FEAT][Removing Order2_A and Order2_B and use Order2 only instead (#308)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/308) * [BACK][FEAT][Removing Order2_A and Order2_B and use Order2 only instead (#308)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/308)
......
...@@ -72,6 +72,52 @@ This will take a bit of time as it has to download/build the dependencies, but t ...@@ -72,6 +72,52 @@ This will take a bit of time as it has to download/build the dependencies, but t
#### With Cabal (recommanded) #### With Cabal (recommanded)
##### Turning off optimization flags
Create a `cabal.project.local` file (don't commit it to git!):
```
package gargantext
ghc-options: -fwrite-ide-info -hiedir=".stack-work/hiedb" -O0
package gargantext-admin
ghc-options: -O0
package gargantext-cli
ghc-options: -O0
package gargantext-db-obfuscation
ghc-options: -O0
package gargantext-import
ghc-options: -O0
package gargantext-init
ghc-options: -O0
package gargantext-invitations
ghc-options: -O0
package gargantext-phylo
ghc-options: -O0
package gargantext-server
ghc-options: -O0
package gargantext-upgrade
ghc-options: -O0
package gargantext-graph
ghc-options: -O0
package hmatrix
ghc-options: -O0
package sparse-linear
ghc-options: -O0
```
##### Building
First, into `nix-shell`: First, into `nix-shell`:
```shell ```shell
cabal update cabal update
...@@ -140,7 +186,7 @@ The good news is that you don't have to do all of this manually; during developm ...@@ -140,7 +186,7 @@ The good news is that you don't have to do all of this manually; during developm
``` sh ``` sh
# If docker is not installed: # If docker is not installed:
# curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/dev/devops/docker/docker-install | sh # curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/dev/devops/docker/install_docker | sh
cd devops/docker cd devops/docker
docker compose up docker compose up
``` ```
......
This source diff could not be displayed because it is too large. You can view the blob instead.
status label forms
map %
map % ci
candidate 17α-ethinylestradiol
map 17β-estradiol
candidate 2-ethylhexyl
candidate 4-tert-octylphenol
candidate absence
candidate action
candidate activation
candidate activities activity
candidate activity
map addition
map additional studies
map adsorption
map adult zebrafish
candidate adults
map adverse effect
map adverse effects adverse effect
candidate adverse health effects
map aim
map aim was
candidate algae
map alteration
map alterations alteration
candidate alternatives
map analogs
candidate analogues
map analysis
map androgen receptor
map animal animals
map animal models
map animals
map apoptosis
candidate application applications
candidate applications
candidate aquatic ecosystems
map aquatic environment
candidate aquatic plants
candidate ar
candidate area
candidate areas area
map article is protected
candidate assay
candidate assays assay
candidate association
candidate associations association
map balloon pulmonary angioplasty
candidate basis
map better understanding
candidate beverages
candidate binding affinity
map bioaccumulation potential
candidate biological effects
map birth outcomes
map birth weight
map bisphenol bisphenols
map bisphenol a
map bisphenol a action
map bisphenol a analogues
candidate bisphenol a exposure
candidate bisphenol a.
map bisphenol af
map bisphenol analogues
map bisphenol b
candidate bisphenol compounds
map bisphenol f
map bisphenol s bisphenol-s
candidate bisphenol-a
map bisphenol-s
map bisphenols
candidate bnct
map body weight
map boron neutron capture therapy
map bp bps
map bpa
candidate bpa analogues
map bpa caused
map bpa degradation
candidate bpa did
map bpa disrupts
map bpa exposure
map bpa exposure was
candidate bpa group
candidate bpa had
map bpa increased
map bpa is
candidate bpa leads
candidate bpa replacements
candidate bpa stimulated
map bpa treatment
map bpa was bpa is
candidate bpa was determined
candidate bpa were found
candidate bpa-bnct
candidate bpa-exposed rats
map bpaf
map bpaf was
map bpb
map bpf
map bps
map breast cancer
map c-bisphenol a
candidate caffeine
candidate carbon nanotubes
map case
map cash register receipts
candidate cat
candidate catalase
candidate cck-8
candidate cd
map cell counting kit-8
map cell proliferation
map cell viability
candidate changes
map chemical chemicals
candidate chemical analysis
map chemical exposure
map chemical oxygen demand
map chemicals
candidate chemicals were
candidate children
candidate china
candidate chromosomes
map chronic exposure
map chronic thromboembolic pulmonary hypertension
candidate cm
candidate co
candidate cod
map coexistence
map color developer
candidate combination
candidate common sources
candidate comparison
candidate complications
map compounds
map concentration
map concentration-dependent manner
map concentrations concentration
map concern
map concerns concern
map conclusion
candidate consumer products
candidate contamination
candidate contrast
map control
map control group control groups
map control groups
map controls control
map copyright
candidate cr
map critical period
map critical windows
candidate cross-sectional study
candidate crucial role
candidate cteph
candidate cu
candidate current knowledge
map cyp11a1
map danio rerio
map data
map day
map days day
map dbp
candidate decrease decreases
candidate decreases
candidate degradation
map degradation rate
candidate dehp
candidate deleterious effects
map delivery
map dermal absorption
map dermal exposure
candidate desorption
map desorption time
map detection
candidate detection limit
map determination
map development
map developmental periods
candidate diabetes
map dibutyl phthalate
candidate dietary exposure
candidate different effects
map different products
candidate different regions
map distribution distributions
map distributions
candidate dna damage
map dna methylation
candidate dose
map dose-limiting tissue
candidate doses dose
map drinking water
map e1
map e2
map ecological and human health risk
map edc
map edcs edc
candidate ee2
map effect
map effects effect
map effects are effects were
map effects were
candidate efsa
candidate embryos
map emt
map emt process
candidate endocrine disrupter
candidate endocrine disruption
map endocrine disruptor
map endocrine disruptors endocrine disruptor
map endocrine system
candidate endocrine-disrupting chemical
candidate environment
candidate environmental behaviors
candidate environmental contaminant
candidate environmental endocrine disruptors
map environmental exposure
map environmental remediation
map environmental toxicants
candidate epidemiological studies
map epoxy resins
map er
map er-dependent pathway
candidate erk1/2
map ers er
map estriol
candidate estrogen
map estrogen receptor
map estrogen receptors estrogen receptor
map estrogenic activity
candidate estrogenic properties
candidate estrogens estrogen
map estrone
candidate europe
map european food safety authority
candidate evaluation
candidate evidence
candidate evidence indicates
candidate experimental data
map exposure
map exposure was
map exposures exposure
map expression expressions
map expressions
candidate extent
map extraction
candidate fact
map females
map fetal development
map findings indicate
candidate findings suggest
map first time
candidate fish
map flow rate
map food foods
candidate food packaging
map foods
candidate formation
map fourier transform infrared spectroscopy
candidate freshwater
map freundlich model
candidate ftir
candidate function
map g creatinine
candidate g protein-coupled estrogen receptor
map gene expression
map general population
candidate gestation
candidate glucose homeostasis
map glutathione peroxidase
candidate good linear relationship
map group
map groups group
candidate growth
candidate h
map head circumference
map health effects
candidate high expression
candidate high risk
candidate high sensitivity
candidate high-resolution transmission electron microscopy
map higher bpa levels
candidate higher concentrations
map higher rates
map hippocampus
candidate hong kong
map hormonal regulation
candidate hours
candidate hplc-uv
candidate hrtem
map human exposure
map human health
candidate human plasma
candidate humans
map humans are
map humic acid
map humic acids humic acid
map hydroxyl radicals
candidate hypothalamus
candidate ici
candidate il-6
candidate immune system
candidate impact
candidate impacts impact
candidate impairment
map implantation failure
candidate important role
candidate increase
map induction
candidate influence
candidate inhibition
map initial concentration
map insulin resistance
candidate interaction interactions
candidate interactions
candidate involvement
map john wiley
map juvenile sprague-dawley rats
candidate key role
candidate kg
candidate kinetics
map l
candidate lack
candidate learning
candidate length
map level
map levels level
map limit
candidate limited number
map limits limit
candidate liver
map lod
map lods lod
candidate loq
map low concentrations
candidate low-dose bisphenol a
map low-dose bpa
map low-dose exposure
map ltd.
map major source
map male
map male mice
map male offspring
map males male
candidate manufacture
candidate mapk
candidate mass spectrometry
candidate maternal diet
map matrix effect
map mcf-7 breast cancer cells
map mcf-7 cells
candidate mechanism
candidate mechanisms mechanism
candidate method
map method was
map method was applied
candidate methods method
map mg g
candidate mg kg
candidate mg l
candidate mg/kg
candidate mg/l
candidate mice
map migration
candidate min
map mitogen-activated protein kinase
candidate ml
map mobile phase
candidate modulation
map molecular docking
candidate molecular interactions
candidate motility
map mrna expression
map myelin basic protein
map n
candidate naphthalene
candidate natural waters
map negative effect
candidate neuroendocrine disruption
map new mechanism
candidate ng/l
candidate ni
map nonylphenol
candidate np
candidate number
candidate obesity
candidate objective
candidate objective was
candidate occupational exposure
map occurrence
candidate onset
candidate order
map organic contaminants
map organic pollutants
candidate organisms
map ovarian reserve
map oxidative stress
candidate p
candidate papillary thyroid carcinoma
candidate parabens
candidate part
candidate participants
candidate pathophysiology
candidate pb
candidate pergafast
map perinatal exposure
candidate pfoa
candidate pfos
candidate ph
candidate phenol phenols
candidate phenolic compounds
candidate phenols
candidate phosphate
candidate phosphorylation
map photocatalytic mechanism
map photocatalytic properties
candidate photoluminescence
map phthalate
map phthalates phthalate
candidate physical activity
map physicochemical properties
candidate pl
map plastic industry
map plastic production
candidate plastics
candidate pnd
candidate pnd90
candidate pollutants
map polycarbonate plastic
map polycarbonate plastics polycarbonate plastic
map positive effect
candidate possible effects
map postnatal day
candidate potential
candidate potential applications
candidate potential effects
candidate potential mechanism
candidate potential risk
map potential sources
map pregnancy
candidate preliminary results
candidate prenatal bisphenol a
map prenatal exposure
map presence
map present study
map present study is present study was
map present study was
candidate previous study
map pro-inflammatory cytokines
candidate production
candidate products
candidate proliferation
map protein expression
map protein levels
candidate pseudo-second-order kinetic model
map pubertal development
candidate puberty
candidate quantification
map r
map range
map ranges range
map rat offspring
map reactive oxygen species
candidate regulation
map relation
map relative standard deviations
candidate release
candidate relevant levels
map removal
map removal efficiencies
map removal efficiency removal efficiencies
candidate reproductive development
map reproductive function
map reproductive hormones
candidate response
map result
map results result
candidate results demonstrated
map results indicate
map results indicated results indicate
candidate results revealed
map results show
map results showed results show
map results suggest
candidate review
map rights reserved
map risk
map risk assessment
map risks risk
map role
map ros
candidate s/n
map sample samples
map samples
candidate samples were
map sampling sites showed
candidate scanning electron microscope
map scanning electron microscopy
map sediment
map sediments sediment
candidate sem
map sensitivities
map sensitivity sensitivities
candidate series
candidate serum
map signal-to-noise ratio
map significant decrease
candidate significant differences
map significant increase
map simultaneous determination
map simultaneous removal
candidate soil
map sons
map sorbent
map source
map sources source
map spatial distribution
map spatial memory
candidate species
map stem cells
map structural analogues
candidate structure
map studies study
candidate studies showed
map study
map study aims
map study evaluated
map study investigated
candidate study suggests
map study was
map superoxide dismutase
candidate surface
map surface water surface waters
map surface waters
candidate susceptibility
map synaptic plasticity
map synthesis
candidate t
map taihu lake
candidate tcs
candidate testis
map tetrabromobisphenol a
map thermal paper
map thermal paper contains
candidate thermal paper receipts
map toxicity
map transcriptional activity
candidate treatment
map triclosan
candidate type types
candidate types
candidate ubiquitous endocrine-disrupting chemical
map urinary levels
map urine samples
map urine samples collected
candidate use
map utero development
map utero windows
candidate value
candidate values value
map vi
map visible light irradiation
map wastewater
map water
candidate water sources
candidate water treatment
candidate week weeks
candidate weeks
map wide range
map women
candidate work
map work was
candidate wt %
map x-ray diffraction
candidate xrd
candidate young children
candidate zebrafish
map zebrafish embryos
map μg
map μg kg
candidate μgl
{
"corpusPath" : "Gargantext_DocsList-nodeId-185487.csv",
"listPath" : "Gargantext_NgramsList-185488.csv",
"outputPath" : "data",
"corpusParser" : {
"tag" : "Csv",
"_csv_limit" : 1500000
},
"listParser" : "V3",
"phyloName" : "bpa",
"phyloScale" : 2,
"similarity" : {
"tag" : "WeightedLogJaccard",
"_wlj_sensibility" : 0.5,
"_wlj_minSharedNgrams" : 1
},
"seaElevation" : {
"tag" : "Evolving",
"_evol_neighborhood" : true
},
"defaultMode" : false,
"findAncestors" : true,
"phyloSynchrony" : {
"tag" : "ByProximityThreshold",
"_bpt_threshold" : 0.6,
"_bpt_sensibility" : 0,
"_bpt_scope" : "AllBranches",
"_bpt_strategy" : "MergeAllGroups"
},
"phyloQuality" : {
"tag" : "Quality",
"_qua_granularity" : 0.1,
"_qua_minBranch" : 2
},
"timeUnit" : {
"tag" : "Week",
"_week_period" : 4,
"_week_step" : 2,
"_week_matchingFrame" : 5
},
"clique" : {
"tag" : "Fis",
"_fis_support" : 3,
"_fis_size" : 1
},
"exportLabel" : [
{
"tag" : "BranchLabel",
"_branch_labelTagger" : "MostEmergentTfIdf",
"_branch_labelSize" : 2
},
{
"tag" : "GroupLabel",
"_group_labelTagger" : "MostEmergentInclusive",
"_group_labelSize" : 2
}
],
"exportSort" : {
"tag" : "ByHierarchy",
"_sort_order" : "Desc"
},
"exportFilter" : [
{
"tag" : "ByBranchSize",
"_branch_size" : 2
}
]
}
{-# LANGUAGE OverloadedStrings #-}
module Main where
import Common
import Data.Aeson
import Data.List (nub)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig, toPeriods, getTimePeriod, getTimeStep)
import GHC.IO.Encoding
import GHC.Stack
import Paths_gargantext
import Prelude
import qualified Data.Text as T
import Shelly
import System.Directory
--------------
-- | Main | --
--------------
phyloConfig :: FilePath -> PhyloConfig
phyloConfig outdir = PhyloConfig {
corpusPath = "corpus.csv"
, listPath = "list.csv"
, outputPath = outdir
, corpusParser = Csv {_csv_limit = 150000}
, listParser = V4
, phyloName = "phylo_profile_test"
, phyloScale = 2
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = True
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
, clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold}
, exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2}
, GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2}
]
, exportSort = ByHierarchy {_sort_order = Desc}
, exportFilter = [ByBranchSize {_branch_size = 3.0}]
}
main :: HasCallStack => IO ()
main = do
shelly $ escaping False $ withTmpDir $ \tdir -> do
curDir <- pwd
let output = curDir <> "/" <> "gargantext_profile_out.dot"
chdir tdir $ do
liftIO $ setLocaleEncoding utf8
bpaConfig <- liftIO $ getDataFileName "bench-data/phylo/bpa-config.json"
corpusPath' <- liftIO $ getDataFileName "bench-data/phylo/GarganText_DocsList-nodeId-185487.csv"
listPath' <- liftIO $ getDataFileName "bench-data/phylo/GarganText_NgramsList-185488.csv"
(Right config) <- fmap (\pcfg -> pcfg { outputPath = tdir
, corpusPath = corpusPath'
, listPath = listPath'
}) <$> liftIO (eitherDecodeFileStrict' bpaConfig)
mapList <- liftIO $ fileToList (listParser config) (listPath config)
corpus <- liftIO $ if (defaultMode config)
then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList
else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config) mapList
liftIO $ do
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms")
printIOComment (show (length mapList) <> " Size ngs_terms List Map Ngrams")
printIOMsg "Reconstruct the phylo"
-- check the existing backup files
let backupPhyloWithoutLink = (outputPath config) <> "backupPhyloWithoutLink_" <> (configToSha BackupPhyloWithoutLink config) <> ".json"
let backupPhylo = (outputPath config) <> "backupPhylo_" <> (configToSha BackupPhylo config) <> ".json"
phyloWithoutLinkExists <- doesFileExist backupPhyloWithoutLink
phyloExists <- doesFileExist backupPhylo
-- reconstruct the phylo
phylo <- if phyloExists
then do
printIOMsg "Reconstruct the phylo from an existing file"
readPhylo backupPhylo
else do
if phyloWithoutLinkExists
then do
printIOMsg "Reconstruct the phylo from an existing file without links"
phyloWithoutLink <- readPhylo backupPhyloWithoutLink
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus config
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
writePhylo backupPhylo phylo
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phylo)
dotToFile output dot
echo "Done."
...@@ -46,210 +46,7 @@ import Gargantext.Database.Schema.Ngrams (NgramsType(..)) ...@@ -46,210 +46,7 @@ import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash, replace) import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified import Prelude qualified
import System.Directory (listDirectory,doesFileExist) import System.Directory (listDirectory,doesFileExist)
import Common
data Backup = BackupPhyloWithoutLink | BackupPhylo deriving (Show)
---------------
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath :: FilePath -> IO [FilePath]
getFilesFromPath path = do
if (isSuffixOf "/" path)
then (listDirectory path)
else return [path]
----------------
-- | Parser | --
----------------
-- | To filter the Ngrams of a document based on the termList
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO [Document]
wosToDocs limit patterns time path = do
files <- getFilesFromPath path
take limit
<$> map (\d -> let title = fromJust $ _hd_title d
abstr = if (isJust $ _hd_abstract d)
then fromJust $ _hd_abstract d
else ""
in Document (toPhyloDate
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d) time)
(toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d) time)
(termsInText patterns $ title <> " " <> abstr) Nothing [] time)
<$> concat
<$> mapConcurrently (\file ->
filter (\d -> (isJust $ _hd_publication_year d)
&& (isJust $ _hd_title d))
<$> fromRight [] <$> parseFile WOS Plain (path <> file) ) files
-- To transform a Csv file into a list of Document
csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs parser patterns time path =
case parser of
Wos _ -> Prelude.error "csvToDocs: unimplemented"
Csv limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) (fromMaybe Csv.defaultMonth $ csv_publication_month row) (fromMaybe Csv.defaultDay $ csv_publication_day row) time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) (fromMaybe Csv.defaultMonth $ csv_publication_month row) (fromMaybe Csv.defaultDay $ csv_publication_day row) time)
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
time
) <$> snd <$> either (\err -> panicTrace $ "CSV error" <> (show err)) identity <$> Csv.readCSVFile path
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row)
(map (T.strip . pack) $ splitOn ";" (unpack $ (csv'_source row)))
time
) <$> snd <$> Csv.readWeightedCsv path
-- To parse a file into a list of Document
fileToDocsAdvanced :: CorpusParser -> FilePath -> TimeUnit -> TermList -> IO [Document]
fileToDocsAdvanced parser path time lst = do
let patterns = buildPatterns lst
case parser of
Wos limit -> wosToDocs limit patterns time path
Csv _ -> csvToDocs parser patterns time path
Csv' _ -> csvToDocs parser patterns time path
fileToDocsDefault :: CorpusParser -> FilePath -> [TimeUnit] -> TermList -> IO [Document]
fileToDocsDefault parser path timeUnits lst =
if length timeUnits > 0
then
do
let timeUnit = (head' "fileToDocsDefault" timeUnits)
docs <- fileToDocsAdvanced parser path timeUnit lst
let periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeUnit) (getTimeStep timeUnit)
if (length periods < 3)
then fileToDocsDefault parser path (tail timeUnits) lst
else pure docs
else panicTrace "this corpus is incompatible with the phylomemy reconstruction"
-- on passe à passer la time unit dans la conf envoyé au phyloMaker
-- dans le phyloMaker si default est true alors dans le setDefault ou pense à utiliser la TimeUnit de la conf
---------------
-- | Label | --
---------------
-- Config time parameters to label
timeToLabel :: PhyloConfig -> [Char]
timeToLabel config = case (timeUnit config) of
Epoch p s f -> ("time_epochs" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: PhyloConfig -> [Char]
seaToLabel config = case (seaElevation config) of
Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
Adaptative granularity -> ("sea_adapt" <> (show granularity))
Evolving _ -> ("sea_evolv")
sensToLabel :: PhyloConfig -> [Char]
sensToLabel config = case (similarity config) of
Hamming _ _ -> Prelude.error "sensToLabel: unimplemented"
WeightedLogJaccard s _ -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s _ -> ( "WeightedLogSim-sens_" <> show s)
cliqueToLabel :: PhyloConfig -> [Char]
cliqueToLabel config = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s t f -> "clique_" <> (show s)<> "_" <> (show f)<> "_" <> (show t)
syncToLabel :: PhyloConfig -> [Char]
syncToLabel config = case (phyloSynchrony config) of
ByProximityThreshold scl sync_sens scope _ -> ("scale_" <> (show scope) <> "_" <> (show sync_sens) <> "_" <> (show scl))
ByProximityDistribution _ _ -> "syncToLabel: unimplemented"
qualToConfig :: PhyloConfig -> [Char]
qualToConfig config = case (phyloQuality config) of
Quality g m -> "quality_" <> (show g) <> "_" <> (show m)
-- To set up the export file's label from the configuration
configToLabel :: PhyloConfig -> [Char]
configToLabel config = outputPath config
<> (unpack $ phyloName config)
<> "-" <> (timeToLabel config)
<> "-scale_" <> (show (phyloScale config))
<> "-" <> (seaToLabel config)
<> "-" <> (sensToLabel config)
<> "-" <> (cliqueToLabel config)
<> "-level_" <> (show (_qua_granularity $ phyloQuality config))
<> "-" <> (syncToLabel config)
<> ".dot"
-- To write a sha256 from a set of config's parameters
configToSha :: Backup -> PhyloConfig -> [Char]
configToSha stage config = unpack
$ replace "/" "-"
$ T.pack (show (hash $ C8.pack label))
where
label :: [Char]
label = case stage of
BackupPhyloWithoutLink -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
BackupPhylo -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
<> (sensToLabel config)
<> (seaToLabel config)
<> (syncToLabel config)
<> (qualToConfig config)
<> (show (phyloScale config))
readListV4 :: [Char] -> IO NgramsList
readListV4 path = do
listJson <- (eitherDecode <$> readJson path) :: IO (Either Prelude.String NgramsList)
case listJson of
Left err -> do
putStrLn err
Prelude.error "readListV4 unimplemented"
Right listV4 -> pure listV4
fileToList :: ListParser -> FilePath -> IO TermList
fileToList parser path =
case parser of
V3 -> csvMapTermList path
V4 -> fromJust
<$> toTermList MapTerm NgramsTerms
<$> readListV4 path
--------------
-- | Main | --
--------------
main :: IO () main :: IO ()
main = do main = do
...@@ -273,7 +70,7 @@ main = do ...@@ -273,7 +70,7 @@ main = do
corpus <- if (defaultMode config) corpus <- if (defaultMode config)
then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList
else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config) mapList else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus") printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms") printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms")
......
{-# LANGUAGE OverloadedStrings #-}
module Common where
import Control.Concurrent.Async (mapConcurrently)
import Crypto.Hash.SHA256 (hash)
import Data.Aeson
import Data.ByteString.Char8 qualified as C8
import Data.List (nub, tail)
import Data.List.Split
import Data.Maybe (fromJust)
import Data.Text (unpack, replace, pack)
import Data.Text qualified as T
import Data.Vector qualified as Vector
import Gargantext.API.Ngrams.Prelude (toTermList)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloTools (toPeriods, getTimePeriod, getTimeStep)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified
import System.Directory (listDirectory)
data Backup = BackupPhyloWithoutLink | BackupPhylo deriving (Show)
---------------
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath :: FilePath -> IO [FilePath]
getFilesFromPath path = do
if (isSuffixOf "/" path)
then (listDirectory path)
else return [path]
----------------
-- | Parser | --
----------------
-- | To filter the Ngrams of a document based on the termList
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO [Document]
wosToDocs limit patterns time path = do
files <- getFilesFromPath path
take limit
<$> map (\d -> let title = fromJust $ _hd_title d
abstr = if (isJust $ _hd_abstract d)
then fromJust $ _hd_abstract d
else ""
in Document (toPhyloDate
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d) time)
(toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d) time)
(termsInText patterns $ title <> " " <> abstr) Nothing [] time)
<$> concat
<$> mapConcurrently (\file ->
filter (\d -> (isJust $ _hd_publication_year d)
&& (isJust $ _hd_title d))
<$> fromRight [] <$> parseFile WOS Plain (path <> file) ) files
-- To transform a Csv file into a list of Document
csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs parser patterns time path =
case parser of
Wos _ -> Prelude.error "csvToDocs: unimplemented"
Csv limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) (fromMaybe Csv.defaultMonth $ csv_publication_month row) (fromMaybe Csv.defaultDay $ csv_publication_day row) time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) (fromMaybe Csv.defaultMonth $ csv_publication_month row) (fromMaybe Csv.defaultDay $ csv_publication_day row) time)
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
time
) <$> snd <$> either (\err -> panicTrace $ "CSV error" <> (show err)) identity <$> Csv.readCSVFile path
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row)
(map (T.strip . pack) $ splitOn ";" (unpack $ (csv'_source row)))
time
) <$> snd <$> Csv.readWeightedCsv path
-- To parse a file into a list of Document
fileToDocsAdvanced :: CorpusParser -> FilePath -> TimeUnit -> TermList -> IO [Document]
fileToDocsAdvanced parser path time lst = do
let patterns = buildPatterns lst
case parser of
Wos limit -> wosToDocs limit patterns time path
Csv _ -> csvToDocs parser patterns time path
Csv' _ -> csvToDocs parser patterns time path
fileToDocsDefault :: CorpusParser -> FilePath -> [TimeUnit] -> TermList -> IO [Document]
fileToDocsDefault parser path timeUnits lst =
if length timeUnits > 0
then
do
let timeUnit = (head' "fileToDocsDefault" timeUnits)
docs <- fileToDocsAdvanced parser path timeUnit lst
let periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeUnit) (getTimeStep timeUnit)
if (length periods < 3)
then fileToDocsDefault parser path (tail timeUnits) lst
else pure docs
else panicTrace "this corpus is incompatible with the phylomemy reconstruction"
-- on passe à passer la time unit dans la conf envoyé au phyloMaker
-- dans le phyloMaker si default est true alors dans le setDefault ou pense à utiliser la TimeUnit de la conf
readListV4 :: [Char] -> IO NgramsList
readListV4 path = do
listJson <- (eitherDecode <$> readJson path) :: IO (Either Prelude.String NgramsList)
case listJson of
Left err -> do
putStrLn err
Prelude.error "readListV4 unimplemented"
Right listV4 -> pure listV4
fileToList :: ListParser -> FilePath -> IO TermList
fileToList parser path =
case parser of
V3 -> csvMapTermList path
V4 -> fromJust
<$> toTermList MapTerm NgramsTerms
<$> readListV4 path
---------------
-- | Label | --
---------------
-- Config time parameters to label
timeToLabel :: PhyloConfig -> [Char]
timeToLabel config = case (timeUnit config) of
Epoch p s f -> ("time_epochs" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: PhyloConfig -> [Char]
seaToLabel config = case (seaElevation config) of
Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
Adaptative granularity -> ("sea_adapt" <> (show granularity))
Evolving _ -> ("sea_evolv")
sensToLabel :: PhyloConfig -> [Char]
sensToLabel config = case (similarity config) of
Hamming _ _ -> Prelude.error "sensToLabel: unimplemented"
WeightedLogJaccard s _ -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s _ -> ( "WeightedLogSim-sens_" <> show s)
cliqueToLabel :: PhyloConfig -> [Char]
cliqueToLabel config = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s t f -> "clique_" <> (show s)<> "_" <> (show f)<> "_" <> (show t)
syncToLabel :: PhyloConfig -> [Char]
syncToLabel config = case (phyloSynchrony config) of
ByProximityThreshold scl sync_sens scope _ -> ("scale_" <> (show scope) <> "_" <> (show sync_sens) <> "_" <> (show scl))
ByProximityDistribution _ _ -> "syncToLabel: unimplemented"
qualToConfig :: PhyloConfig -> [Char]
qualToConfig config = case (phyloQuality config) of
Quality g m -> "quality_" <> (show g) <> "_" <> (show m)
-- To set up the export file's label from the configuration
configToLabel :: PhyloConfig -> [Char]
configToLabel config = outputPath config
<> (unpack $ phyloName config)
<> "-" <> (timeToLabel config)
<> "-scale_" <> (show (phyloScale config))
<> "-" <> (seaToLabel config)
<> "-" <> (sensToLabel config)
<> "-" <> (cliqueToLabel config)
<> "-level_" <> (show (_qua_granularity $ phyloQuality config))
<> "-" <> (syncToLabel config)
<> ".dot"
-- To write a sha256 from a set of config's parameters
configToSha :: Backup -> PhyloConfig -> [Char]
configToSha stage config = unpack
$ replace "/" "-"
$ T.pack (show (hash $ C8.pack label))
where
label :: [Char]
label = case stage of
BackupPhyloWithoutLink -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
BackupPhylo -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
<> (sensToLabel config)
<> (seaToLabel config)
<> (syncToLabel config)
<> (qualToConfig config)
<> (show (phyloScale config))
...@@ -18,8 +18,8 @@ fi ...@@ -18,8 +18,8 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and # with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI # `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in. # cache can kick in.
expected_cabal_project_hash="3f5d6b7f26cac4aa5a7f87ba0227a7671041dfe46643ddef79512eb49bd876ec" expected_cabal_project_hash="c9fe39301e8b60bfd183e60e7e25a14cd1c9c66d8739bf9041ca3f4db89db7c6"
expected_cabal_project_freeze_hash="745c65c246998cfda4d2a7a22df44a9f1f7fb0927e2afc2f16712861bf552c76" expected_cabal_project_freeze_hash="db24c7d3006167102532e3101e2b49bae13d478003459c7d3f1d66590e57740a"
cabal --store-dir=$STORE_DIR v2-build --dry-run cabal --store-dir=$STORE_DIR v2-build --dry-run
cabal2stack --system-ghc --allow-newer --resolver lts-21.17 --resolver-file devops/stack/lts-21.17.yaml -o stack.yaml cabal2stack --system-ghc --allow-newer --resolver lts-21.17 --resolver-file devops/stack/lts-21.17.yaml -o stack.yaml
......
...@@ -31,8 +31,8 @@ source-repository-package ...@@ -31,8 +31,8 @@ source-repository-package
source-repository-package source-repository-package
type: git type: git
location: https://github.com/adinapoli/haskell-opaleye.git location: https://github.com/garganscript/haskell-opaleye.git
tag: e9a29582ac66198dd2c2fdc3f8c8a4b1e6fbe004 tag: 6cf1bcfe215143efac17919cfd0abdd60e0f717c
source-repository-package source-repository-package
type: git type: git
...@@ -183,12 +183,9 @@ allow-newer: * ...@@ -183,12 +183,9 @@ allow-newer: *
package gargantext package gargantext
ghc-options: -fwrite-ide-info -hiedir=".stack-work/hiedb" ghc-options: -fwrite-ide-info -hiedir=".stack-work/hiedb"
package gargantext-graph
ghc-options: -O2 -fsimpl-tick-factor=10000 -fdicts-cheap -fdicts-strict -flate-dmd-anal -fno-state-hack
package hmatrix package hmatrix
ghc-options: -O2 -fsimpl-tick-factor=10000 -fdicts-cheap -fdicts-strict -flate-dmd-anal -fno-state-hack ghc-options: -O2 -fsimpl-tick-factor=10000 -fdicts-cheap -fdicts-strict -flate-dmd-anal -fno-state-hack
package sparse-linear package sparse-linear
ghc-options: -O2 -fsimpl-tick-factor=10000 -fdicts-cheap -fdicts-strict -flate-dmd-anal -fno-state-hack ghc-options: -O2 -fsimpl-tick-factor=10000 -fdicts-cheap -fdicts-strict -flate-dmd-anal -fno-state-hack
...@@ -178,6 +178,7 @@ constraints: any.Cabal ==3.8.1.0, ...@@ -178,6 +178,7 @@ constraints: any.Cabal ==3.8.1.0,
any.digest ==0.0.1.7, any.digest ==0.0.1.7,
digest +pkg-config, digest +pkg-config,
any.directory ==1.3.7.1, any.directory ==1.3.7.1,
any.discrimination ==0.5,
any.distributive ==0.6.2.1, any.distributive ==0.6.2.1,
distributive +semigroups +tagged, distributive +semigroups +tagged,
any.dlist ==1.0, any.dlist ==1.0,
...@@ -454,6 +455,7 @@ constraints: any.Cabal ==3.8.1.0, ...@@ -454,6 +455,7 @@ constraints: any.Cabal ==3.8.1.0,
any.process ==1.6.17.0, any.process ==1.6.17.0,
any.product-profunctors ==0.11.1.1, any.product-profunctors ==0.11.1.1,
any.profunctors ==5.6.2, any.profunctors ==5.6.2,
any.promises ==0.3,
any.protolude ==0.3.3, any.protolude ==0.3.3,
any.psqueues ==0.2.7.3, any.psqueues ==0.2.7.3,
any.pureMD5 ==2.1.4, any.pureMD5 ==2.1.4,
......
cabal-version: 2.0 cabal-version: 3.4
-- This file has been generated from package.yaml by hpack version 0.35.1. -- This file has been generated from package.yaml by hpack version 0.35.1.
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.9.9.9.6.1 version: 0.0.6.9.9.9.6.2
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -13,10 +13,13 @@ homepage: https://gargantext.org ...@@ -13,10 +13,13 @@ homepage: https://gargantext.org
author: Gargantext Team author: Gargantext Team
maintainer: team@gargantext.org maintainer: team@gargantext.org
copyright: Copyright: (c) 2017-Present: see git logs and README copyright: Copyright: (c) 2017-Present: see git logs and README
license: AGPL-3 license: AGPL-3.0-or-later
license-file: LICENSE license-file: LICENSE
build-type: Simple build-type: Simple
data-files: data-files:
bench-data/phylo/bpa-config.json
bench-data/phylo/GarganText_DocsList-nodeId-185487.csv
bench-data/phylo/GarganText_NgramsList-185488.csv
bench-data/phylo/issue-290.json bench-data/phylo/issue-290.json
bench-data/phylo/issue-290-small.json bench-data/phylo/issue-290-small.json
devops/postgres/extensions.sql devops/postgres/extensions.sql
...@@ -39,6 +42,37 @@ data-files: ...@@ -39,6 +42,37 @@ data-files:
gargantext-cors-settings.toml gargantext-cors-settings.toml
.clippy.dhall .clippy.dhall
-- common options
-- https://vrom911.github.io/blog/common-stanzas
common defaults
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
default-language: Haskell2010
build-depends:
base >=4.7 && <5
-- optimizations
common optimized
ghc-options:
-O2
-threaded
-rtsopts
-with-rtsopts=-N
-Wmissing-signatures
-- When enabled, it swaps the hashing algorithm -- When enabled, it swaps the hashing algorithm
-- with a quicker (and less secure) version, which -- with a quicker (and less secure) version, which
-- runs faster in tests. -- runs faster in tests.
...@@ -50,7 +84,16 @@ flag disable-db-obfuscation-executable ...@@ -50,7 +84,16 @@ flag disable-db-obfuscation-executable
default: False default: False
manual: True manual: True
-- When enabled, it suppresses at compile time the
-- debug output for the phylo code, so that it doesn't
-- hinder its performance.
flag no-phylo-debug-logs
default: False
manual: True
library library
import:
defaults
exposed-modules: exposed-modules:
Gargantext Gargantext
Gargantext.API Gargantext.API
...@@ -90,6 +133,8 @@ library ...@@ -90,6 +133,8 @@ library
Gargantext.Core.Methods.Similarities Gargantext.Core.Methods.Similarities
Gargantext.Core.NLP Gargantext.Core.NLP
Gargantext.Core.NodeStory Gargantext.Core.NodeStory
Gargantext.Core.NodeStory.DB
Gargantext.Core.NodeStory.Types
Gargantext.Core.Text Gargantext.Core.Text
Gargantext.Core.Text.Context Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus.API Gargantext.Core.Text.Corpus.API
...@@ -308,6 +353,7 @@ library ...@@ -308,6 +353,7 @@ library
Gargantext.Database Gargantext.Database
Gargantext.Database.Action.Delete Gargantext.Database.Action.Delete
Gargantext.Database.Action.Flow.Annuaire Gargantext.Database.Action.Flow.Annuaire
Gargantext.Database.Action.Flow.Extract
Gargantext.Database.Action.Flow.List Gargantext.Database.Action.Flow.List
Gargantext.Database.Action.Flow.Pairing Gargantext.Database.Action.Flow.Pairing
Gargantext.Database.Action.Flow.Utils Gargantext.Database.Action.Flow.Utils
...@@ -385,26 +431,13 @@ library ...@@ -385,26 +431,13 @@ library
Gargantext.Utils.Servant Gargantext.Utils.Servant
Gargantext.Utils.UTCTime Gargantext.Utils.UTCTime
Paths_gargantext Paths_gargantext
ghc-options: -Wall -Wincomplete-uni-patterns -Wincomplete-record-updates -Wmissing-signatures -Wunused-binds -Wunused-imports -Werror -freduction-depth=300 -fprint-potential-instances
hs-source-dirs: hs-source-dirs:
src src
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -Wincomplete-uni-patterns -Wincomplete-record-updates -Wmissing-signatures -Wunused-binds -Wunused-imports -Werror -freduction-depth=300 -fprint-potential-instances
if flag(test-crypto) if flag(test-crypto)
cpp-options: -DTEST_CRYPTO cpp-options: -DTEST_CRYPTO
if flag(no-phylo-debug-logs)
cpp-options: -DNO_PHYLO_DEBUG_LOGS
build-depends: build-depends:
HSvm ^>= 0.1.1.3.22 HSvm ^>= 0.1.1.3.22
, KMP ^>= 0.2.0.0 , KMP ^>= 0.2.0.0
...@@ -423,7 +456,6 @@ library ...@@ -423,7 +456,6 @@ library
, async ^>= 2.2.4 , async ^>= 2.2.4
, attoparsec ^>= 0.13.2.5 , attoparsec ^>= 0.13.2.5
, auto-update ^>= 0.1.6 , auto-update ^>= 0.1.6
, base >=4.7 && <5
, base16-bytestring ^>= 1.0.2.0 , base16-bytestring ^>= 1.0.2.0
, base64-bytestring ^>= 1.1.0.0 , base64-bytestring ^>= 1.1.0.0
, bimap >= 0.5.0 , bimap >= 0.5.0
...@@ -450,6 +482,7 @@ library ...@@ -450,6 +482,7 @@ library
, data-time-segment ^>= 0.1.0.0 , data-time-segment ^>= 0.1.0.0
, deepseq ^>= 1.4.4.0 , deepseq ^>= 1.4.4.0
, directory ^>= 1.3.6.0 , directory ^>= 1.3.6.0
, discrimination >= 0.5
, duckling ^>= 0.2.0.0 , duckling ^>= 0.2.0.0
, ekg-core ^>= 0.1.1.7 , ekg-core ^>= 0.1.1.7
, ekg-json ^>= 0.1.0.7 , ekg-json ^>= 0.1.0.7
...@@ -599,98 +632,35 @@ library ...@@ -599,98 +632,35 @@ library
, yaml ^>= 0.11.8.0 , yaml ^>= 0.11.8.0
, zip ^>= 1.7.2 , zip ^>= 1.7.2
, zlib ^>= 0.6.2.3 , zlib ^>= 0.6.2.3
default-language: Haskell2010
executable gargantext-admin executable gargantext-admin
import:
defaults
, optimized
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
bin/gargantext-admin bin/gargantext-admin
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends: build-depends:
base extra
, extra
, gargantext , gargantext
, gargantext-prelude , gargantext-prelude
, text , text
default-language: Haskell2010
executable gargantext-cbor2json
main-is: Main.hs
other-modules:
Paths_gargantext
hs-source-dirs:
bin/gargantext-cbor2json
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
aeson ^>= 1.5.6.0
, base ^>= 4.14.3
, bytestring ^>= 0.10.12.0
, extra ^>= 1.7.9
, gargantext
, gargantext-prelude
, serialise ^>= 0.2.4.0
, text ^>= 1.2.4.1
default-language: Haskell2010
executable gargantext-cli executable gargantext-cli
import:
defaults
, optimized
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
CleanCsvCorpus CleanCsvCorpus
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
bin/gargantext-cli bin/gargantext-cli
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends: build-depends:
aeson ^>= 1.5.6.0 aeson ^>= 1.5.6.0
, async ^>= 2.2.4 , async ^>= 2.2.4
, base ^>= 4.14.3.0
, bytestring ^>= 0.10.12.0 , bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0 , cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1 , containers ^>= 0.6.5.1
...@@ -705,141 +675,86 @@ executable gargantext-cli ...@@ -705,141 +675,86 @@ executable gargantext-cli
, text ^>= 1.2.4.1 , text ^>= 1.2.4.1
, unordered-containers ^>= 0.2.16.0 , unordered-containers ^>= 0.2.16.0
, vector ^>= 0.12.3.0 , vector ^>= 0.12.3.0
default-language: Haskell2010
executable gargantext-db-obfuscation executable gargantext-db-obfuscation
import:
defaults
, optimized
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
bin/gargantext-db-obfuscation bin/gargantext-db-obfuscation
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
if flag(disable-db-obfuscation-executable) if flag(disable-db-obfuscation-executable)
buildable: False buildable: False
else else
build-depends: build-depends:
base extra
, extra
, gargantext , gargantext
, gargantext-prelude , gargantext-prelude
, optparse-simple , optparse-simple
, postgresql-simple ^>= 0.6.4 , postgresql-simple ^>= 0.6.4
, text , text
default-language: Haskell2010
executable gargantext-import executable gargantext-import
import:
defaults
, optimized
main-is: Main.hs main-is: Main.hs
default-extensions:
TypeOperators
other-modules: other-modules:
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
bin/gargantext-import bin/gargantext-import
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends: build-depends:
base ^>= 4.14.3.0 extra ^>= 1.7.9
, extra ^>= 1.7.9
, gargantext , gargantext
, gargantext-prelude , gargantext-prelude
, servant-server ^>= 0.18.3 , servant-server ^>= 0.18.3
, text ^>= 1.2.4.1 , text ^>= 1.2.4.1
default-language: Haskell2010
executable gargantext-init executable gargantext-init
import:
defaults
, optimized
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
bin/gargantext-init bin/gargantext-init
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends: build-depends:
base ^>= 4.14.3.0 cron ^>= 0.7.0
, cron ^>= 0.7.0
, extra ^>= 1.7.9 , extra ^>= 1.7.9
, gargantext , gargantext
, gargantext-prelude , gargantext-prelude
, text ^>= 1.2.4.1 , text ^>= 1.2.4.1
default-language: Haskell2010
executable gargantext-invitations executable gargantext-invitations
import:
defaults
, optimized
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
bin/gargantext-invitations bin/gargantext-invitations
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends: build-depends:
base ^>= 4.14.3.0 extra ^>= 1.7.9
, extra ^>= 1.7.9
, gargantext , gargantext
, gargantext-prelude , gargantext-prelude
, text ^>= 1.2.4.1 , text ^>= 1.2.4.1
default-language: Haskell2010
executable gargantext-phylo executable gargantext-phylo
import:
defaults
, optimized
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Paths_gargantext Paths_gargantext
Common
hs-source-dirs: hs-source-dirs:
bin/gargantext-phylo bin/gargantext-phylo bin/gargantext-phylo/Phylo
default-extensions: default-extensions:
DataKinds DataKinds
DeriveGeneric DeriveGeneric
...@@ -859,7 +774,6 @@ executable gargantext-phylo ...@@ -859,7 +774,6 @@ executable gargantext-phylo
build-depends: build-depends:
aeson ^>= 1.5.6.0 aeson ^>= 1.5.6.0
, async ^>= 2.2.4 , async ^>= 2.2.4
, base ^>= 4.14.3.0
, bytestring ^>= 0.10.12.0 , bytestring ^>= 0.10.12.0
, cassava ^>= 0.5.2.0 , cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1 , containers ^>= 0.6.5.1
...@@ -876,33 +790,18 @@ executable gargantext-phylo ...@@ -876,33 +790,18 @@ executable gargantext-phylo
, time ^>= 1.9.3 , time ^>= 1.9.3
, unordered-containers ^>= 0.2.16.0 , unordered-containers ^>= 0.2.16.0
, vector ^>= 0.7.3 , vector ^>= 0.7.3
default-language: Haskell2010
executable gargantext-server executable gargantext-server
import:
defaults
, optimized
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
bin/gargantext-server bin/gargantext-server
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -O2 -Wcompat -Wmissing-signatures -rtsopts -threaded -with-rtsopts=-N -with-rtsopts=-T -fprof-auto
build-depends: build-depends:
base ^>= 4.14.3.0 cassava ^>= 0.5.2.0
, cassava ^>= 0.5.2.0
, containers ^>= 0.6.5.1 , containers ^>= 0.6.5.1
, extra ^>= 1.7.9 , extra ^>= 1.7.9
, full-text-search ^>= 0.2.1.4 , full-text-search ^>= 0.2.1.4
...@@ -914,41 +813,27 @@ executable gargantext-server ...@@ -914,41 +813,27 @@ executable gargantext-server
, text ^>= 1.2.4.1 , text ^>= 1.2.4.1
, unordered-containers ^>= 0.2.16.0 , unordered-containers ^>= 0.2.16.0
, vector ^>= 0.7.3 , vector ^>= 0.7.3
default-language: Haskell2010
executable gargantext-upgrade executable gargantext-upgrade
import:
defaults
, optimized
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
bin/gargantext-upgrade bin/gargantext-upgrade
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends: build-depends:
base ^>= 4.14.3.0 cron ^>= 0.7.0
, cron ^>= 0.7.0
, extra ^>= 1.7.9 , extra ^>= 1.7.9
, gargantext , gargantext
, gargantext-prelude , gargantext-prelude
, postgresql-simple ^>= 0.6.4 , postgresql-simple ^>= 0.6.4
, text ^>= 1.2.4.1 , text ^>= 1.2.4.1
default-language: Haskell2010
test-suite garg-test-tasty test-suite garg-test-tasty
import:
defaults
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
main-is: drivers/tasty/Main.hs main-is: drivers/tasty/Main.hs
other-modules: other-modules:
...@@ -985,28 +870,12 @@ test-suite garg-test-tasty ...@@ -985,28 +870,12 @@ test-suite garg-test-tasty
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
test test
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
QuickCheck ^>= 2.14.2 QuickCheck ^>= 2.14.2
, aeson ^>= 1.5.6.0 , aeson ^>= 1.5.6.0
, aeson-qq , aeson-qq
, async ^>= 2.2.4 , async ^>= 2.2.4
, base ^>= 4.14.3.0
, boolexpr ^>= 0.2 , boolexpr ^>= 0.2
, bytestring ^>= 0.10.12.0 , bytestring ^>= 0.10.12.0
, case-insensitive , case-insensitive
...@@ -1064,9 +933,10 @@ test-suite garg-test-tasty ...@@ -1064,9 +933,10 @@ test-suite garg-test-tasty
, wai , wai
, wai-extra , wai-extra
, warp , warp
default-language: Haskell2010
test-suite garg-test-hspec test-suite garg-test-hspec
import:
defaults
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
main-is: drivers/hspec/Main.hs main-is: drivers/hspec/Main.hs
other-modules: other-modules:
...@@ -1086,38 +956,12 @@ test-suite garg-test-hspec ...@@ -1086,38 +956,12 @@ test-suite garg-test-hspec
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
test test
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NoImplicitPrelude
OverloadedStrings
RankNTypes
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
QuickCheck ^>= 2.14.2 QuickCheck ^>= 2.14.2
, aeson ^>= 1.5.6.0 , aeson ^>= 1.5.6.0
, aeson-qq , aeson-qq
, async ^>= 2.2.4 , async ^>= 2.2.4
, base ^>= 4.14.3.0
, boolexpr ^>= 0.2 , boolexpr ^>= 0.2
, bytestring ^>= 0.10.12.0 , bytestring ^>= 0.10.12.0
, case-insensitive , case-insensitive
...@@ -1174,7 +1018,6 @@ test-suite garg-test-hspec ...@@ -1174,7 +1018,6 @@ test-suite garg-test-hspec
, wai , wai
, wai-extra , wai-extra
, warp , warp
default-language: Haskell2010
benchmark garg-bench benchmark garg-bench
main-is: Main.hs main-is: Main.hs
...@@ -1191,3 +1034,27 @@ benchmark garg-bench ...@@ -1191,3 +1034,27 @@ benchmark garg-bench
ghc-options: "-with-rtsopts=-T -A32m" ghc-options: "-with-rtsopts=-T -A32m"
if impl(ghc >= 8.6) if impl(ghc >= 8.6)
ghc-options: "-with-rtsopts=--nonmoving-gc" ghc-options: "-with-rtsopts=--nonmoving-gc"
executable gargantext-phylo-profile
main-is: Main.hs
other-modules:
Paths_gargantext
Common
hs-source-dirs:
bin/gargantext-phylo-profile bin/gargantext-phylo/Phylo
default-extensions: GHC2021
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
, bytestring
, gargantext
, gargantext-prelude
, shelly
, text
, async
, cryptohash
, aeson
, split
, vector
, directory
default-language: Haskell2010
...@@ -48,10 +48,8 @@ import Gargantext.API.Admin.Settings.CORS ...@@ -48,10 +48,8 @@ import Gargantext.API.Admin.Settings.CORS
import Gargantext.API.Admin.Types (FireWall(..), PortNumber, cookieSettings, jwtSettings, settings, corsSettings) import Gargantext.API.Admin.Types (FireWall(..), PortNumber, cookieSettings, jwtSettings, settings, corsSettings)
import Gargantext.API.EKG import Gargantext.API.EKG
import Gargantext.API.Middleware (logStdoutDevSanitised) import Gargantext.API.Middleware (logStdoutDevSanitised)
import Gargantext.API.Ngrams (saveNodeStoryImmediate)
import Gargantext.API.Routes import Gargantext.API.Routes
import Gargantext.API.Server (server) import Gargantext.API.Server (server)
import Gargantext.Core.NodeStory
import Gargantext.Database.Prelude qualified as DB import Gargantext.Database.Prelude qualified as DB
import Gargantext.Prelude hiding (putStrLn) import Gargantext.Prelude hiding (putStrLn)
import Gargantext.System.Logging import Gargantext.System.Logging
...@@ -74,7 +72,7 @@ startGargantext mode port file = withLoggerHoisted mode $ \logger -> do ...@@ -74,7 +72,7 @@ startGargantext mode port file = withLoggerHoisted mode $ \logger -> do
app <- makeApp env app <- makeApp env
mid <- makeGargMiddleware (env ^. settings.corsSettings) mode mid <- makeGargMiddleware (env ^. settings.corsSettings) mode
periodicActions <- schedulePeriodicActions env periodicActions <- schedulePeriodicActions env
run port (mid app) `finally` stopGargantext env periodicActions run port (mid app) `finally` stopGargantext periodicActions
where runDbCheck env = do where runDbCheck env = do
r <- runExceptT (runReaderT DB.dbCheck env) `catch` r <- runExceptT (runReaderT DB.dbCheck env) `catch`
...@@ -94,11 +92,10 @@ portRouteInfo port = do ...@@ -94,11 +92,10 @@ portRouteInfo port = do
-- | Stops the gargantext server and cancels all the periodic actions -- | Stops the gargantext server and cancels all the periodic actions
-- scheduled to run up to that point. -- scheduled to run up to that point.
-- TODO clean this Monad condition (more generic) ? -- TODO clean this Monad condition (more generic) ?
stopGargantext :: HasNodeStoryImmediateSaver env => env -> [ThreadId] -> IO () stopGargantext :: [ThreadId] -> IO ()
stopGargantext env scheduledPeriodicActions = do stopGargantext scheduledPeriodicActions = do
forM_ scheduledPeriodicActions killThread forM_ scheduledPeriodicActions killThread
putStrLn "----- Stopping gargantext -----" putStrLn "----- Stopping gargantext -----"
runReaderT saveNodeStoryImmediate env
-- | Schedules all sorts of useful periodic actions to be run while -- | Schedules all sorts of useful periodic actions to be run while
-- the server is alive accepting requests. -- the server is alive accepting requests.
......
...@@ -136,9 +136,6 @@ instance HasConnectionPool Env where ...@@ -136,9 +136,6 @@ instance HasConnectionPool Env where
instance HasNodeStoryEnv Env where instance HasNodeStoryEnv Env where
hasNodeStory = env_nodeStory hasNodeStory = env_nodeStory
instance HasNodeStoryVar Env where
hasNodeStoryVar = hasNodeStory . nse_getter
instance HasNodeStoryImmediateSaver Env where instance HasNodeStoryImmediateSaver Env where
hasNodeStoryImmediateSaver = hasNodeStory . nse_saver_immediate hasNodeStoryImmediateSaver = hasNodeStory . nse_saver_immediate
...@@ -314,9 +311,6 @@ instance HasSettings DevEnv where ...@@ -314,9 +311,6 @@ instance HasSettings DevEnv where
instance HasNodeStoryEnv DevEnv where instance HasNodeStoryEnv DevEnv where
hasNodeStory = dev_env_nodeStory hasNodeStory = dev_env_nodeStory
instance HasNodeStoryVar DevEnv where
hasNodeStoryVar = hasNodeStory . nse_getter
instance HasNodeStoryImmediateSaver DevEnv where instance HasNodeStoryImmediateSaver DevEnv where
hasNodeStoryImmediateSaver = hasNodeStory . nse_saver_immediate hasNodeStoryImmediateSaver = hasNodeStory . nse_saver_immediate
......
...@@ -16,7 +16,6 @@ import Control.Monad (fail) ...@@ -16,7 +16,6 @@ import Control.Monad (fail)
import Gargantext.API.Admin.EnvTypes import Gargantext.API.Admin.EnvTypes
import Gargantext.API.Admin.Settings import Gargantext.API.Admin.Settings
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types
import Gargantext.API.Ngrams (saveNodeStoryImmediate)
import Gargantext.API.Prelude import Gargantext.API.Prelude
import Gargantext.Core.NLP (nlpServerMap) import Gargantext.Core.NLP (nlpServerMap)
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
...@@ -72,9 +71,7 @@ runCmdDev env f = ...@@ -72,9 +71,7 @@ runCmdDev env f =
runCmdGargDev :: DevEnv -> GargM DevEnv BackendInternalError a -> IO a runCmdGargDev :: DevEnv -> GargM DevEnv BackendInternalError a -> IO a
runCmdGargDev env cmd = runCmdGargDev env cmd =
(either (fail . show) pure =<< runExceptT (runReaderT cmd env)) either (fail . show) pure =<< runExceptT (runReaderT cmd env)
`finally`
runReaderT saveNodeStoryImmediate env
runCmdDevNoErr :: DevEnv -> Cmd' DevEnv () a -> IO a runCmdDevNoErr :: DevEnv -> Cmd' DevEnv () a -> IO a
runCmdDevNoErr = runCmdDev runCmdDevNoErr = runCmdDev
......
...@@ -60,7 +60,6 @@ module Gargantext.API.Ngrams ...@@ -60,7 +60,6 @@ module Gargantext.API.Ngrams
, r_history , r_history
, NgramsRepoElement(..) , NgramsRepoElement(..)
, saveNodeStory , saveNodeStory
, saveNodeStoryImmediate
, initRepo , initRepo
, TabType(..) , TabType(..)
...@@ -87,7 +86,7 @@ module Gargantext.API.Ngrams ...@@ -87,7 +86,7 @@ module Gargantext.API.Ngrams
) )
where where
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, non, ifolded, to, withIndex, over) import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Monad.Reader import Control.Monad.Reader
import Data.Aeson.Text qualified as DAT import Data.Aeson.Text qualified as DAT
import Data.Foldable import Data.Foldable
...@@ -105,10 +104,10 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) ...@@ -105,10 +104,10 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings) import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types
import Gargantext.API.Metrics qualified as Metrics import Gargantext.API.Metrics qualified as Metrics
import Gargantext.API.Ngrams.Tools import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude import Gargantext.API.Prelude
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory (ArchiveList, HasNodeStory, HasNodeArchiveStoryImmediateSaver(..), HasNodeStoryImmediateSaver(..), NgramsStatePatch', a_history, a_state, a_version, currentVersion)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasValidationError, ContextId) import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasValidationError, ContextId)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..)) import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast) import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
...@@ -123,7 +122,6 @@ import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id) ...@@ -123,7 +122,6 @@ import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
import Gargantext.Prelude hiding (log, to, toLower, (%), isInfixOf) import Gargantext.Prelude hiding (log, to, toLower, (%), isInfixOf)
import Gargantext.Prelude.Clock (hasTime, getTime) import Gargantext.Prelude.Clock (hasTime, getTime)
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import GHC.Conc (readTVar, writeTVar)
import Servant hiding (Patch) import Servant hiding (Patch)
{- {-
...@@ -174,23 +172,10 @@ mkChildrenGroups addOrRem nt patches = ...@@ -174,23 +172,10 @@ mkChildrenGroups addOrRem nt patches =
------------------------------------------------------------------------ ------------------------------------------------------------------------
saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env ) saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
=> m () => NodeId -> ArchiveList -> m ()
saveNodeStory = do saveNodeStory nId a = do
saver <- view hasNodeStoryImmediateSaver saver <- view hasNodeStoryImmediateSaver
liftBase $ do liftBase $ saver nId a
--Gargantext.Prelude.putStrLn "---- Running node story saver ----"
saver
--Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
=> m ()
saveNodeStoryImmediate = do
saver <- view hasNodeStoryImmediateSaver
liftBase $ do
--Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
saver
--Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
listTypeConflictResolution :: ListType -> ListType -> ListType listTypeConflictResolution :: ListType -> ListType -> ListType
listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType listTypeConflictResolution _ _ = undefined -- TODO Use Map User ListType
...@@ -256,19 +241,23 @@ setListNgrams :: HasNodeStory env err m ...@@ -256,19 +241,23 @@ setListNgrams :: HasNodeStory env err m
-> m () -> m ()
setListNgrams listId ngramsType ns = do setListNgrams listId ngramsType ns = do
-- printDebug "[setListNgrams]" (listId, ngramsType) -- printDebug "[setListNgrams]" (listId, ngramsType)
var <- getNodeStoryVar [listId] a <- getNodeStory listId
liftBase $ atomically $ do let a' = a & a_state . at ngramsType %~ (\mns' -> case mns' of
nls <- readTVar var Nothing -> Just ns
writeTVar var $ Just ns' -> Just $ ns <> ns')
( unNodeStory saveNodeStory listId a'
. at listId . _Just -- liftBase $ atomically $ do
. a_state -- nls <- readTVar var
. at ngramsType -- writeTVar var $
%~ (\mns' -> case mns' of -- ( unNodeStory
Nothing -> Just ns -- . at listId . _Just
Just ns' -> Just $ ns <> ns') -- . a_state
) nls -- . at ngramsType
saveNodeStory -- %~ (\mns' -> case mns' of
-- Nothing -> Just ns
-- Just ns' -> Just $ ns <> ns')
-- ) nls
-- saveNodeStory
newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams] newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
...@@ -292,11 +281,11 @@ commitStatePatch :: ( HasNodeStory env err m ...@@ -292,11 +281,11 @@ commitStatePatch :: ( HasNodeStory env err m
-> m (Versioned NgramsStatePatch') -> m (Versioned NgramsStatePatch')
commitStatePatch listId (Versioned _p_version p) = do commitStatePatch listId (Versioned _p_version p) = do
-- printDebug "[commitStatePatch]" listId -- printDebug "[commitStatePatch]" listId
var <- getNodeStoryVar [listId] a <- getNodeStory listId
archiveSaver <- view hasNodeArchiveStoryImmediateSaver archiveSaver <- view hasNodeArchiveStoryImmediateSaver
ns <- liftBase $ atomically $ readTVar var -- ns <- liftBase $ atomically $ readTVar var
let let
a = ns ^. unNodeStory . at listId . non initArchive -- a = ns ^. unNodeStory . at listId . non initArchive
-- apply patches from version p_version to a ^. a_version -- apply patches from version p_version to a ^. a_version
-- TODO Check this -- TODO Check this
--q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history) --q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
...@@ -327,10 +316,12 @@ commitStatePatch listId (Versioned _p_version p) = do ...@@ -327,10 +316,12 @@ commitStatePatch listId (Versioned _p_version p) = do
-} -}
-- printDebug "[commitStatePatch] a version" (a ^. a_version) -- printDebug "[commitStatePatch] a version" (a ^. a_version)
-- printDebug "[commitStatePatch] a' version" (a' ^. a_version) -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
let newNs = ( ns & unNodeStory . at listId .~ (Just a') -- let newNs = ( ns & unNodeStory . at listId .~ (Just a')
, Versioned (a' ^. a_version) q' -- , Versioned (a' ^. a_version) q'
) -- )
let newA = Versioned (a' ^. a_version) q'
-- NOTE Now is the only good time to save the archive history. We -- NOTE Now is the only good time to save the archive history. We
-- have the handle to the MVar and we need to save its exact -- have the handle to the MVar and we need to save its exact
-- snapshot. Node Story archive is a linear table, so it's only -- snapshot. Node Story archive is a linear table, so it's only
...@@ -353,16 +344,15 @@ commitStatePatch listId (Versioned _p_version p) = do ...@@ -353,16 +344,15 @@ commitStatePatch listId (Versioned _p_version p) = do
-- archive was saved and applied) -- archive was saved and applied)
-- newNs' <- archiveSaver $ fst newNs -- newNs' <- archiveSaver $ fst newNs
liftBase $ do liftBase $ do
newNs' <- archiveSaver $ fst newNs -- newNs' <- archiveSaver $ fst newNs
atomically $ writeTVar var newNs' -- atomically $ writeTVar var newNs'
void $ archiveSaver listId a'
-- Save new ngrams -- Save new ngrams
_ <- insertNgrams (newNgramsFromNgramsStatePatch p) _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
-- NOTE State (i.e. `NodeStory` can be saved asynchronously, i.e. with debounce) saveNodeStory listId a'
-- saveNodeStory
saveNodeStoryImmediate
pure $ snd newNs pure newA
...@@ -374,11 +364,11 @@ tableNgramsPull :: HasNodeStory env err m ...@@ -374,11 +364,11 @@ tableNgramsPull :: HasNodeStory env err m
-> m (Versioned NgramsTablePatch) -> m (Versioned NgramsTablePatch)
tableNgramsPull listId ngramsType p_version = do tableNgramsPull listId ngramsType p_version = do
-- printDebug "[tableNgramsPull]" (listId, ngramsType) -- printDebug "[tableNgramsPull]" (listId, ngramsType)
var <- getNodeStoryVar [listId] a <- getNodeStory listId
r <- liftBase $ atomically $ readTVar var -- r <- liftBase $ atomically $ readTVar var
let let
a = r ^. unNodeStory . at listId . non initArchive -- a = r ^. unNodeStory . at listId . non initArchive
q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history) q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
q_table = q ^. _PatchMap . at ngramsType . _Just q_table = q ^. _PatchMap . at ngramsType . _Just
...@@ -502,10 +492,9 @@ getNgramsTableMap :: HasNodeStory env err m ...@@ -502,10 +492,9 @@ getNgramsTableMap :: HasNodeStory env err m
-> TableNgrams.NgramsType -> TableNgrams.NgramsType
-> m (Versioned NgramsTableMap) -> m (Versioned NgramsTableMap)
getNgramsTableMap nodeId ngramsType = do getNgramsTableMap nodeId ngramsType = do
v <- getNodeStoryVar [nodeId] a <- getNodeStory nodeId
repo <- liftBase $ atomically $ readTVar v pure $ Versioned (a ^. a_version)
pure $ Versioned (repo ^. unNodeStory . at nodeId . _Just . a_version) (a ^. a_state . at ngramsType . _Just)
(repo ^. unNodeStory . at nodeId . _Just . a_state . at ngramsType . _Just)
dumpJsonTableMap :: HasNodeStory env err m dumpJsonTableMap :: HasNodeStory env err m
......
...@@ -23,7 +23,7 @@ import Data.HashMap.Strict qualified as HM ...@@ -23,7 +23,7 @@ import Data.HashMap.Strict qualified as HM
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Validity import Data.Validity
import GHC.Conc (TVar, readTVar) -- import GHC.Conc (TVar, readTVar)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Types (ListType(..), NodeId, ListId) import Gargantext.Core.Types (ListType(..), NodeId, ListId)
...@@ -40,10 +40,11 @@ type RootTerm = NgramsTerm ...@@ -40,10 +40,11 @@ type RootTerm = NgramsTerm
getRepo :: HasNodeStory env err m getRepo :: HasNodeStory env err m
=> [ListId] -> m NodeListStory => [ListId] -> m NodeListStory
getRepo listIds = do getRepo listIds = do
f <- getNodeListStory f <- getNodeListStoryMulti
v <- liftBase $ f listIds liftBase $ f listIds
v' <- liftBase $ atomically $ readTVar v -- v <- liftBase $ f listIds
pure $ v' -- v' <- liftBase $ atomically $ readTVar v
-- pure $ v'
repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p
...@@ -56,21 +57,29 @@ repoSize repo node_id = Map.map Map.size state' ...@@ -56,21 +57,29 @@ repoSize repo node_id = Map.map Map.size state'
. a_state . a_state
getNodeStoryVar :: HasNodeStory env err m getNodeStory :: HasNodeStory env err m
=> [ListId] -> m (TVar NodeListStory) => ListId -> m ArchiveList
getNodeStoryVar l = do getNodeStory l = do
f <- getNodeListStory f <- getNodeListStory
v <- liftBase $ f l liftBase $ f l
pure v -- v <- liftBase $ f l
-- pure v
getNodeListStory :: HasNodeStory env err m getNodeListStory :: HasNodeStory env err m
=> m ([NodeId] -> IO (TVar NodeListStory)) => m (NodeId -> IO ArchiveList)
getNodeListStory = do getNodeListStory = do
env <- view hasNodeStory env <- view hasNodeStory
pure $ view nse_getter env pure $ view nse_getter env
getNodeListStoryMulti :: HasNodeStory env err m
=> m ([NodeId] -> IO NodeListStory)
getNodeListStoryMulti = do
env <- view hasNodeStory
pure $ view nse_getter_multi env
listNgramsFromRepo :: [ListId] listNgramsFromRepo :: [ListId]
-> NgramsType -> NgramsType
......
{-| {-|
Module : Gargantext.Core.NodeStory Module : Gargantext.Core.NodeStory
Description : Node API generation Description : NodeStory
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
...@@ -40,51 +40,20 @@ TODO: ...@@ -40,51 +40,20 @@ TODO:
- charger les listes - charger les listes
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-} {-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.NodeStory module Gargantext.Core.NodeStory
( HasNodeStory ( module Gargantext.Core.NodeStory.Types
, HasNodeStoryEnv
, hasNodeStory
, HasNodeStoryVar
, hasNodeStoryVar
, HasNodeStoryImmediateSaver
, hasNodeStoryImmediateSaver
, HasNodeArchiveStoryImmediateSaver
, hasNodeArchiveStoryImmediateSaver
, NodeStory(..)
, NgramsStatePatch'
, NodeListStory
, ArchiveList
, initNodeListStoryMock
, NodeStoryEnv(..)
, initNodeStory
, nse_getter
, nse_saver_immediate
, nse_archive_saver_immediate
, nse_var
, unNodeStory
, getNodesArchiveHistory , getNodesArchiveHistory
, Archive(..) , Archive(..)
, initArchive
, archiveAdvance
, unionArchives
, a_history
, a_state
, a_version
, nodeExists , nodeExists
, runPGSQuery
, runPGSAdvisoryLock
, runPGSAdvisoryUnlock
, runPGSAdvisoryXactLock
, getNodesIdWithType , getNodesIdWithType
, fromDBNodeStoryEnv , fromDBNodeStoryEnv
, upsertNodeStories , upsertNodeStories
, getNodeStory -- , getNodeStory
, nodeStoriesQuery , nodeStoriesQuery
, currentVersion , currentVersion
, archiveStateFromList , archiveStateFromList
...@@ -92,282 +61,28 @@ module Gargantext.Core.NodeStory ...@@ -92,282 +61,28 @@ module Gargantext.Core.NodeStory
, fixNodeStoryVersions ) , fixNodeStoryVersions )
where where
import Codec.Serialise.Class import Control.Lens ((^.), (.~), (%~), non, _Just, at, view)
import Control.Lens (makeLenses, Getter, (^.), (.~), (%~), non, _Just, at, view)
import Control.Monad.Except import Control.Monad.Except
import Control.Monad.Reader
import Data.Aeson hiding ((.=), decode)
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Monoid import Data.Monoid
import Data.Pool (Pool, withResource) import Data.Pool (Pool, withResource)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Semigroup
import Data.Set qualified as Set import Data.Set qualified as Set
import Database.PostgreSQL.Simple qualified as PGS import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.FromField (FromField(fromField), fromJSONField)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.ToField qualified as PGS import Database.PostgreSQL.Simple.ToField qualified as PGS
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import GHC.Conc (TVar, newTVar, readTVar, writeTVar)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core (toDBid) import Gargantext.Core.NodeStory.DB
import Gargantext.Core.Types (ListId, NodeId(..), NodeType) import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Types (ListId, NodeId(..))
import Gargantext.Database.Admin.Config () import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Prelude (DbCmd', HasConnectionPool(..)) import Gargantext.Database.Prelude (HasConnectionPool(..))
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude hiding (to) import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Database import Gargantext.Prelude.Database
import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField)
------------------------------------------------------------------------
data NodeStoryEnv = NodeStoryEnv
{ _nse_var :: !(TVar NodeListStory)
, _nse_saver_immediate :: !(IO ())
, _nse_archive_saver_immediate :: !(NodeListStory -> IO NodeListStory)
, _nse_getter :: !([NodeId] -> IO (TVar NodeListStory))
--, _nse_cleaner :: !(IO ()) -- every 12 hours: cleans the repos of unused NodeStories
-- , _nse_lock :: !FileLock -- TODO (it depends on the option: if with database or file only)
}
deriving (Generic)
type HasNodeStory env err m = ( DbCmd' env err m
, MonadReader env m
, MonadError err m
, HasNodeStoryEnv env
, HasNodeError err
)
class (HasNodeStoryVar env, HasNodeStoryImmediateSaver env)
=> HasNodeStoryEnv env where
hasNodeStory :: Getter env NodeStoryEnv
class HasNodeStoryVar env where
hasNodeStoryVar :: Getter env ([NodeId] -> IO (TVar NodeListStory))
class HasNodeStoryImmediateSaver env where
hasNodeStoryImmediateSaver :: Getter env (IO ())
class HasNodeArchiveStoryImmediateSaver env where
hasNodeArchiveStoryImmediateSaver :: Getter env (NodeListStory -> IO NodeListStory)
------------------------------------------------------------------------
{- | Node Story for each NodeType where the Key of the Map is NodeId
TODO : generalize for any NodeType, let's start with NodeList which
is implemented already
-}
newtype NodeStory s p = NodeStory { _unNodeStory :: Map NodeId (Archive s p) }
deriving (Generic, Show, Eq)
instance (FromJSON s, FromJSON p) => FromJSON (NodeStory s p)
instance (ToJSON s, ToJSON p) => ToJSON (NodeStory s p)
instance (Serialise s, Serialise p) => Serialise (NodeStory s p)
data Archive s p = Archive
{ _a_version :: !Version
, _a_state :: !s
, _a_history :: ![p]
-- first patch in the list is the most recent
-- We use `take` in `commitStatePatch`, that's why.
-- History is immutable, we just insert things on top of existing
-- list.
-- We don't need to store the whole history in memory, this
-- structure holds only recent history, the one that will be
-- inserted to the DB.
}
deriving (Generic, Show, Eq)
instance (Serialise s, Serialise p) => Serialise (Archive s p)
type NodeListStory = NodeStory NgramsState' NgramsStatePatch'
-- NOTE: 'type NgramsTableMap = Map NgramsTerm NgramsRepoElement'
type NgramsState' = Map TableNgrams.NgramsType NgramsTableMap
type NgramsStatePatch' = PatchMap TableNgrams.NgramsType NgramsTablePatch
instance Serialise NgramsStatePatch'
instance FromField (Archive NgramsState' NgramsStatePatch')
where
fromField = fromJSONField
instance DefaultFromField SqlJsonb (Archive NgramsState' NgramsStatePatch')
where
defaultFromField = fromPGSFromField
-- | Combine `NgramsState'`. This is because the structure is (Map
-- NgramsType (Map ...)) and the default `(<>)` operator is
-- left-biased
-- (https://hackage.haskell.org/package/containers-0.6.6/docs/Data-Map-Internal.html#v:union)
combineState :: NgramsState' -> NgramsState' -> NgramsState'
combineState = Map.unionWith (<>)
-- This is not a typical Semigroup instance. The state is not
-- appended, instead it is replaced with the second entry. This is
-- because state changes with each version. We have to take into
-- account the removal of terms as well.
-- instance (Semigroup s, Semigroup p) => Semigroup (Archive s p) where
-- (<>) (Archive { _a_history = p }) (Archive { _a_version = v'
-- , _a_state = s'
-- , _a_history = p' }) =
-- Archive { _a_version = v'
-- , _a_state = s'
-- , _a_history = p' <> p }
-- instance (Monoid s, Semigroup p) => Monoid (Archive s p) where
-- mempty = Archive { _a_version = 0
-- , _a_state = mempty
-- , _a_history = [] }
instance (FromJSON s, FromJSON p) => FromJSON (Archive s p) where
parseJSON = genericParseJSON $ unPrefix "_a_"
instance (ToJSON s, ToJSON p) => ToJSON (Archive s p) where
toJSON = genericToJSON $ unPrefix "_a_"
toEncoding = genericToEncoding $ unPrefix "_a_"
-- | This is the normal way to update archive state, bumping the
-- version and history. Resulting state is taken directly from new
-- archive, omitting old archive completely.
archiveAdvance :: (Semigroup s, Semigroup p) => Archive s p -> Archive s p -> Archive s p
archiveAdvance aOld aNew = aNew { _a_history = _a_history aNew <> _a_history aOld }
-- | This is to merge archive states.
unionArchives :: (Semigroup s, Semigroup p) => Archive s p -> Archive s p -> Archive s p
unionArchives aOld aNew = aNew { _a_state = _a_state aOld <> _a_state aNew
, _a_history = _a_history aNew <> _a_history aOld }
------------------------------------------------------------------------
initNodeStory :: (Monoid s, Semigroup p) => NodeId -> NodeStory s p
initNodeStory ni = NodeStory $ Map.singleton ni initArchive
initArchive :: (Monoid s, Semigroup p) => Archive s p
initArchive = Archive { _a_version = 0
, _a_state = mempty
, _a_history = [] }
initNodeListStoryMock :: NodeListStory
initNodeListStoryMock = NodeStory $ Map.singleton nodeListId archive
where
nodeListId = 0
archive = Archive { _a_version = 0
, _a_state = ngramsTableMap
, _a_history = [] }
ngramsTableMap = Map.singleton TableNgrams.NgramsTerms
$ Map.fromList
[ (n ^. ne_ngrams, ngramsElementToRepo n)
| n <- mockTable ^. _NgramsTable
]
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Lenses at the bottom of the file because Template Haskell would reorder order of execution in others cases
makeLenses ''NodeStoryEnv
makeLenses ''NodeStory
makeLenses ''Archive
----------------------------------------------------------------------
data NodeStoryPoly nid v ngtid ngid nre =
NodeStoryDB { node_id :: !nid
, version :: !v
, ngrams_type_id :: !ngtid
, ngrams_id :: !ngid
, ngrams_repo_element :: !nre }
deriving (Eq)
data NodeStoryArchivePoly nid a =
NodeStoryArchiveDB { a_node_id :: !nid
, archive :: !a }
deriving (Eq)
$(makeAdaptorAndInstance "pNodeStory" ''NodeStoryPoly)
$(makeAdaptorAndInstance "pNodeArchiveStory" ''NodeStoryArchivePoly)
-- type NodeStoryWrite = NodeStoryPoly (Column SqlInt4) (Column SqlInt4) (Column SqlInt4) (Column SqlInt4) (Column SqlJsonb)
-- type NodeStoryRead = NodeStoryPoly (Column SqlInt4) (Column SqlInt4) (Column SqlInt4) (Column SqlInt4) (Column SqlJsonb)
-- type NodeStoryArchiveWrite = NodeStoryArchivePoly (Column SqlInt4) (Column SqlJsonb)
-- type NodeStoryArchiveRead = NodeStoryArchivePoly (Column SqlInt4) (Column SqlJsonb)
type ArchiveList = Archive NgramsState' NgramsStatePatch'
-- DB stuff
nodeExists :: PGS.Connection -> NodeId -> IO Bool
nodeExists c nId = (== [PGS.Only True])
<$> runPGSQuery c [sql| SELECT true FROM nodes WHERE id = ? LIMIT 1 |]
(PGS.Only nId)
getNodesIdWithType :: PGS.Connection -> NodeType -> IO [NodeId]
getNodesIdWithType c nt = do
ns <- runPGSQuery c query (PGS.Only $ toDBid nt)
pure $ map (\(PGS.Only nId) -> UnsafeMkNodeId nId) ns
where
query :: PGS.Query
query = [sql| SELECT id FROM nodes WHERE typename = ? |]
-- /!\ This function is using an hard coded parameter
-- which depends on the Ngrams List Flow
-- Version > 5 is hard coded because by default
-- first version of history of manual change is 6
getNodesArchiveHistory :: PGS.Connection
-> [NodeId]
-> IO [(NodeId, (Map NgramsType [HashMap NgramsTerm NgramsPatch]))]
getNodesArchiveHistory c nodesId = do
as <- runPGSQuery c query (PGS.Only $ Values fields nodesId)
:: IO [(Int, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
pure $ map (\(nId, ngramsType, terms, patch)
-> ( UnsafeMkNodeId nId
, Map.singleton ngramsType [HashMap.singleton terms patch]
)
) as
where
fields = [QualifiedIdentifier Nothing "int4"]
query :: PGS.Query
query = [sql| WITH nodes_id(nid) as (?)
SELECT node_id, ngrams_type_id, terms, patch
FROM node_story_archive_history
JOIN ngrams ON ngrams.id = ngrams_id
JOIN nodes_id n ON node_id = n.nid
WHERE version > 5
ORDER BY (version, node_story_archive_history.id) DESC
|]
insertNodeArchiveHistory :: PGS.Connection -> NodeId -> Version -> [NgramsStatePatch'] -> IO ()
insertNodeArchiveHistory _ _ _ [] = pure ()
insertNodeArchiveHistory c nodeId version (h:hs) = do
let tuples = mconcat $ (\(nType, NgramsTablePatch patch) ->
(\(term, p) ->
(nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
tuplesM <- mapM (\(nId, nType, term, patch) -> do
[PGS.Only ngramsId] <- runPGSReturning c qInsert [PGS.Only term] :: IO [PGS.Only Int]
pure (nId, nType, ngramsId, term, patch)
) tuples :: IO [(NodeId, TableNgrams.NgramsType, Int, NgramsTerm, NgramsPatch)]
_ <- runPGSExecuteMany c query $ ((\(nId, nType, termId, _term, patch) -> (nId, nType, termId, patch, version)) <$> tuplesM)
_ <- insertNodeArchiveHistory c nodeId version hs
pure ()
where
qInsert :: PGS.Query
qInsert = [sql|INSERT INTO ngrams (terms) VALUES (?)
ON CONFLICT (terms) DO UPDATE SET terms = excluded.terms
RETURNING id|]
-- https://stackoverflow.com/questions/39224438/postgresql-insert-if-foreign-key-exists
query :: PGS.Query
query = [sql| INSERT INTO node_story_archive_history(node_id, ngrams_type_id, ngrams_id, patch, version)
VALUES (?, ?, ?, ?, ?)
|]
getNodeStory :: PGS.Connection -> NodeId -> IO NodeListStory getNodeStory' :: PGS.Connection -> NodeId -> IO ArchiveList
getNodeStory c nId = do getNodeStory' c nId = do
--res <- withResource pool $ \c -> runSelect c query :: IO [NodeStoryPoly NodeId Version Int Int NgramsRepoElement] --res <- withResource pool $ \c -> runSelect c query :: IO [NodeStoryPoly NodeId Version Int Int NgramsRepoElement]
res <- runPGSQuery c nodeStoriesQuery (PGS.Only $ PGS.toField nId) :: IO [(Version, TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)] res <- runPGSQuery c nodeStoriesQuery (PGS.Only $ PGS.toField nId) :: IO [(Version, TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)]
-- We have multiple rows with same node_id and different (ngrams_type_id, ngrams_id). -- We have multiple rows with same node_id and different (ngrams_type_id, ngrams_id).
...@@ -390,21 +105,17 @@ getNodeStory c nId = do ...@@ -390,21 +105,17 @@ getNodeStory c nId = do
pure () pure ()
-} -}
pure $ NodeStory $ Map.singleton nId $ foldl combine initArchive dbData pure $ foldl combine initArchive dbData
where where
-- NOTE (<>) for Archive doesn't concatenate states, so we have to use `combine` -- NOTE (<>) for Archive doesn't concatenate states, so we have to use `combine`
combine a1 a2 = a1 & a_state %~ combineState (a2 ^. a_state) combine a1 a2 = a1 & a_state %~ combineState (a2 ^. a_state)
& a_version .~ (a2 ^. a_version) -- version should be updated from list, not taken from the empty Archive & a_version .~ (a2 ^. a_version) -- version should be updated from list, not taken from the empty Archive
nodeStoriesQuery :: PGS.Query
nodeStoriesQuery = [sql| SELECT version, ngrams_type_id, terms, ngrams_repo_element
FROM node_stories
JOIN ngrams ON ngrams.id = ngrams_id
WHERE node_id = ?
|]
type ArchiveStateList = [(TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)] getNodeStory :: PGS.Connection -> NodeId -> IO NodeListStory
type ArchiveStateSet = Set.Set (TableNgrams.NgramsType, NgramsTerm) getNodeStory c nId = do
a <- getNodeStory' c nId
pure $ NodeStory $ Map.singleton nId a
-- |Functions to convert archive state (which is a `Map NgramsType -- |Functions to convert archive state (which is a `Map NgramsType
-- (Map NgramsTerm NgramsRepoElement`)) to/from a flat list -- (Map NgramsTerm NgramsRepoElement`)) to/from a flat list
...@@ -426,53 +137,6 @@ insertNodeStory :: PGS.Connection -> NodeId -> ArchiveList -> IO () ...@@ -426,53 +137,6 @@ insertNodeStory :: PGS.Connection -> NodeId -> ArchiveList -> IO ()
insertNodeStory c nId a = do insertNodeStory c nId a = do
insertArchiveStateList c nId (a ^. a_version) (archiveStateToList $ a ^. a_state) insertArchiveStateList c nId (a ^. a_version) (archiveStateToList $ a ^. a_state)
insertArchiveStateList :: PGS.Connection -> NodeId -> Version -> ArchiveStateList -> IO ()
insertArchiveStateList c nodeId version as = do
mapM_ performInsert as
where
performInsert (ngramsType, ngrams, ngramsRepoElement) = do
[PGS.Only ngramsId] <- tryInsertTerms ngrams
_ <- case ngramsRepoElement ^. nre_root of
Nothing -> pure []
Just r -> tryInsertTerms r
mapM_ tryInsertTerms $ ngramsRepoElement ^. nre_children
runPGSExecute c query (nodeId, ngramsId, version, ngramsType, ngramsRepoElement)
tryInsertTerms :: NgramsTerm -> IO [PGS.Only Int]
tryInsertTerms t = runPGSReturning c qInsert [PGS.Only t]
qInsert :: PGS.Query
qInsert = [sql|INSERT INTO ngrams (terms) VALUES (?)
ON CONFLICT (terms) DO UPDATE SET terms = excluded.terms
RETURNING id|]
query :: PGS.Query
query = [sql|INSERT INTO node_stories(node_id, ngrams_id, version, ngrams_type_id, ngrams_repo_element)
VALUES (?, ?, ?, ?, ? :: jsonb)
|]
deleteArchiveStateList :: PGS.Connection -> NodeId -> ArchiveStateList -> IO ()
deleteArchiveStateList c nodeId as = do
mapM_ (\(nt, n, _) -> runPGSExecute c query (nodeId, nt, n)) as
where
query :: PGS.Query
query = [sql| DELETE FROM node_stories
WHERE node_id = ? AND ngrams_type_id = ?
AND ngrams_id IN (SELECT id FROM ngrams WHERE terms = ?)
|]
updateArchiveStateList :: PGS.Connection -> NodeId -> Version -> ArchiveStateList -> IO ()
updateArchiveStateList c nodeId version as = do
let params = (\(nt, n, nre) -> (nre, version, nodeId, nt, n)) <$> as
mapM_ (runPGSExecute c query) params
where
query :: PGS.Query
query = [sql| UPDATE node_stories
SET ngrams_repo_element = ?, version = ?
WHERE node_id = ? AND ngrams_type_id = ?
AND ngrams_id IN (SELECT id FROM ngrams WHERE terms = ?)
|]
-- | This function updates the node story and archive for given node_id. -- | This function updates the node story and archive for given node_id.
updateNodeStory :: PGS.Connection -> NodeId -> ArchiveList -> ArchiveList -> IO () updateNodeStory :: PGS.Connection -> NodeId -> ArchiveList -> ArchiveList -> IO ()
updateNodeStory c nodeId currentArchive newArchive = do updateNodeStory c nodeId currentArchive newArchive = do
...@@ -523,13 +187,6 @@ updateNodeStory c nodeId currentArchive newArchive = do ...@@ -523,13 +187,6 @@ updateNodeStory c nodeId currentArchive newArchive = do
-- , uWhere = (\row -> node_id row .== sqlInt4 nId) -- , uWhere = (\row -> node_id row .== sqlInt4 nId)
-- , uReturning = rCount } -- , uReturning = rCount }
-- nodeStoryRemove :: Pool PGS.Connection -> NodeId -> IO Int64
-- nodeStoryRemove pool (NodeId nId) = withResource pool $ \c -> runDelete c delete
-- where
-- delete = Delete { dTable = nodeStoryTable
-- , dWhere = (\row -> node_id row .== sqlInt4 nId)
-- , dReturning = rCount }
upsertNodeStories :: PGS.Connection -> NodeId -> ArchiveList -> IO () upsertNodeStories :: PGS.Connection -> NodeId -> ArchiveList -> IO ()
upsertNodeStories c nodeId newArchive = do upsertNodeStories c nodeId newArchive = do
-- printDebug "[upsertNodeStories] START nId" nId -- printDebug "[upsertNodeStories] START nId" nId
...@@ -551,21 +208,6 @@ upsertNodeStories c nodeId newArchive = do ...@@ -551,21 +208,6 @@ upsertNodeStories c nodeId newArchive = do
-- printDebug "[upsertNodeStories] STOP nId" nId -- printDebug "[upsertNodeStories] STOP nId" nId
updateNodeStoryVersion :: PGS.Connection -> NodeId -> ArchiveList -> IO ()
updateNodeStoryVersion c nodeId newArchive = do
let ngramsTypes = Map.keys $ newArchive ^. a_state
mapM_ (\nt -> runPGSExecute c query (newArchive ^. a_version, nodeId, nt)) ngramsTypes
where
query :: PGS.Query
query = [sql|UPDATE node_stories
SET version = ?
WHERE node_id = ?
AND ngrams_type_id = ?|]
writeNodeStories :: PGS.Connection -> NodeListStory -> IO ()
writeNodeStories c (NodeStory nls) = do
mapM_ (\(nId, a) -> upsertNodeStories c nId a) $ Map.toList nls
-- | Returns a `NodeListStory`, updating the given one for given `NodeId` -- | Returns a `NodeListStory`, updating the given one for given `NodeId`
nodeStoryInc :: PGS.Connection -> NodeListStory -> NodeId -> IO NodeListStory nodeStoryInc :: PGS.Connection -> NodeListStory -> NodeId -> IO NodeListStory
nodeStoryInc c ns@(NodeStory nls) nId = do nodeStoryInc c ns@(NodeStory nls) nId = do
...@@ -575,34 +217,10 @@ nodeStoryInc c ns@(NodeStory nls) nId = do ...@@ -575,34 +217,10 @@ nodeStoryInc c ns@(NodeStory nls) nId = do
pure $ NodeStory $ Map.unionWith archiveAdvance nls' nls pure $ NodeStory $ Map.unionWith archiveAdvance nls' nls
Just _ -> pure ns Just _ -> pure ns
nodeStoryIncrementalRead :: PGS.Connection -> Maybe NodeListStory -> [NodeId] -> IO NodeListStory
nodeStoryIncrementalRead _ Nothing [] = pure $ NodeStory $ Map.empty
nodeStoryIncrementalRead c Nothing (ni:ns) = do
m <- getNodeStory c ni
nodeStoryIncrementalRead c (Just m) ns
nodeStoryIncrementalRead c (Just nls) ns = foldM (\m n -> nodeStoryInc c m n) nls ns
-- nodeStoryDec :: Pool PGS.Connection -> NodeListStory -> NodeId -> IO NodeListStory
-- nodeStoryDec pool ns@(NodeStory nls) ni = do
-- case Map.lookup ni nls of
-- Nothing -> do
-- _ <- nodeStoryRemove pool ni
-- pure ns
-- Just _ -> do
-- let ns' = Map.filterWithKey (\k _v -> k /= ni) nls
-- _ <- nodeStoryRemove pool ni
-- pure $ NodeStory ns'
------------------------------------
-- | NgramsRepoElement contains, in particular, `nre_list`, -- | NgramsRepoElement contains, in particular, `nre_list`,
-- `nre_parent` and `nre_children`. We want to make sure that all -- `nre_parent` and `nre_children`. We want to make sure that all
-- children entries (i.e. ones that have `nre_parent`) have the same -- children entries (i.e. ones that have `nre_parent`) have the same
-- `list` as their parent entry. -- `list` as their parent entry.
fixChildrenTermTypes :: NodeListStory -> NodeListStory
fixChildrenTermTypes (NodeStory nls) =
NodeStory $ Map.fromList [ (nId, a & a_state %~ fixChildrenInNgramsStatePatch)
| (nId, a) <- Map.toList nls ]
fixChildrenInNgramsStatePatch :: NgramsState' -> NgramsState' fixChildrenInNgramsStatePatch :: NgramsState' -> NgramsState'
fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildrenFixed fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildrenFixed
where where
...@@ -623,11 +241,6 @@ fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildre ...@@ -623,11 +241,6 @@ fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildre
-- | Sometimes, when we upload a new list, a child can be left without -- | Sometimes, when we upload a new list, a child can be left without
-- a parent. Find such ngrams and set their 'root' and 'parent' to -- a parent. Find such ngrams and set their 'root' and 'parent' to
-- 'Nothing'. -- 'Nothing'.
fixChildrenWithNoParent :: NodeListStory -> NodeListStory
fixChildrenWithNoParent (NodeStory nls) =
NodeStory $ Map.fromList [ (nId, a & a_state %~ fixChildrenWithNoParentStatePatch)
| (nId, a) <- Map.toList nls ]
fixChildrenWithNoParentStatePatch :: NgramsState' -> NgramsState' fixChildrenWithNoParentStatePatch :: NgramsState' -> NgramsState'
fixChildrenWithNoParentStatePatch ns = archiveStateFromList $ nsParents <> nsChildrenFixed fixChildrenWithNoParentStatePatch ns = archiveStateFromList $ nsParents <> nsChildrenFixed
where where
...@@ -654,50 +267,37 @@ fixChildrenWithNoParentStatePatch ns = archiveStateFromList $ nsParents <> nsChi ...@@ -654,50 +267,37 @@ fixChildrenWithNoParentStatePatch ns = archiveStateFromList $ nsParents <> nsChi
fromDBNodeStoryEnv :: Pool PGS.Connection -> IO NodeStoryEnv fromDBNodeStoryEnv :: Pool PGS.Connection -> IO NodeStoryEnv
fromDBNodeStoryEnv pool = do fromDBNodeStoryEnv pool = do
tvar <- nodeStoryVar pool Nothing [] -- tvar <- nodeStoryVar pool Nothing []
let saver_immediate = do let saver_immediate nId a = do
ns <- atomically $ -- ns <- atomically $
readTVar tvar -- readTVar tvar
-- fix children so their 'list' is the same as their parents' -- -- fix children so their 'list' is the same as their parents'
>>= pure . fixChildrenTermTypes -- >>= pure . fixChildrenTermTypes
-- fix children that don't have a parent anymore -- -- fix children that don't have a parent anymore
>>= pure . fixChildrenWithNoParent -- >>= pure . fixChildrenWithNoParent
>>= writeTVar tvar -- >>= writeTVar tvar
>> readTVar tvar -- >> readTVar tvar
withResource pool $ \c -> do withResource pool $ \c -> do
--printDebug "[mkNodeStorySaver] will call writeNodeStories, ns" ns --printDebug "[mkNodeStorySaver] will call writeNodeStories, ns" ns
writeNodeStories c ns -- writeNodeStories c $ fixChildrenWithNoParent $ fixChildrenTermTypes ns
let archive_saver_immediate ns@(NodeStory nls) = withResource pool $ \c -> do upsertNodeStories c nId $
mapM_ (\(nId, a) -> do a & a_state %~ (fixChildrenInNgramsStatePatch . fixChildrenWithNoParentStatePatch)
insertNodeArchiveHistory c nId (a ^. a_version) $ reverse $ a ^. a_history let archive_saver_immediate nId a = withResource pool $ \c -> do
) $ Map.toList nls insertNodeArchiveHistory c nId (a ^. a_version) $ reverse $ a ^. a_history
pure $ clearHistory ns pure $ a & a_history .~ []
-- mapM_ (\(nId, a) -> do
-- insertNodeArchiveHistory c nId (a ^. a_version) $ reverse $ a ^. a_history
-- ) $ Map.toList nls
-- pure $ clearHistory ns
pure $ NodeStoryEnv { _nse_var = tvar pure $ NodeStoryEnv { _nse_saver_immediate = saver_immediate
, _nse_saver_immediate = saver_immediate
, _nse_archive_saver_immediate = archive_saver_immediate , _nse_archive_saver_immediate = archive_saver_immediate
, _nse_getter = nodeStoryVar pool (Just tvar) , _nse_getter = \nId -> withResource pool $ \c ->
getNodeStory' c nId
, _nse_getter_multi = \nIds -> withResource pool $ \c ->
foldM (\m nId -> nodeStoryInc c m nId) (NodeStory Map.empty) nIds
} }
nodeStoryVar :: Pool PGS.Connection
-> Maybe (TVar NodeListStory)
-> [NodeId]
-> IO (TVar NodeListStory)
nodeStoryVar pool Nothing nIds = do
state' <- withResource pool $ \c -> nodeStoryIncrementalRead c Nothing nIds
atomically $ newTVar state'
nodeStoryVar pool (Just tv) nIds = do
nls <- atomically $ readTVar tv
nls' <- withResource pool
$ \c -> nodeStoryIncrementalRead c (Just nls) nIds
_ <- atomically $ writeTVar tv nls'
pure tv
clearHistory :: NodeListStory -> NodeListStory
clearHistory (NodeStory ns) = NodeStory $ ns & (traverse . a_history) .~ emptyHistory
where
emptyHistory = [] :: [NgramsStatePatch']
currentVersion :: (HasNodeStory env err m) => ListId -> m Version currentVersion :: (HasNodeStory env err m) => ListId -> m Version
currentVersion listId = do currentVersion listId = do
pool <- view connPool pool <- view connPool
...@@ -748,3 +348,72 @@ fixNodeStoryVersions = do ...@@ -748,3 +348,72 @@ fixNodeStoryVersions = do
_ <- runPGSExecute c updateVerQuery (maxVersion, nId, ngramsType) _ <- runPGSExecute c updateVerQuery (maxVersion, nId, ngramsType)
pure () pure ()
_ -> panicTrace "Should get only 1 result!" _ -> panicTrace "Should get only 1 result!"
-----------------------------------------
-- DEPRECATED
-- nodeStoryVar :: Pool PGS.Connection
-- -> Maybe (TVar NodeListStory)
-- -> [NodeId]
-- -> IO (TVar NodeListStory)
-- nodeStoryVar pool Nothing nIds = do
-- state' <- withResource pool $ \c -> nodeStoryIncrementalRead c Nothing nIds
-- atomically $ newTVar state'
-- nodeStoryVar pool (Just tv) nIds = do
-- nls <- atomically $ readTVar tv
-- nls' <- withResource pool
-- $ \c -> nodeStoryIncrementalRead c (Just nls) nIds
-- _ <- atomically $ writeTVar tv nls'
-- pure tv
-- clearHistory :: NodeListStory -> NodeListStory
-- clearHistory (NodeStory ns) = NodeStory $ ns & (traverse . a_history) .~ emptyHistory
-- where
-- emptyHistory = [] :: [NgramsStatePatch']
-- fixChildrenWithNoParent :: NodeListStory -> NodeListStory
-- fixChildrenWithNoParent (NodeStory nls) =
-- NodeStory $ Map.fromList [ (nId, a & a_state %~ fixChildrenWithNoParentStatePatch)
-- | (nId, a) <- Map.toList nls ]
-- fixChildrenTermTypes :: NodeListStory -> NodeListStory
-- fixChildrenTermTypes (NodeStory nls) =
-- NodeStory $ Map.fromList [ (nId, a & a_state %~ fixChildrenInNgramsStatePatch)
-- | (nId, a) <- Map.toList nls ]
-- nodeStoryIncrementalRead :: PGS.Connection -> Maybe NodeListStory -> [NodeId] -> IO NodeListStory
-- nodeStoryIncrementalRead _ Nothing [] = pure $ NodeStory Map.empty
-- nodeStoryIncrementalRead c Nothing (ni:ns) = do
-- m <- getNodeStory c ni
-- nodeStoryIncrementalRead c (Just m) ns
-- nodeStoryIncrementalRead c (Just nls) ns = foldM (\m n -> nodeStoryInc c m n) nls ns
-- nodeStoryDec :: Pool PGS.Connection -> NodeListStory -> NodeId -> IO NodeListStory
-- nodeStoryDec pool ns@(NodeStory nls) ni = do
-- case Map.lookup ni nls of
-- Nothing -> do
-- _ <- nodeStoryRemove pool ni
-- pure ns
-- Just _ -> do
-- let ns' = Map.filterWithKey (\k _v -> k /= ni) nls
-- _ <- nodeStoryRemove pool ni
-- pure $ NodeStory ns'
------------------------------------
-- writeNodeStories :: PGS.Connection -> NodeListStory -> IO ()
-- writeNodeStories c (NodeStory nls) = do
-- mapM_ (\(nId, a) -> upsertNodeStories c nId a) $ Map.toList nls
-- nodeStoryRemove :: Pool PGS.Connection -> NodeId -> IO Int64
-- nodeStoryRemove pool (NodeId nId) = withResource pool $ \c -> runDelete c delete
-- where
-- delete = Delete { dTable = nodeStoryTable
-- , dWhere = (\row -> node_id row .== sqlInt4 nId)
-- , dReturning = rCount }
{-|
Module : Gargantext.Core.NodeStory.DB
Description : NodeStory DB functions
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.NodeStory.DB
( nodeExists
, getNodesIdWithType
, getNodesArchiveHistory
, insertNodeArchiveHistory
, nodeStoriesQuery
, insertArchiveStateList
, deleteArchiveStateList
, updateArchiveStateList
, updateNodeStoryVersion )
where
import Control.Lens ((^.))
import Control.Monad.Except
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Monoid
import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.API.Ngrams.Types
import Gargantext.Core (toDBid)
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Types (NodeId(..), NodeType)
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Database
nodeExists :: PGS.Connection -> NodeId -> IO Bool
nodeExists c nId = (== [PGS.Only True])
<$> runPGSQuery c [sql| SELECT true FROM nodes WHERE id = ? LIMIT 1 |]
(PGS.Only nId)
getNodesIdWithType :: PGS.Connection -> NodeType -> IO [NodeId]
getNodesIdWithType c nt = do
ns <- runPGSQuery c query (PGS.Only $ toDBid nt)
pure $ map (\(PGS.Only nId) -> UnsafeMkNodeId nId) ns
where
query :: PGS.Query
query = [sql| SELECT id FROM nodes WHERE typename = ? |]
-- /!\ This function is using an hard coded parameter
-- which depends on the Ngrams List Flow
-- Version > 5 is hard coded because by default
-- first version of history of manual change is 6
getNodesArchiveHistory :: PGS.Connection
-> [NodeId]
-> IO [(NodeId, (Map NgramsType [HashMap NgramsTerm NgramsPatch]))]
getNodesArchiveHistory c nodesId = do
as <- runPGSQuery c query (PGS.Only $ Values fields nodesId)
:: IO [(Int, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
pure $ map (\(nId, ngramsType, terms, patch)
-> ( UnsafeMkNodeId nId
, Map.singleton ngramsType [HashMap.singleton terms patch]
)
) as
where
fields = [QualifiedIdentifier Nothing "int4"]
query :: PGS.Query
query = [sql| WITH nodes_id(nid) as (?)
SELECT node_id, ngrams_type_id, terms, patch
FROM node_story_archive_history
JOIN ngrams ON ngrams.id = ngrams_id
JOIN nodes_id n ON node_id = n.nid
WHERE version > 5
ORDER BY (version, node_story_archive_history.id) DESC
|]
insertNodeArchiveHistory :: PGS.Connection -> NodeId -> Version -> [NgramsStatePatch'] -> IO ()
insertNodeArchiveHistory _ _ _ [] = pure ()
insertNodeArchiveHistory c nodeId version (h:hs) = do
let tuples = mconcat $ (\(nType, NgramsTablePatch patch) ->
(\(term, p) ->
(nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
tuplesM <- mapM (\(nId, nType, term, patch) -> do
[PGS.Only ngramsId] <- runPGSReturning c qInsert [PGS.Only term] :: IO [PGS.Only Int]
pure (nId, nType, ngramsId, term, patch)
) tuples :: IO [(NodeId, TableNgrams.NgramsType, Int, NgramsTerm, NgramsPatch)]
_ <- runPGSExecuteMany c query $ ((\(nId, nType, termId, _term, patch) -> (nId, nType, termId, patch, version)) <$> tuplesM)
_ <- insertNodeArchiveHistory c nodeId version hs
pure ()
where
qInsert :: PGS.Query
qInsert = [sql|INSERT INTO ngrams (terms) VALUES (?)
ON CONFLICT (terms) DO UPDATE SET terms = excluded.terms
RETURNING id|]
-- https://stackoverflow.com/questions/39224438/postgresql-insert-if-foreign-key-exists
query :: PGS.Query
query = [sql| INSERT INTO node_story_archive_history(node_id, ngrams_type_id, ngrams_id, patch, version)
VALUES (?, ?, ?, ?, ?)
|]
nodeStoriesQuery :: PGS.Query
nodeStoriesQuery = [sql| SELECT version, ngrams_type_id, terms, ngrams_repo_element
FROM node_stories
JOIN ngrams ON ngrams.id = ngrams_id
WHERE node_id = ?
|]
-- Archive
insertArchiveStateList :: PGS.Connection -> NodeId -> Version -> ArchiveStateList -> IO ()
insertArchiveStateList c nodeId version as = do
mapM_ performInsert as
where
performInsert (ngramsType, ngrams, ngramsRepoElement) = do
[PGS.Only ngramsId] <- tryInsertTerms ngrams
_ <- case ngramsRepoElement ^. nre_root of
Nothing -> pure []
Just r -> tryInsertTerms r
mapM_ tryInsertTerms $ ngramsRepoElement ^. nre_children
runPGSExecute c query (nodeId, ngramsId, version, ngramsType, ngramsRepoElement)
tryInsertTerms :: NgramsTerm -> IO [PGS.Only Int]
tryInsertTerms t = runPGSReturning c qInsert [PGS.Only t]
qInsert :: PGS.Query
qInsert = [sql|INSERT INTO ngrams (terms) VALUES (?)
ON CONFLICT (terms) DO UPDATE SET terms = excluded.terms
RETURNING id|]
query :: PGS.Query
query = [sql|INSERT INTO node_stories(node_id, ngrams_id, version, ngrams_type_id, ngrams_repo_element)
VALUES (?, ?, ?, ?, ? :: jsonb)
|]
deleteArchiveStateList :: PGS.Connection -> NodeId -> ArchiveStateList -> IO ()
deleteArchiveStateList c nodeId as = do
mapM_ (\(nt, n, _) -> runPGSExecute c query (nodeId, nt, n)) as
where
query :: PGS.Query
query = [sql| DELETE FROM node_stories
WHERE node_id = ? AND ngrams_type_id = ?
AND ngrams_id IN (SELECT id FROM ngrams WHERE terms = ?)
|]
updateArchiveStateList :: PGS.Connection -> NodeId -> Version -> ArchiveStateList -> IO ()
updateArchiveStateList c nodeId version as = do
let params = (\(nt, n, nre) -> (nre, version, nodeId, nt, n)) <$> as
mapM_ (runPGSExecute c query) params
where
query :: PGS.Query
query = [sql| UPDATE node_stories
SET ngrams_repo_element = ?, version = ?
WHERE node_id = ? AND ngrams_type_id = ?
AND ngrams_id IN (SELECT id FROM ngrams WHERE terms = ?)
|]
updateNodeStoryVersion :: PGS.Connection -> NodeId -> ArchiveList -> IO ()
updateNodeStoryVersion c nodeId newArchive = do
let ngramsTypes = Map.keys $ newArchive ^. a_state
mapM_ (\nt -> runPGSExecute c query (newArchive ^. a_version, nodeId, nt)) ngramsTypes
where
query :: PGS.Query
query = [sql|UPDATE node_stories
SET version = ?
WHERE node_id = ?
AND ngrams_type_id = ?|]
{-|
Module : Gargantext.Core.NodeStory.Types
Description : Node API generation
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.NodeStory.Types
( HasNodeStory
, HasNodeStoryEnv
, hasNodeStory
, HasNodeStoryImmediateSaver
, hasNodeStoryImmediateSaver
, HasNodeArchiveStoryImmediateSaver
, hasNodeArchiveStoryImmediateSaver
, NodeStory(..)
, NgramsState'
, NgramsStatePatch'
, NodeListStory
, ArchiveList
, initNodeListStoryMock
, NodeStoryEnv(..)
, initNodeStory
, nse_getter
, nse_getter_multi
, nse_saver_immediate
, nse_archive_saver_immediate
-- , nse_var
, unNodeStory
, Archive(..)
, initArchive
, archiveAdvance
, unionArchives
, a_history
, a_state
, a_version
, combineState
, ArchiveStateSet
, ArchiveStateList )
where
import Codec.Serialise.Class
import Control.Lens (makeLenses, Getter, (^.))
import Control.Monad.Except
import Control.Monad.Reader
import Data.Aeson hiding ((.=), decode)
import Data.Map.Strict qualified as Map
import Data.Monoid
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Semigroup
import Data.Set qualified as Set
import Database.PostgreSQL.Simple.FromField (FromField(fromField), fromJSONField)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (NodeId(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Prelude (DbCmd')
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Prelude hiding (to)
import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField)
------------------------------------------------------------------------
{- | Node Story for each NodeType where the Key of the Map is NodeId
TODO : generalize for any NodeType, let's start with NodeList which
is implemented already
-}
newtype NodeStory s p = NodeStory { _unNodeStory :: Map NodeId (Archive s p) }
deriving (Generic, Show, Eq)
instance (FromJSON s, FromJSON p) => FromJSON (NodeStory s p)
instance (ToJSON s, ToJSON p) => ToJSON (NodeStory s p)
instance (Serialise s, Serialise p) => Serialise (NodeStory s p)
data Archive s p = Archive
{ _a_version :: !Version
, _a_state :: !s
, _a_history :: ![p]
-- first patch in the list is the most recent
-- We use `take` in `commitStatePatch`, that's why.
-- History is immutable, we just insert things on top of existing
-- list.
-- We don't need to store the whole history in memory, this
-- structure holds only recent history, the one that will be
-- inserted to the DB.
}
deriving (Generic, Show, Eq)
instance (Serialise s, Serialise p) => Serialise (Archive s p)
type NodeListStory = NodeStory NgramsState' NgramsStatePatch'
-- NOTE: 'type NgramsTableMap = Map NgramsTerm NgramsRepoElement'
type NgramsState' = Map TableNgrams.NgramsType NgramsTableMap
type NgramsStatePatch' = PatchMap TableNgrams.NgramsType NgramsTablePatch
-- instance Serialise NgramsStatePatch'
instance FromField (Archive NgramsState' NgramsStatePatch')
where
fromField = fromJSONField
instance DefaultFromField SqlJsonb (Archive NgramsState' NgramsStatePatch')
where
defaultFromField = fromPGSFromField
-- | Combine `NgramsState'`. This is because the structure is (Map
-- NgramsType (Map ...)) and the default `(<>)` operator is
-- left-biased
-- (https://hackage.haskell.org/package/containers-0.6.6/docs/Data-Map-Internal.html#v:union)
combineState :: NgramsState' -> NgramsState' -> NgramsState'
combineState = Map.unionWith (<>)
-- This is not a typical Semigroup instance. The state is not
-- appended, instead it is replaced with the second entry. This is
-- because state changes with each version. We have to take into
-- account the removal of terms as well.
-- instance (Semigroup s, Semigroup p) => Semigroup (Archive s p) where
-- (<>) (Archive { _a_history = p }) (Archive { _a_version = v'
-- , _a_state = s'
-- , _a_history = p' }) =
-- Archive { _a_version = v'
-- , _a_state = s'
-- , _a_history = p' <> p }
-- instance (Monoid s, Semigroup p) => Monoid (Archive s p) where
-- mempty = Archive { _a_version = 0
-- , _a_state = mempty
-- , _a_history = [] }
instance (FromJSON s, FromJSON p) => FromJSON (Archive s p) where
parseJSON = genericParseJSON $ unPrefix "_a_"
instance (ToJSON s, ToJSON p) => ToJSON (Archive s p) where
toJSON = genericToJSON $ unPrefix "_a_"
toEncoding = genericToEncoding $ unPrefix "_a_"
-- | This is the normal way to update archive state, bumping the
-- version and history. Resulting state is taken directly from new
-- archive, omitting old archive completely.
archiveAdvance :: (Semigroup s, Semigroup p) => Archive s p -> Archive s p -> Archive s p
archiveAdvance aOld aNew = aNew { _a_history = _a_history aNew <> _a_history aOld }
-- | This is to merge archive states.
unionArchives :: (Semigroup s, Semigroup p) => Archive s p -> Archive s p -> Archive s p
unionArchives aOld aNew = aNew { _a_state = _a_state aOld <> _a_state aNew
, _a_history = _a_history aNew <> _a_history aOld }
------------------------------------------------------------------------
initNodeStory :: (Monoid s, Semigroup p) => NodeId -> NodeStory s p
initNodeStory ni = NodeStory $ Map.singleton ni initArchive
initArchive :: (Monoid s, Semigroup p) => Archive s p
initArchive = Archive { _a_version = 0
, _a_state = mempty
, _a_history = [] }
initNodeListStoryMock :: NodeListStory
initNodeListStoryMock = NodeStory $ Map.singleton nodeListId archive
where
nodeListId = 0
archive = Archive { _a_version = 0
, _a_state = ngramsTableMap
, _a_history = [] }
ngramsTableMap = Map.singleton TableNgrams.NgramsTerms
$ Map.fromList
[ (n ^. ne_ngrams, ngramsElementToRepo n)
| n <- mockTable ^. _NgramsTable
]
----------------------------------------------------------------------
data NodeStoryPoly nid v ngtid ngid nre =
NodeStoryDB { node_id :: !nid
, version :: !v
, ngrams_type_id :: !ngtid
, ngrams_id :: !ngid
, ngrams_repo_element :: !nre }
deriving (Eq)
data NodeStoryArchivePoly nid a =
NodeStoryArchiveDB { a_node_id :: !nid
, archive :: !a }
deriving (Eq)
$(makeAdaptorAndInstance "pNodeStory" ''NodeStoryPoly)
$(makeAdaptorAndInstance "pNodeArchiveStory" ''NodeStoryArchivePoly)
-- type NodeStoryWrite = NodeStoryPoly (Column SqlInt4) (Column SqlInt4) (Column SqlInt4) (Column SqlInt4) (Column SqlJsonb)
-- type NodeStoryRead = NodeStoryPoly (Column SqlInt4) (Column SqlInt4) (Column SqlInt4) (Column SqlInt4) (Column SqlJsonb)
-- type NodeStoryArchiveWrite = NodeStoryArchivePoly (Column SqlInt4) (Column SqlJsonb)
-- type NodeStoryArchiveRead = NodeStoryArchivePoly (Column SqlInt4) (Column SqlJsonb)
type ArchiveList = Archive NgramsState' NgramsStatePatch'
------------------------------------------------------------------------
data NodeStoryEnv = NodeStoryEnv
{ _nse_saver_immediate :: !(NodeId -> ArchiveList -> IO ())
, _nse_archive_saver_immediate :: !(NodeId -> ArchiveList -> IO ArchiveList)
, _nse_getter :: !(NodeId -> IO ArchiveList)
, _nse_getter_multi :: !([NodeId] -> IO NodeListStory)
--, _nse_cleaner :: !(IO ()) -- every 12 hours: cleans the repos of unused NodeStories
-- , _nse_lock :: !FileLock -- TODO (it depends on the option: if with database or file only)
}
deriving (Generic)
type HasNodeStory env err m = ( DbCmd' env err m
, MonadReader env m
, MonadError err m
, HasNodeStoryEnv env
, HasNodeError err
)
class (HasNodeStoryImmediateSaver env)
=> HasNodeStoryEnv env where
hasNodeStory :: Getter env NodeStoryEnv
class HasNodeStoryImmediateSaver env where
hasNodeStoryImmediateSaver :: Getter env (NodeId -> ArchiveList -> IO ())
class HasNodeArchiveStoryImmediateSaver env where
hasNodeArchiveStoryImmediateSaver :: Getter env (NodeId -> ArchiveList -> IO ArchiveList)
type ArchiveStateList = [(TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)]
type ArchiveStateSet = Set.Set (TableNgrams.NgramsType, NgramsTerm)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Lenses at the bottom of the file because Template Haskell would reorder order of execution in others cases
makeLenses ''NodeStoryEnv
makeLenses ''NodeStory
makeLenses ''Archive
...@@ -64,10 +64,11 @@ instance ToNamedRecord CsvList where ...@@ -64,10 +64,11 @@ instance ToNamedRecord CsvList where
] ]
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance FromField CsvListType where instance FromField CsvListType where
parseField "map" = pure CsvMap parseField "map" = pure CsvMap
parseField "main" = pure CsvCandidate parseField "main" = pure CsvCandidate
parseField "stop" = pure CsvStop parseField "candidate" = pure CsvCandidate -- backward compat
parseField _ = mzero parseField "stop" = pure CsvStop
parseField _ = mzero
instance ToField CsvListType where instance ToField CsvListType where
toField CsvMap = "map" toField CsvMap = "map"
......
...@@ -195,7 +195,7 @@ toPhyloDate' y m d _ = pack $ showGregorian $ fromGregorian (toIntege ...@@ -195,7 +195,7 @@ toPhyloDate' y m d _ = pack $ showGregorian $ fromGregorian (toIntege
-- Utils -- Utils
writePhylo :: [Char] -> Phylo -> IO () writePhylo :: HasCallStack => [Char] -> Phylo -> IO ()
writePhylo path phylo = Lazy.writeFile path $ encode phylo writePhylo path phylo = Lazy.writeFile path $ encode phylo
......
...@@ -702,17 +702,17 @@ toPhyloExport phylo = exportToDot phylo ...@@ -702,17 +702,17 @@ toPhyloExport phylo = exportToDot phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch] traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
traceExportBranches branches = traceExportBranches branches =
trace ("\n" tracePhylo ("\n"
<> "-- | Export " <> show(length branches) <> " branches" :: Text) branches <> "-- | Export " <> show(length branches) <> " branches" :: Text) branches
tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]] tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups = tracePhyloAncestors groups =
trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) tracePhylo ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups)
<> " ancestors" :: Text) groups <> " ancestors" :: Text) groups
tracePhyloInfo :: Phylo -> Phylo tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo = tracePhyloInfo phylo =
trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = " tracePhylo ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
<> show(getLevel phylo) <> " applied to " <> show(getLevel phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations" :: Text <> show(length $ Vector.toList $ getRoots phylo) <> " foundations" :: Text
) phylo ) phylo
...@@ -720,7 +720,7 @@ tracePhyloInfo phylo = ...@@ -720,7 +720,7 @@ tracePhyloInfo phylo =
traceExportGroups :: [PhyloGroup] -> [PhyloGroup] traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
traceExportGroups groups = traceExportGroups groups =
trace ("\n" <> "-- | Export " tracePhylo ("\n" <> "-- | Export "
<> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, " <> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
<> show(length groups) <> " groups and " <> show(length groups) <> " groups and "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms" :: Text <> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms" :: Text
......
...@@ -10,19 +10,23 @@ Portability : POSIX ...@@ -10,19 +10,23 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-} {-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE BangPatterns #-}
module Gargantext.Core.Viz.Phylo.PhyloMaker where module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parMap, rpar)
import Data.List (nub, partition, intersect, tail) import Data.Containers.ListUtils (nubOrd)
import Data.Discrimination qualified as D
import Data.List (partition, intersect, tail)
import Data.List qualified as List import Data.List qualified as List
import Data.Map (fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, insert) import Data.Map (fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, insert)
import Data.Map qualified as Map import Data.Map qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Vector (Vector) import Data.Text qualified as T
import Data.Vector qualified as Vector import Data.Vector qualified as Vector
import Data.Vector (Vector)
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques) import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Methods.Similarities (Similarity(Conditional)) import Gargantext.Core.Methods.Similarities (Similarity(Conditional))
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..)) import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
...@@ -131,9 +135,9 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd ...@@ -131,9 +135,9 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
-------- --------
-- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality -- 1.1) for each measure of similarity, prune the flat phylo, compute the branches and estimate the quality
qua :: [Double] qua :: [Double]
qua = map (\thr -> qua = parMap rpar (\thr ->
let edges = filter (\edge -> snd edge >= thr) graph let edges = filter (\edge -> snd edge >= thr) graph
nodes = nub $ concat $ map (\((n,n'),_) -> [n,n']) edges nodes = nubOrd $ concat $ map (\((n,n'),_) -> [n,n']) edges
branches = toRelatedComponents nodes edges branches = toRelatedComponents nodes edges
in toPhyloQuality nbFdt lambda freq branches in toPhyloQuality nbFdt lambda freq branches
) $ (Set.toList similarities) ) $ (Set.toList similarities)
...@@ -146,7 +150,7 @@ findSeaLadder :: Phylo -> Phylo ...@@ -146,7 +150,7 @@ findSeaLadder :: Phylo -> Phylo
findSeaLadder phylo = case getSeaElevation phylo of findSeaLadder phylo = case getSeaElevation phylo of
Constante start gap -> phylo & phylo_seaLadder .~ (constSeaLadder start gap Set.empty) Constante start gap -> phylo & phylo_seaLadder .~ (constSeaLadder start gap Set.empty)
Adaptative steps -> phylo & phylo_seaLadder .~ (squareLadder $ adaptSeaLadder steps similarities Set.empty) Adaptative steps -> phylo & phylo_seaLadder .~ (squareLadder $ adaptSeaLadder steps similarities Set.empty)
Evolving _ -> let ladder = evolvSeaLadder Evolving _ -> let !ladder = evolvSeaLadder
(fromIntegral $ Vector.length $ getRoots phylo) (fromIntegral $ Vector.length $ getRoots phylo)
(getLevel phylo) (getLevel phylo)
(getRootsFreq phylo) (getRootsFreq phylo)
...@@ -172,7 +176,7 @@ findSeaLadder phylo = case getSeaElevation phylo of ...@@ -172,7 +176,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
docs = filterDocs (getDocsByDate phylo) ([period] ++ next) docs = filterDocs (getDocsByDate phylo) ([period] ++ next)
diagos = filterDiago (getCoocByDate phylo) ([period] ++ next) diagos = filterDiago (getCoocByDate phylo) ([period] ++ next)
-- 1.2) compute the kinship similarities between pairs of source & target in parallel -- 1.2) compute the kinship similarities between pairs of source & target in parallel
pairs = map (\source -> pairs = parMap rpar (\source ->
let candidates = filter (\target -> (> 2) $ length let candidates = filter (\target -> (> 2) $ length
$ intersect (getGroupNgrams source) (getGroupNgrams target)) targets $ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
in map (\target -> in map (\target ->
...@@ -183,8 +187,7 @@ findSeaLadder phylo = case getSeaElevation phylo of ...@@ -183,8 +187,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
in ((source,target),toSimilarity nbDocs diago (getSimilarity phylo) (getGroupNgrams source) (getGroupNgrams target) (getGroupNgrams target)) in ((source,target),toSimilarity nbDocs diago (getSimilarity phylo) (getGroupNgrams source) (getGroupNgrams target) (getGroupNgrams target))
) candidates ) candidates
) sources ) sources
pairs' = pairs `using` parList rdeepseq in acc ++ (concat pairs)
in acc ++ (concat pairs')
) [] $ keys $ phylo ^. phylo_periods ) [] $ keys $ phylo ^. phylo_periods
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
...@@ -311,15 +314,14 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >= ...@@ -311,15 +314,14 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. clustering_roots) >=
-- To filter nested Fis -- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering] filterCliqueByNested :: Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
filterCliqueByNested m = filterCliqueByNested m =
let clq = map (\l -> let clq = parMap rpar (\l ->
foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem) foldl (\mem f -> if (any (\f' -> isNested (f' ^. clustering_roots) (f ^. clustering_roots)) mem)
then mem then mem
else else
let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem let fMax = filter (\f' -> not $ isNested (f ^. clustering_roots) (f' ^. clustering_roots)) mem
in fMax ++ [f] ) [] l) in fMax ++ [f] ) [] l)
$ elems m $ elems m
clq' = clq `using` parList rdeepseq in fromList $ zip (keys m) clq
in fromList $ zip (keys m) clq'
-- | To transform a time map of docs into a time map of Fis with some filters -- | To transform a time map of docs into a time map of Fis with some filters
...@@ -340,7 +342,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -340,7 +342,7 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
seriesOfClustering :: Map (Date,Date) [Clustering] seriesOfClustering :: Map (Date,Date) [Clustering]
seriesOfClustering = case (clique $ getConfig phylo) of seriesOfClustering = case (clique $ getConfig phylo) of
Fis _ _ -> Fis _ _ ->
let fis = map (\(prd,docs) -> let fis = parMap rpar (\(prd,docs) ->
case (corpusParser $ getConfig phylo) of case (corpusParser $ getConfig phylo) of
Csv' _ -> let lst = toList Csv' _ -> let lst = toList
$ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs) $ fisWithSizePolyMap' (Segment 1 20) 1 (map (\d -> (ngramsToIdx (text d) (getRoots phylo), (weight d, (sourcesToIdx (sources d) (getSources phylo))))) docs)
...@@ -350,18 +352,16 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -350,18 +352,16 @@ toSeriesOfClustering phylo phyloDocs = case (clique $ getConfig phylo) of
in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst) in (prd, map (\f -> Clustering (Set.toList $ fst f) (snd f) prd (Just $ fromIntegral $ snd f) []) lst)
) )
$ toList phyloDocs $ toList phyloDocs
fis' = fis `using` parList rdeepseq in fromList fis
in fromList fis'
MaxClique _ thr filterType -> MaxClique _ thr filterType ->
let mcl = map (\(prd,docs) -> let mcl = parMap rpar (\(prd,docs) ->
let cooc = map round let cooc = map round
$ foldl sumCooc empty $ foldl sumCooc empty
$ map listToMatrix $ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs $ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc)) in (prd, map (\cl -> Clustering cl 0 prd Nothing []) $ getMaxCliques filterType Conditional thr cooc))
$ toList phyloDocs $ toList phyloDocs
mcl' = mcl `using` parList rdeepseq in fromList mcl
in fromList mcl'
-------------------------------------- --------------------------------------
-- dev viz graph maxClique getMaxClique -- dev viz graph maxClique getMaxClique
...@@ -377,7 +377,7 @@ docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc ...@@ -377,7 +377,7 @@ docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt = docsToTimeScaleCooc docs fdt =
let mCooc = fromListWith sumCooc let mCooc = fromListWith sumCooc
$ map (\(_d,l) -> (_d, listToMatrix l)) $ map (\(_d,l) -> (_d, listToMatrix l))
$ map (\doc -> (date doc, sort $ ngramsToIdx (text doc) fdt)) docs $ map (\doc -> (date doc, D.sort $ ngramsToIdx (text doc) fdt)) docs
mCooc' = fromList mCooc' = fromList
$ map (\t -> (t,empty)) $ map (\t -> (t,empty))
$ toTimeScale (map date docs) 1 $ toTimeScale (map date docs) 1
...@@ -406,13 +406,12 @@ groupDocsByPeriodRec f prds docs acc = ...@@ -406,13 +406,12 @@ groupDocsByPeriodRec f prds docs acc =
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs = groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = map (inPeriode f docs') pds periods = parMap rpar (inPeriode f docs') pds
periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " in trace ("\n" <> "-- | Group "
<> show(length docs) <> show(length docs)
<> " docs by " <> " docs by "
<> show(length pds) <> " periods" <> "\n" :: Text) <> show(length pds) <> " periods" <> "\n" :: Text)
$ fromList $ zip pds periods' $ fromList $ zip pds periods
where where
-------------------------------------- --------------------------------------
inPeriode :: Ord b => (t -> b) -> [[t]] -> (b, b) -> [t] inPeriode :: Ord b => (t -> b) -> [[t]] -> (b, b) -> [t]
...@@ -425,13 +424,12 @@ groupDocsByPeriod' f pds docs = ...@@ -425,13 +424,12 @@ groupDocsByPeriod' f pds docs =
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods" groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds let periods = parMap rpar (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
in trace ("\n" <> "-- | Group " in trace ("\n" <> "-- | Group "
<> show(length es) <> " docs by " <> show(length es) <> " docs by "
<> show(length pds) <> " periods" <> "\n" :: Text) <> show(length pds) <> " periods" <> "\n" :: Text)
$ fromList $ zip pds periods' $ fromList $ zip pds periods
where where
-------------------------------------- --------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t] inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
...@@ -446,7 +444,7 @@ docsToTermFreq docs fdt = ...@@ -446,7 +444,7 @@ docsToTermFreq docs fdt =
freqs = map (/(nbDocs)) freqs = map (/(nbDocs))
$ fromList $ fromList
$ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) docs $ group $ D.sort $ concat $ map (\d -> D.nub $ ngramsToIdx (text d) fdt) docs
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
...@@ -454,28 +452,28 @@ docsToTermFreq docs fdt = ...@@ -454,28 +452,28 @@ docsToTermFreq docs fdt =
docsToTermCount :: [Document] -> Vector Ngrams -> Map Int Double docsToTermCount :: [Document] -> Vector Ngrams -> Map Int Double
docsToTermCount docs roots = fromList docsToTermCount docs roots = fromList
$ map (\lst -> (head' "docsToTermCount" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToTermCount" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) roots) docs $ group $ D.sort $ concat $ map (\d -> D.nub $ ngramsToIdx (text d) roots) docs
docsToTimeTermCount :: [Document] -> Vector Ngrams -> (Map Date (Map Int Double)) docsToTimeTermCount :: [Document] -> Vector Ngrams -> (Map Date (Map Int Double))
docsToTimeTermCount docs roots = docsToTimeTermCount docs roots =
let docs' = Map.map (\l -> fromList $ map (\lst -> (head' "docsToTimeTermCount" lst, fromIntegral $ length lst)) let docs' = Map.map (\l -> fromList $ map (\lst -> (head' "docsToTimeTermCount" lst, fromIntegral $ length lst))
$ group $ sort l) $ group $ D.sort l)
$ fromListWith (++) $ fromListWith (++)
$ map (\d -> (date d, nub $ ngramsToIdx (text d) roots)) docs $ map (\d -> (date d, D.nub $ ngramsToIdx (text d) roots)) docs
time = fromList $ map (\t -> (t,Map.empty)) $ toTimeScale (keys docs') 1 time = fromList $ map (\t -> (t,Map.empty)) $ toTimeScale (keys docs') 1
in unionWith (Map.union) time docs' in unionWith (Map.union) time docs'
docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
docsToLastTermFreq n docs fdt = docsToLastTermFreq n docs fdt =
let last = take n $ reverse $ sort $ map date docs let last = take n $ reverse $ D.sort $ map date docs
nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs nbDocs = fromIntegral $ length $ filter (\d -> elem (date d) last) docs
freqs = map (/(nbDocs)) freqs = map (/(nbDocs))
$ fromList $ fromList
$ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst))
$ group $ sort $ concat $ map (\d -> nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs $ group $ D.sort $ concat $ map (\d -> D.nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
...@@ -527,10 +525,10 @@ setDefault conf timeScale nbDocs = defaultConfig ...@@ -527,10 +525,10 @@ setDefault conf timeScale nbDocs = defaultConfig
-- --
initPhylo :: [Document] -> PhyloConfig -> Phylo initPhylo :: [Document] -> PhyloConfig -> Phylo
initPhylo docs conf = initPhylo docs conf =
let roots = Vector.fromList $ nub $ concat $ map text docs let roots = Vector.fromList $ D.nubWith T.unpack $ concat $ map text docs
timeScale = head' "initPhylo" $ map docTime docs timeScale = head' "initPhylo" $ map docTime docs
foundations = PhyloFoundations roots empty foundations = PhyloFoundations roots empty
docsSources = PhyloSources (Vector.fromList $ nub $ concat $ map sources docs) docsSources = PhyloSources (Vector.fromList $ nubOrd $ concat $ map sources docs)
docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots)) docsCounts = PhyloCounts (docsToTimeScaleCooc docs (foundations ^. foundations_roots))
(docsToTimeScaleNb docs) (docsToTimeScaleNb docs)
(docsToTimeTermCount docs (foundations ^. foundations_roots)) (docsToTimeTermCount docs (foundations ^. foundations_roots))
...@@ -540,7 +538,7 @@ initPhylo docs conf = ...@@ -540,7 +538,7 @@ initPhylo docs conf =
params = if (defaultMode conf) params = if (defaultMode conf)
then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) } then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) }
else defaultPhyloParam { _phyloParam_config = conf } else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale) periods = toPeriods (D.sort $ D.nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
in trace ("\n" <> "-- | Init a phylo out of " in trace ("\n" <> "-- | Init a phylo out of "
<> show(length docs) <> " docs \n" :: Text) <> show(length docs) <> " docs \n" :: Text)
$ trace ("\n" <> "-- | lambda " $ trace ("\n" <> "-- | lambda "
......
...@@ -10,7 +10,8 @@ Portability : POSIX ...@@ -10,7 +10,8 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-} {-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.PhyloTools where module Gargantext.Core.Viz.Phylo.PhyloTools where
...@@ -232,6 +233,14 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l) ...@@ -232,6 +233,14 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
then keepFilled f (thr - 1) l then keepFilled f (thr - 1) l
else f thr l else f thr l
-- | General workhorse to use in lieu of /trace/. It decides at compile
-- time whether or not debug logs are enabled.
tracePhylo :: (Print s, IsString s) => s -> a -> a
#if NO_PHYLO_DEBUG_LOGS
tracePhylo _ p = p
#else
tracePhylo msg p = trace msg p
#endif
traceClique :: Map (Date, Date) [Clustering] -> String traceClique :: Map (Date, Date) [Clustering] -> String
traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6] traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
...@@ -252,7 +261,7 @@ traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> " ...@@ -252,7 +261,7 @@ traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> "
traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering] traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n" traceFis msg mFis = tracePhylo ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> traceSupport mFis <> "\n" <> "Support : " <> traceSupport mFis <> "\n"
<> "Nb Ngrams : " <> traceClique mFis <> "\n" <> "Nb Ngrams : " <> traceClique mFis <> "\n"
) mFis ) mFis
...@@ -636,7 +645,7 @@ updateLevel level phylo = phylo { _phylo_level = level } ...@@ -636,7 +645,7 @@ updateLevel level phylo = phylo { _phylo_level = level }
traceToPhylo :: Scale -> Phylo -> Phylo traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo = traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at scale " <> show (lvl) <> " with " tracePhylo ("\n" <> "-- | End of phylo making at scale " <> show (lvl) <> " with "
<> show (length $ getGroupsFromScale lvl phylo) <> " groups and " <> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo)
<> " branches" <> "\n" :: Text <> " branches" <> "\n" :: Text
...@@ -702,7 +711,7 @@ toRelatedComponents nodes edges = ...@@ -702,7 +711,7 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo = traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at scale " <> show (getLastLevel phylo) tracePhylo ( "-- | End synchronic clustering at scale " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo)
<> " branches" <> "\n" :: Text <> " branches" <> "\n" :: Text
...@@ -710,7 +719,7 @@ traceSynchronyEnd phylo = ...@@ -710,7 +719,7 @@ traceSynchronyEnd phylo =
traceSynchronyStart :: Phylo -> Phylo traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo = traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at scale " <> show (getLastLevel phylo) tracePhylo ( "\n" <> "-- | Start synchronic clustering at scale " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo)
<> " branches" <> "\n" :: Text <> " branches" <> "\n" :: Text
...@@ -754,7 +763,7 @@ ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgram ...@@ -754,7 +763,7 @@ ngramsInBranches branches = nub $ foldl (\acc g -> acc ++ (g ^. phylo_groupNgram
traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]] traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
traceMatchSuccess thr qua qua' nextBranches = traceMatchSuccess thr qua qua' nextBranches =
trace ( "\n" <> "-- local branches : " tracePhylo ( "\n" <> "-- local branches : "
<> (Text.pack $ init $ show ((init . init . snd) <> (Text.pack $ init $ show ((init . init . snd)
$ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId)) $ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length nextBranches) <> ")]" <> ",(1.." <> show (length nextBranches) <> ")]"
...@@ -767,7 +776,7 @@ traceMatchSuccess thr qua qua' nextBranches = ...@@ -767,7 +776,7 @@ traceMatchSuccess thr qua qua' nextBranches =
traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]] traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchFailure thr qua qua' branches = traceMatchFailure thr qua qua' branches =
trace ( "\n" <> "-- local branches : " tracePhylo ( "\n" <> "-- local branches : "
<> (Text.pack $ init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId)) <> (Text.pack $ init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]" <> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n" <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
...@@ -778,7 +787,7 @@ traceMatchFailure thr qua qua' branches = ...@@ -778,7 +787,7 @@ traceMatchFailure thr qua qua' branches =
traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]] traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchNoSplit branches = traceMatchNoSplit branches =
trace ( "\n" <> "-- local branches : " tracePhylo ( "\n" <> "-- local branches : "
<> (Text.pack $ init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId)) <> (Text.pack $ init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]" <> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n" <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
...@@ -788,7 +797,7 @@ traceMatchNoSplit branches = ...@@ -788,7 +797,7 @@ traceMatchNoSplit branches =
traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]] traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchLimit branches = traceMatchLimit branches =
trace ( "\n" <> "-- local branches : " tracePhylo ( "\n" <> "-- local branches : "
<> (Text.pack $ init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId)) <> (Text.pack $ init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]" <> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n" <> " | " <> show (length $ concat branches) <> " groups" <> "\n"
...@@ -798,15 +807,15 @@ traceMatchLimit branches = ...@@ -798,15 +807,15 @@ traceMatchLimit branches =
traceMatchEnd :: [PhyloGroup] -> [PhyloGroup] traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
traceMatchEnd groups = traceMatchEnd groups =
trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) tracePhylo ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
<> " branches and " <> show (length groups) <> " groups" <> "\n" :: Text) groups <> " branches and " <> show (length groups) <> " groups" <> "\n" :: Text) groups
traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup] traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching groups = traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n" :: Text ) groups tracePhylo ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n" :: Text ) groups
traceGroupsProxi :: [Double] -> [Double] traceGroupsProxi :: [Double] -> [Double]
traceGroupsProxi l = traceGroupsProxi l =
trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups Similarity" <> "\n" :: Text ) l tracePhylo ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups Similarity" <> "\n" :: Text ) l
...@@ -662,7 +662,7 @@ seaLevelRise fdt similarity lambda minBranch frequency ladder rise frame periods ...@@ -662,7 +662,7 @@ seaLevelRise fdt similarity lambda minBranch frequency ladder rise frame periods
else else
-- start breaking up all the possible branches for the current similarity threshold -- start breaking up all the possible branches for the current similarity threshold
let thr = List.head ladder let thr = List.head ladder
branches' = trace ( "threshold = " <> (T.pack $ printf "%.3f" thr) branches' = tracePhylo ( "threshold = " <> (T.pack $ printf "%.3f" thr)
<> " F(λ) = " <> (T.pack $ printf "%.5f" (toPhyloQuality fdt lambda frequency (map fst branches))) <> " F(λ) = " <> (T.pack $ printf "%.5f" (toPhyloQuality fdt lambda frequency (map fst branches)))
<> " ξ = " <> (T.pack $ printf "%.5f" (globalAccuracy frequency (map fst branches))) <> " ξ = " <> (T.pack $ printf "%.5f" (globalAccuracy frequency (map fst branches)))
<> " ρ = " <> (T.pack $ printf "%.5f" (globalRecall frequency (map fst branches))) <> " ρ = " <> (T.pack $ printf "%.5f" (globalRecall frequency (map fst branches)))
......
...@@ -15,8 +15,6 @@ Portability : POSIX ...@@ -15,8 +15,6 @@ Portability : POSIX
-- TODO-EVENTS: InsertedNodes -- TODO-EVENTS: InsertedNodes
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ConstrainedClassMethods #-} {-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-} {-# LANGUAGE InstanceSigs #-}
...@@ -41,7 +39,6 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) ...@@ -41,7 +39,6 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
, addDocumentsToHyperCorpus , addDocumentsToHyperCorpus
, reIndexWith , reIndexWith
, docNgrams
, getOrMkRoot , getOrMkRoot
, getOrMk_RootWithCorpus , getOrMk_RootWithCorpus
...@@ -50,53 +47,44 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) ...@@ -50,53 +47,44 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
, allDataOrigins , allDataOrigins
, do_api , do_api
, indexAllDocumentsWithPosTag
) )
where where
import Conduit import Conduit
import Control.Lens hiding (elements, Indexed) import Control.Lens hiding (elements, Indexed)
import Data.Aeson.TH (deriveJSON) import Data.Bifunctor qualified as B
import Data.Conduit qualified as C import Data.Conduit qualified as C
import Data.Conduit.Internal (zipSources) import Data.Conduit.Internal (zipSources)
import Data.Conduit.List qualified as CL import Data.Conduit.List qualified as CL
import Data.Conduit.List qualified as CList import Data.Conduit.List qualified as CList
import Data.HashMap.Strict qualified as HashMap import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List import Data.List qualified as List
import Data.Map.Strict (lookup)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Proxy import Data.Proxy
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Swagger
import Data.Text qualified as T import Data.Text qualified as T
import EPO.API.Client.Types qualified as EPO import EPO.API.Client.Types qualified as EPO
import Gargantext.API.Ngrams.Tools (getTermsWith) import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types qualified as NT import Gargantext.Core (Lang(..), NLPServerConfig)
import Gargantext.Core (Lang(..), PosTagAlgo(..), NLPServerConfig)
import Gargantext.Core (withDefaultLanguage) import Gargantext.Core (withDefaultLanguage)
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire) import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.Flow.Types
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet) import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory (HasNodeStory) import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.Text
import Gargantext.Core.Text.Corpus.API qualified as API import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType, splitOn) import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType)
import Gargantext.Core.Text.List (buildNgramsLists) import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..)) import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..)) import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Text.Terms.WithList (MatchedText, buildPatternsWith, termsInText) import Gargantext.Core.Types (HasValidationError, TermsCount)
import Gargantext.Core.Types (HasValidationError, POS(NP), TermsCount)
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Core.Types.Query (Limit) import Gargantext.Core.Types.Query (Limit)
import Gargantext.Core.Utils (addTuples) import Gargantext.Database.Action.Flow.Extract () -- ExtractNgramsT instances
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap
import Gargantext.Database.Action.Flow.List import Gargantext.Database.Action.Flow.List
import Gargantext.Database.Action.Flow.Utils (docNgrams, documentIdWithNgrams, insertDocNgrams, insertDocs, mapNodeIdNgrams)
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, DocumentIdWithNgrams(..))
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore) import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Action.Search (searchDocInDatabase) import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName) import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
...@@ -112,42 +100,23 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..)) ...@@ -112,42 +100,23 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId) import Gargantext.Database.Query.Table.NodeNgrams (listInsertDb , getCgramsId)
import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus) import Gargantext.Database.Query.Tree.Root (getOrMkRoot, getOrMk_RootWithCorpus)
import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.Node (NodePoly(..), node_id)
import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Database.Types
import Gargantext.Prelude hiding (to) import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Config (GargConfig(..)) import Gargantext.Prelude.Config (GargConfig(..))
import Gargantext.Prelude.Crypto.Hash (Hash)
import Gargantext.System.Logging import Gargantext.System.Logging
import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..)) import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..))
import PUBMED.Types qualified as PUBMED import PUBMED.Types qualified as PUBMED
import qualified Data.Bifunctor as B
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Imports for upgrade function -- Imports for upgrade function
import Gargantext.Database.Query.Tree.Root (getRootId) import Gargantext.Database.Query.Tree (HasTreeError)
import Gargantext.Database.Query.Tree (findNodesId, HasTreeError)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO use internal with API name (could be old data)
data DataOrigin = InternalOrigin { _do_api :: API.ExternalAPIs }
| ExternalOrigin { _do_api :: API.ExternalAPIs }
-- TODO Web
deriving (Generic, Eq)
makeLenses ''DataOrigin
deriveJSON (unPrefix "_do_") ''DataOrigin
instance ToSchema DataOrigin where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_")
allDataOrigins :: [DataOrigin] allDataOrigins :: [DataOrigin]
allDataOrigins = map InternalOrigin API.externalAPIs <> map ExternalOrigin API.externalAPIs allDataOrigins = map InternalOrigin API.externalAPIs <> map ExternalOrigin API.externalAPIs
--------------- ---------------
data DataText = DataOld ![NodeId]
| DataNew !(Maybe Integer, ConduitT () HyperdataDocument IO ())
--- | DataNew ![[HyperdataDocument]]
-- Show instance is not possible because of IO -- Show instance is not possible because of IO
printDataText :: DataText -> IO () printDataText :: DataText -> IO ()
...@@ -311,9 +280,9 @@ flow c u cn la mfslw (count, docsC) jobHandle = do ...@@ -311,9 +280,9 @@ flow c u cn la mfslw (count, docsC) jobHandle = do
-- TODO if public insertMasterDocs else insertUserDocs -- TODO if public insertMasterDocs else insertUserDocs
nlpServer <- view $ nlpServerGet (_tt_lang la) nlpServer <- view $ nlpServerGet (_tt_lang la)
runConduit $ zipSources (yieldMany ([1..] :: [Int])) docsC runConduit $ zipSources (yieldMany ([1..] :: [Int])) docsC
.| CList.chunksOf 100 .| CList.chunksOf 100
.| mapM_C (addDocumentsWithProgress nlpServer userCorpusId) .| mapM_C (addDocumentsWithProgress nlpServer userCorpusId)
.| sinkNull .| sinkNull
$(logLocM) DEBUG "Calling flowCorpusUser" $(logLocM) DEBUG "Calling flowCorpusUser"
flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw flowCorpusUser (la ^. tt_lang) u userCorpusId listId c mfslw
...@@ -384,26 +353,8 @@ flowCorpusUser :: ( HasNodeError err ...@@ -384,26 +353,8 @@ flowCorpusUser :: ( HasNodeError err
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> m CorpusId -> m CorpusId
flowCorpusUser l user userCorpusId listId ctype mfslw = do flowCorpusUser l user userCorpusId listId ctype mfslw = do
server <- view (nlpServerGet l) buildSocialList l user userCorpusId listId ctype mfslw
-- User List Flow
(masterUserId, _masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
--let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according to the Lang
_ <- case mfslw of
(Just (NoList _)) -> do
-- printDebug "Do not build list" mfslw
pure ()
_ -> do
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw
$ GroupWithPosTag l server HashMap.empty
-- printDebug "flowCorpusUser:ngs" ngs
_userListId <- flowList_DbRepo listId ngs
_mastListId <- getOrMkList masterCorpusId masterUserId
pure ()
-- _ <- insertOccsUpdates userCorpusId mastListId -- _ <- insertOccsUpdates userCorpusId mastListId
--_ <- mkPhylo userCorpusId userId --_ <- mkPhylo userCorpusId userId
-- Annuaire Flow -- Annuaire Flow
...@@ -415,6 +366,39 @@ flowCorpusUser l user userCorpusId listId ctype mfslw = do ...@@ -415,6 +366,39 @@ flowCorpusUser l user userCorpusId listId ctype mfslw = do
pure userCorpusId pure userCorpusId
buildSocialList :: ( HasNodeError err
, HasValidationError err
, HasNLPServer env
, HasTreeError err
, HasNodeStory env err m
, MkCorpus c
)
=> Lang
-> User
-> CorpusId
-> ListId
-> Maybe c
-> Maybe FlowSocialListWith
-> m ()
buildSocialList _l _user _userCorpusId _listId _ctype (Just (NoList _)) = pure ()
buildSocialList l user userCorpusId listId ctype mfslw = do
-- User List Flow
(masterUserId, _masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") ctype
nlpServer <- view (nlpServerGet l)
--let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according to the Lang
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw
$ GroupWithPosTag l nlpServer HashMap.empty
-- printDebug "flowCorpusUser:ngs" ngs
_userListId <- flowList_DbRepo listId ngs
_mastListId <- getOrMkList masterCorpusId masterUserId
pure ()
insertMasterDocs :: ( DbCmd' env err m insertMasterDocs :: ( DbCmd' env err m
, HasNodeError err , HasNodeError err
, FlowCorpus a , FlowCorpus a
...@@ -483,170 +467,6 @@ saveDocNgramsWith lId mapNgramsDocs' = do ...@@ -483,170 +467,6 @@ saveDocNgramsWith lId mapNgramsDocs' = do
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO Type NodeDocumentUnicised
insertDocs :: ( DbCmd' env err m
-- , FlowCorpus a
, FlowInsertDB a
, HasNodeError err
)
=> UserId
-> CorpusId
-> [a]
-> m ([ContextId], [Indexed ContextId a])
insertDocs uId cId hs = do
let docs = map addUniqId hs
newIds <- insertDb uId Nothing docs
-- printDebug "newIds" newIds
let
newIds' = map (nodeId2ContextId . reId) newIds
documentsWithId = mergeData (toInserted newIds) (Map.fromList $ map viewUniqId' docs)
_ <- Doc.add cId newIds'
pure (newIds', map (B.first nodeId2ContextId) documentsWithId)
------------------------------------------------------------------------
viewUniqId' :: UniqId a
=> a
-> (Hash, a)
viewUniqId' d = maybe err (\h -> (h,d)) (view uniqId d)
where
err = panicTrace "[ERROR] Database.Flow.toInsert"
toInserted :: [ReturnId]
-> Map Hash ReturnId
toInserted =
Map.fromList . map (\r -> (reUniqId r, r) )
. filter (\r -> reInserted r == True)
mergeData :: Map Hash ReturnId
-> Map Hash a
-> [Indexed NodeId a]
mergeData rs = catMaybes . map toDocumentWithId . Map.toList
where
toDocumentWithId (sha,hpd) =
Indexed <$> fmap reId (lookup sha rs)
<*> Just hpd
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
documentIdWithNgrams :: HasNodeError err
=> (a
-> DBCmd err (HashMap.HashMap b (Map NgramsType Int, TermsCount)))
-> [Indexed NodeId a]
-> DBCmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams f = traverse toDocumentIdWithNgrams
where
toDocumentIdWithNgrams d = do
e <- f $ _unIndex d
pure $ DocumentIdWithNgrams d e
-- | TODO check optimization
mapNodeIdNgrams :: (Ord b, Hashable b)
=> [DocumentIdWithNgrams a b]
-> HashMap.HashMap b
(Map NgramsType
(Map NodeId (Int, TermsCount))
)
mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith addTuples)) . fmap f
where
-- | NOTE We are somehow multiplying 'TermsCount' here: If the
-- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types.
f :: DocumentIdWithNgrams a b
-> HashMap.HashMap b (Map NgramsType (Map NodeId (Int, TermsCount)))
f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\i -> Map.singleton nId (i, cnt)) ngramsTypeMap) $ documentNgrams d
where
nId = _index $ documentWithId d
------------------------------------------------------------------------
instance ExtractNgramsT HyperdataContact
where
extractNgramsT _ncs l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
where
extract :: TermType Lang -> HyperdataContact
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
extract _l hc' = do
let authors = map text2ngrams
$ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc'
pure $ HashMap.fromList $ [(SimpleNgrams a', (Map.singleton Authors 1, 1)) | a' <- authors ]
instance ExtractNgramsT HyperdataDocument
where
extractNgramsT :: NLPServerConfig
-> TermType Lang
-> HyperdataDocument
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
extractNgramsT ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd
where
extractNgramsT' :: HyperdataDocument
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
extractNgramsT' doc = do
let source = text2ngrams
$ maybe "Nothing" identity
$ _hd_source doc
institutes = map text2ngrams
$ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd))
$ _hd_institutes doc
authors = map text2ngrams
$ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd))
$ _hd_authors doc
termsWithCounts' <- map (\(t, cnt) -> (enrichedTerms (lang ^. tt_lang) CoreNLP NP t, cnt))
<$> concat
<$> liftBase (extractTerms ncs lang $ hasText doc)
pure $ HashMap.fromList
$ [(SimpleNgrams source, (Map.singleton Sources 1, 1)) ]
<> [(SimpleNgrams i', (Map.singleton Institutes 1, 1)) | i' <- institutes ]
<> [(SimpleNgrams a', (Map.singleton Authors 1, 1)) | a' <- authors ]
<> [(EnrichedNgrams t', (Map.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
where
extractNgramsT ncs l (Node { _node_hyperdata = h }) = extractNgramsT ncs l h
instance HasText a => HasText (Node a)
where
hasText (Node { _node_hyperdata = h }) = hasText h
-- | TODO putelsewhere
-- | Upgrade function
-- Suppose all documents are English (this is the case actually)
indexAllDocumentsWithPosTag :: ( HasNodeStory env err m
, HasNLPServer env )
=> m ()
indexAllDocumentsWithPosTag = do
rootId <- getRootId (UserName userMaster)
corpusIds <- findNodesId rootId [NodeCorpus]
docs <- List.concat <$> mapM getDocumentsWithParentId corpusIds
_ <- mapM extractInsert (splitEvery 1000 docs)
pure ()
extractInsert :: ( HasNodeStory env err m
, HasNLPServer env )
=> [Node HyperdataDocument] -> m ()
extractInsert docs = do
let documentsWithId = map (\doc -> Indexed (doc ^. node_id) doc) docs
let lang = EN
ncs <- view $ nlpServerGet lang
mapNgramsDocs' <- mapNodeIdNgrams
<$> documentIdWithNgrams
(extractNgramsT ncs $ withLang (Multi lang) documentsWithId)
documentsWithId
_ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
pure ()
...@@ -680,22 +500,3 @@ reIndexWith cId lId nt lts = do ...@@ -680,22 +500,3 @@ reIndexWith cId lId nt lts = do
-- Saving the indexation in database -- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc _ <- mapM (saveDocNgramsWith lId) ngramsByDoc
pure () pure ()
docNgrams :: Lang
-> NgramsType
-> [NT.NgramsTerm]
-> Gargantext.Database.Admin.Types.Node.Context HyperdataDocument
-> [((MatchedText, TermsCount),
Map NgramsType (Map NodeId Int))]
docNgrams lang nt ts doc =
List.zip
(termsInText lang (buildPatternsWith lang ts)
$ T.unlines $ catMaybes
[ doc ^. context_hyperdata . hd_title
, doc ^. context_hyperdata . hd_abstract
]
)
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
{-|
Module : Gargantext.Database.Flow.Extract
Description : Database Flow
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Database.Action.Flow.Extract
where
import Control.Lens ((^.), _Just, view)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as DM
import Gargantext.Core (Lang, NLPServerConfig, PosTagAlgo(CoreNLP))
import Gargantext.Core.Text (HasText(..))
import Gargantext.Core.Text.Corpus.Parsers (splitOn)
import Gargantext.Core.Text.Terms (ExtractNgramsT, ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractNgramsT, extractTerms, tt_lang)
import Gargantext.Core.Types (POS(NP), TermsCount)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataContact, HyperdataDocument, cw_lastName, hc_who, hd_authors, hd_bdd, hd_institutes, hd_source)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude
------------------------------------------------------------------------
instance ExtractNgramsT HyperdataContact
where
extractNgramsT _ncs l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
where
extract :: TermType Lang -> HyperdataContact
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
extract _l hc' = do
let authors = map text2ngrams
$ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc'
pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
instance ExtractNgramsT HyperdataDocument
where
extractNgramsT :: NLPServerConfig
-> TermType Lang
-> HyperdataDocument
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
extractNgramsT ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd
where
extractNgramsT' :: HyperdataDocument
-> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
extractNgramsT' doc = do
let source = text2ngrams
$ maybe "Nothing" identity
$ doc ^. hd_source
institutes = map text2ngrams
$ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd))
$ doc ^. hd_institutes
authors = map text2ngrams
$ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd))
$ doc ^. hd_authors
termsWithCounts' <- map (\(t, cnt) -> (enrichedTerms (lang ^. tt_lang) CoreNLP NP t, cnt))
<$> concat
<$> liftBase (extractTerms ncs lang $ hasText doc)
pure $ HashMap.fromList
$ [(SimpleNgrams source, (DM.singleton Sources 1, 1)) ]
<> [(SimpleNgrams i', (DM.singleton Institutes 1, 1)) | i' <- institutes ]
<> [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]
<> [(EnrichedNgrams t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
where
extractNgramsT ncs l (Node { _node_hyperdata = h }) = extractNgramsT ncs l h
instance HasText a => HasText (Node a)
where
hasText (Node { _node_hyperdata = h }) = hasText h
-- Apparently unused functions
-- extractInsert :: ( HasNodeStory env err m
-- , HasNLPServer env )
-- => [Node HyperdataDocument] -> m ()
-- extractInsert docs = do
-- let documentsWithId = map (\doc -> Indexed (doc ^. node_id) doc) docs
-- let lang = EN
-- ncs <- view $ nlpServerGet lang
-- mapNgramsDocs' <- mapNodeIdNgrams
-- <$> documentIdWithNgrams
-- (extractNgramsT ncs $ withLang (Multi lang) documentsWithId)
-- documentsWithId
-- _ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
-- pure ()
...@@ -17,16 +17,16 @@ Portability : POSIX ...@@ -17,16 +17,16 @@ Portability : POSIX
module Gargantext.Database.Action.Flow.List module Gargantext.Database.Action.Flow.List
where where
import Control.Lens ((^.), (+~), (%~), at, (.~), _Just) import Control.Lens ((^.), (+~), (%~), at, (.~))
import Control.Monad.Reader import Control.Monad.Reader
import Data.List qualified as List import Data.List qualified as List
import Data.Map.Strict (toList) import Data.Map.Strict (toList)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM import Data.Map.Strict.Patch qualified as PM
import Gargantext.API.Ngrams (saveNodeStory) import Gargantext.API.Ngrams (saveNodeStory)
import Gargantext.API.Ngrams.Tools (getNodeStoryVar) import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory (HasNodeStory, a_history, a_state, a_version)
import Gargantext.Core.Types (HasValidationError(..), assertValid) import Gargantext.Core.Types (HasValidationError(..), assertValid)
import Gargantext.Core.Types.Main (ListType(CandidateTerm)) import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
...@@ -34,7 +34,6 @@ import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams ...@@ -34,7 +34,6 @@ import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb,{- getCgramsId -}) import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb,{- getCgramsId -})
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (toList) import Gargantext.Prelude hiding (toList)
import GHC.Conc (readTVar, writeTVar)
-- FLOW LIST -- FLOW LIST
-- 1. select specific terms of the corpus when compared with others langs -- 1. select specific terms of the corpus when compared with others langs
...@@ -201,11 +200,14 @@ putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m ...@@ -201,11 +200,14 @@ putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
-- The modifyMVar_ would test the patch with applicable first. -- The modifyMVar_ would test the patch with applicable first.
-- If valid the rest would be atomic and no merge is required. -- If valid the rest would be atomic and no merge is required.
-} -}
var <- getNodeStoryVar [listId] a <- getNodeStory listId
liftBase $ atomically $ do let a' = a & a_version +~ 1
r <- readTVar var & a_history %~ (p :)
writeTVar var $ & a_state . at ngramsType' .~ Just ns
r & unNodeStory . at listId . _Just . a_version +~ 1 -- liftBase $ atomically $ do
& unNodeStory . at listId . _Just . a_history %~ (p :) -- r <- readTVar var
& unNodeStory . at listId . _Just . a_state . at ngramsType' .~ Just ns -- writeTVar var $
saveNodeStory -- r & unNodeStory . at listId . _Just . a_version +~ 1
-- & unNodeStory . at listId . _Just . a_history %~ (p :)
-- & unNodeStory . at listId . _Just . a_state . at ngramsType' .~ Just ns
saveNodeStory listId a'
...@@ -9,29 +9,40 @@ Portability : POSIX ...@@ -9,29 +9,40 @@ Portability : POSIX
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstrainedClassMethods #-} {-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-} {-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Action.Flow.Types module Gargantext.Database.Action.Flow.Types
where where
import Conduit (ConduitT)
import Control.Lens (makeLenses)
import Data.Aeson (ToJSON) import Data.Aeson (ToJSON)
import Data.Aeson.TH (deriveJSON)
import Gargantext.Core.Types (HasValidationError) import Data.HashMap.Strict (HashMap)
import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
import Gargantext.Core.Flow.Types import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Text
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Core.Types (HasValidationError, TermsCount)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Prelude (CmdM) import Gargantext.Database.Prelude (CmdM)
import Gargantext.Database.Query.Table.Node.Document.Insert import Gargantext.Database.Query.Table.Node.Document.Insert
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Tree.Error (HasTreeError) import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Types (Indexed)
import Gargantext.Prelude
import Gargantext.System.Logging import Gargantext.System.Logging
type FlowCmdM env err m = type FlowCmdM env err m =
( CmdM env err m ( CmdM env err m
, HasNodeStory env err m , HasNodeStory env err m
...@@ -56,3 +67,27 @@ type FlowInsertDB a = ( AddUniqId a ...@@ -56,3 +67,27 @@ type FlowInsertDB a = ( AddUniqId a
, UniqParameters a , UniqParameters a
, InsertDb a , InsertDb a
) )
data DocumentIdWithNgrams a b =
DocumentIdWithNgrams
{ documentWithId :: Indexed NodeId a
, documentNgrams :: HashMap b (Map NgramsType Int, TermsCount)
} deriving (Show)
-- TODO use internal with API name (could be old data)
data DataOrigin = InternalOrigin { _do_api :: API.ExternalAPIs }
| ExternalOrigin { _do_api :: API.ExternalAPIs }
-- TODO Web
deriving (Generic, Eq)
makeLenses ''DataOrigin
deriveJSON (unPrefix "_do_") ''DataOrigin
instance ToSchema DataOrigin where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_")
data DataText = DataOld ![NodeId]
| DataNew !(Maybe Integer, ConduitT () HyperdataDocument IO ())
--- | DataNew ![[HyperdataDocument]]
...@@ -9,30 +9,47 @@ Portability : POSIX ...@@ -9,30 +9,47 @@ Portability : POSIX
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Database.Action.Flow.Utils module Gargantext.Database.Action.Flow.Utils
where ( docNgrams
, documentIdWithNgrams
, insertDocNgrams
, insertDocs
, mapNodeIdNgrams )
where
import Control.Lens ((^.)) import Control.Lens ((^.))
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as DM import Data.Map.Strict qualified as DM
import Data.Text qualified as T
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core (Lang, toDBid)
import Gargantext.Core.Flow.Types (UniqId, uniqId)
import Gargantext.Core.Text.Terms.WithList (MatchedText, buildPatternsWith, termsInText)
import Gargantext.Core.Types (TermsCount) import Gargantext.Core.Types (TermsCount)
import Gargantext.Core.Utils (addTuples)
import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap
import Gargantext.Database.Action.Flow.Types (DocumentIdWithNgrams(..), FlowInsertDB)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument, hd_abstract, hd_title)
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (DBCmd) import Gargantext.Database.Prelude (DBCmd, DbCmd')
import Gargantext.Database.Query.Table.ContextNodeNgrams import Gargantext.Database.Query.Table.ContextNodeNgrams
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Schema.Context (context_hyperdata, context_id)
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Types import Gargantext.Database.Types
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (toDBid) import Gargantext.Prelude.Crypto.Hash (Hash)
data DocumentIdWithNgrams a b =
DocumentIdWithNgrams
{ documentWithId :: Indexed NodeId a
, documentNgrams :: HashMap b (Map NgramsType Int, TermsCount)
} deriving (Show)
insertDocNgrams :: ListId insertDocNgrams :: ListId
-> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId (Int, TermsCount))) -> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId (Int, TermsCount)))
-> DBCmd err Int -> DBCmd err Int
...@@ -52,3 +69,122 @@ insertDocNgrams lId m = do ...@@ -52,3 +69,122 @@ insertDocNgrams lId m = do
-- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})] -- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
-- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}} -- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}}
docNgrams :: Lang
-> NgramsType
-> [NT.NgramsTerm]
-> Gargantext.Database.Admin.Types.Node.Context HyperdataDocument
-> [((MatchedText, TermsCount),
Map NgramsType (Map NodeId Int))]
docNgrams lang nt ts doc =
List.zip
(termsInText lang (buildPatternsWith lang ts)
$ T.unlines $ catMaybes
[ doc ^. context_hyperdata . hd_title
, doc ^. context_hyperdata . hd_abstract
]
)
(List.cycle [DM.fromList $ [(nt, DM.singleton (doc ^. context_id) 1 )]])
documentIdWithNgrams :: HasNodeError err
=> (a
-> DBCmd err (HashMap.HashMap b (Map NgramsType Int, TermsCount)))
-> [Indexed NodeId a]
-> DBCmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams f = traverse toDocumentIdWithNgrams
where
toDocumentIdWithNgrams d = do
e <- f $ _unIndex d
pure $ DocumentIdWithNgrams d e
-- | TODO check optimization
mapNodeIdNgrams :: (Ord b, Hashable b)
=> [DocumentIdWithNgrams a b]
-> HashMap.HashMap b
(Map NgramsType
(Map NodeId (Int, TermsCount))
)
mapNodeIdNgrams = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f
where
-- | NOTE We are somehow multiplying 'TermsCount' here: If the
-- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types.
f :: DocumentIdWithNgrams a b
-> HashMap.HashMap b (Map NgramsType (Map NodeId (Int, TermsCount)))
f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\i -> DM.singleton nId (i, cnt)) ngramsTypeMap) $ documentNgrams d
where
nId = _index $ documentWithId d
-- TODO Type NodeDocumentUnicised
insertDocs :: ( DbCmd' env err m
-- , FlowCorpus a
, FlowInsertDB a
, HasNodeError err
)
=> UserId
-> CorpusId
-> [a]
-> m ([ContextId], [Indexed ContextId a])
insertDocs uId cId hs = do
let docs = map addUniqId hs
newIds <- insertDb uId Nothing docs
-- printDebug "newIds" newIds
let
newIds' = map (nodeId2ContextId . reId) newIds
documentsWithId = mergeData (toInserted newIds) (DM.fromList $ map viewUniqId' docs)
_ <- Doc.add cId newIds'
pure (newIds', map (first nodeId2ContextId) documentsWithId)
------------------------------------------------------------------------
viewUniqId' :: UniqId a
=> a
-> (Hash, a)
viewUniqId' d = maybe err (\h -> (h,d)) (d ^. uniqId)
where
err = panicTrace "[ERROR] Database.Flow.toInsert"
mergeData :: Map Hash ReturnId
-> Map Hash a
-> [Indexed NodeId a]
mergeData rs = catMaybes . map toDocumentWithId . DM.toList
where
toDocumentWithId (sha,hpd) =
Indexed <$> fmap reId (DM.lookup sha rs)
<*> Just hpd
toInserted :: [ReturnId]
-> Map Hash ReturnId
toInserted =
DM.fromList . map (\r -> (reUniqId r, r) )
. filter (\r -> reInserted r == True)
-- Apparently unused functions
-- | TODO putelsewhere
-- | Upgrade function
-- Suppose all documents are English (this is the case actually)
-- indexAllDocumentsWithPosTag :: ( HasNodeStory env err m
-- , HasNLPServer env )
-- => m ()
-- indexAllDocumentsWithPosTag = do
-- rootId <- getRootId (UserName userMaster)
-- corpusIds <- findNodesId rootId [NodeCorpus]
-- docs <- List.concat <$> mapM getDocumentsWithParentId corpusIds
-- _ <- mapM extractInsert (splitEvery 1000 docs)
-- pure ()
...@@ -30,7 +30,7 @@ import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) ...@@ -30,7 +30,7 @@ import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo) import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..)) import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
import Gargantext.Core (HasDBid(toDBid)) import Gargantext.Core (HasDBid(toDBid))
import Gargantext.Core.NodeStory hiding (runPGSQuery) import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-}) import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import Gargantext.Core.Types (ListType(..), NodeType(..), ContextId, contextId2NodeId) import Gargantext.Core.Types (ListType(..), NodeType(..), ContextId, contextId2NodeId)
import Gargantext.Core.Types.Query (Limit(..)) import Gargantext.Core.Types.Query (Limit(..))
...@@ -74,7 +74,8 @@ getNgramsCooc cId lId tabType maybeLimit = do ...@@ -74,7 +74,8 @@ getNgramsCooc cId lId tabType maybeLimit = do
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
updateNgramsOccurrences :: (HasNodeStory env err m) updateNgramsOccurrences :: (HasNodeStory env err m)
=> CorpusId -> ListId => CorpusId
-> ListId
-> m () -> m ()
updateNgramsOccurrences cId lId = do updateNgramsOccurrences cId lId = do
_ <- mapM (updateNgramsOccurrences' cId lId Nothing) [Terms, Sources, Authors, Institutes] _ <- mapM (updateNgramsOccurrences' cId lId Nothing) [Terms, Sources, Authors, Institutes]
......
...@@ -95,7 +95,7 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t) ...@@ -95,7 +95,7 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
queryDocInDatabase :: ParentId -> Text -> O.Select (Column SqlInt4, Column SqlJsonb) queryDocInDatabase :: ParentId -> Text -> O.Select (Column SqlInt4, Column SqlJsonb)
queryDocInDatabase _p q = proc () -> do queryDocInDatabase _p q = proc () -> do
row <- queryNodeSearchTable -< () row <- queryNodeSearchTable -< ()
restrict -< (_ns_search row) @@ (sqlToTSQuery (unpack q)) restrict -< (_ns_search row) @@ (sqlPlainToTSQuery (unpack q))
restrict -< (_ns_typename row) .== (sqlInt4 $ toDBid NodeDocument) restrict -< (_ns_typename row) .== (sqlInt4 $ toDBid NodeDocument)
returnA -< (_ns_id row, _ns_hyperdata row) returnA -< (_ns_id row, _ns_hyperdata row)
......
...@@ -71,10 +71,6 @@ ...@@ -71,10 +71,6 @@
git: "https://github.com/adinapoli/duckling.git" git: "https://github.com/adinapoli/duckling.git"
subdirs: subdirs:
- . - .
- commit: e9a29582ac66198dd2c2fdc3f8c8a4b1e6fbe004
git: "https://github.com/adinapoli/haskell-opaleye.git"
subdirs:
- .
- commit: 7533a9ccd3bfe77141745f6b61039a26aaf5c83b - commit: 7533a9ccd3bfe77141745f6b61039a26aaf5c83b
git: "https://github.com/adinapoli/llvm-hs.git" git: "https://github.com/adinapoli/llvm-hs.git"
subdirs: subdirs:
...@@ -119,6 +115,10 @@ ...@@ -119,6 +115,10 @@
git: "https://github.com/delanoe/patches-map" git: "https://github.com/delanoe/patches-map"
subdirs: subdirs:
- . - .
- commit: 6cf1bcfe215143efac17919cfd0abdd60e0f717c
git: "https://github.com/garganscript/haskell-opaleye.git"
subdirs:
- .
- commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4 - commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
git: "https://github.com/robstewart57/rdf4h.git" git: "https://github.com/robstewart57/rdf4h.git"
subdirs: subdirs:
...@@ -147,7 +147,7 @@ ...@@ -147,7 +147,7 @@
git: "https://gitlab.iscpif.fr/gargantext/crawlers/istex.git" git: "https://gitlab.iscpif.fr/gargantext/crawlers/istex.git"
subdirs: subdirs:
- . - .
- commit: 35a95e7e8da655f868d5420aa29e835a813fa3a2 - commit: cd179f6dda15d77a085c0176284c921b7bc50c46
git: "https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git" git: "https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git"
subdirs: subdirs:
- . - .
...@@ -324,6 +324,7 @@ flags: ...@@ -324,6 +324,7 @@ flags:
"build-search-demo": false "build-search-demo": false
gargantext: gargantext:
"disable-db-obfuscation-executable": false "disable-db-obfuscation-executable": false
"no-phylo-debug-logs": false
"test-crypto": false "test-crypto": false
"generic-deriving": "generic-deriving":
"base-4-9": true "base-4-9": true
......
...@@ -68,6 +68,7 @@ tests = sequential $ aroundAll withTestDB $ describe "Database" $ do ...@@ -68,6 +68,7 @@ tests = sequential $ aroundAll withTestDB $ describe "Database" $ do
it "Can perform search by author in documents" corpusSearch02 it "Can perform search by author in documents" corpusSearch02
it "Can perform more complex searches using the boolean API" corpusSearch03 it "Can perform more complex searches using the boolean API" corpusSearch03
it "Can correctly count doc score" corpusScore01 it "Can correctly count doc score" corpusScore01
it "Can perform search with spaces for doc in db" corpusSearchDB01
nodeStoryTests :: Spec nodeStoryTests :: Spec
nodeStoryTests = sequential $ nodeStoryTests = sequential $
......
...@@ -208,3 +208,16 @@ corpusScore01 env = do ...@@ -208,3 +208,16 @@ corpusScore01 env = do
liftIO $ do liftIO $ do
map facetDoc_score results `shouldBe` [Just 0.0, Just 0.0] map facetDoc_score results `shouldBe` [Just 0.0, Just 0.0]
-- | Check that we support search with tsquery
corpusSearchDB01 :: TestEnv -> Assertion
corpusSearchDB01 env = do
flip runReaderT env $ runTestMonad $ do
parentId <- getRootId (UserName userMaster)
[corpus] <- getCorporaWithParentId parentId
results <- searchDocInDatabase (_node_id corpus) ("first second")
liftIO $ do
length results `shouldBe` 0 -- doesn't exist, we just check that proper to_tsquery is called
...@@ -20,9 +20,9 @@ import Data.Map.Strict.Patch qualified as PM ...@@ -20,9 +20,9 @@ import Data.Map.Strict.Patch qualified as PM
import Data.Set qualified as Set import Data.Set qualified as Set
import Database.PostgreSQL.Simple qualified as PSQL import Database.PostgreSQL.Simple qualified as PSQL
import Database.PostgreSQL.Simple.SqlQQ import Database.PostgreSQL.Simple.SqlQQ
import Gargantext.API.Ngrams (commitStatePatch, mSetFromList, setListNgrams, saveNodeStoryImmediate) import Gargantext.API.Ngrams (commitStatePatch, mSetFromList, setListNgrams, saveNodeStory)
import Gargantext.API.Ngrams.Types (MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTerm(..), Versioned(..), mkNgramsTablePatch, nre_children, nre_list, nre_parent, nre_root) import Gargantext.API.Ngrams.Types (MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTerm(..), Versioned(..), mkNgramsTablePatch, nre_children, nre_list, nre_parent, nre_root)
import Gargantext.API.Ngrams.Tools (getNodeStoryVar) import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.Core.NodeStory hiding (runPGSQuery) import Gargantext.Core.NodeStory hiding (runPGSQuery)
import Gargantext.Core.Types.Individu import Gargantext.Core.Types.Individu
import Gargantext.Core.Types (ListType(..), ListId, NodeId, UserId) import Gargantext.Core.Types (ListType(..), ListId, NodeId, UserId)
...@@ -35,13 +35,12 @@ import Gargantext.Database.Query.Tree.Root ...@@ -35,13 +35,12 @@ import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude import Gargantext.Prelude
import GHC.Conc (TVar, readTVar)
import Test.Database.Types import Test.Database.Types
import Test.Hspec.Expectations import Test.Hspec.Expectations
import Test.Tasty.HUnit import Test.Tasty.HUnit
commonInitialization :: TestMonad ( UserId, NodeId, ListId, TVar NodeListStory ) commonInitialization :: TestMonad ( UserId, NodeId, ListId, ArchiveList )
commonInitialization = do commonInitialization = do
let user = UserName userMaster let user = UserName userMaster
parentId <- getRootId user parentId <- getRootId user
...@@ -52,9 +51,9 @@ commonInitialization = do ...@@ -52,9 +51,9 @@ commonInitialization = do
listId <- getOrMkList corpusId userId listId <- getOrMkList corpusId userId
v <- getNodeStoryVar [listId] a <- getNodeStory listId
pure $ (userId, corpusId, listId, v) pure $ (userId, corpusId, listId, a)
initArchiveList :: ArchiveList initArchiveList :: ArchiveList
...@@ -90,7 +89,7 @@ simpleChildTerm = ( simpleChildTerm' ...@@ -90,7 +89,7 @@ simpleChildTerm = ( simpleChildTerm'
createListTest :: TestEnv -> Assertion createListTest :: TestEnv -> Assertion
createListTest env = do createListTest env = do
flip runReaderT env $ runTestMonad $ do flip runReaderT env $ runTestMonad $ do
(userId, corpusId, listId, _v) <- commonInitialization (userId, corpusId, listId, _a) <- commonInitialization
listId' <- getOrMkList corpusId userId listId' <- getOrMkList corpusId userId
...@@ -100,28 +99,32 @@ createListTest env = do ...@@ -100,28 +99,32 @@ createListTest env = do
queryNodeStoryTest :: TestEnv -> Assertion queryNodeStoryTest :: TestEnv -> Assertion
queryNodeStoryTest env = do queryNodeStoryTest env = do
flip runReaderT env $ runTestMonad $ do flip runReaderT env $ runTestMonad $ do
(_userId, _corpusId, listId, v) <- commonInitialization (_userId, _corpusId, listId, a) <- commonInitialization
liftIO $ do
a `shouldBe` initArchiveList
saveNodeStory listId a
a' <- getNodeStory listId
saveNodeStoryImmediate
liftIO $ do liftIO $ do
ns <- atomically $ readTVar v a' `shouldBe` a
ns `shouldBe` (NodeStory $ Map.singleton listId initArchiveList)
insertNewTermsToNodeStoryTest :: TestEnv -> Assertion insertNewTermsToNodeStoryTest :: TestEnv -> Assertion
insertNewTermsToNodeStoryTest env = do insertNewTermsToNodeStoryTest env = do
flip runReaderT env $ runTestMonad $ do flip runReaderT env $ runTestMonad $ do
(_userId, _corpusId, listId, v) <- commonInitialization (_userId, _corpusId, listId, _a) <- commonInitialization
let (terms, nre) = simpleTerm let (terms, nre) = simpleTerm
let nls = Map.singleton terms nre let nls = Map.singleton terms nre
setListNgrams listId NgramsTerms nls setListNgrams listId NgramsTerms nls
a <- getNodeStory listId
liftIO $ do liftIO $ do
ns <- atomically $ readTVar v a `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nls })
ns `shouldBe` (NodeStory $ Map.singleton listId
(initArchiveList { _a_state = Map.singleton NgramsTerms nls }))
-- check that the ngrams are in the DB as well -- check that the ngrams are in the DB as well
ngramsMap <- selectNgramsId [unNgramsTerm terms] ngramsMap <- selectNgramsId [unNgramsTerm terms]
liftIO $ (snd <$> Map.toList ngramsMap) `shouldBe` [unNgramsTerm terms] liftIO $ (snd <$> Map.toList ngramsMap) `shouldBe` [unNgramsTerm terms]
...@@ -139,7 +142,7 @@ insertNewTermsToNodeStoryTest env = do ...@@ -139,7 +142,7 @@ insertNewTermsToNodeStoryTest env = do
insertNewTermsWithChildrenToNodeStoryTest :: TestEnv -> Assertion insertNewTermsWithChildrenToNodeStoryTest :: TestEnv -> Assertion
insertNewTermsWithChildrenToNodeStoryTest env = do insertNewTermsWithChildrenToNodeStoryTest env = do
flip runReaderT env $ runTestMonad $ do flip runReaderT env $ runTestMonad $ do
(_userId, _corpusId, listId, v) <- commonInitialization (_userId, _corpusId, listId, _a) <- commonInitialization
let (tParent, nreParent) = simpleParentTerm let (tParent, nreParent) = simpleParentTerm
let (tChild, nreChild) = simpleChildTerm let (tChild, nreChild) = simpleChildTerm
...@@ -148,10 +151,10 @@ insertNewTermsWithChildrenToNodeStoryTest env = do ...@@ -148,10 +151,10 @@ insertNewTermsWithChildrenToNodeStoryTest env = do
let nls = Map.fromList [(tParent, nreParent), (tChild, nreChild)] let nls = Map.fromList [(tParent, nreParent), (tChild, nreChild)]
setListNgrams listId NgramsTerms nls setListNgrams listId NgramsTerms nls
a <- getNodeStory listId
liftIO $ do liftIO $ do
ns <- atomically $ readTVar v a `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nls })
ns `shouldBe` (NodeStory $ Map.singleton listId
(initArchiveList { _a_state = Map.singleton NgramsTerms nls }))
-- `setListNgrams` calls saveNodeStory already so we should have -- `setListNgrams` calls saveNodeStory already so we should have
-- the terms in the DB by now -- the terms in the DB by now
...@@ -178,7 +181,7 @@ insertNewTermsWithChildrenToNodeStoryTest env = do ...@@ -178,7 +181,7 @@ insertNewTermsWithChildrenToNodeStoryTest env = do
insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest :: TestEnv -> Assertion insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest :: TestEnv -> Assertion
insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest env = do insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest env = do
flip runReaderT env $ runTestMonad $ do flip runReaderT env $ runTestMonad $ do
(_userId, _corpusId, listId, v) <- commonInitialization (_userId, _corpusId, listId, _a) <- commonInitialization
let (tParent, nreParent) = simpleParentTerm let (tParent, nreParent) = simpleParentTerm
let (tChild, nreChildGoodType) = simpleChildTerm let (tChild, nreChildGoodType) = simpleChildTerm
...@@ -190,10 +193,10 @@ insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest env = do ...@@ -190,10 +193,10 @@ insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest env = do
setListNgrams listId NgramsTerms nls setListNgrams listId NgramsTerms nls
a <- getNodeStory listId
liftIO $ do liftIO $ do
ns <- atomically $ readTVar v a `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nlsWithChildFixed })
ns `shouldBe` (NodeStory $ Map.singleton listId
(initArchiveList { _a_state = Map.singleton NgramsTerms nlsWithChildFixed }))
ngramsMap <- selectNgramsId terms ngramsMap <- selectNgramsId terms
liftIO $ (snd <$> Map.toList ngramsMap) `shouldBe` terms liftIO $ (snd <$> Map.toList ngramsMap) `shouldBe` terms
...@@ -216,16 +219,16 @@ insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest env = do ...@@ -216,16 +219,16 @@ insertNodeStoryChildrenWithDifferentNgramsTypeThanParentTest env = do
setListNgramsUpdatesNodeStoryTest :: TestEnv -> Assertion setListNgramsUpdatesNodeStoryTest :: TestEnv -> Assertion
setListNgramsUpdatesNodeStoryTest env = do setListNgramsUpdatesNodeStoryTest env = do
flip runReaderT env $ runTestMonad $ do flip runReaderT env $ runTestMonad $ do
(_userId, _corpusId, listId, v) <- commonInitialization (_userId, _corpusId, listId, _a) <- commonInitialization
let (terms, nre) = simpleTerm let (terms, nre) = simpleTerm
let nls = Map.singleton terms nre let nls = Map.singleton terms nre
setListNgrams listId NgramsTerms nls setListNgrams listId NgramsTerms nls
a <- getNodeStory listId
liftIO $ do liftIO $ do
ns <- atomically $ readTVar v a `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nls })
ns `shouldBe` (NodeStory $ Map.singleton listId
(initArchiveList { _a_state = Map.singleton NgramsTerms nls }))
-- check that the ngrams are in the DB as well -- check that the ngrams are in the DB as well
ngramsMap <- selectNgramsId [unNgramsTerm terms] ngramsMap <- selectNgramsId [unNgramsTerm terms]
liftIO $ (snd <$> Map.toList ngramsMap) `shouldBe` [unNgramsTerm terms] liftIO $ (snd <$> Map.toList ngramsMap) `shouldBe` [unNgramsTerm terms]
...@@ -238,27 +241,27 @@ setListNgramsUpdatesNodeStoryTest env = do ...@@ -238,27 +241,27 @@ setListNgramsUpdatesNodeStoryTest env = do
let terms2 = "WORLD" let terms2 = "WORLD"
let nls2 = Map.singleton (NgramsTerm terms2) nre2 let nls2 = Map.singleton (NgramsTerm terms2) nre2
setListNgrams listId NgramsTerms nls2 setListNgrams listId NgramsTerms nls2
a' <- getNodeStory listId
liftIO $ do liftIO $ do
ns <- atomically $ readTVar v a' `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms $ nls <> nls2 })
ns `shouldBe` (NodeStory $ Map.singleton listId
(initArchiveList { _a_state = Map.singleton NgramsTerms $ nls <> nls2 }))
setListNgramsUpdatesNodeStoryWithChildrenTest :: TestEnv -> Assertion setListNgramsUpdatesNodeStoryWithChildrenTest :: TestEnv -> Assertion
setListNgramsUpdatesNodeStoryWithChildrenTest env = do setListNgramsUpdatesNodeStoryWithChildrenTest env = do
flip runReaderT env $ runTestMonad $ do flip runReaderT env $ runTestMonad $ do
(_userId, _corpusId, listId, v) <- commonInitialization (_userId, _corpusId, listId, _a) <- commonInitialization
let (tChild, nreChild) = simpleChildTerm let (tChild, nreChild) = simpleChildTerm
let (tParent, nreParent) = simpleParentTerm let (tParent, nreParent) = simpleParentTerm
let nls = Map.fromList [(tParent, nreParent), (tChild, nreChild)] let nls = Map.fromList [(tParent, nreParent), (tChild, nreChild)]
setListNgrams listId NgramsTerms nls setListNgrams listId NgramsTerms nls
a <- getNodeStory listId
liftIO $ do liftIO $ do
ns <- atomically $ readTVar v a `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nls })
ns `shouldBe` (NodeStory $ Map.singleton listId
(initArchiveList { _a_state = Map.singleton NgramsTerms nls }))
-- OK, now we substitute parent with no children, the parent of -- OK, now we substitute parent with no children, the parent of
-- 'nreChild' should become Nothing -- 'nreChild' should become Nothing
...@@ -270,22 +273,20 @@ setListNgramsUpdatesNodeStoryWithChildrenTest env = do ...@@ -270,22 +273,20 @@ setListNgramsUpdatesNodeStoryWithChildrenTest env = do
, _nre_root = Nothing } , _nre_root = Nothing }
let nlsNew = Map.fromList [(tParent, nreParentNew), (tChild, nreChildNew)] let nlsNew = Map.fromList [(tParent, nreParentNew), (tChild, nreChildNew)]
a' <- getNodeStory listId
liftIO $ do liftIO $ do
ns <- atomically $ readTVar v a' `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nlsNew })
ns `shouldBe` (NodeStory $ Map.singleton listId
(initArchiveList { _a_state = Map.singleton NgramsTerms nlsNew }))
commitPatchSimpleTest :: TestEnv -> Assertion commitPatchSimpleTest :: TestEnv -> Assertion
commitPatchSimpleTest env = do commitPatchSimpleTest env = do
flip runReaderT env $ runTestMonad $ do flip runReaderT env $ runTestMonad $ do
(_userId, _corpusId, listId, v) <- commonInitialization (_userId, _corpusId, listId, a) <- commonInitialization
-- initially, the node story table is empty -- initially, the node story table is empty
liftIO $ do liftIO $ do
ns <- atomically $ readTVar v a `shouldBe` (initArchiveList { _a_state = Map.empty })
ns `shouldBe` (NodeStory $ Map.singleton listId
(initArchiveList { _a_state = Map.empty }))
let (term, nre) = simpleTerm let (term, nre) = simpleTerm
let tPatch = NgramsReplace { _patch_old = Nothing let tPatch = NgramsReplace { _patch_old = Nothing
...@@ -298,9 +299,9 @@ commitPatchSimpleTest env = do ...@@ -298,9 +299,9 @@ commitPatchSimpleTest env = do
_patchApplied <- commitStatePatch listId patch _patchApplied <- commitStatePatch listId patch
let nls = Map.fromList [(term, nre)] let nls = Map.fromList [(term, nre)]
a' <- getNodeStory listId
liftIO $ do liftIO $ do
ns <- atomically $ readTVar v a' `shouldBe` (initArchiveList { _a_state = Map.singleton NgramsTerms nls
ns `shouldBe` (NodeStory $ Map.singleton listId , _a_version = ver + 1 })
(initArchiveList { _a_state = Map.singleton NgramsTerms nls
, _a_version = ver + 1 }))
...@@ -115,10 +115,6 @@ instance HasMail TestEnv where ...@@ -115,10 +115,6 @@ instance HasMail TestEnv where
instance HasNodeStoryEnv TestEnv where instance HasNodeStoryEnv TestEnv where
hasNodeStory = to test_nodeStory hasNodeStory = to test_nodeStory
instance HasNodeStoryVar TestEnv where
hasNodeStoryVar = hasNodeStory . nse_getter
instance HasNodeStoryImmediateSaver TestEnv where instance HasNodeStoryImmediateSaver TestEnv where
hasNodeStoryImmediateSaver = hasNodeStory . nse_saver_immediate hasNodeStoryImmediateSaver = hasNodeStory . nse_saver_immediate
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment