Commit 3d90bcc8 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Add Nadal canned Phylo corpus for testing

parent 1bff83b6
Pipeline #5851 passed with stages
in 153 minutes and 54 seconds
...@@ -37,6 +37,9 @@ data-files: ...@@ -37,6 +37,9 @@ data-files:
test-data/ngrams/simple.csv test-data/ngrams/simple.csv
test-data/phylo/bpa_phylo_test.json test-data/phylo/bpa_phylo_test.json
test-data/phylo/cleopatre.golden.json test-data/phylo/cleopatre.golden.json
test-data/phylo/issue-290-small.golden.json test-data/phylo/issue-290-small.golden.json
test-data/phylo/open_science.json test-data/phylo/open_science.json
test-data/phylo/small-phylo.golden.json test-data/phylo/small-phylo.golden.json
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
status label forms
candidate ability
map abstentionists
candidate access
candidate account
map accuracy
candidate ace
candidate aci
candidate acquisition
candidate activity
map adaptive thresholds
candidate addition
map additive output noise
map additive utility
candidate advance
candidate advantage
stop agent
map agent based
stop agents agent
map aggregated dynamics
candidate aid
candidate aim
map algorithm
map algorithme
map allophonic rules
map allophony
map analogies
candidate analyse
stop analysis
candidate analytical results
map animal
map animals animal
map anisotropic
map anisotropy
map anomalous fluctuation effects
stop application applications
stop applications
map apprentissage symbolique
stop approach
stop approaches approach
candidate april
map architecture
candidate article
map associative memory
candidate attraction
map attractiveness
map attractor network dynamics
map attractor neural network
map attractor neural networks attractor neural network
candidate authors
map available experimental and theoretical techniques
candidate avalanches
map average opinion
map axelrod
candidate background
map backpropagation
map bandwagon goods
map bandwagon properties
candidate barlow
candidate basic evidences
map basins
candidate basis
map battery
map behavior
map behaviors behavior
candidate behaviour
map benevolent individuals
map benevolentbased organizations
map binary channel
map binary choice
map binary inputs
map binary neurons
map binary opinion
map binary strings
map biological organisms
map biology
map biophysical modeling
map birthrate
map blind source separation
map body
map boundaries boundary|&|bounds
map boundary
map bounded confidence
map bounds
map brain
map brain injury‬
map buyers
candidate calculation
candidate capacity
stop case cases
stop cases
map catastrophic deterioration
map categorical perception
map categories
map categorization categories
map causal inference
map cayley tree
candidate ce travail
map cell
map cell cycles
map cell signalling
map cellular automata
candidate centre national de la recherche scientifique
map cerebellar learning
map cerebellum
candidate challenges
candidate chapter
candidate choice
map choice function
map choice functions
map choices
map cities
map city
map civic involvement
candidate classes
map classical economics
candidate classical methods
candidate classical techniques search
map classification
map classification method classification|&|classifying
map classifying
map climate
map cluster growth
candidate clusters
candidate code
map coding cells
map coding efficiency
map coding errors
map cognitive economics
map cognitive science
candidate collection
map collective behavior collective behaviour|&|collective overall behavior
map collective behaviour
map collective dynamics
map collective overall behavior
map collective phenomena phénomenes collectifs
map collective trends
map combined support
map communication network
map community
map community size
stop comparison
candidate complex adaptive systems
map complex interactive system
map complex social systems
candidate complex systems
candidate complexes
map complexities
map complexity complexities
candidate components
candidate composantes
candidate composantes indépendantes
map computacional social science‬
candidate computation
map computational economics
map computational neuroscience
map computational social science
candidate concepts
candidate conditions
map confidence judgment
candidate conjecture
candidate consequence consequences
candidate consequences
candidate consideration
candidate constraint
candidate construction
map constructive algorithm
map contagion
candidate context
map continuous opinions
map continuous opinions dynamics
map contour enhancement
candidate contrast
candidate contribution
candidate contributions
candidate convergence
map cooperation cooperativeness|&|cooperators
map cooperative work
map cooperativeness cooperative work
map cooperators
map coordination coordination process
map coordination process
map cost
candidate countries
map coupling couplings
map couplings
candidate course
map covid 19
map crime criminal act
map crime rate
map criminal act
map criminal activity
map criminality
candidate criteria
candidate criterion
map critical care beds
map critical exponents
map critical phenomena
map critical state
candidate critical value
map cult
candidate cumulants
candidate curse
map customers
candidate cycles
candidate data
candidate decision
map decision trials
map decorrelated components
candidate definition
map demand
map democracies
candidate dependence
candidate dependency
candidate description
stop detailed study
candidate development
candidate difference
candidate different approaches
candidate different constraints
candidate different level
candidate different levels different level
candidate different magnitudes
candidate different regimes
candidate different states
candidate different types
map diffusion
map dilemma
map dilute lattice
map diluted asymmetric models
map diluted neural networks
candidate dimension
candidate dimensions
candidate directed
map directed animals directed compact animals
map directed compact animals
map directed models
map directed percolation
map directed systems directed models
candidate direction
map discharging cycles
map discrete choice model
map discrete choices discrete choice model
map discrete set
map discrimination task
map disorder
map disordered systems
candidate distance
candidate distribution distributions
candidate distributions
map diversity
candidate downs
map duality
map dynamical systems
stop dynamics
map dynamics of neural networks
candidate ecole normale supérieure
candidate ecole polytechnique
map economic burden
map economic theory
map economics
map economy
map eden growth
map eden model
map edge detectors
map education
stop effect
map effectors
stop effects effect
candidate efficacies
candidate efficiency
candidate ehess
map electoral behavior
map emergence
map emergent organization
candidate emile borel center
candidate emphasis
map empirical data
map endogenous bursts
map endogenous externalities
map entanglement
map entropy
candidate environment
map epidemiological modelling
map equilibria equilibrium
map equilibrium
map error backpropagation
map error signals
candidate errors
candidate est introduit
map ethnic turnovers
candidate evidence
map evolution
candidate exact results
candidate example
candidate examples
candidate existence
candidate expansion
candidate experiment
map experimental conditions
candidate experimental data
map experimental measurements experimental conditions
map experiments
map exploration exploitation compromise
map exponents
candidate expression
map extensive use
map externalities
map externalities‬
candidate fact
map factorial code
map factorial codehence
map family
candidate features
map feedforward layered networks
map feedforward network
map feedforward neural network feedforward network
candidate few individual entities
candidate field
candidate final organization
map finance financial market
map financial crises
map financial market
map finite size scaling
map finite temperature
candidate finite width
map first order phase transition
stop first time
map fish market
map fisher information
map fixed cost
stop foci
stop focus foci
map foreign affairs
map foreign experts
map forgetting
candidate form
map formal neural networks
map formal neurons
candidate formalism
candidate former case
map fractal
map fractal components
candidate fraction
candidate framework
candidate france
map freeriders
candidate freeriding
map french ministry
map french riots
candidate french students
candidate function
candidate functioning
candidate functions
map fuzzy neural networks
map game theory
map gene
stop general framework
candidate generalisation
map generic properties
map genes
map geophysical community
map geophysical data
map geophysical time series analysis
map global trend
candidate goal
candidate good model
map goods
map gradient descent
map grammatical status
map grammaticalization
candidate group
map group effects
map group members
candidate groups
map growth
map growth algorithm
candidate guarantee
map handwritten character classification
candidate help
candidate henri poincaré institute
map herschkowitz
map heterogeneities
map heterogeneity heterogeneous|&|heterogenous
map heterogeneous
map heterogeneous agents
map heterogeneous media
map heterogenous
map hidden units
map hierarchical neural network
map hierarchical neural networks‬
candidate high price
candidate high thresholds
map honesty
map hopfield model
map hot spots
map housing
map human machine interaction
map human confidence
map human vision
map humanities
candidate humans
map hybrid neural trees
map hybrid trees
candidate hypothesis
map hypothèse linéaire
map hysteretic effects
map ica
map ica algorithm
candidate idea
stop identification identifications
stop identifications
map idiosyncratic component
map idiosyncratic preferences
map idiosyncratic willingness idiosyncratic willingnesses
map idiosyncratic willingnesses
map illegal activities
candidate image
map image coding
map image representation
map images
stop importance
map important open issues
map impunity
map income
map incomplete confounding factors
map independent component analysis
map independent components
map independent sources
map individual behavior
map individual decisions
map individual dynamics
map individual preferences
map individuals
map inequality
map infants
candidate information
map information capacity
map information processing
map information storage
map information storage capacity
map information theoretic approach
map information theory
map information transfer
map information transmission
candidate input
candidate input data
map input layer
map input noise
map input space
candidate input units
candidate inputs
candidate insight insights
candidate insights
stop integration
map intelligence
map intelligent battery
map interacting individuals
candidate interaction
map interaction network
map interaction structures
candidate interactions
map interconnected global society
map interdisciplinary approach
map interdisciplinary team
map interface
map international epidemics
candidate international symposia
map interpretation
stop introduction
map invasion percolation
map invertible transfer functions
map ion channels
map ising model ising problem
map ising problem
candidate issues
candidate j nadal
candidate january
candidate joint cognitive science phd program
candidate journal
candidate jp nadal
candidate junior researchers
map k means
map k means algorithm
candidate kind
candidate kirman
candidate knowledge
candidate l
map language
map language change
candidate languages
candidate large fraction
candidate large n limit
candidate large number
map large systems
map lattice
candidate law
map law enforcement
candidate layer
map layered networks‬
map layered neural network
map learning
map learning algorithm learning algorithms|&|learning process|&|learning rule
map learning algorithms
map learning process learning processes
map learning processes
map learning rule
candidate lecture
candidate level
map lexicon
map life
candidate light
candidate limit limits
candidate limited number
candidate limits
candidate linear hypothesis
map linear mixture
map linear noisy network
map linguistic
map linguistic change
candidate link
candidate links
candidate linsker
candidate literature
map lives life
map local and global properties
map local behavior
map local dynamics
map localised externalities
map low price
candidate low thresholds
candidate lyon
map machine learning
map magnetic field
stop main result
stop main results
stop major advantages
map mammals
map management
map manifesto
candidate many experimental and theoretical studies
candidate many other works
candidate many socioeconomic phenomena
map market
map market mechanisms
map market model
map market organisation
map market organization
map markets
map master equations
candidate mathematical description
map mathematicians
map mathematics mathematicians
candidate maximal capacity
stop maximal number
candidate maximization
map maximum entropy principle
map maximum information
candidate mcfadden
map mean field approximation
candidate mechanism mechanisms
candidate mechanisms
candidate members
map memories
map memory memories
stop method
map methodological foundations
stop methods method
map microarray
map microarrays
candidate mixing
map mixing‬
map mixture
candidate model
stop modeling
candidate models model
candidate models interactions
candidate modifications
stop modélisation des
map monkeys
map monopolist
map monopoly case
map monopoly market
map motley crowd
map motor outputs
candidate movements
stop much information
stop much research effort
map multi agent simulation
map multiagent approach
map multiagent simulation
map multiclass classification
map multifractal
map multifractal signals
map multimodal sensors
map multiple equilibria
map multivariate time series
map municipalities
map mutual influences
map mutual information
candidate n
candidate nadal
map nash equilibria
map national elections
map natural images
stop nature
candidate need
candidate neighborhood
map neighbourhood
map neocortex
map network
map network externality
map networks network
map neural codes
map neural coding
map neural decision making process
map neural decision process
map neural information processing
map neural net
map neural network neural net|&|neural networks|&|neural networks‬
map neural networks
map neural networks‬
map neural tree
map neural trees neural tree
map neurobiology
map neuron
map neuronal dynamics
map neurones
map neurones formels
map neurons neuron|&|neurones
map neurons spiking
map neuroscience
map neuroscience phd program
candidate new algorithm
candidate new avenues
candidate new discipline
candidate new method
candidate new scientific direction
map noise noisy
map noisy
map noisy conditions
map noisy environment
map nonlinear analogue channel
map nonlinear neural network nonlinear neurons
map nonlinear neurons
map nonlinearities
map norm
candidate notion
candidate number
map numerical simulations
candidate numerical study
candidate numerical tests
map obesity
candidate objective
candidate objectives
candidate observation
candidate observations
map onelayer feedforward neural network
stop ones
map opinion
map opinion dynamics
map opinions opinion
map opper
candidate opportunity
map optimal information processors
map optimal information storage
candidate optimization
stop oral presentations
candidate order
map oriented cluster
map oriented graph
map oriented percolation
map oriented textures‬
candidate origin
stop other forms
stop other hand
stop other means
candidate output outputs
candidate output distribution
map output layer
map output neuron
map output noises
candidate outputs
stop paper
candidate parameter
candidate parameter estimation
candidate parameters
map paris
candidate parisian science universities
map part
map partial differential equations
stop particular emphasis
map pattern storage
map patterns
map pde
map perception
map perceptron
map perceptron architecture
map perceptrons perceptron
map perceptual decision
map perceptual decisions‬
map perceptual tasks
map percolation
candidate performance
candidate performances
map perishable good
map perturbation theory
map perturbations
map phase diagram
map phase diagrams phase diagram
map phase transition phase transitions
map phase transitions
map phases
map phenomena
map phenomenological renormalization
map phonemes
map phonetic
map physical causes
map physicists
candidate physics
map physics literature
map physiology
map physique statistique
candidate physiques
map phénomenes collectifs
map place
map planning
map plasticity
candidate point
map poisson process
map policy
map policy issues
map polymorphic equilibrium
candidate population
map population coding
map population dynamics
map porous medium
map positive externalities
map possibility
map possible learning theories
candidate postgraduate program
map potts perceptron
candidate practice
map pre attentive
candidate prediction
map predictions
candidate preferences
stop presence
stop present issue
candidate previous results
candidate previous work
map price
map price distribution
map price system
map prices
map prices adjustments
map pricing
map pricing strategies
candidate principal component analysis
candidate probability
candidate probability distribution
stop problem
stop problems problem
candidate procedure
candidate production
map profit
map profit maximization
map profit optimization
candidate propagation
map propensity to offend
candidate properties
map pseudo language
map psychologie
map psychophysics
map punishment
map pure consumers
map purkinje cell
candidate q
map q different states
map quantitative understanding
candidate question
map random binary encounters
map random environment
map random field
map random field ising model
map random medium
map random patterns
map random systems
map randomly dilute lattice
map rare events
candidate ratio
candidate reason
stop recent work
map receptive fields
map rechargeable battery
map redundancy reduction
candidate regime
candidate regions
candidate relation
map relationship
map relationships
map relevance
map repetitive learning
map representation
candidate research
candidate researchdedicated program
map researchers
map resistor
map resistor networks
stop respect
stop result
stop results result
map retarded learning
candidate retrieval
map reward
map riot
map rioters
map rioting
map riots riot|&|rioters|&|rioting
stop role
stop rough partition
candidate rule
candidate rules
map réseau de neurones
map réseau de neurones flous‬
map réseaux
candidate same model
stop same solution
stop same time period
candidate satisfactory account
candidate scale
map scale invariant
map scaling
map schelling
candidate schemes
map science sciences
map sciences
map sciences sociales‬
map sciences économiques
map segmentation
map segmentation analysis
map segregation
map selection
map self avoiding walk
map self coupling‬
map self organised self organized states
map self organized states
map self similar
map self similarity self similar|&|selfsimilarity properties
map self avoiding walks self avoiding walk
map selfsimilarity properties
map seller
map sellers
map semantic bleaching
map seminar
map seminars
map sensorimotor
map sensory coding
map sensory inputs
candidate sequence
map sequences
stop series
candidate set
map several opinion clusters
map short term memories
map sign
map signal
map signal configurations‬
map simple market model
stop simple model
map simple neural network
map simplest market model
map simulated market mechanisms
map simulation
map simulations simulation
map single homogeneous product
map single seller
candidate situations
candidate size
stop small fraction
stop small number small numbers
stop small numbers
candidate smallworlds
map smart battery
map social convention
map social diversity
map social influence
map social influence‬
map social interactions
map social outbursts
candidate social science
map social sciences
map social system
map social systems social system
map society
map socio spatial segregation
map sociology
candidate sole description
candidate solution solutions
candidate solutions
map source separation
map sources
candidate space
map sparse coding limit
map sparsely coded
map sparsely encoded data
map spatiality
map spin glass
map spin glasses
map square lattice
map stability
candidate standard
map standard microeconomic theory
map statistical inference
map statistical learning
map statistical mechanics
candidate statistical mechanics techniques
map statistical physics
candidate statistical physics approach
map statistics
candidate statistique
candidate steps
map stimuli
map stimulus
map stimulus space
candidate storage
map storage capacity
stop straightforward way
candidate strategic nature
stop strategies
stop strategy strategies
map strengths
candidate stress
candidate strip
candidate strong constraints
map strong dilution
map strongly textured
candidate structure
candidate subject
map suburb
map success
candidate such systems
candidate sum
candidate supervised and unsupervised learning tasks
map supervised clustering
map supervised learning
map supply
candidate support
map symbolic data analysis
map symbolic learning
map symmetries
map symmetry
map synapse
map synapses synapse
map synaptic coefficients
map synaptic couplings
map synaptic efficacies
map synaptic plasticity
map synaptic weights
candidate synthesis
candidate system
candidate systems
candidate systems neuroscience
map système dynamique
map séries temporelles
map tagging
candidate task
map teaching
candidate teaching faculty
candidate techniques
candidate techniques classiques
stop techniques classiques cherchent uniquement
candidate techniques statistiques d'extraction
map technology
candidate temperature
map temporal sequences
map temporal series
candidate terms
candidate text
map textural characteristics
map texture
map texture synthesis
map texture synthesizer texture synthesis
map textured images
candidate theoretical analysis
candidate theoretical and modeling approaches
map theoretical brain
candidate theoretical framework
candidate theoretical physics
candidate theoretical tools
candidate theoretical understanding
candidate theory
candidate threshold
map threshold heterogeneity
candidate thurstone
map tiling algorithm
candidate time
candidate tool
candidate tools
map topics
stop total number
map trading
map transfer matrix
candidate transitions
map treatment effect
candidate tree
map tropical sea surface temperature
map tropical sst variability
map turbulent flows
map turbulent flows‬
candidate type
candidate types
map typical mutual information
map unbiased patterns
map uncivil
candidate understanding
map unemployment
candidate unit units
candidate units
map university
candidate unprecedented volumes
map unsupervised learning
map unusual enterprise
candidate ups
map urban citizen
map urban housing
map urban voters
map urgent need
candidate use
map user behaviours
map user profiling
candidate utilities
candidate value
candidate values
map variability
candidate variance
stop variants
candidate variety
candidate version
map victimisation effects
map view
map visual pathway
map volatility
map voters
map voting
map vowels
map wave of riots
map wavelet basis
map wavelets
candidate way
map weaker constraint
map wealth distribution
map wetting fluid
candidate whole series
stop wide variety
candidate width
candidate willingness
map willingness to pay
map willshaw model
candidate words
candidate work works
map working memories
candidate works
candidate world
map yeast
...@@ -58,6 +58,7 @@ tests = testGroup "Phylo" [ ...@@ -58,6 +58,7 @@ tests = testGroup "Phylo" [
, testGroup "toPhyloWithoutLink" [ , testGroup "toPhyloWithoutLink" [
testCase "returns expected data" testSmallPhyloWithoutLinkExpectedOutput testCase "returns expected data" testSmallPhyloWithoutLinkExpectedOutput
, testCase "phyloCleopatre returns expected data" testCleopatreWithoutLinkExpectedOutput , testCase "phyloCleopatre returns expected data" testCleopatreWithoutLinkExpectedOutput
, testCase "Nadal canned corpus returns expected data" testNadalWithoutLinkExpectedOutput
] ]
, testGroup "phylo2dot2json" [ , testGroup "phylo2dot2json" [
testCase "is deterministic" testPhylo2dot2json testCase "is deterministic" testPhylo2dot2json
...@@ -76,6 +77,23 @@ testCleopatreWithoutLinkExpectedOutput = do ...@@ -76,6 +77,23 @@ testCleopatreWithoutLinkExpectedOutput = do
expected <- readPhylo =<< getDataFileName "test-data/phylo/cleopatre.golden.json" expected <- readPhylo =<< getDataFileName "test-data/phylo/cleopatre.golden.json"
assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual) assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual)
testNadalWithoutLinkExpectedOutput :: Assertion
testNadalWithoutLinkExpectedOutput = do
corpusPath' <- getDataFileName "test-data/phylo/nadal_docslist.golden.csv"
listPath' <- getDataFileName "test-data/phylo/nadal_ngramslist.golden.csv"
let config = phyloTestConfig { corpusPath = corpusPath'
, listPath = listPath'
, listParser = V3
mapList <- fileToList (listParser config) (listPath config)
corpus <- fileToDocsDefault (corpusParser config)
(corpusPath config)
[Year 3 1 5,Month 3 1 5,Week 4 2 5]
let actual = setConfig phyloTestConfig $ toPhyloWithoutLink corpus config
expected <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/nadal.golden.json")
assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual)
testSmallPhyloWithoutLinkExpectedOutput :: Assertion testSmallPhyloWithoutLinkExpectedOutput :: Assertion
testSmallPhyloWithoutLinkExpectedOutput = do testSmallPhyloWithoutLinkExpectedOutput = do
bpaConfig <- getDataFileName "bench-data/phylo/bpa-config.json" bpaConfig <- getDataFileName "bench-data/phylo/bpa-config.json"
...@@ -91,7 +109,7 @@ testSmallPhyloWithoutLinkExpectedOutput = do ...@@ -91,7 +109,7 @@ testSmallPhyloWithoutLinkExpectedOutput = do
mapList mapList
let actual = setConfig phyloTestConfig $ toPhyloWithoutLink corpus config let actual = setConfig phyloTestConfig $ toPhyloWithoutLink corpus config
expected <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/small-phylo.golden.json") expected <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/small-phylo.golden.json")
assertBool (show $ ansiWlEditExpr $ ediff expected actual) (expected == actual) assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual)
testPhylo2dot2json :: Assertion testPhylo2dot2json :: Assertion
testPhylo2dot2json = do testPhylo2dot2json = do
...@@ -103,7 +121,7 @@ testPhylo2dot2json = do ...@@ -103,7 +121,7 @@ testPhylo2dot2json = do
case actual_e of case actual_e of
Left err -> fail err Left err -> fail err
Right (actual :: GraphData) -> do Right (actual :: GraphData) -> do
assertBool ("Phylo mismatch!" <> show (ansiWlEditExpr $ ediff expected actual)) (expected `compareGraphDataFuzzy` actual) assertBool ("Phylo mismatch!" <> show (ansiWlEditExprCompact $ ediff expected actual)) (expected `compareGraphDataFuzzy` actual)
compareGraphDataFuzzy :: GraphData -> GraphData -> Bool compareGraphDataFuzzy :: GraphData -> GraphData -> Bool
compareGraphDataFuzzy gd1 gd2 = compareGraphDataFuzzy gd1 gd2 =
