Commit 19071e4b authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into dev-doc-table-optimization

parents 4caaf612 63e3a6fd
Pipeline #922 failed with stage
......@@ -6,15 +6,15 @@ category: Data
author: Gargantext Team
maintainer: team@gargantext.org
copyright:
- ! 'Copyright: (c) 2017-2018: see git logs and README'
license: BSD3
- ! 'Copyright: (c) 2017-Present: see git logs and README'
license: AGPL-3
homepage: https://gargantext.org
ghc-options: -Wall
extra-libraries:
- gfortran
- gfortran
dependencies:
- extra
- text
- extra
- text
default-extensions:
- DataKinds
- DeriveGeneric
......@@ -59,6 +59,7 @@ library:
- Gargantext.Text
- Gargantext.Text.Context
- Gargantext.Text.Corpus.Parsers
- Gargantext.Text.Corpus.Parsers.Date.Parsec
- Gargantext.Text.Corpus.API
- Gargantext.Text.Corpus.Parsers.CSV
- Gargantext.Text.Examples
......@@ -364,36 +365,58 @@ executables:
tests:
# garg-test:
# main: Main.hs
# source-dirs: src-test
# ghc-options:
# - -threaded
# - -rtsopts
# - -with-rtsopts=-N
# dependencies:
# - base
# - gargantext
# - hspec
# - QuickCheck
# - quickcheck-instances
# - time
# - parsec
# - duckling
# - text
garg-doctest:
main: Main.hs
source-dirs: src-doctest
ghc-options:
- -O2
- -Wcompat
- -Wmissing-signatures
- -rtsopts
- -threaded
- -with-rtsopts=-N
dependencies:
- doctest
- Glob
- QuickCheck
- base
- gargantext
garg-test:
main: Main.hs
source-dirs: src-test
default-extensions:
- DataKinds
- DeriveGeneric
- FlexibleContexts
- FlexibleInstances
- GeneralizedNewtypeDeriving
- MultiParamTypeClasses
- NoImplicitPrelude
- OverloadedStrings
- RankNTypes
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- base
- gargantext
- hspec
- QuickCheck
- quickcheck-instances
- time
- parsec
- duckling
- text
# garg-doctest:
# main: Main.hs
# source-dirs: src-doctest
# ghc-options:
# - -O2
# - -Wcompat
# - -Wmissing-signatures
# - -rtsopts
# - -threaded
# - -with-rtsopts=-N
# dependencies:
# - doctest
# - Glob
# - QuickCheck
# - base
# - gargantext
# default-extensions:
# - DataKinds
# - DeriveGeneric
# - FlexibleContexts
# - FlexibleInstances
# - GeneralizedNewtypeDeriving
# - MultiParamTypeClasses
# - NoImplicitPrelude
# - OverloadedStrings
# - RankNTypes
#
import System.FilePath.Glob
import Test.DocTest
import Gargantext.Prelude
main :: IO ()
main = glob "src/Gargantext/" >>= doctest
......
......@@ -12,15 +12,17 @@ Portability : POSIX
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
--import qualified Ngrams.Lang.Fr as Fr
import qualified Ngrams.Lang as Lang
--import qualified Ngrams.Lang as Lang
import qualified Ngrams.Lang.Occurrences as Occ
import qualified Ngrams.Metrics as Metrics
import qualified Parsers.Date as PD
import qualified Graph.Distance as GD
main :: IO ()
main = do
Occ.parsersTest
Lang.ngramsExtractionTest FR
Lang.ngramsExtractionTest EN
Metrics.main
-- Occ.parsersTest
-- Lang.ngramsExtractionTest FR
-- Lang.ngramsExtractionTest EN
-- Metrics.main
PD.testFromRFC3339
GD.test
......@@ -15,6 +15,7 @@ commentary with @some markup@.
module Ngrams.Lang where
{-
import Gargantext.Prelude (IO())
import Gargantext.Core (Lang(..))
......@@ -24,4 +25,4 @@ import qualified Ngrams.Lang.En as En
ngramsExtractionTest :: Lang -> IO ()
ngramsExtractionTest FR = Fr.ngramsExtractionTest
ngramsExtractionTest EN = En.ngramsExtractionTest
-}
......@@ -15,6 +15,7 @@ commentary with @some markup@.
module Ngrams.Lang.En where
{-
import Data.List ((!!))
import Data.Text (Text)
......@@ -22,8 +23,11 @@ import Test.Hspec
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
-- TODO this import is not used anymore
import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
-- use instead
-- import Gargantext.Text.Terms (extractNgramsT)
ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do
......@@ -43,4 +47,4 @@ ngramsExtractionTest = hspec $ do
t2 <- map (selectNgrams EN) <$> extractNgrams EN t
t2 `shouldBe` [[("Donald Trump","NNP","PERSON"),("president of the United-States of America","NN","LOCATION")]]
-}
......@@ -15,12 +15,15 @@ commentary with @some markup@.
module Ngrams.Lang.Fr where
{-
import Test.Hspec
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
-- TODO this import is not used anymore
import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
-- use instead
-
ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do
describe "Behavioral tests: ngrams extraction in French Language" $ do
......@@ -61,4 +64,4 @@ ngramsExtractionTest = hspec $ do
let textFr1 = "L'heure d'arrivée des coureurs dépend de la météo du jour."
testFr1 <- map (selectNgrams FR) <$> (extractNgrams FR) textFr1
testFr1 `shouldBe` [[("heure d' arrivée des coureurs","NC","O"),("météo du jour","NC","O")]]
-}
......@@ -15,6 +15,7 @@ commentary with @some markup@.
module Ngrams.Lang.Occurrences where
{-
import Test.Hspec
import Data.Either (Either(Right))
......@@ -59,4 +60,4 @@ parsersTest = hspec $ do
-- describe "Parser for nodes" $ do
-- it "returns the result of one parsing after space" $ do
-- occOfCorpus 249509 "sciences" `shouldReturn` 7
-}
......@@ -15,8 +15,10 @@ commentary with @some markup@.
{-# LANGUAGE CPP #-}
module Ngrams.Metrics (main) where
--module Ngrams.Metrics (main) where
module Ngrams.Metrics where
{-
import Data.Text (Text)
import qualified Data.Text as T
import Data.Ratio
......@@ -139,3 +141,5 @@ testPair :: (Eq a, Show a)
-> SpecWith ()
testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $
f a b `shouldBe` r
-}
......@@ -28,7 +28,7 @@ import Duckling.Time.Types (toRFC3339)
-----------------------------------------------------------
import Gargantext.Prelude
import Gargantext.Text.Parsers.Date (fromRFC3339)
import Gargantext.Text.Corpus.Parsers.Date.Parsec (fromRFC3339)
import Parsers.Types
-----------------------------------------------------------
......
......@@ -27,12 +27,12 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
arbitrary = JobOutput <$> arbitrary
-- | Main Types
-- TODO IsidoreAuth
data ExternalAPIs = All
| PubMed
| HAL
| IsTex
| Isidore
-- | IsidoreAuth
deriving (Show, Eq, Enum, Bounded, Generic)
......
......@@ -81,7 +81,7 @@ updateNode uId nId (UpdateNodeParamsGraph metric) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 2
, _scst_remaining = Just 1
, _scst_events = Just []
}
......
......@@ -175,6 +175,6 @@ newTries n t = buildTries n (fmap toToken $ uniText t)
uniText :: Text -> [[Text]]
uniText = map (List.filter (not . isPunctuation))
. map tokenize
. sentences -- | TODO get sentences according to lang
. sentences -- TODO get sentences according to lang
. Text.toLower
......@@ -73,12 +73,12 @@ subst (src, dst) x | sim src x = dst
| otherwise = x
------------------------------------------------------------------------
-- | TODO: Show Instance only used for debugging
type Entropy e =
( Fractional e
, Floating e
, P.RealFloat e
, Show e
-- ^ TODO: only used for debugging
)
------------------------------------------------------------------------
-- | Example and tests for development
......
......@@ -65,11 +65,13 @@ data SeaElevation =
data Proximity =
WeightedLogJaccard
{ _wlj_sensibility :: Double
{ _wlj_sensibility :: Double
{-
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
-}
}
| Hamming
deriving (Show,Generic,Eq)
......
......@@ -147,8 +147,9 @@ computeGraph cId d nt repo = do
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
-- TODO split diagonal
myCooc <- Map.filter (>1)
<$> getCoocByNgrams (Diagonal True)
<$> getCoocByNgrams (Diagonal False)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
......
......@@ -27,6 +27,7 @@ import Test.QuickCheck.Arbitrary
------------------------------------------------------------------------
data Distance = Conditional | Distributional
deriving (Show)
measure :: Distance -> Matrix Int -> Matrix Double
measure Conditional = measureConditional
......
......@@ -18,18 +18,15 @@ module Gargantext.Viz.Graph.Distances.Distributional
where
import Data.Matrix hiding (identity)
import qualified Data.Map as M
import Data.Vector (Vector)
import qualified Data.Vector as V
import Gargantext.Prelude
import Gargantext.Viz.Graph.Utils
distributional :: (Floating a, Ord a) => Matrix a -> [((Int, Int), a)]
distributional m = filter (\((x,y), d) -> foldl' (&&) True (conditions x y d) ) distriList
distributional' :: (Floating a, Ord a) => Matrix a -> [((Int, Int), a)]
distributional' m = filter (\((x,y), d) -> foldl' (&&) True (conditions x y d) ) distriList
where
conditions x y d = [ (x /= y)
, (d > miniMax')
......@@ -51,7 +48,6 @@ ri m = matrix c r doRi
$ V.zip (ax Col x y mi') (ax Row x y mi')
(c,r) = (nOf Col m, nOf Row m)
mi :: (Ord a, Floating a) => Matrix a -> Matrix a
mi m = matrix c r createMat
where
......
......@@ -17,14 +17,6 @@ Implementation use Accelerate library which enables GPU and CPU computation:
[Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
* Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
[Optimising Purely Functional GPU Programs][MCKL13].
In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
* Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
[Embedding Foreign Code][CMCK14].
In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
* Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
[Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
......@@ -34,13 +26,14 @@ Implementation use Accelerate library which enables GPU and CPU computation:
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Viz.Graph.Distances.Matrice
where
import Debug.Trace (trace)
import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
import qualified Gargantext.Prelude as P
......@@ -49,8 +42,8 @@ import qualified Gargantext.Prelude as P
--
-- >>> vector 3
-- Vector (Z :. 3) [0,1,2]
vector :: Int -> (Array (Z :. Int) Int)
vector n = fromList (Z :. n) [0..n]
vector :: Elt c => Int -> [c] -> (Array (Z :. Int) c)
vector n l = fromList (Z :. n) l
-- | Define a matrix
--
......@@ -85,16 +78,26 @@ dim m = n
-- indexTail (arrayShape m)
-----------------------------------------------------------------------
-- TODO move to Utils
runExp :: Elt e => Exp e -> e
runExp e = indexArray (run (unit e)) Z
-----------------------------------------------------------------------
-- | Sum of a Matrix by Column
--
-- >>> run $ matSum 3 (use $ matrix 3 [1..])
-- >>> run $ matSumCol 3 (use $ matrix 3 [1..])
-- Matrix (Z :. 3 :. 3)
-- [ 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0]
matSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
matSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
matSumCol :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
matSumCol' :: Matrix Double -> Matrix Double
matSumCol' m = run $ matSumCol n m'
where
n = dim m
m' = use m
-- | Proba computes de probability matrix: all cells divided by thee sum of its column
......@@ -106,14 +109,16 @@ matSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose m
-- 0.3333333333333333, 0.3333333333333333, 0.3333333333333333,
-- 0.5833333333333334, 0.5333333333333333, 0.5]
matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
matProba r mat = zipWith (/) mat (matSum r mat)
matProba r mat = zipWith (/) mat (matSumCol r mat)
-- | Diagonal of the matrix
--
-- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
-- Vector (Z :. 3) [1,5,9]
diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
diag m = backpermute (indexTail (shape m))
(lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int))))
m
-- | Divide by the Diagonal of the matrix
--
......@@ -145,8 +150,8 @@ matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
-- [ 0.0, 0.0, 7.0,
-- 0.0, 0.0, 8.0,
-- 0.0, 6.0, 9.0]
matFilter :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
matFilter t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
filter' :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
filter' t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
-----------------------------------------------------------------------
-- * Measures of proximity
......@@ -164,7 +169,9 @@ matFilter t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
-- interactions of 2 terms in the corpus.
measureConditional :: Matrix Int -> Matrix Double
--measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
measureConditional m = run (matProba (dim m) $ map fromIntegral $ use m)
measureConditional m = run $ matProba (dim m)
$ map fromIntegral
$ use m
-- *** Conditional distance (advanced)
......@@ -196,9 +203,9 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
r = dim m
xs :: Acc (Matrix Double) -> Acc (Matrix Double)
xs mat = zipWith (-) (matSum r $ matProba r mat) (matProba r mat)
xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (matSum r $ transpose $ matProba r mat) (matProba r mat)
ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
-----------------------------------------------------------------------
-- ** Distributional Distance
......@@ -206,11 +213,11 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
-- | Distributional Distance Measure
--
-- Distributional measure is a relative measure which depends on the
-- selected list, it represents structural equivalence.
-- selected list, it represents structural equivalence of mutual information.
--
-- The distributional measure P(c) of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}}^{}} \]
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
--
-- Mutual information
-- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
......@@ -228,26 +235,120 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
--
distributional :: Matrix Int -> Matrix Double
distributional m = run $ matMiniMax $ ri (map fromIntegral $ use m)
distributional m = run {- -- $ matMiniMax
-- $ ri
-- $ myMin
-}
$ filter' 0
$ s_mi
$ map fromIntegral
{- from Int to Double -}
$ use m
{- push matrix in Accelerate type -}
where
-- filter m = zipWith (\a b -> max a b) m (transpose m)
ri mat = zipWith (/) mat1 mat2
{-
ri :: Acc (Matrix Double) -> Acc (Matrix Double)
ri mat = mat1 -- zipWith (/) mat1 mat2
where
mat1 = matSum n $ zipWith min (s_mi mat) (s_mi $ transpose mat)
mat2 = matSum n mat
s_mi m' = zipWith (\a b -> log (a/b)) m'
$ zipWith (/) (crossProduct m') (total m')
mat1 = matSumCol n $ zipWith min' (myMin mat) (myMin $ transpose mat)
mat2 = total mat
myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
myMin = replicate (constant (Z :. n :. All)) . minimum
-}
-- TODO fix NaN
-- Quali TEST: OK
s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
$ zipWith (/) (crossProduct n m') (total m')
-- crossProduct n m'
total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
total :: Acc (Matrix Double) -> Acc (Matrix Double)
total = replicate (constant (Z :. n :. n)) . sum . sum
n :: Dim
n = dim m
crossProduct m''' = zipWith (*) (cross m''' ) (cross (transpose m'''))
cross mat = zipWith (-) (matSum n mat) (mat)
-- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10)
identityMatrix :: Num a => Exp Int -> Acc (Matrix a)
identityMatrix n =
let zeros = fill (index2 n n) 0
ones = fill (index1 n) 1
in
permute const zeros (\(unindex1 -> i) -> index2 i i) ones
eyeMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
eyeMatrix n' _m =
let ones = fill (index2 n n) 1
zeros = fill (index1 n) 0
n = constant n'
in
permute const ones (\(unindex1 -> i) -> index2 i i) zeros
selfMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
selfMatrix n' _m =
let zeros = fill (index2 n n) 0
ones = fill (index2 n n) 1
n = constant n'
in
permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
-> -- ifThenElse (i /= j)
-- (Z :. i :. j)
(Z :. i :. i)
)) zeros
selfMatrix' :: (Elt a, P.Num (Exp a)) => Array DIM2 a -> Matrix a
selfMatrix' m' = run $ selfMatrix n m
where
n = dim m'
m = use m'
-------------------------------------------------
diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
diagNull n m = zipWith (*) m eye
where
eye = eyeMatrix n m
-------------------------------------------------
crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
crossProduct n m = trace (P.show (run m',run m'')) $ zipWith (*) m' m''
where
m' = cross n m
m'' = cross n (transpose m)
crossT :: Matrix Double -> Matrix Double
crossT = run . transpose . use
crossProduct' :: Matrix Double -> Matrix Double
crossProduct' m = run $ crossProduct n m'
where
n = dim m
m' = use m
runWith :: (Arrays c, Elt a1)
=> (Dim -> Acc (Matrix a1) -> a2 -> Acc c)
-> Matrix a1
-> a2
-> c
runWith f m = run . f (dim m) (use m)
-- | cross
cross :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
cross n mat = diagNull n (matSumCol n $ diagNull n mat)
cross' :: Matrix Double -> Matrix Double
cross' mat = run $ cross n mat'
where
mat' = use mat
n = dim mat
-----------------------------------------------------------------------
-----------------------------------------------------------------------
......
......@@ -56,6 +56,7 @@ cooc2graph :: Distance
-> (Map (Text, Text) Int)
-> IO Graph
cooc2graph distance threshold myCooc = do
printDebug "cooc2graph" distance
let
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
......
......@@ -207,7 +207,7 @@ toNthLevel lvlMax prox clus p
| otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
-- $ transposePeriodLinks (lvl + 1)
-- \$ transposePeriodLinks (lvl + 1)
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
......@@ -230,15 +230,15 @@ toNthLevel lvlMax prox clus p
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceBranches 1
-- $ reLinkPhyloBranches 1
-- $ traceBranches 1
-- \$ reLinkPhyloBranches 1
-- \$ traceBranches 1
$ setPhyloBranches 1
$ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox
$ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox
$ tracePhyloN 1
-- $ setLevelLinks (0,1)
-- \$ setLevelLinks (0,1)
$ addPhyloLevel 1 (getPhyloFis phyloFis)
$ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
where
......
......@@ -126,15 +126,15 @@ phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
phyloGroupMatching periods g p = case pointers of
Nothing -> []
Just pts -> head' "phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
-- Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
-- Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
pointers :: Maybe [Pointer]
pointers = find (not . null)
-- | For each time frame, process the Proximity on relevant pairs of targeted groups
-- For each time frame, process the Proximity on relevant pairs of targeted groups
$ scanl (\acc frame ->
let pairs = makePairs frame g p
in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p))
......@@ -145,7 +145,7 @@ phyloGroupMatching periods g p = case pointers of
if (t == t')
then [(getGroupId t,proxi)]
else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) []
-- | [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
-- [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$ inits periods
--------------------------------------
......@@ -218,8 +218,6 @@ interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
------------------------------------------------------------------------
-- | Make links from Period to Period after level 1
listToTuple :: (a -> b) -> [a] -> [(b,a)]
listToTuple f l = map (\x -> (f x, x)) l
......
......@@ -90,13 +90,13 @@ findDynamics n pv pn m =
bid = fromJust $ (pn ^. pn_bid)
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | emergence
-- emergence
then 2
else if ((fst prd) == (fst $ m ! n))
-- | recombination
-- recombination
then 0
else if (not $ sharedWithParents (fst prd) bid n pv)
-- | decrease
-- decrease
then 1
else 3
......@@ -175,9 +175,3 @@ hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (s
inter :: Map (Int, Int) Double
inter = intersection f1 f2
--------------------------------------
......@@ -171,12 +171,12 @@ exportToDot phylo export =
<> "##########################") $
digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
-- | 1) init the dot graph
{- 1) init the dot graph -}
graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
<> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]]
-- | home made attributes
{-- home made attributes -}
<> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
......@@ -185,36 +185,36 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
])
{-
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- | 2) create a layer for the branches labels
-- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do
graphAttrs [Rank SameRank]
-- | 3) group the branches by hierarchy
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
-- subgraph (Str "Branches clade") $ do
-- graphAttrs [Rank SameRank]
-- -- | 4) create a node for each branch
-- -- 4) create a node for each branch
-- mapM branchToDotNode branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
-}
mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
-- | 5) create a layer for each period
{-- 5) create a layer for each period -}
_ <- mapM (\period ->
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
graphAttrs [Rank SameRank]
periodToDotNode period
-- | 6) create a node for each group
{-- 6) create a node for each group -}
mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
) $ getPeriodIds phylo
-- | 7) create the edges between a branch and its first groups
{-- 7) create the edges between a branch and its first groups -}
_ <- mapM (\(bId,groups) ->
mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
)
......@@ -224,31 +224,29 @@ exportToDot phylo export =
$ sortOn (fst . _phylo_groupPeriod) groups)
$ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
-- | 8) create the edges between the groups
{- 8) create the edges between the groups -}
_ <- mapM (\((k,k'),_) ->
toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
) $ (toList . mergePointers) $ export ^. export_groups
-- | 7) create the edges between the periods
{- 7) create the edges between the periods -}
_ <- mapM (\(prd,prd') ->
toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
-- | 8) create the edges between the branches
{- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-}
graphAttrs [Rank SameRank]
----------------
-- | Filter | --
----------------
......@@ -439,13 +437,13 @@ toDynamics n parents g m =
let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | decrease
{- decrease -}
then 2
else if ((fst prd) == (fst $ m ! n))
-- | recombination
{- recombination -}
then 0
else if isNew
-- | emergence
{- emergence -}
then 1
else 3
where
......
......@@ -115,7 +115,7 @@ cliqueToGroup fis pId lvl idx fdt coocs =
(fis ^. phyloClique_support)
ngrams
(ngramsToCooc ngrams coocs)
(1,[0]) -- | branchid (lvl,[path in the branching tree])
(1,[0]) -- branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] []
......@@ -142,24 +142,24 @@ toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
---------------------------
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
-- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterClique keep thr f m = case keep of
False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support
-- To filter Fis with small Support
filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l
-- | To filter Fis with small Clique size
-- To filter Fis with small Clique size
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l
-- | To filter nested Fis
-- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterCliqueByNested m =
let clq = map (\l ->
......@@ -173,16 +173,16 @@ filterCliqueByNested m =
in fromList $ zip (keys m) clq'
-- | To transform a time map of docs innto a time map of Fis with some filters
-- To transform a time map of docs innto a time map of Fis with some filters
toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested
-- $ traceFis "Filtered by clique size"
{- \$ traceFis "Filtered by clique size" -}
$ filterClique True s' (filterCliqueBySize)
-- $ traceFis "Filtered by support"
{- \$ traceFis "Filtered by support" -}
$ filterClique True s (filterCliqueBySupport)
-- $ traceFis "Unfiltered Fis"
{- \$ traceFis "Unfiltered Fis" -}
phyloClique
MaxClique _ -> undefined
where
......@@ -204,7 +204,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
--------------------
-- | To transform the docs into a time map of coocurency matrix
-- To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt =
let mCooc = fromListWith sumCooc
......@@ -221,7 +221,7 @@ docsToTimeScaleCooc docs fdt =
-- | to Phylo Base | --
-----------------------
-- | To group a list of Documents by fixed periods
-- To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
......@@ -237,7 +237,7 @@ groupDocsByPeriod' f pds docs =
-- | To group a list of Documents by fixed periods
-- To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
......@@ -265,7 +265,7 @@ docsToTermFreq docs fdt =
in map (/sumFreqs) freqs
-- | To count the number of docs by unit of time
-- To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
......@@ -279,7 +279,7 @@ initPhyloLevels lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax]
-- | To init the basic elements of a Phylo
-- To init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
......
......@@ -36,13 +36,13 @@ import qualified Data.Set as Set
mergeBranchIds :: [[Int]] -> [Int]
mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
-- 1) find the most frequent ids
mostFreq' :: [[Int]] -> [[Int]]
mostFreq' ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
......@@ -58,12 +58,12 @@ mergeMeta bId groups =
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups =
-- | run the related component algorithm
-- run the related component algorithm
let egos = map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
graph = relatedComponents egos
-- | update each group's branch id
-- update each group's branch id
in map (\ids ->
let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
......@@ -103,26 +103,26 @@ toNextLevel' phylo groups =
newGroups = concat $ groupsToBranches'
$ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' ->
-- | 4) create the parent group
-- 4) create the parent group
let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
in acc ++ [parent]) []
-- | 3) group the current groups by parentId
-- 3) group the current groups by parentId
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodLevels . traverse
-- | 6) update each period at curLvl + 1
-- 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
-- | 7) by adding the parents
-- 7) by adding the parents
(\phyloLvl ->
if member (phyloLvl ^. phylo_levelPeriod) newPeriods
then phyloLvl & phylo_levelGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod))
else phyloLvl)
-- | 2) add the curLvl + 1 phyloLevel to the phylo
-- 2) add the curLvl + 1 phyloLevel to the phylo
$ addPhyloLevel (curLvl + 1)
-- | 1) update the current groups (with level parent pointers) in the phylo
-- 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
--------------------
......@@ -187,19 +187,19 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1),
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs diagos branch =
-- | 1) reduce a branch as a set of periods & groups
-- 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems)
$ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp ->
-- | 4) add to each groups their futur level parent group
-- 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp)
in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
-- |3) reduce the graph a a set of related components
-- 3) reduce the graph a a set of related components
$ toRelatedComponents groups edges) periods
......@@ -251,4 +251,4 @@ synchronicClustering phylo =
-- <> "\n"
-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
\ No newline at end of file
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
......@@ -117,7 +117,7 @@ makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs d
if (null periods)
then []
else removeOldPointers oldPointers fil thr prox lastPrd
-- | at least on of the pair candidates should be from the last added period
{- at least on of the pair candidates should be from the last added period -}
$ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
$ listToKeys
$ filter (\(id,ngrams) ->
......@@ -143,36 +143,36 @@ phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map
-> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
if (null $ filterPointers proxi thr oldPointers)
-- | let's find new pointers
{- let's find new pointers -}
then if null nextPointers
then []
else head' "phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
{- Keep only the best set of pointers grouped by proximity -}
$ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd $ head' "pointers" nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
{- Find the first time frame where at leats one pointer satisfies the proximity threshold -}
else oldPointers
where
nextPointers :: [[Pointer]]
nextPointers = take 1
$ dropWhile (null)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
{- for each time frame, process the proximity on relevant pairs of targeted groups -}
$ scanl (\acc groups ->
let periods = nub $ map (fst . fst . fst) $ concat groups
nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
diago = reduceDiagos
$ filterDiago diagos ([(fst . fst) id] ++ periods)
-- | important resize nbdocs et diago dans le make pairs
{- important resize nbdocs et diago dans le make pairs -}
pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
in acc ++ ( filterPointers proxi thr
$ concat
$ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates
{- process the proximity between the current group and a pair of candidates -}
let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
in if (c == c')
then [(fst c,proximity)]
else [(fst c,proximity),(fst c',proximity)] ) pairs )) []
$ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$ inits candidates {- groups from [[1900],[1900,1901],[1900,1901,1902],...] -}
filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
......@@ -205,19 +205,19 @@ matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date
matchGroupsToGroups frame periods proximity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd ->
let -- | 1) find the parents/childs matching periods
let -- 1) find the parents/childs matching periods
periodsPar = getNextPeriods ToParents frame prd periods
periodsChi = getNextPeriods ToChilds frame prd periods
-- | 2) find the parents/childs matching candidates
-- 2) find the parents/childs matching candidates
candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
-- | 3) find the parents/child number of docs by years
-- 3) find the parents/child number of docs by years
docsPar = filterDocs docs ([prd] ++ periodsPar)
docsChi = filterDocs docs ([prd] ++ periodsChi)
-- | 4) find the parents/child diago by years
-- 4) find the parents/child diago by years
diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
-- | 5) match in parallel all the groups (egos) to their possible candidates
-- 5) match in parallel all the groups (egos) to their possible candidates
egos = map (\ego ->
let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
......@@ -272,19 +272,19 @@ toPhyloQuality' beta freq branches =
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches groups =
-- | run the related component algorithm
-- run the related component algorithm
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\group -> [getGroupId group]
++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- | first find the related components by inside each ego's period
-- | a supprimer
-- first find the related components by inside each ego's period
-- a supprimer
graph' = map relatedComponents egos
-- | then run it for the all the periods
-- then run it for the all the periods
graph = zip [1..]
$ relatedComponents $ concat (graph' `using` parList rdeepseq)
-- | update each group's branch id
-- update each group's branch id
in map (\(bId,ids) ->
let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList ids)
......@@ -300,14 +300,14 @@ updateThr thr branches = map (\b -> map (\g ->
g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
-- | Sequentially break each branch of a phylo where
-- Sequentially break each branch of a phylo where
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
-> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
-- | 1) keep or not the new division of ego
-- 1) keep or not the new division of ego
let done' = done ++ (if snd ego
then
(if ((null (fst ego')) || (quality > quality'))
......@@ -325,7 +325,7 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
else [ego])
in
-- | 2) if there is no more branches in rest then return else continue
-- 2) if there is no more branches in rest then return else continue
if null rest
then done'
else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
......@@ -352,11 +352,11 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
-- | if there is no branch to break or if seaLvl level > 1 then end
-- if there is no branch to break or if seaLvl level > 1 then end
if (thr >= 1) || ((not . or) $ map snd branches)
then branches
else
-- | break all the possible branches at the current seaLvl level
-- break all the possible branches at the current seaLvl level
let branches' = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
......@@ -368,7 +368,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
phylo
where
-- | 2) process the temporal matching by elevating seaLvl level
-- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ seaLevelMatching (phyloProximity $ getConfig phylo)
......@@ -383,8 +383,8 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
groups
-- | 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],Bool)]
groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
......@@ -441,7 +441,7 @@ adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,Ph
-> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
-> [([PhyloGroup],(Bool,[Double]))]
adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
-- | 1) keep or not the new division of ego
-- 1) keep or not the new division of ego
let done' = done ++ (if (fst . snd) ego
then (if ((null (fst ego')) || (quality > quality'))
then
......@@ -451,13 +451,13 @@ adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency min
++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
in
-- | uncomment let .. in for debugging
-- uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
-- | 2) if there is no more branches in rest then return else continue
-- 2) if there is no more branches in rest then return else continue
if null rest
then done'
else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
......@@ -489,11 +489,11 @@ adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
-- | if there is no branch to break or if seaLvl level >= depth then end
-- if there is no branch to break or if seaLvl level >= depth then end
if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
then branches
else
-- | break all the possible branches at the current seaLvl level
-- break all the possible branches at the current seaLvl level
let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches')
......@@ -511,7 +511,7 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
phylo
where
-- | 2) process the temporal matching by elevating seaLvl level
-- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]]
branches = map fst
$ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
......@@ -526,8 +526,8 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc)
groups
-- | 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],(Bool,[Double]))]
groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
......
......@@ -58,7 +58,6 @@ extra-deps:
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- Unique-0.4.7.6@sha256:a1ff411f4d68c756e01e8d532fbe8e57f1ac77f2cc0ee8a999770be2bca185c5,2723
- KMP-0.1.0.2
- accelerate-1.2.0.1
- aeson-lens-0.5.0.0
- deepseq-th-0.1.0.4
- duckling-0.1.3.0
......@@ -84,3 +83,7 @@ extra-deps:
- password-2.0.1.1
- base64-0.4.2@sha256:e9523e18bdadc3cab9dc32dfe3ac09c718fe792076326d6d353437b8b255cb5b,2888
- ghc-byteorder-4.11.0.0.10@sha256:5ee4a907279bfec27b0f9de7b8fba4cecfd34395a0235a7784494de70ad4e98f,1535
# Matrix Computation
- accelerate-1.2.0.1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment