Commit 19071e4b authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into dev-doc-table-optimization

parents 4caaf612 63e3a6fd
Pipeline #922 failed with stage
...@@ -6,15 +6,15 @@ category: Data ...@@ -6,15 +6,15 @@ category: Data
author: Gargantext Team author: Gargantext Team
maintainer: team@gargantext.org maintainer: team@gargantext.org
copyright: copyright:
- ! 'Copyright: (c) 2017-2018: see git logs and README' - ! 'Copyright: (c) 2017-Present: see git logs and README'
license: BSD3 license: AGPL-3
homepage: https://gargantext.org homepage: https://gargantext.org
ghc-options: -Wall ghc-options: -Wall
extra-libraries: extra-libraries:
- gfortran - gfortran
dependencies: dependencies:
- extra - extra
- text - text
default-extensions: default-extensions:
- DataKinds - DataKinds
- DeriveGeneric - DeriveGeneric
...@@ -59,6 +59,7 @@ library: ...@@ -59,6 +59,7 @@ library:
- Gargantext.Text - Gargantext.Text
- Gargantext.Text.Context - Gargantext.Text.Context
- Gargantext.Text.Corpus.Parsers - Gargantext.Text.Corpus.Parsers
- Gargantext.Text.Corpus.Parsers.Date.Parsec
- Gargantext.Text.Corpus.API - Gargantext.Text.Corpus.API
- Gargantext.Text.Corpus.Parsers.CSV - Gargantext.Text.Corpus.Parsers.CSV
- Gargantext.Text.Examples - Gargantext.Text.Examples
...@@ -364,36 +365,58 @@ executables: ...@@ -364,36 +365,58 @@ executables:
tests: tests:
# garg-test: garg-test:
# main: Main.hs main: Main.hs
# source-dirs: src-test source-dirs: src-test
# ghc-options: default-extensions:
# - -threaded - DataKinds
# - -rtsopts - DeriveGeneric
# - -with-rtsopts=-N - FlexibleContexts
# dependencies: - FlexibleInstances
# - base - GeneralizedNewtypeDeriving
# - gargantext - MultiParamTypeClasses
# - hspec - NoImplicitPrelude
# - QuickCheck - OverloadedStrings
# - quickcheck-instances - RankNTypes
# - time ghc-options:
# - parsec - -threaded
# - duckling - -rtsopts
# - text - -with-rtsopts=-N
garg-doctest: dependencies:
main: Main.hs - base
source-dirs: src-doctest - gargantext
ghc-options: - hspec
- -O2 - QuickCheck
- -Wcompat - quickcheck-instances
- -Wmissing-signatures - time
- -rtsopts - parsec
- -threaded - duckling
- -with-rtsopts=-N - text
dependencies: # garg-doctest:
- doctest # main: Main.hs
- Glob # source-dirs: src-doctest
- QuickCheck # ghc-options:
- base # - -O2
- gargantext # - -Wcompat
# - -Wmissing-signatures
# - -rtsopts
# - -threaded
# - -with-rtsopts=-N
# dependencies:
# - doctest
# - Glob
# - QuickCheck
# - base
# - gargantext
# default-extensions:
# - DataKinds
# - DeriveGeneric
# - FlexibleContexts
# - FlexibleInstances
# - GeneralizedNewtypeDeriving
# - MultiParamTypeClasses
# - NoImplicitPrelude
# - OverloadedStrings
# - RankNTypes
#
import System.FilePath.Glob import System.FilePath.Glob
import Test.DocTest import Test.DocTest
import Gargantext.Prelude
main :: IO () main :: IO ()
main = glob "src/Gargantext/" >>= doctest main = glob "src/Gargantext/" >>= doctest
......
...@@ -12,15 +12,17 @@ Portability : POSIX ...@@ -12,15 +12,17 @@ Portability : POSIX
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
--import qualified Ngrams.Lang.Fr as Fr --import qualified Ngrams.Lang.Fr as Fr
import qualified Ngrams.Lang as Lang --import qualified Ngrams.Lang as Lang
import qualified Ngrams.Lang.Occurrences as Occ import qualified Ngrams.Lang.Occurrences as Occ
import qualified Ngrams.Metrics as Metrics import qualified Ngrams.Metrics as Metrics
import qualified Parsers.Date as PD import qualified Parsers.Date as PD
import qualified Graph.Distance as GD
main :: IO () main :: IO ()
main = do main = do
Occ.parsersTest -- Occ.parsersTest
Lang.ngramsExtractionTest FR -- Lang.ngramsExtractionTest FR
Lang.ngramsExtractionTest EN -- Lang.ngramsExtractionTest EN
Metrics.main -- Metrics.main
PD.testFromRFC3339 PD.testFromRFC3339
GD.test
...@@ -15,6 +15,7 @@ commentary with @some markup@. ...@@ -15,6 +15,7 @@ commentary with @some markup@.
module Ngrams.Lang where module Ngrams.Lang where
{-
import Gargantext.Prelude (IO()) import Gargantext.Prelude (IO())
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
...@@ -24,4 +25,4 @@ import qualified Ngrams.Lang.En as En ...@@ -24,4 +25,4 @@ import qualified Ngrams.Lang.En as En
ngramsExtractionTest :: Lang -> IO () ngramsExtractionTest :: Lang -> IO ()
ngramsExtractionTest FR = Fr.ngramsExtractionTest ngramsExtractionTest FR = Fr.ngramsExtractionTest
ngramsExtractionTest EN = En.ngramsExtractionTest ngramsExtractionTest EN = En.ngramsExtractionTest
-}
...@@ -15,6 +15,7 @@ commentary with @some markup@. ...@@ -15,6 +15,7 @@ commentary with @some markup@.
module Ngrams.Lang.En where module Ngrams.Lang.En where
{-
import Data.List ((!!)) import Data.List ((!!))
import Data.Text (Text) import Data.Text (Text)
...@@ -22,8 +23,11 @@ import Test.Hspec ...@@ -22,8 +23,11 @@ import Test.Hspec
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
-- TODO this import is not used anymore
import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
-- use instead
-- import Gargantext.Text.Terms (extractNgramsT)
ngramsExtractionTest :: IO () ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do ngramsExtractionTest = hspec $ do
...@@ -43,4 +47,4 @@ ngramsExtractionTest = hspec $ do ...@@ -43,4 +47,4 @@ ngramsExtractionTest = hspec $ do
t2 <- map (selectNgrams EN) <$> extractNgrams EN t t2 <- map (selectNgrams EN) <$> extractNgrams EN t
t2 `shouldBe` [[("Donald Trump","NNP","PERSON"),("president of the United-States of America","NN","LOCATION")]] t2 `shouldBe` [[("Donald Trump","NNP","PERSON"),("president of the United-States of America","NN","LOCATION")]]
-}
...@@ -15,12 +15,15 @@ commentary with @some markup@. ...@@ -15,12 +15,15 @@ commentary with @some markup@.
module Ngrams.Lang.Fr where module Ngrams.Lang.Fr where
{-
import Test.Hspec import Test.Hspec
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
-- TODO this import is not used anymore
import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams) import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
-- use instead
-
ngramsExtractionTest :: IO () ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do ngramsExtractionTest = hspec $ do
describe "Behavioral tests: ngrams extraction in French Language" $ do describe "Behavioral tests: ngrams extraction in French Language" $ do
...@@ -61,4 +64,4 @@ ngramsExtractionTest = hspec $ do ...@@ -61,4 +64,4 @@ ngramsExtractionTest = hspec $ do
let textFr1 = "L'heure d'arrivée des coureurs dépend de la météo du jour." let textFr1 = "L'heure d'arrivée des coureurs dépend de la météo du jour."
testFr1 <- map (selectNgrams FR) <$> (extractNgrams FR) textFr1 testFr1 <- map (selectNgrams FR) <$> (extractNgrams FR) textFr1
testFr1 `shouldBe` [[("heure d' arrivée des coureurs","NC","O"),("météo du jour","NC","O")]] testFr1 `shouldBe` [[("heure d' arrivée des coureurs","NC","O"),("météo du jour","NC","O")]]
-}
...@@ -15,6 +15,7 @@ commentary with @some markup@. ...@@ -15,6 +15,7 @@ commentary with @some markup@.
module Ngrams.Lang.Occurrences where module Ngrams.Lang.Occurrences where
{-
import Test.Hspec import Test.Hspec
import Data.Either (Either(Right)) import Data.Either (Either(Right))
...@@ -59,4 +60,4 @@ parsersTest = hspec $ do ...@@ -59,4 +60,4 @@ parsersTest = hspec $ do
-- describe "Parser for nodes" $ do -- describe "Parser for nodes" $ do
-- it "returns the result of one parsing after space" $ do -- it "returns the result of one parsing after space" $ do
-- occOfCorpus 249509 "sciences" `shouldReturn` 7 -- occOfCorpus 249509 "sciences" `shouldReturn` 7
-}
...@@ -15,8 +15,10 @@ commentary with @some markup@. ...@@ -15,8 +15,10 @@ commentary with @some markup@.
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
module Ngrams.Metrics (main) where --module Ngrams.Metrics (main) where
module Ngrams.Metrics where
{-
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Ratio import Data.Ratio
...@@ -139,3 +141,5 @@ testPair :: (Eq a, Show a) ...@@ -139,3 +141,5 @@ testPair :: (Eq a, Show a)
-> SpecWith () -> SpecWith ()
testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $ testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $
f a b `shouldBe` r f a b `shouldBe` r
-}
...@@ -28,7 +28,7 @@ import Duckling.Time.Types (toRFC3339) ...@@ -28,7 +28,7 @@ import Duckling.Time.Types (toRFC3339)
----------------------------------------------------------- -----------------------------------------------------------
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Parsers.Date (fromRFC3339) import Gargantext.Text.Corpus.Parsers.Date.Parsec (fromRFC3339)
import Parsers.Types import Parsers.Types
----------------------------------------------------------- -----------------------------------------------------------
......
...@@ -27,12 +27,12 @@ instance Arbitrary a => Arbitrary (JobOutput a) where ...@@ -27,12 +27,12 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
arbitrary = JobOutput <$> arbitrary arbitrary = JobOutput <$> arbitrary
-- | Main Types -- | Main Types
-- TODO IsidoreAuth
data ExternalAPIs = All data ExternalAPIs = All
| PubMed | PubMed
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
-- | IsidoreAuth
deriving (Show, Eq, Enum, Bounded, Generic) deriving (Show, Eq, Enum, Bounded, Generic)
......
...@@ -81,7 +81,7 @@ updateNode uId nId (UpdateNodeParamsGraph metric) logStatus = do ...@@ -81,7 +81,7 @@ updateNode uId nId (UpdateNodeParamsGraph metric) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1 logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 2 , _scst_remaining = Just 1
, _scst_events = Just [] , _scst_events = Just []
} }
......
...@@ -175,6 +175,6 @@ newTries n t = buildTries n (fmap toToken $ uniText t) ...@@ -175,6 +175,6 @@ newTries n t = buildTries n (fmap toToken $ uniText t)
uniText :: Text -> [[Text]] uniText :: Text -> [[Text]]
uniText = map (List.filter (not . isPunctuation)) uniText = map (List.filter (not . isPunctuation))
. map tokenize . map tokenize
. sentences -- | TODO get sentences according to lang . sentences -- TODO get sentences according to lang
. Text.toLower . Text.toLower
...@@ -73,12 +73,12 @@ subst (src, dst) x | sim src x = dst ...@@ -73,12 +73,12 @@ subst (src, dst) x | sim src x = dst
| otherwise = x | otherwise = x
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO: Show Instance only used for debugging
type Entropy e = type Entropy e =
( Fractional e ( Fractional e
, Floating e , Floating e
, P.RealFloat e , P.RealFloat e
, Show e , Show e
-- ^ TODO: only used for debugging
) )
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Example and tests for development -- | Example and tests for development
......
...@@ -65,11 +65,13 @@ data SeaElevation = ...@@ -65,11 +65,13 @@ data SeaElevation =
data Proximity = data Proximity =
WeightedLogJaccard WeightedLogJaccard
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
{-
-- , _wlj_thresholdInit :: Double -- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double -- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching -- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double -- , _wlj_elevation :: Double
-}
} }
| Hamming | Hamming
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
......
...@@ -147,8 +147,9 @@ computeGraph cId d nt repo = do ...@@ -147,8 +147,9 @@ computeGraph cId d nt repo = do
lIds <- selectNodesWithUsername NodeList userMaster lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
-- TODO split diagonal
myCooc <- Map.filter (>1) myCooc <- Map.filter (>1)
<$> getCoocByNgrams (Diagonal True) <$> getCoocByNgrams (Diagonal False)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs) <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
......
...@@ -27,6 +27,7 @@ import Test.QuickCheck.Arbitrary ...@@ -27,6 +27,7 @@ import Test.QuickCheck.Arbitrary
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Distance = Conditional | Distributional data Distance = Conditional | Distributional
deriving (Show)
measure :: Distance -> Matrix Int -> Matrix Double measure :: Distance -> Matrix Int -> Matrix Double
measure Conditional = measureConditional measure Conditional = measureConditional
......
...@@ -18,18 +18,15 @@ module Gargantext.Viz.Graph.Distances.Distributional ...@@ -18,18 +18,15 @@ module Gargantext.Viz.Graph.Distances.Distributional
where where
import Data.Matrix hiding (identity) import Data.Matrix hiding (identity)
import qualified Data.Map as M import qualified Data.Map as M
import Data.Vector (Vector) import Data.Vector (Vector)
import qualified Data.Vector as V import qualified Data.Vector as V
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Graph.Utils import Gargantext.Viz.Graph.Utils
distributional :: (Floating a, Ord a) => Matrix a -> [((Int, Int), a)] distributional' :: (Floating a, Ord a) => Matrix a -> [((Int, Int), a)]
distributional m = filter (\((x,y), d) -> foldl' (&&) True (conditions x y d) ) distriList distributional' m = filter (\((x,y), d) -> foldl' (&&) True (conditions x y d) ) distriList
where where
conditions x y d = [ (x /= y) conditions x y d = [ (x /= y)
, (d > miniMax') , (d > miniMax')
...@@ -51,7 +48,6 @@ ri m = matrix c r doRi ...@@ -51,7 +48,6 @@ ri m = matrix c r doRi
$ V.zip (ax Col x y mi') (ax Row x y mi') $ V.zip (ax Col x y mi') (ax Row x y mi')
(c,r) = (nOf Col m, nOf Row m) (c,r) = (nOf Col m, nOf Row m)
mi :: (Ord a, Floating a) => Matrix a -> Matrix a mi :: (Ord a, Floating a) => Matrix a -> Matrix a
mi m = matrix c r createMat mi m = matrix c r createMat
where where
......
...@@ -17,14 +17,6 @@ Implementation use Accelerate library which enables GPU and CPU computation: ...@@ -17,14 +17,6 @@ Implementation use Accelerate library which enables GPU and CPU computation:
[Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11]. [Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011. In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
* Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
[Optimising Purely Functional GPU Programs][MCKL13].
In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
* Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
[Embedding Foreign Code][CMCK14].
In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
* Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton. * Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
[Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15]. [Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015. In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
...@@ -34,13 +26,14 @@ Implementation use Accelerate library which enables GPU and CPU computation: ...@@ -34,13 +26,14 @@ Implementation use Accelerate library which enables GPU and CPU computation:
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Viz.Graph.Distances.Matrice module Gargantext.Viz.Graph.Distances.Matrice
where where
import Debug.Trace (trace)
import Data.Array.Accelerate import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run) import Data.Array.Accelerate.Interpreter (run)
import qualified Gargantext.Prelude as P import qualified Gargantext.Prelude as P
...@@ -49,8 +42,8 @@ import qualified Gargantext.Prelude as P ...@@ -49,8 +42,8 @@ import qualified Gargantext.Prelude as P
-- --
-- >>> vector 3 -- >>> vector 3
-- Vector (Z :. 3) [0,1,2] -- Vector (Z :. 3) [0,1,2]
vector :: Int -> (Array (Z :. Int) Int) vector :: Elt c => Int -> [c] -> (Array (Z :. Int) c)
vector n = fromList (Z :. n) [0..n] vector n l = fromList (Z :. n) l
-- | Define a matrix -- | Define a matrix
-- --
...@@ -85,16 +78,26 @@ dim m = n ...@@ -85,16 +78,26 @@ dim m = n
-- indexTail (arrayShape m) -- indexTail (arrayShape m)
----------------------------------------------------------------------- -----------------------------------------------------------------------
-- TODO move to Utils
runExp :: Elt e => Exp e -> e
runExp e = indexArray (run (unit e)) Z
-----------------------------------------------------------------------
-- | Sum of a Matrix by Column -- | Sum of a Matrix by Column
-- --
-- >>> run $ matSum 3 (use $ matrix 3 [1..]) -- >>> run $ matSumCol 3 (use $ matrix 3 [1..])
-- Matrix (Z :. 3 :. 3) -- Matrix (Z :. 3 :. 3)
-- [ 12.0, 15.0, 18.0, -- [ 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0, -- 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0] -- 12.0, 15.0, 18.0]
matSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double) matSumCol :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
matSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat matSumCol r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
matSumCol' :: Matrix Double -> Matrix Double
matSumCol' m = run $ matSumCol n m'
where
n = dim m
m' = use m
-- | Proba computes de probability matrix: all cells divided by thee sum of its column -- | Proba computes de probability matrix: all cells divided by thee sum of its column
...@@ -106,14 +109,16 @@ matSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose m ...@@ -106,14 +109,16 @@ matSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose m
-- 0.3333333333333333, 0.3333333333333333, 0.3333333333333333, -- 0.3333333333333333, 0.3333333333333333, 0.3333333333333333,
-- 0.5833333333333334, 0.5333333333333333, 0.5] -- 0.5833333333333334, 0.5333333333333333, 0.5]
matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double) matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
matProba r mat = zipWith (/) mat (matSum r mat) matProba r mat = zipWith (/) mat (matSumCol r mat)
-- | Diagonal of the matrix -- | Diagonal of the matrix
-- --
-- >>> run $ diag (use $ matrix 3 ([1..] :: [Int])) -- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
-- Vector (Z :. 3) [1,5,9] -- Vector (Z :. 3) [1,5,9]
diag :: Elt e => Acc (Matrix e) -> Acc (Vector e) diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m diag m = backpermute (indexTail (shape m))
(lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int))))
m
-- | Divide by the Diagonal of the matrix -- | Divide by the Diagonal of the matrix
-- --
...@@ -145,8 +150,8 @@ matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m) ...@@ -145,8 +150,8 @@ matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
-- [ 0.0, 0.0, 7.0, -- [ 0.0, 0.0, 7.0,
-- 0.0, 0.0, 8.0, -- 0.0, 0.0, 8.0,
-- 0.0, 6.0, 9.0] -- 0.0, 6.0, 9.0]
matFilter :: Double -> Acc (Matrix Double) -> Acc (Matrix Double) filter' :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
matFilter t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m) filter' t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
----------------------------------------------------------------------- -----------------------------------------------------------------------
-- * Measures of proximity -- * Measures of proximity
...@@ -164,7 +169,9 @@ matFilter t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m) ...@@ -164,7 +169,9 @@ matFilter t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
-- interactions of 2 terms in the corpus. -- interactions of 2 terms in the corpus.
measureConditional :: Matrix Int -> Matrix Double measureConditional :: Matrix Int -> Matrix Double
--measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m) --measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
measureConditional m = run (matProba (dim m) $ map fromIntegral $ use m) measureConditional m = run $ matProba (dim m)
$ map fromIntegral
$ use m
-- *** Conditional distance (advanced) -- *** Conditional distance (advanced)
...@@ -196,9 +203,9 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m ...@@ -196,9 +203,9 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
r = dim m r = dim m
xs :: Acc (Matrix Double) -> Acc (Matrix Double) xs :: Acc (Matrix Double) -> Acc (Matrix Double)
xs mat = zipWith (-) (matSum r $ matProba r mat) (matProba r mat) xs mat = zipWith (-) (matSumCol r $ matProba r mat) (matProba r mat)
ys :: Acc (Matrix Double) -> Acc (Matrix Double) ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (matSum r $ transpose $ matProba r mat) (matProba r mat) ys mat = zipWith (-) (matSumCol r $ transpose $ matProba r mat) (matProba r mat)
----------------------------------------------------------------------- -----------------------------------------------------------------------
-- ** Distributional Distance -- ** Distributional Distance
...@@ -206,11 +213,11 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m ...@@ -206,11 +213,11 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
-- | Distributional Distance Measure -- | Distributional Distance Measure
-- --
-- Distributional measure is a relative measure which depends on the -- Distributional measure is a relative measure which depends on the
-- selected list, it represents structural equivalence. -- selected list, it represents structural equivalence of mutual information.
-- --
-- The distributional measure P(c) of @i@ and @j@ terms is: \[ -- The distributional measure P(c) of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik}, -- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}}^{}} \] -- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
-- --
-- Mutual information -- Mutual information
-- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\] -- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
...@@ -228,26 +235,120 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m ...@@ -228,26 +235,120 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\] -- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
-- --
distributional :: Matrix Int -> Matrix Double distributional :: Matrix Int -> Matrix Double
distributional m = run $ matMiniMax $ ri (map fromIntegral $ use m) distributional m = run {- -- $ matMiniMax
-- $ ri
-- $ myMin
-}
$ filter' 0
$ s_mi
$ map fromIntegral
{- from Int to Double -}
$ use m
{- push matrix in Accelerate type -}
where where
-- filter m = zipWith (\a b -> max a b) m (transpose m) -- filter m = zipWith (\a b -> max a b) m (transpose m)
ri mat = zipWith (/) mat1 mat2 {-
ri :: Acc (Matrix Double) -> Acc (Matrix Double)
ri mat = mat1 -- zipWith (/) mat1 mat2
where where
mat1 = matSum n $ zipWith min (s_mi mat) (s_mi $ transpose mat) mat1 = matSumCol n $ zipWith min' (myMin mat) (myMin $ transpose mat)
mat2 = matSum n mat mat2 = total mat
myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
s_mi m' = zipWith (\a b -> log (a/b)) m' myMin = replicate (constant (Z :. n :. All)) . minimum
$ zipWith (/) (crossProduct m') (total m')
-}
-- TODO fix NaN
-- Quali TEST: OK
s_mi :: Acc (Matrix Double) -> Acc (Matrix Double)
s_mi m' = zipWith (\x y -> log (x / y)) (diagNull n m')
$ zipWith (/) (crossProduct n m') (total m')
-- crossProduct n m'
total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
total :: Acc (Matrix Double) -> Acc (Matrix Double)
total = replicate (constant (Z :. n :. n)) . sum . sum
n :: Dim n :: Dim
n = dim m n = dim m
crossProduct m''' = zipWith (*) (cross m''' ) (cross (transpose m''')) -- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10)
cross mat = zipWith (-) (matSum n mat) (mat) identityMatrix :: Num a => Exp Int -> Acc (Matrix a)
identityMatrix n =
let zeros = fill (index2 n n) 0
ones = fill (index1 n) 1
in
permute const zeros (\(unindex1 -> i) -> index2 i i) ones
eyeMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
eyeMatrix n' _m =
let ones = fill (index2 n n) 1
zeros = fill (index1 n) 0
n = constant n'
in
permute const ones (\(unindex1 -> i) -> index2 i i) zeros
selfMatrix :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
selfMatrix n' _m =
let zeros = fill (index2 n n) 0
ones = fill (index2 n n) 1
n = constant n'
in
permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
-> -- ifThenElse (i /= j)
-- (Z :. i :. j)
(Z :. i :. i)
)) zeros
selfMatrix' :: (Elt a, P.Num (Exp a)) => Array DIM2 a -> Matrix a
selfMatrix' m' = run $ selfMatrix n m
where
n = dim m'
m = use m'
-------------------------------------------------
diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
diagNull n m = zipWith (*) m eye
where
eye = eyeMatrix n m
-------------------------------------------------
crossProduct :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
crossProduct n m = trace (P.show (run m',run m'')) $ zipWith (*) m' m''
where
m' = cross n m
m'' = cross n (transpose m)
crossT :: Matrix Double -> Matrix Double
crossT = run . transpose . use
crossProduct' :: Matrix Double -> Matrix Double
crossProduct' m = run $ crossProduct n m'
where
n = dim m
m' = use m
runWith :: (Arrays c, Elt a1)
=> (Dim -> Acc (Matrix a1) -> a2 -> Acc c)
-> Matrix a1
-> a2
-> c
runWith f m = run . f (dim m) (use m)
-- | cross
cross :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
cross n mat = diagNull n (matSumCol n $ diagNull n mat)
cross' :: Matrix Double -> Matrix Double
cross' mat = run $ cross n mat'
where
mat' = use mat
n = dim mat
----------------------------------------------------------------------- -----------------------------------------------------------------------
----------------------------------------------------------------------- -----------------------------------------------------------------------
......
...@@ -56,6 +56,7 @@ cooc2graph :: Distance ...@@ -56,6 +56,7 @@ cooc2graph :: Distance
-> (Map (Text, Text) Int) -> (Map (Text, Text) Int)
-> IO Graph -> IO Graph
cooc2graph distance threshold myCooc = do cooc2graph distance threshold myCooc = do
printDebug "cooc2graph" distance
let let
(ti, _) = createIndices myCooc (ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc myCooc' = toIndex ti myCooc
......
...@@ -207,7 +207,7 @@ toNthLevel lvlMax prox clus p ...@@ -207,7 +207,7 @@ toNthLevel lvlMax prox clus p
| otherwise = toNthLevel lvlMax prox clus | otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1) $ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1) $ setPhyloBranches (lvl + 1)
-- $ transposePeriodLinks (lvl + 1) -- \$ transposePeriodLinks (lvl + 1)
$ traceTranspose (lvl + 1) Descendant $ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant $ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant $ traceTranspose (lvl + 1) Ascendant
...@@ -230,15 +230,15 @@ toNthLevel lvlMax prox clus p ...@@ -230,15 +230,15 @@ toNthLevel lvlMax prox clus p
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceBranches 1 Fis (FisParams k s t) -> traceBranches 1
-- $ reLinkPhyloBranches 1 -- \$ reLinkPhyloBranches 1
-- $ traceBranches 1 -- \$ traceBranches 1
$ setPhyloBranches 1 $ setPhyloBranches 1
$ traceTempoMatching Descendant 1 $ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox $ interTempoMatching Descendant 1 prox
$ traceTempoMatching Ascendant 1 $ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox $ interTempoMatching Ascendant 1 prox
$ tracePhyloN 1 $ tracePhyloN 1
-- $ setLevelLinks (0,1) -- \$ setLevelLinks (0,1)
$ addPhyloLevel 1 (getPhyloFis phyloFis) $ addPhyloLevel 1 (getPhyloFis phyloFis)
$ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis $ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
where where
......
...@@ -126,15 +126,15 @@ phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer] ...@@ -126,15 +126,15 @@ phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
phyloGroupMatching periods g p = case pointers of phyloGroupMatching periods g p = case pointers of
Nothing -> [] Nothing -> []
Just pts -> head' "phyloGroupMatching" Just pts -> head' "phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity -- Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> snd pt == snd pt') $ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd pts $ reverse $ sortOn snd pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold -- Find the first time frame where at leats one pointer satisfies the proximity threshold
where where
-------------------------------------- --------------------------------------
pointers :: Maybe [Pointer] pointers :: Maybe [Pointer]
pointers = find (not . null) pointers = find (not . null)
-- | For each time frame, process the Proximity on relevant pairs of targeted groups -- For each time frame, process the Proximity on relevant pairs of targeted groups
$ scanl (\acc frame -> $ scanl (\acc frame ->
let pairs = makePairs frame g p let pairs = makePairs frame g p
in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p)) in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p))
...@@ -145,7 +145,7 @@ phyloGroupMatching periods g p = case pointers of ...@@ -145,7 +145,7 @@ phyloGroupMatching periods g p = case pointers of
if (t == t') if (t == t')
then [(getGroupId t,proxi)] then [(getGroupId t,proxi)]
else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) [] else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) []
-- | [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years -- [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$ inits periods $ inits periods
-------------------------------------- --------------------------------------
...@@ -218,8 +218,6 @@ interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p ...@@ -218,8 +218,6 @@ interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Make links from Period to Period after level 1 -- | Make links from Period to Period after level 1
listToTuple :: (a -> b) -> [a] -> [(b,a)] listToTuple :: (a -> b) -> [a] -> [(b,a)]
listToTuple f l = map (\x -> (f x, x)) l listToTuple f l = map (\x -> (f x, x)) l
......
...@@ -90,13 +90,13 @@ findDynamics n pv pn m = ...@@ -90,13 +90,13 @@ findDynamics n pv pn m =
bid = fromJust $ (pn ^. pn_bid) bid = fromJust $ (pn ^. pn_bid)
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | emergence -- emergence
then 2 then 2
else if ((fst prd) == (fst $ m ! n)) else if ((fst prd) == (fst $ m ! n))
-- | recombination -- recombination
then 0 then 0
else if (not $ sharedWithParents (fst prd) bid n pv) else if (not $ sharedWithParents (fst prd) bid n pv)
-- | decrease -- decrease
then 1 then 1
else 3 else 3
...@@ -175,9 +175,3 @@ hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (s ...@@ -175,9 +175,3 @@ hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (s
inter :: Map (Int, Int) Double inter :: Map (Int, Int) Double
inter = intersection f1 f2 inter = intersection f1 f2
-------------------------------------- --------------------------------------
...@@ -171,12 +171,12 @@ exportToDot phylo export = ...@@ -171,12 +171,12 @@ exportToDot phylo export =
<> "##########################") $ <> "##########################") $
digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do digraph ((Str . fromStrict) $ (phyloName $ getConfig phylo)) $ do
-- | 1) init the dot graph {- 1) init the dot graph -}
graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))] graphAttrs ( [ Label (toDotLabel $ (phyloName $ getConfig phylo))]
<> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps <> [ FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio , Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]] , Style [SItem Filled []],Color [toWColor White]]
-- | home made attributes {-- home made attributes -}
<> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo)) <> [(toAttr (fromStrict "phyloFoundations") $ pack $ show (length $ Vector.toList $ getRoots phylo))
,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups)) ,(toAttr (fromStrict "phyloTerms") $ pack $ show (length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) $ export ^. export_groups))
,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs)) ,(toAttr (fromStrict "phyloDocs") $ pack $ show (sum $ elems $ phylo ^. phylo_timeDocs))
...@@ -185,36 +185,36 @@ exportToDot phylo export = ...@@ -185,36 +185,36 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups)) ,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
]) ])
{-
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v -- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- | 2) create a layer for the branches labels -- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do subgraph (Str "Branches peaks") $ do
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
{-
-- | 3) group the branches by hierarchy -- 3) group the branches by hierarchy
-- mapM (\branches -> -- mapM (\branches ->
-- subgraph (Str "Branches clade") $ do -- subgraph (Str "Branches clade") $ do
-- graphAttrs [Rank SameRank] -- graphAttrs [Rank SameRank]
-- -- | 4) create a node for each branch -- -- 4) create a node for each branch
-- mapM branchToDotNode branches -- mapM branchToDotNode branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches -- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
-}
mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches mapM (\b -> branchToDotNode b (fromJust $ elemIndex b (export ^. export_branches))) $ export ^. export_branches
-- | 5) create a layer for each period {-- 5) create a layer for each period -}
_ <- mapM (\period -> _ <- mapM (\period ->
subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
periodToDotNode period periodToDotNode period
-- | 6) create a node for each group {-- 6) create a node for each group -}
mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups) mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
) $ getPeriodIds phylo ) $ getPeriodIds phylo
-- | 7) create the edges between a branch and its first groups {-- 7) create the edges between a branch and its first groups -}
_ <- mapM (\(bId,groups) -> _ <- mapM (\(bId,groups) ->
mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups mapM (\g -> toDotEdge (branchIdToDotId bId) (groupIdToDotId $ getGroupId g) "" BranchToGroup) groups
) )
...@@ -224,31 +224,29 @@ exportToDot phylo export = ...@@ -224,31 +224,29 @@ exportToDot phylo export =
$ sortOn (fst . _phylo_groupPeriod) groups) $ sortOn (fst . _phylo_groupPeriod) groups)
$ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups $ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
-- | 8) create the edges between the groups {- 8) create the edges between the groups -}
_ <- mapM (\((k,k'),_) -> _ <- mapM (\((k,k'),_) ->
toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
) $ (toList . mergePointers) $ export ^. export_groups ) $ (toList . mergePointers) $ export ^. export_groups
-- | 7) create the edges between the periods {- 7) create the edges between the periods -}
_ <- mapM (\(prd,prd') -> _ <- mapM (\(prd,prd') ->
toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod toDotEdge (periodIdToDotId prd) (periodIdToDotId prd') "" PeriodToPeriod
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ getPeriodIds phylo
-- | 8) create the edges between the branches {- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') -> -- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId') -- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId' -- (Text.pack $ show(branchIdsToProximity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo) -- (getThresholdInit $ phyloProximity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch -- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches -- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-}
graphAttrs [Rank SameRank] graphAttrs [Rank SameRank]
---------------- ----------------
-- | Filter | -- -- | Filter | --
---------------- ----------------
...@@ -439,13 +437,13 @@ toDynamics n parents g m = ...@@ -439,13 +437,13 @@ toDynamics n parents g m =
let prd = g ^. phylo_groupPeriod let prd = g ^. phylo_groupPeriod
end = last' "dynamics" (sort $ map snd $ elems m) end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end)) in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- | decrease {- decrease -}
then 2 then 2
else if ((fst prd) == (fst $ m ! n)) else if ((fst prd) == (fst $ m ! n))
-- | recombination {- recombination -}
then 0 then 0
else if isNew else if isNew
-- | emergence {- emergence -}
then 1 then 1
else 3 else 3
where where
......
...@@ -115,7 +115,7 @@ cliqueToGroup fis pId lvl idx fdt coocs = ...@@ -115,7 +115,7 @@ cliqueToGroup fis pId lvl idx fdt coocs =
(fis ^. phyloClique_support) (fis ^. phyloClique_support)
ngrams ngrams
(ngramsToCooc ngrams coocs) (ngramsToCooc ngrams coocs)
(1,[0]) -- | branchid (lvl,[path in the branching tree]) (1,[0]) -- branchid (lvl,[path in the branching tree])
(fromList [("breaks",[0]),("seaLevels",[0])]) (fromList [("breaks",[0]),("seaLevels",[0])])
[] [] [] [] [] [] [] []
...@@ -142,24 +142,24 @@ toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of ...@@ -142,24 +142,24 @@ toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
--------------------------- ---------------------------
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False) -- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique] filterClique :: Bool -> Int -> (Int -> [PhyloClique] -> [PhyloClique]) -> Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterClique keep thr f m = case keep of filterClique keep thr f m = case keep of
False -> map (\l -> f thr l) m False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support -- To filter Fis with small Support
filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique] filterCliqueBySupport :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l filterCliqueBySupport thr l = filter (\clq -> (clq ^. phyloClique_support) >= thr) l
-- | To filter Fis with small Clique size -- To filter Fis with small Clique size
filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique] filterCliqueBySize :: Int -> [PhyloClique] -> [PhyloClique]
filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l filterCliqueBySize thr l = filter (\clq -> (size $ clq ^. phyloClique_nodes) >= thr) l
-- | To filter nested Fis -- To filter nested Fis
filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique] filterCliqueByNested :: Map (Date, Date) [PhyloClique] -> Map (Date, Date) [PhyloClique]
filterCliqueByNested m = filterCliqueByNested m =
let clq = map (\l -> let clq = map (\l ->
...@@ -173,16 +173,16 @@ filterCliqueByNested m = ...@@ -173,16 +173,16 @@ filterCliqueByNested m =
in fromList $ zip (keys m) clq' in fromList $ zip (keys m) clq'
-- | To transform a time map of docs innto a time map of Fis with some filters -- To transform a time map of docs innto a time map of Fis with some filters
toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique] toPhyloClique :: Phylo -> Map (Date, Date) [Document] -> Map (Date,Date) [PhyloClique]
toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
Fis s s' -> -- traceFis "Filtered Fis" Fis s s' -> -- traceFis "Filtered Fis"
filterCliqueByNested filterCliqueByNested
-- $ traceFis "Filtered by clique size" {- \$ traceFis "Filtered by clique size" -}
$ filterClique True s' (filterCliqueBySize) $ filterClique True s' (filterCliqueBySize)
-- $ traceFis "Filtered by support" {- \$ traceFis "Filtered by support" -}
$ filterClique True s (filterCliqueBySupport) $ filterClique True s (filterCliqueBySupport)
-- $ traceFis "Unfiltered Fis" {- \$ traceFis "Unfiltered Fis" -}
phyloClique phyloClique
MaxClique _ -> undefined MaxClique _ -> undefined
where where
...@@ -204,7 +204,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of ...@@ -204,7 +204,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
-------------------- --------------------
-- | To transform the docs into a time map of coocurency matrix -- To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc docsToTimeScaleCooc :: [Document] -> Vector Ngrams -> Map Date Cooc
docsToTimeScaleCooc docs fdt = docsToTimeScaleCooc docs fdt =
let mCooc = fromListWith sumCooc let mCooc = fromListWith sumCooc
...@@ -221,7 +221,7 @@ docsToTimeScaleCooc docs fdt = ...@@ -221,7 +221,7 @@ docsToTimeScaleCooc docs fdt =
-- | to Phylo Base | -- -- | to Phylo Base | --
----------------------- -----------------------
-- | To group a list of Documents by fixed periods -- To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs = groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
...@@ -237,7 +237,7 @@ groupDocsByPeriod' f pds docs = ...@@ -237,7 +237,7 @@ groupDocsByPeriod' f pds docs =
-- | To group a list of Documents by fixed periods -- To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods" groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es = groupDocsByPeriod f pds es =
...@@ -265,7 +265,7 @@ docsToTermFreq docs fdt = ...@@ -265,7 +265,7 @@ docsToTermFreq docs fdt =
in map (/sumFreqs) freqs in map (/sumFreqs) freqs
-- | To count the number of docs by unit of time -- To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double docsToTimeScaleNb :: [Document] -> Map Date Double
docsToTimeScaleNb docs = docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs
...@@ -279,7 +279,7 @@ initPhyloLevels lvlMax pId = ...@@ -279,7 +279,7 @@ initPhyloLevels lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax] fromList $ map (\lvl -> ((pId,lvl),PhyloLevel pId lvl empty)) [1..lvlMax]
-- | To init the basic elements of a Phylo -- To init the basic elements of a Phylo
toPhyloBase :: [Document] -> TermList -> Config -> Phylo toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase docs lst conf = toPhyloBase docs lst conf =
let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst let foundations = PhyloFoundations (Vector.fromList $ nub $ concat $ map text docs) lst
......
...@@ -36,13 +36,13 @@ import qualified Data.Set as Set ...@@ -36,13 +36,13 @@ import qualified Data.Set as Set
mergeBranchIds :: [[Int]] -> [Int] mergeBranchIds :: [[Int]] -> [Int]
mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where where
-- | 2) find the most Up Left ids in the hierarchy of similarity -- 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]] -- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' = -- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids' -- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds -- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds -- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids -- 1) find the most frequent ids
mostFreq' :: [[Int]] -> [[Int]] mostFreq' :: [[Int]] -> [[Int]]
mostFreq' ids' = mostFreq' ids' =
let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids' let groupIds = (map (\gIds -> (length gIds, head' "gIds" gIds)) . group . sort) ids'
...@@ -58,12 +58,12 @@ mergeMeta bId groups = ...@@ -58,12 +58,12 @@ mergeMeta bId groups =
groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches' :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches' groups = groupsToBranches' groups =
-- | run the related component algorithm -- run the related component algorithm
let egos = map (\g -> [getGroupId g] let egos = map (\g -> [getGroupId g]
++ (map fst $ g ^. phylo_groupPeriodParents) ++ (map fst $ g ^. phylo_groupPeriodParents)
++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups ++ (map fst $ g ^. phylo_groupPeriodChilds) ) $ elems groups
graph = relatedComponents egos graph = relatedComponents egos
-- | update each group's branch id -- update each group's branch id
in map (\ids -> in map (\ids ->
let groups' = elems $ restrictKeys groups (Set.fromList ids) let groups' = elems $ restrictKeys groups (Set.fromList ids)
bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups' bId = mergeBranchIds $ map (\g -> snd $ g ^. phylo_groupBranchId) groups'
...@@ -103,26 +103,26 @@ toNextLevel' phylo groups = ...@@ -103,26 +103,26 @@ toNextLevel' phylo groups =
newGroups = concat $ groupsToBranches' newGroups = concat $ groupsToBranches'
$ fromList $ map (\g -> (getGroupId g, g)) $ fromList $ map (\g -> (getGroupId g, g))
$ foldlWithKey (\acc id groups' -> $ foldlWithKey (\acc id groups' ->
-- | 4) create the parent group -- 4) create the parent group
let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups' let parent = mergeGroups (elems $ restrictKeys (phylo ^. phylo_timeCooc) $ periodsToYears [(fst . fst) id]) id oldGroups groups'
in acc ++ [parent]) [] in acc ++ [parent]) []
-- | 3) group the current groups by parentId -- 3) group the current groups by parentId
$ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups $ fromListWith (++) $ map (\g -> (getLevelParentId g, [g])) groups
newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups newPeriods = fromListWith (++) $ map (\g -> (g ^. phylo_groupPeriod, [g])) newGroups
in traceSynchronyEnd in traceSynchronyEnd
$ over ( phylo_periods . traverse . phylo_periodLevels . traverse $ over ( phylo_periods . traverse . phylo_periodLevels . traverse
-- | 6) update each period at curLvl + 1 -- 6) update each period at curLvl + 1
. filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1))) . filtered (\phyloLvl -> phyloLvl ^. phylo_levelLevel == (curLvl + 1)))
-- | 7) by adding the parents -- 7) by adding the parents
(\phyloLvl -> (\phyloLvl ->
if member (phyloLvl ^. phylo_levelPeriod) newPeriods if member (phyloLvl ^. phylo_levelPeriod) newPeriods
then phyloLvl & phylo_levelGroups then phyloLvl & phylo_levelGroups
.~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod)) .~ fromList (map (\g -> (getGroupId g, g)) $ newPeriods ! (phyloLvl ^. phylo_levelPeriod))
else phyloLvl) else phyloLvl)
-- | 2) add the curLvl + 1 phyloLevel to the phylo -- 2) add the curLvl + 1 phyloLevel to the phylo
$ addPhyloLevel (curLvl + 1) $ addPhyloLevel (curLvl + 1)
-- | 1) update the current groups (with level parent pointers) in the phylo -- 1) update the current groups (with level parent pointers) in the phylo
$ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo $ updatePhyloGroups curLvl (fromList $ map (\g -> (getGroupId g, g)) groups) phylo
-------------------- --------------------
...@@ -187,19 +187,19 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1), ...@@ -187,19 +187,19 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1),
reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: Proximity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup]
reduceGroups prox sync docs diagos branch = reduceGroups prox sync docs diagos branch =
-- | 1) reduce a branch as a set of periods & groups -- 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
$ map (\g -> (g ^. phylo_groupPeriod,[g])) branch $ map (\g -> (g ^. phylo_groupPeriod,[g])) branch
in (concat . concat . elems) in (concat . concat . elems)
$ mapWithKey (\prd groups -> $ mapWithKey (\prd groups ->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold -- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd] let diago = reduceDiagos $ filterDiago diagos [prd]
edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups edges = groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups
in map (\comp -> in map (\comp ->
-- | 4) add to each groups their futur level parent group -- 4) add to each groups their futur level parent group
let parentId = toParentId (head' "parentId" comp) let parentId = toParentId (head' "parentId" comp)
in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp ) in map (\g -> g & phylo_groupLevelParents %~ (++ [(parentId,1)]) ) comp )
-- |3) reduce the graph a a set of related components -- 3) reduce the graph a a set of related components
$ toRelatedComponents groups edges) periods $ toRelatedComponents groups edges) periods
...@@ -251,4 +251,4 @@ synchronicClustering phylo = ...@@ -251,4 +251,4 @@ synchronicClustering phylo =
-- <> "\n" -- <> "\n"
-- ) "" edges -- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch) -- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo -- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
\ No newline at end of file
...@@ -117,7 +117,7 @@ makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs d ...@@ -117,7 +117,7 @@ makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs d
if (null periods) if (null periods)
then [] then []
else removeOldPointers oldPointers fil thr prox lastPrd else removeOldPointers oldPointers fil thr prox lastPrd
-- | at least on of the pair candidates should be from the last added period {- at least on of the pair candidates should be from the last added period -}
$ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd)) $ filter (\((id,_),(id',_)) -> ((fst . fst) id == lastPrd) || ((fst . fst) id' == lastPrd))
$ listToKeys $ listToKeys
$ filter (\(id,ngrams) -> $ filter (\(id,ngrams) ->
...@@ -143,36 +143,36 @@ phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map ...@@ -143,36 +143,36 @@ phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map
-> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer] -> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer]
phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) = phyloGroupMatching candidates fil proxi docs diagos thr oldPointers (id,ngrams) =
if (null $ filterPointers proxi thr oldPointers) if (null $ filterPointers proxi thr oldPointers)
-- | let's find new pointers {- let's find new pointers -}
then if null nextPointers then if null nextPointers
then [] then []
else head' "phyloGroupMatching" else head' "phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity {- Keep only the best set of pointers grouped by proximity -}
$ groupBy (\pt pt' -> snd pt == snd pt') $ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd $ head' "pointers" nextPointers $ reverse $ sortOn snd $ head' "pointers" nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold {- Find the first time frame where at leats one pointer satisfies the proximity threshold -}
else oldPointers else oldPointers
where where
nextPointers :: [[Pointer]] nextPointers :: [[Pointer]]
nextPointers = take 1 nextPointers = take 1
$ dropWhile (null) $ dropWhile (null)
-- | for each time frame, process the proximity on relevant pairs of targeted groups {- for each time frame, process the proximity on relevant pairs of targeted groups -}
$ scanl (\acc groups -> $ scanl (\acc groups ->
let periods = nub $ map (fst . fst . fst) $ concat groups let periods = nub $ map (fst . fst . fst) $ concat groups
nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods)) nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
diago = reduceDiagos diago = reduceDiagos
$ filterDiago diagos ([(fst . fst) id] ++ periods) $ filterDiago diagos ([(fst . fst) id] ++ periods)
-- | important resize nbdocs et diago dans le make pairs {- important resize nbdocs et diago dans le make pairs -}
pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos pairs = makePairs' (id,ngrams) (concat groups) periods oldPointers fil thr proxi docs diagos
in acc ++ ( filterPointers proxi thr in acc ++ ( filterPointers proxi thr
$ concat $ concat
$ map (\(c,c') -> $ map (\(c,c') ->
-- | process the proximity between the current group and a pair of candidates {- process the proximity between the current group and a pair of candidates -}
let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c') let proximity = toProximity nbdocs diago proxi ngrams (snd c) (snd c')
in if (c == c') in if (c == c')
then [(fst c,proximity)] then [(fst c,proximity)]
else [(fst c,proximity),(fst c',proximity)] ) pairs )) [] else [(fst c,proximity),(fst c',proximity)] ) pairs )) []
$ inits candidates -- | groups from [[1900],[1900,1901],[1900,1901,1902],...] $ inits candidates {- groups from [[1900],[1900,1901],[1900,1901,1902],...] -}
filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double filterDocs :: Map Date Double -> [PhyloPeriodId] -> Map Date Double
...@@ -205,19 +205,19 @@ matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date ...@@ -205,19 +205,19 @@ matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date
matchGroupsToGroups frame periods proximity thr docs coocs groups = matchGroupsToGroups frame periods proximity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd -> in foldl' (\acc prd ->
let -- | 1) find the parents/childs matching periods let -- 1) find the parents/childs matching periods
periodsPar = getNextPeriods ToParents frame prd periods periodsPar = getNextPeriods ToParents frame prd periods
periodsChi = getNextPeriods ToChilds frame prd periods periodsChi = getNextPeriods ToChilds frame prd periods
-- | 2) find the parents/childs matching candidates -- 2) find the parents/childs matching candidates
candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar candidatesPar = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsPar
candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi candidatesChi = map (\prd' -> map (\g -> (getGroupId g, g ^. phylo_groupNgrams)) $ findWithDefault [] prd' groups') periodsChi
-- | 3) find the parents/child number of docs by years -- 3) find the parents/child number of docs by years
docsPar = filterDocs docs ([prd] ++ periodsPar) docsPar = filterDocs docs ([prd] ++ periodsPar)
docsChi = filterDocs docs ([prd] ++ periodsChi) docsChi = filterDocs docs ([prd] ++ periodsChi)
-- | 4) find the parents/child diago by years -- 4) find the parents/child diago by years
diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar) diagoPar = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar) diagoChi = filterDiago (map coocToDiago coocs) ([prd] ++ periodsPar)
-- | 5) match in parallel all the groups (egos) to their possible candidates -- 5) match in parallel all the groups (egos) to their possible candidates
egos = map (\ego -> egos = map (\ego ->
let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar let pointersPar = phyloGroupMatching (getCandidates ego candidatesPar) ToParents proximity docsPar diagoPar
thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams) thr (getPeriodPointers ToParents ego) (getGroupId ego, ego ^. phylo_groupNgrams)
...@@ -272,19 +272,19 @@ toPhyloQuality' beta freq branches = ...@@ -272,19 +272,19 @@ toPhyloQuality' beta freq branches =
groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]] groupsToBranches :: Map PhyloGroupId PhyloGroup -> [[PhyloGroup]]
groupsToBranches groups = groupsToBranches groups =
-- | run the related component algorithm -- run the related component algorithm
let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs')) let egos = groupBy (\gs gs' -> (fst $ fst $ head' "egos" gs) == (fst $ fst $ head' "egos" gs'))
$ sortOn (\gs -> fst $ fst $ head' "egos" gs) $ sortOn (\gs -> fst $ fst $ head' "egos" gs)
$ map (\group -> [getGroupId group] $ map (\group -> [getGroupId group]
++ (map fst $ group ^. phylo_groupPeriodParents) ++ (map fst $ group ^. phylo_groupPeriodParents)
++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups ++ (map fst $ group ^. phylo_groupPeriodChilds) ) $ elems groups
-- | first find the related components by inside each ego's period -- first find the related components by inside each ego's period
-- | a supprimer -- a supprimer
graph' = map relatedComponents egos graph' = map relatedComponents egos
-- | then run it for the all the periods -- then run it for the all the periods
graph = zip [1..] graph = zip [1..]
$ relatedComponents $ concat (graph' `using` parList rdeepseq) $ relatedComponents $ concat (graph' `using` parList rdeepseq)
-- | update each group's branch id -- update each group's branch id
in map (\(bId,ids) -> in map (\(bId,ids) ->
let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId]))) let groups' = map (\group -> group & phylo_groupBranchId %~ (\(lvl,lst) -> (lvl,lst ++ [bId])))
$ elems $ restrictKeys groups (Set.fromList ids) $ elems $ restrictKeys groups (Set.fromList ids)
...@@ -300,14 +300,14 @@ updateThr thr branches = map (\b -> map (\g -> ...@@ -300,14 +300,14 @@ updateThr thr branches = map (\b -> map (\g ->
g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches g & phylo_groupMeta .~ (singleton "seaLevels" (((g ^. phylo_groupMeta) ! "seaLevels") ++ [thr]))) b) branches
-- | Sequentially break each branch of a phylo where -- Sequentially break each branch of a phylo where
-- done = all the allready broken branches -- done = all the allready broken branches
-- ego = the current branch we want to break -- ego = the current branch we want to break
-- rest = the branches we still have to break -- rest = the branches we still have to break
breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double breakBranches :: Proximity -> Double -> Map Int Double -> Int -> Double -> Double -> Double
-> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] -> Int -> Map Date Double -> Map Date Cooc -> [PhyloPeriodId] -> [([PhyloGroup],Bool)] -> ([PhyloGroup],Bool) -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods done ego rest =
-- | 1) keep or not the new division of ego -- 1) keep or not the new division of ego
let done' = done ++ (if snd ego let done' = done ++ (if snd ego
then then
(if ((null (fst ego')) || (quality > quality')) (if ((null (fst ego')) || (quality > quality'))
...@@ -325,7 +325,7 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs ...@@ -325,7 +325,7 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego')))) ((map (\e -> (e,True)) (fst ego')) ++ (map (\e -> (e,False)) (snd ego'))))
else [ego]) else [ego])
in in
-- | 2) if there is no more branches in rest then return else continue -- 2) if there is no more branches in rest then return else continue
if null rest if null rest
then done' then done'
else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods else breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
...@@ -352,11 +352,11 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs ...@@ -352,11 +352,11 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double seaLevelMatching :: Proximity -> Double -> Int -> Map Int Double -> Double -> Double -> Double -> Double
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)] -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> [([PhyloGroup],Bool)] -> [([PhyloGroup],Bool)]
seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches = seaLevelMatching proximity beta minBranch frequency thr step depth elevation frame periods docs coocs branches =
-- | if there is no branch to break or if seaLvl level > 1 then end -- if there is no branch to break or if seaLvl level > 1 then end
if (thr >= 1) || ((not . or) $ map snd branches) if (thr >= 1) || ((not . or) $ map snd branches)
then branches then branches
else else
-- | break all the possible branches at the current seaLvl level -- break all the possible branches at the current seaLvl level
let branches' = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods let branches' = breakBranches proximity beta frequency minBranch thr depth elevation frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches) [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches') frequency' = reduceFrequency frequency (map fst branches')
...@@ -368,7 +368,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1 ...@@ -368,7 +368,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches) (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
phylo phylo
where where
-- | 2) process the temporal matching by elevating seaLvl level -- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]] branches :: [[PhyloGroup]]
branches = map fst branches = map fst
$ seaLevelMatching (phyloProximity $ getConfig phylo) $ seaLevelMatching (phyloProximity $ getConfig phylo)
...@@ -383,8 +383,8 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1 ...@@ -383,8 +383,8 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc) (phylo ^. phylo_timeCooc)
groups groups
-- | 1) for each group process an initial temporal Matching -- 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch -- here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],Bool)] groups :: [([PhyloGroup],Bool)]
groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo))) groups = map (\b -> (b,(length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo)))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
...@@ -441,7 +441,7 @@ adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,Ph ...@@ -441,7 +441,7 @@ adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,Ph
-> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))] -> [PhyloPeriodId] -> [([PhyloGroup],(Bool,[Double]))] -> ([PhyloGroup],(Bool,[Double])) -> [([PhyloGroup],(Bool,[Double]))]
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods done ego rest =
-- | 1) keep or not the new division of ego -- 1) keep or not the new division of ego
let done' = done ++ (if (fst . snd) ego let done' = done ++ (if (fst . snd) ego
then (if ((null (fst ego')) || (quality > quality')) then (if ((null (fst ego')) || (quality > quality'))
then then
...@@ -451,13 +451,13 @@ adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency min ...@@ -451,13 +451,13 @@ adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency min
++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego')))) ++ (map (\e -> (e,(False, ((snd . snd) ego)))) (snd ego'))))
else [(concat $ thrToMeta thr $ [fst ego], snd ego)]) else [(concat $ thrToMeta thr $ [fst ego], snd ego)])
in in
-- | uncomment let .. in for debugging -- uncomment let .. in for debugging
-- let part1 = partition (snd) done' -- let part1 = partition (snd) done'
-- part2 = partition (snd) rest -- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] " -- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]" -- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $ -- ) $
-- | 2) if there is no more branches in rest then return else continue -- 2) if there is no more branches in rest then return else continue
if null rest if null rest
then done' then done'
else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods else adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
...@@ -489,11 +489,11 @@ adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId ...@@ -489,11 +489,11 @@ adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId
-> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc -> Int -> [PhyloPeriodId] -> Map Date Double -> Map Date Cooc
-> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))] -> [([PhyloGroup],(Bool,[Double]))]
adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches = adaptativeSeaLevelMatching proxiConf depth elevation groupsProxi beta minBranch frequency frame periods docs coocs branches =
-- | if there is no branch to break or if seaLvl level >= depth then end -- if there is no branch to break or if seaLvl level >= depth then end
if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches) if (Map.null groupsProxi) || (depth <= 0) || ((not . or) $ map (fst . snd) branches)
then branches then branches
else else
-- | break all the possible branches at the current seaLvl level -- break all the possible branches at the current seaLvl level
let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods let branches' = adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency minBranch frame docs coocs periods
[] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches) [] (head' "seaLevelMatching" branches) (tail' "seaLevelMatching" branches)
frequency' = reduceFrequency frequency (map fst branches') frequency' = reduceFrequency frequency (map fst branches')
...@@ -511,7 +511,7 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1 ...@@ -511,7 +511,7 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches) (fromList $ map (\g -> (getGroupId g,g)) $ traceMatchEnd $ concat branches)
phylo phylo
where where
-- | 2) process the temporal matching by elevating seaLvl level -- 2) process the temporal matching by elevating seaLvl level
branches :: [[PhyloGroup]] branches :: [[PhyloGroup]]
branches = map fst branches = map fst
$ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo) $ adaptativeSeaLevelMatching (phyloProximity $ getConfig phylo)
...@@ -526,8 +526,8 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1 ...@@ -526,8 +526,8 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(phylo ^. phylo_timeDocs) (phylo ^. phylo_timeDocs)
(phylo ^. phylo_timeCooc) (phylo ^. phylo_timeCooc)
groups groups
-- | 1) for each group process an initial temporal Matching -- 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch -- here we suppose that all the groups of level 1 are part of the same big branch
groups :: [([PhyloGroup],(Bool,[Double]))] groups :: [([PhyloGroup],(Bool,[Double]))]
groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr]))) groups = map (\b -> (b,((length $ nub $ map _phylo_groupPeriod b) >= (_qua_minBranch $ phyloQuality $ getConfig phylo),[thr])))
$ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) $ groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
......
...@@ -58,7 +58,6 @@ extra-deps: ...@@ -58,7 +58,6 @@ extra-deps:
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- Unique-0.4.7.6@sha256:a1ff411f4d68c756e01e8d532fbe8e57f1ac77f2cc0ee8a999770be2bca185c5,2723 - Unique-0.4.7.6@sha256:a1ff411f4d68c756e01e8d532fbe8e57f1ac77f2cc0ee8a999770be2bca185c5,2723
- KMP-0.1.0.2 - KMP-0.1.0.2
- accelerate-1.2.0.1
- aeson-lens-0.5.0.0 - aeson-lens-0.5.0.0
- deepseq-th-0.1.0.4 - deepseq-th-0.1.0.4
- duckling-0.1.3.0 - duckling-0.1.3.0
...@@ -84,3 +83,7 @@ extra-deps: ...@@ -84,3 +83,7 @@ extra-deps:
- password-2.0.1.1 - password-2.0.1.1
- base64-0.4.2@sha256:e9523e18bdadc3cab9dc32dfe3ac09c718fe792076326d6d353437b8b255cb5b,2888 - base64-0.4.2@sha256:e9523e18bdadc3cab9dc32dfe3ac09c718fe792076326d6d353437b8b255cb5b,2888
- ghc-byteorder-4.11.0.0.10@sha256:5ee4a907279bfec27b0f9de7b8fba4cecfd34395a0235a7784494de70ad4e98f,1535 - ghc-byteorder-4.11.0.0.10@sha256:5ee4a907279bfec27b0f9de7b8fba4cecfd34395a0235a7784494de70ad4e98f,1535
# Matrix Computation
- accelerate-1.2.0.1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment