Commit 1a051cf1 authored by david Chavalarias's avatar david Chavalarias

New similarity measure for inter-temporal matching added named WeightedLogSim...

New similarity measure for inter-temporal matching added named WeightedLogSim Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
parent 49ac7b06
......@@ -72,6 +72,15 @@ data Proximity =
-- , _wlj_elevation :: Double
-}
}
| WeightedLogSim
{ _wlj_sensibility :: Double
{-
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
-}
}
| Hamming
deriving (Show,Generic,Eq)
......
......@@ -261,11 +261,12 @@ data LouvainParams = LouvainParams
-- | Proximity constructors
data Proximity = WeightedLogJaccard WLJParams
| WeightedLogSim WLJParams
| Hamming HammingParams
| Filiation
deriving (Generic, Show, Eq, Read)
-- | Parameters for WeightedLogJaccard proximity
-- | Parameters for WeightedLogJaccard and WeightedLogSim proximity
data WLJParams = WLJParams
{ _wlj_threshold :: !Double
, _wlj_sensibility :: !Double
......
......@@ -295,12 +295,14 @@ filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local =
case proximity of
WeightedLogJaccard _ -> local >= thr
WeightedLogSim _ -> local >= thr
Hamming -> undefined
getProximityName :: Proximity -> String
getProximityName proximity =
case proximity of
WeightedLogJaccard _ -> "WLJaccard"
WeightedLogSim _ -> "WeightedLogSim"
Hamming -> "Hamming"
---------------
......@@ -484,6 +486,7 @@ traceSynchronyStart phylo =
getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of
WeightedLogJaccard s -> s
WeightedLogSim s -> s
Hamming -> undefined
----------------
......
......@@ -73,19 +73,49 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
--------------------------------------
-- | Process the weighted similarity between clusters. Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
-- tests not conclusive
weightedLogSim' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
| null ngramsInter = 0
| ngramsInter == ngramsUnion = 1
| sens == 0 = jaccard ngramsInter ngramsUnion
| sens > 0 = (sumInvLog' sens nbDocs diagoInter) / minimum [(sumInvLog' sens nbDocs diagoEgo),(sumInvLog' sens nbDocs diagoTarget)]
| otherwise = (sumLog' sens nbDocs diagoInter) / minimum [(sumLog' sens nbDocs diagoEgo),(sumLog' sens nbDocs diagoTarget)]
where
--------------------------------------
ngramsInter :: [Int]
ngramsInter = intersect ego_ngrams target_ngrams
--------------------------------------
ngramsUnion :: [Int]
ngramsUnion = union ego_ngrams target_ngrams
--------------------------------------
diagoInter :: [Double]
diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
--------------------------------------
diagoEgo :: [Double]
diagoEgo = elems $ restrictKeys diago (Set.fromList ego_ngrams)
--------------------------------------
diagoTarget :: [Double]
diagoTarget = elems $ restrictKeys diago (Set.fromList target_ngrams)
--------------------------------------
-- | To process the proximity between a current group and a pair of targets group
toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
-- | To process the proximity between a current group and a pair of targets group using the adapted Wang et al. Similarity
toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
case proximity of
WeightedLogJaccard sens ->
let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams
else union targetNgrams targetNgrams'
in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
WeightedLogSim sens ->
let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams
else union targetNgrams targetNgrams'
in weightedLogSim' sens nbDocs diago egoNgrams pairNgrams
Hamming -> undefined
------------------------
-- | Local Matching | --
------------------------
......
......@@ -796,6 +796,7 @@ getPeriodSteps q = q ^. q_periodSteps
getThreshold :: Proximity -> Double
getThreshold prox = case prox of
WeightedLogJaccard (WLJParams thr _) -> thr
WeightedLogSim (WLJParams thr _) -> thr
Hamming (HammingParams thr) -> thr
Filiation -> panic "[ERR][Viz.Phylo.Tools.getThreshold] Filiation"
......@@ -834,6 +835,8 @@ initRelatedComponents (def defaultWeightedLogJaccard -> proxi) = RCParams proxi
initWeightedLogJaccard :: Maybe Double -> Maybe Double -> WLJParams
initWeightedLogJaccard (def 0.3 -> thr) (def 20.0 -> sens) = WLJParams thr sens
initWeightedLogSim :: Maybe Double -> Maybe Double -> WLJParams
initWeightedLogSim (def 0.3 -> thr) (def 20.0 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int
......@@ -896,6 +899,9 @@ defaultHamming = Hamming (initHamming Nothing)
defaultWeightedLogJaccard :: Proximity
defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing Nothing)
defaultWeightedLogSim :: Proximity
defaultWeightedLogSim = WeightedLogSim (initWeightedLogSim Nothing Nothing)
-- Queries
type Title = Text
type Desc = Text
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment