Commit 1a051cf1 authored by david Chavalarias's avatar david Chavalarias

New similarity measure for inter-temporal matching added named WeightedLogSim...

New similarity measure for inter-temporal matching added named WeightedLogSim Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
parent 49ac7b06
...@@ -70,6 +70,15 @@ data Proximity = ...@@ -70,6 +70,15 @@ data Proximity =
-- , _wlj_thresholdStep :: Double -- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching -- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double -- , _wlj_elevation :: Double
-}
}
| WeightedLogSim
{ _wlj_sensibility :: Double
{-
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
-} -}
} }
| Hamming | Hamming
......
...@@ -261,11 +261,12 @@ data LouvainParams = LouvainParams ...@@ -261,11 +261,12 @@ data LouvainParams = LouvainParams
-- | Proximity constructors -- | Proximity constructors
data Proximity = WeightedLogJaccard WLJParams data Proximity = WeightedLogJaccard WLJParams
| WeightedLogSim WLJParams
| Hamming HammingParams | Hamming HammingParams
| Filiation | Filiation
deriving (Generic, Show, Eq, Read) deriving (Generic, Show, Eq, Read)
-- | Parameters for WeightedLogJaccard proximity -- | Parameters for WeightedLogJaccard and WeightedLogSim proximity
data WLJParams = WLJParams data WLJParams = WLJParams
{ _wlj_threshold :: !Double { _wlj_threshold :: !Double
, _wlj_sensibility :: !Double , _wlj_sensibility :: !Double
......
...@@ -295,12 +295,14 @@ filterProximity :: Proximity -> Double -> Double -> Bool ...@@ -295,12 +295,14 @@ filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity proximity thr local = filterProximity proximity thr local =
case proximity of case proximity of
WeightedLogJaccard _ -> local >= thr WeightedLogJaccard _ -> local >= thr
WeightedLogSim _ -> local >= thr
Hamming -> undefined Hamming -> undefined
getProximityName :: Proximity -> String getProximityName :: Proximity -> String
getProximityName proximity = getProximityName proximity =
case proximity of case proximity of
WeightedLogJaccard _ -> "WLJaccard" WeightedLogJaccard _ -> "WLJaccard"
WeightedLogSim _ -> "WeightedLogSim"
Hamming -> "Hamming" Hamming -> "Hamming"
--------------- ---------------
...@@ -484,6 +486,7 @@ traceSynchronyStart phylo = ...@@ -484,6 +486,7 @@ traceSynchronyStart phylo =
getSensibility :: Proximity -> Double getSensibility :: Proximity -> Double
getSensibility proxi = case proxi of getSensibility proxi = case proxi of
WeightedLogJaccard s -> s WeightedLogJaccard s -> s
WeightedLogSim s -> s
Hamming -> undefined Hamming -> undefined
---------------- ----------------
......
...@@ -73,9 +73,35 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams' ...@@ -73,9 +73,35 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion) diagoUnion = elems $ restrictKeys diago (Set.fromList ngramsUnion)
-------------------------------------- --------------------------------------
-- | Process the weighted similarity between clusters. Adapted from Wang, X., Cheng, Q., Lu, W., 2014. Analyzing evolution of research topics with NEViewer: a new method based on dynamic co-word networks. Scientometrics 101, 1253–1271. https://doi.org/10.1007/s11192-014-1347-y (log added in the formula + pair comparison)
-- tests not conclusive
weightedLogSim' :: Double -> Double -> Map Int Double -> [Int] -> [Int] -> Double
weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
| null ngramsInter = 0
| ngramsInter == ngramsUnion = 1
| sens == 0 = jaccard ngramsInter ngramsUnion
| sens > 0 = (sumInvLog' sens nbDocs diagoInter) / minimum [(sumInvLog' sens nbDocs diagoEgo),(sumInvLog' sens nbDocs diagoTarget)]
| otherwise = (sumLog' sens nbDocs diagoInter) / minimum [(sumLog' sens nbDocs diagoEgo),(sumLog' sens nbDocs diagoTarget)]
where
--------------------------------------
ngramsInter :: [Int]
ngramsInter = intersect ego_ngrams target_ngrams
--------------------------------------
ngramsUnion :: [Int]
ngramsUnion = union ego_ngrams target_ngrams
--------------------------------------
diagoInter :: [Double]
diagoInter = elems $ restrictKeys diago (Set.fromList ngramsInter)
--------------------------------------
diagoEgo :: [Double]
diagoEgo = elems $ restrictKeys diago (Set.fromList ego_ngrams)
--------------------------------------
diagoTarget :: [Double]
diagoTarget = elems $ restrictKeys diago (Set.fromList target_ngrams)
--------------------------------------
-- | To process the proximity between a current group and a pair of targets group
toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double toProximity :: Double -> Map Int Double -> Proximity -> [Int] -> [Int] -> [Int] -> Double
-- | To process the proximity between a current group and a pair of targets group using the adapted Wang et al. Similarity
toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' = toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
case proximity of case proximity of
WeightedLogJaccard sens -> WeightedLogJaccard sens ->
...@@ -83,9 +109,13 @@ toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' = ...@@ -83,9 +109,13 @@ toProximity nbDocs diago proximity egoNgrams targetNgrams targetNgrams' =
then targetNgrams then targetNgrams
else union targetNgrams targetNgrams' else union targetNgrams targetNgrams'
in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams in weightedLogJaccard' sens nbDocs diago egoNgrams pairNgrams
WeightedLogSim sens ->
let pairNgrams = if targetNgrams == targetNgrams'
then targetNgrams
else union targetNgrams targetNgrams'
in weightedLogSim' sens nbDocs diago egoNgrams pairNgrams
Hamming -> undefined Hamming -> undefined
------------------------ ------------------------
-- | Local Matching | -- -- | Local Matching | --
------------------------ ------------------------
......
...@@ -796,6 +796,7 @@ getPeriodSteps q = q ^. q_periodSteps ...@@ -796,6 +796,7 @@ getPeriodSteps q = q ^. q_periodSteps
getThreshold :: Proximity -> Double getThreshold :: Proximity -> Double
getThreshold prox = case prox of getThreshold prox = case prox of
WeightedLogJaccard (WLJParams thr _) -> thr WeightedLogJaccard (WLJParams thr _) -> thr
WeightedLogSim (WLJParams thr _) -> thr
Hamming (HammingParams thr) -> thr Hamming (HammingParams thr) -> thr
Filiation -> panic "[ERR][Viz.Phylo.Tools.getThreshold] Filiation" Filiation -> panic "[ERR][Viz.Phylo.Tools.getThreshold] Filiation"
...@@ -834,6 +835,8 @@ initRelatedComponents (def defaultWeightedLogJaccard -> proxi) = RCParams proxi ...@@ -834,6 +835,8 @@ initRelatedComponents (def defaultWeightedLogJaccard -> proxi) = RCParams proxi
initWeightedLogJaccard :: Maybe Double -> Maybe Double -> WLJParams initWeightedLogJaccard :: Maybe Double -> Maybe Double -> WLJParams
initWeightedLogJaccard (def 0.3 -> thr) (def 20.0 -> sens) = WLJParams thr sens initWeightedLogJaccard (def 0.3 -> thr) (def 20.0 -> sens) = WLJParams thr sens
initWeightedLogSim :: Maybe Double -> Maybe Double -> WLJParams
initWeightedLogSim (def 0.3 -> thr) (def 20.0 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters -- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int initPhyloQueryBuild :: Text -> Text -> Maybe Int
...@@ -896,6 +899,9 @@ defaultHamming = Hamming (initHamming Nothing) ...@@ -896,6 +899,9 @@ defaultHamming = Hamming (initHamming Nothing)
defaultWeightedLogJaccard :: Proximity defaultWeightedLogJaccard :: Proximity
defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing Nothing) defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing Nothing)
defaultWeightedLogSim :: Proximity
defaultWeightedLogSim = WeightedLogSim (initWeightedLogSim Nothing Nothing)
-- Queries -- Queries
type Title = Text type Title = Text
type Desc = Text type Desc = Text
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment