Commit 38d3a9cd authored by qlobbe's avatar qlobbe

remove bad tracers

parent 5e332ef8
Pipeline #403 failed with stage
...@@ -115,7 +115,9 @@ filterTerms patterns (y,d) = (y,termsInText patterns d) ...@@ -115,7 +115,9 @@ filterTerms patterns (y,d) = (y,termsInText patterns d)
-- | To transform a Csv nfile into a readable corpus -- | To transform a Csv nfile into a readable corpus
csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)]) csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList csvToCorpus limit csv = DV.toList
-- . DV.reverse
. DV.take limit . DV.take limit
-- . DV.reverse
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n))) . DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> CSV.readFile csv . snd <$> CSV.readFile csv
......
...@@ -49,10 +49,12 @@ graphToClusters clust (nodes,edges) = case clust of ...@@ -49,10 +49,12 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity -- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> Phylo -> ([GroupNode],[GroupEdge]) groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc p = case prox of groupsToGraph prox gs cooc = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p -- WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
$ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc))) -- $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
-- $ getCandidates gs)
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard' sens (getGroupNgrams x) (getGroupNgrams y) cooc))
$ getCandidates gs) $ getCandidates gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc))) Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs) $ getCandidates gs)
...@@ -82,7 +84,7 @@ phyloToClusters lvl clus p = Map.fromList ...@@ -82,7 +84,7 @@ phyloToClusters lvl clus p = Map.fromList
-------------------------------------- --------------------------------------
graphs :: [([GroupNode],[GroupEdge])] graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox) graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p) p) periods $ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p)) periods
-------------------------------------- --------------------------------------
prox :: Proximity prox :: Proximity
prox = getProximity clus prox = getProximity clus
...@@ -100,7 +102,6 @@ phyloToClusters lvl clus p = Map.fromList ...@@ -100,7 +102,6 @@ phyloToClusters lvl clus p = Map.fromList
traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])] traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n" traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n" <> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
<> show (lst) <> "\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) " <> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) " <> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) " <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
...@@ -120,7 +121,5 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> ...@@ -120,7 +121,5 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
lst = sort $ map snd $ concat $ map snd g lst = sort $ map snd $ concat $ map snd g
traceSim :: PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> Map (Int, Int) Double -> Phylo -> Double -> Double -- traceSim :: PhyloGroup -> PhyloGroup -> Phylo -> Double -> Double
traceSim g g' _ _ p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n" -- traceSim g g' p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n") sim
-- <> show (c) <> " [vs] " <> show (c') <> " = " <> show (sim)
) sim
...@@ -55,7 +55,7 @@ import qualified Data.List as List ...@@ -55,7 +55,7 @@ import qualified Data.List as List
------------------------------------------------------ ------------------------------------------------------
export :: IO () export :: IO ()
export = dotToFile "/home/qlobbe/data/epique/output/cesar_cleopatre.dot" phyloDot export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView phyloDot = viewToDot phyloView
...@@ -77,7 +77,7 @@ queryViewEx = "level=3" ...@@ -77,7 +77,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
-------------------------------------------------- --------------------------------------------------
...@@ -104,7 +104,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -104,7 +104,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.13 0) 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.3 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.5 0)
......
...@@ -89,7 +89,8 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve ...@@ -89,7 +89,8 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve
-- | To apply the corresponding proximity function based on a given Proximity -- | To apply the corresponding proximity function based on a given Proximity
applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> (PhyloGroupId, Double) applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> (PhyloGroupId, Double)
applyProximity prox g1 g2 cooc = case prox of applyProximity prox g1 g2 cooc = case prox of
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc)) -- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard' s (getGroupNgrams g1) (getGroupNgrams g2) cooc)
Hamming (HammingParams _) -> ((getGroupId g2), hamming (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc)) Hamming (HammingParams _) -> ((getGroupId g2), hamming (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
_ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined") _ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined")
...@@ -163,7 +164,52 @@ updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel ...@@ -163,7 +164,52 @@ updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target -- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
filterCandidates :: PhyloGroup -> [PhyloGroup] -> [PhyloGroup] filterCandidates :: PhyloGroup -> [PhyloGroup] -> [PhyloGroup]
filterCandidates g gs = filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g')) filterCandidates g gs = filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ delete g gs $ delete g gs
-- | a init avec la [[head groups]] et la tail groups
toBranches :: [[PhyloGroup]] -> [PhyloGroup] -> [[PhyloGroup]]
toBranches mem gs
| null gs = mem
| otherwise = toBranches mem' $ tail gs
where
--------------------------------------
mem' :: [[PhyloGroup]]
mem' = if (null withHead)
then mem ++ [[head' "toBranches" gs]]
else (filter (\gs' -> not $ elem gs' withHead) mem)
++
[(concat withHead) ++ [head' "toBranches" gs]]
--------------------------------------
withHead :: [[PhyloGroup]]
withHead = filter (\gs' -> (not . null)
$ intersect (concat $ map getGroupNgrams gs')
(getGroupNgrams $ (head' "toBranches" gs))
) mem
--------------------------------------
-- | a init avec la [[head groups]] et la tail groups
toBranches' :: [[[Int]]] -> [[Int]] -> [[[Int]]]
toBranches' mem gs
| null gs = mem
| otherwise = toBranches' mem' $ tail gs
where
--------------------------------------
mem' :: [[[Int]]]
mem' = if (null withHead)
then mem ++ [[head' "toBranches" gs]]
else (filter (\gs' -> not $ elem gs' withHead) mem)
++
[(concat withHead) ++ [head' "toBranches" gs]]
--------------------------------------
withHead :: [[[Int]]]
withHead = filter (\gs' -> (not . null)
$ intersect (concat gs')
(head' "toBranches" gs)
) mem
--------------------------------------
...@@ -179,12 +225,18 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) sc ...@@ -179,12 +225,18 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) sc
scores :: [Double] scores :: [Double]
scores = sort $ concat $ map (snd . snd) candidates scores = sort $ concat $ map (snd . snd) candidates
-------------------------------------- --------------------------------------
-- candidates' :: [(PhyloGroupId,([Pointer],[Double]))]
-- candidates' = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
--------------------------------------
candidates :: [(PhyloGroupId,([Pointer],[Double]))] candidates :: [(PhyloGroupId,([Pointer],[Double]))]
candidates = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs candidates = concat $ map (\b -> map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) b) bs
-------------------------------------- --------------------------------------
gs :: [PhyloGroup] gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p gs = getGroupsWithLevel lvl p
-------------------------------------- --------------------------------------
bs :: [[PhyloGroup]]
bs = tracePreBranches $ toBranches [[head' "interTempoMatching" gs]] $ tail gs
--------------------------------------
prds :: [PhyloPeriodId] prds :: [PhyloPeriodId]
prds = getPhyloPeriods p prds = getPhyloPeriods p
-------------------------------------- --------------------------------------
...@@ -230,3 +282,7 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered ...@@ -230,3 +282,7 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered
<> show (percentile 75 (VS.fromList lst)) <> " (75%) " <> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p <> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
tracePreBranches :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePreBranches bs = trace (show (length bs) <> " pre-branches" <> "\n"
<> "with sizes : " <> show (map length bs) <> "\n") bs
...@@ -17,18 +17,19 @@ Portability : POSIX ...@@ -17,18 +17,19 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Metrics.Proximity module Gargantext.Viz.Phylo.Metrics.Proximity
where where
import Data.List (null) import Data.List (null,intersect,union)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size) import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size)
import Gargantext.Prelude import Gargantext.Prelude
import Debug.Trace (trace) import Gargantext.Viz.Phylo.Aggregates.Cooc
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields -- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
weightedLogJaccard s f1 f2 weightedLogJaccard s f1 f2
| null wUnion = 0 | null wUnion = 0
| wUnion == wInter = 1 | wUnion == wInter = 1
| s == 0 = trace ("==0") $ (fromIntegral $ length wInter)/(fromIntegral $ length wUnion) | s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = trace (">0") $ (sumInvLog wInter)/(sumInvLog wUnion) | s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion) | otherwise = (sumLog wInter)/(sumLog wUnion)
where where
-------------------------------------- --------------------------------------
...@@ -46,6 +47,37 @@ weightedLogJaccard s f1 f2 ...@@ -46,6 +47,37 @@ weightedLogJaccard s f1 f2
-------------------------------------- --------------------------------------
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard' :: Double -> [Int] -> [Int] -> Map (Int, Int) Double -> Double
weightedLogJaccard' s idx idx' cooc
| null idxUnion = 0
| idxUnion == idxInter = 1
| s == 0 = (fromIntegral $ length idxInter)/(fromIntegral $ length idxUnion)
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
where
--------------------------------------
wInter :: [Double]
wInter = elems $ getSubCooc idxInter cooc
--------------------------------------
wUnion :: [Double]
wUnion = elems $ getSubCooc idxUnion cooc
--------------------------------------
idxInter :: [Int]
idxInter = intersect idx idx'
--------------------------------------
idxUnion :: [Int]
idxUnion = union idx idx'
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
--------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields -- | To process the Hamming distance between two PhyloGroup fields
hamming :: Map (Int, Int) Double -> Map (Int, Int) Double -> Double hamming :: Map (Int, Int) Double -> Map (Int, Int) Double -> Double
hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (size f2)) hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (size f2))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment