Commit 38d3a9cd authored by qlobbe's avatar qlobbe

remove bad tracers

parent 5e332ef8
Pipeline #403 failed with stage
......@@ -115,7 +115,9 @@ filterTerms patterns (y,d) = (y,termsInText patterns d)
-- | To transform a Csv nfile into a readable corpus
csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList
-- . DV.reverse
. DV.take limit
-- . DV.reverse
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> CSV.readFile csv
......
......@@ -49,10 +49,12 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> Phylo -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc p = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
$ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
groupsToGraph :: Proximity -> [PhyloGroup] -> Map (Int, Int) Double -> ([GroupNode],[GroupEdge])
groupsToGraph prox gs cooc = case prox of
-- WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
-- $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
-- $ getCandidates gs)
WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), weightedLogJaccard' sens (getGroupNgrams x) (getGroupNgrams y) cooc))
$ getCandidates gs)
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
$ getCandidates gs)
......@@ -82,7 +84,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p) p) periods
$ map (\prd -> groupsToGraph prox (getGroupsWithFilters lvl prd p) (getCooc [prd] p)) periods
--------------------------------------
prox :: Proximity
prox = getProximity clus
......@@ -100,7 +102,6 @@ phyloToClusters lvl clus p = Map.fromList
traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
<> show (lst) <> "\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
......@@ -120,7 +121,5 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
lst = sort $ map snd $ concat $ map snd g
traceSim :: PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> Map (Int, Int) Double -> Phylo -> Double -> Double
traceSim g g' _ _ p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n"
-- <> show (c) <> " [vs] " <> show (c') <> " = " <> show (sim)
) sim
-- traceSim :: PhyloGroup -> PhyloGroup -> Phylo -> Double -> Double
-- traceSim g g' p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n") sim
......@@ -55,7 +55,7 @@ import qualified Data.List as List
------------------------------------------------------
export :: IO ()
export = dotToFile "/home/qlobbe/data/epique/output/cesar_cleopatre.dot" phyloDot
export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView
......@@ -77,7 +77,7 @@ queryViewEx = "level=3"
phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
--------------------------------------------------
......@@ -104,7 +104,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.1 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.13 0)
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.3 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.5 0)
......
......@@ -89,7 +89,8 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve
-- | To apply the corresponding proximity function based on a given Proximity
applyProximity :: Proximity -> PhyloGroup -> PhyloGroup -> Map (Int, Int) Double -> (PhyloGroupId, Double)
applyProximity prox g1 g2 cooc = case prox of
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
-- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard' s (getGroupNgrams g1) (getGroupNgrams g2) cooc)
Hamming (HammingParams _) -> ((getGroupId g2), hamming (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
_ -> panic ("[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined")
......@@ -163,7 +164,52 @@ updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
filterCandidates :: PhyloGroup -> [PhyloGroup] -> [PhyloGroup]
filterCandidates g gs = filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ delete g gs
$ delete g gs
-- | a init avec la [[head groups]] et la tail groups
toBranches :: [[PhyloGroup]] -> [PhyloGroup] -> [[PhyloGroup]]
toBranches mem gs
| null gs = mem
| otherwise = toBranches mem' $ tail gs
where
--------------------------------------
mem' :: [[PhyloGroup]]
mem' = if (null withHead)
then mem ++ [[head' "toBranches" gs]]
else (filter (\gs' -> not $ elem gs' withHead) mem)
++
[(concat withHead) ++ [head' "toBranches" gs]]
--------------------------------------
withHead :: [[PhyloGroup]]
withHead = filter (\gs' -> (not . null)
$ intersect (concat $ map getGroupNgrams gs')
(getGroupNgrams $ (head' "toBranches" gs))
) mem
--------------------------------------
-- | a init avec la [[head groups]] et la tail groups
toBranches' :: [[[Int]]] -> [[Int]] -> [[[Int]]]
toBranches' mem gs
| null gs = mem
| otherwise = toBranches' mem' $ tail gs
where
--------------------------------------
mem' :: [[[Int]]]
mem' = if (null withHead)
then mem ++ [[head' "toBranches" gs]]
else (filter (\gs' -> not $ elem gs' withHead) mem)
++
[(concat withHead) ++ [head' "toBranches" gs]]
--------------------------------------
withHead :: [[[Int]]]
withHead = filter (\gs' -> (not . null)
$ intersect (concat gs')
(head' "toBranches" gs)
) mem
--------------------------------------
......@@ -179,12 +225,18 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) sc
scores :: [Double]
scores = sort $ concat $ map (snd . snd) candidates
--------------------------------------
-- candidates' :: [(PhyloGroupId,([Pointer],[Double]))]
-- candidates' = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
--------------------------------------
candidates :: [(PhyloGroupId,([Pointer],[Double]))]
candidates = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
candidates = concat $ map (\b -> map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) b) bs
--------------------------------------
gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p
--------------------------------------
bs :: [[PhyloGroup]]
bs = tracePreBranches $ toBranches [[head' "interTempoMatching" gs]] $ tail gs
--------------------------------------
prds :: [PhyloPeriodId]
prds = getPhyloPeriods p
--------------------------------------
......@@ -230,3 +282,7 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
tracePreBranches :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePreBranches bs = trace (show (length bs) <> " pre-branches" <> "\n"
<> "with sizes : " <> show (map length bs) <> "\n") bs
......@@ -17,18 +17,19 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Metrics.Proximity
where
import Data.List (null)
import Data.List (null,intersect,union)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size)
import Gargantext.Prelude
import Debug.Trace (trace)
import Gargantext.Viz.Phylo.Aggregates.Cooc
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
weightedLogJaccard s f1 f2
| null wUnion = 0
| wUnion == wInter = 1
| s == 0 = trace ("==0") $ (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = trace (">0") $ (sumInvLog wInter)/(sumInvLog wUnion)
| s == 0 = (fromIntegral $ length wInter)/(fromIntegral $ length wUnion)
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
where
--------------------------------------
......@@ -46,6 +47,37 @@ weightedLogJaccard s f1 f2
--------------------------------------
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard' :: Double -> [Int] -> [Int] -> Map (Int, Int) Double -> Double
weightedLogJaccard' s idx idx' cooc
| null idxUnion = 0
| idxUnion == idxInter = 1
| s == 0 = (fromIntegral $ length idxInter)/(fromIntegral $ length idxUnion)
| s > 0 = (sumInvLog wInter)/(sumInvLog wUnion)
| otherwise = (sumLog wInter)/(sumLog wUnion)
where
--------------------------------------
wInter :: [Double]
wInter = elems $ getSubCooc idxInter cooc
--------------------------------------
wUnion :: [Double]
wUnion = elems $ getSubCooc idxUnion cooc
--------------------------------------
idxInter :: [Int]
idxInter = intersect idx idx'
--------------------------------------
idxUnion :: [Int]
idxUnion = union idx idx'
--------------------------------------
sumInvLog :: [Double] -> Double
sumInvLog l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
--------------------------------------
sumLog :: [Double] -> Double
sumLog l = foldl (\mem x -> mem + log (s + x)) 0 l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
hamming :: Map (Int, Int) Double -> Map (Int, Int) Double -> Double
hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (size f2))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment