Commit 19e0bdc1 authored by Quentin Lobbé's avatar Quentin Lobbé

tune the threshold and sensibility of the WeightedLogJaccard

parent 237a8f2b
...@@ -30,6 +30,7 @@ import Gargantext.Prelude ...@@ -30,6 +30,7 @@ import Gargantext.Prelude
import Gargantext.Text.List.CSV (csvGraphTermList) import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year) import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.Terms.WithList import Gargantext.Text.Terms.WithList
import System.Environment import System.Environment
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
...@@ -64,10 +65,8 @@ filterTerms patterns (year', doc) = (year',termsInText patterns doc) ...@@ -64,10 +65,8 @@ filterTerms patterns (year', doc) = (year',termsInText patterns doc)
termsInText pats txt = DL.nub $ DL.concat $ map (map unwords) $ extractTermsWithList pats txt termsInText pats txt = DL.nub $ DL.concat $ map (map unwords) $ extractTermsWithList pats txt
-- csvToCorpus :: Int -> FilePath -> IO (DM.Map Int [Text])
csvToCorpus :: Int -> FilePath -> IO ([(Int,Text)]) csvToCorpus :: Int -> FilePath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList csvToCorpus limit csv = DV.toList
-- DM.fromListWith (<>)
. DV.take limit . DV.take limit
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n))) . DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> readCsv csv . snd <$> readCsv csv
...@@ -87,20 +86,31 @@ parse limit corpus liste = do ...@@ -87,20 +86,31 @@ parse limit corpus liste = do
main :: IO () main :: IO ()
main = do main = do
-- [corpusFile, termListFile, outputFile] <- getArgs
-- [corpusPath, termListPath, outputPath] <- getArgs
let corpusPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/texts/fullCorpus.csv" let corpusPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/texts/fullCorpus.csv"
let termListPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/termList.csv" let termListPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/termList.csv"
let outputPath = "/home/qlobbe/data/epique/output/cultural_evolution.dot" let outputPath = "/home/qlobbe/data/epique/output/cultural_evolution.dot"
let query = PhyloQueryBuild "cultural_evolution" "Test" 5 3 defaultFis [] [] defaultWeightedLogJaccard 3 defaultRelatedComponents let query = PhyloQueryBuild "cultural_evolution" "" 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0 0)
2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.1 10)
let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
corpus <- parse 5000 corpusPath termListPath putStrLn $ show "-- Start parsing the corpus"
corpus <- parse 500 corpusPath termListPath
let foundations = DL.nub $ DL.concat $ map text corpus let foundations = DL.nub $ DL.concat $ map text corpus
-- putStrLn $ show $ csvGraphTermList termListPath -- putStrLn $ show (map text corpus)
-- foundations <- DL.concat <$> DL.concat <$> map snd <$> csvGraphTermList termListPath
-- putStrLn $ show foundations
-- a <- map snd <$> csvGraphTermList liste
let phylo = toPhylo query corpus foundations [] let phylo = toPhylo query corpus foundations []
......
...@@ -36,11 +36,11 @@ graphToClusters clust (nodes,edges) = case clust of ...@@ -36,11 +36,11 @@ graphToClusters clust (nodes,edges) = case clust of
_ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented" _ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a Phylo into Clusters of PhyloGroups at a given level -- | To transform a Phylo into Clusters of PhyloGroups at a given level
phyloToClusters :: Level -> Proximity -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster] phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
phyloToClusters lvl prox clus p = Map.fromList phyloToClusters lvl clus p = Map.fromList
$ zip (getPhyloPeriods p) $ zip (getPhyloPeriods p)
(map (\prd -> let graph = groupsToGraph prox (getGroupsWithFilters lvl prd p) p (map (\prd -> let graph = groupsToGraph (getProximity clus) (getGroupsWithFilters lvl prd p) p
in if null (fst graph) in if null (fst graph)
then [] then []
else graphToClusters clus graph) else graphToClusters clus graph)
(getPhyloPeriods p)) (getPhyloPeriods p))
...@@ -20,11 +20,13 @@ module Gargantext.Viz.Phylo.BranchMaker ...@@ -20,11 +20,13 @@ module Gargantext.Viz.Phylo.BranchMaker
import Control.Lens hiding (both, Level) import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),tail) import Data.List (concat,nub,(++),tail)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Data.Map (Map,fromList,toList)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Metrics.Clustering import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Metrics.Proximity import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
-- import Debug.Trace (trace)
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering -- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
...@@ -35,6 +37,10 @@ graphToBranches _lvl (nodes,edges) _p = concat ...@@ -35,6 +37,10 @@ graphToBranches _lvl (nodes,edges) _p = concat
$ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] [] $ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] []
mirror :: Ord a => Map (a,a) b -> Map (a,a) b
mirror m = fromList $ concat $ map (\((k,k'),v) -> [((k,k'),v),((k',k),v)]) $ toList m
-- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure -- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
groupsToGraph :: Proximity -> [PhyloGroup] -> Phylo -> GroupGraph groupsToGraph :: Proximity -> [PhyloGroup] -> Phylo -> GroupGraph
groupsToGraph prox groups p = (groups,edges) groupsToGraph prox groups p = (groups,edges)
...@@ -44,8 +50,8 @@ groupsToGraph prox groups p = (groups,edges) ...@@ -44,8 +50,8 @@ groupsToGraph prox groups p = (groups,edges)
Filiation -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p) Filiation -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
++ ++
(map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups (map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
WeightedLogJaccard (WLJParams thr sens) -> filter (\edge -> snd edge >= thr) WeightedLogJaccard (WLJParams thr sens) -> filter (\(_,v) -> v >= thr)
$ map (\(x,y) -> ((x,y), weightedLogJaccard sens (getGroupCooc x) (unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ map (\(x,y) -> ((x,y), weightedLogJaccard sens (mirror $ getGroupCooc x) (mirror $ getGroupCooc y)))
$ listToDirectedCombi groups $ listToDirectedCombi groups
Hamming (HammingParams thr) -> filter (\edge -> snd edge <= thr) Hamming (HammingParams thr) -> filter (\edge -> snd edge <= thr)
$ map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (unifySharedKeys (getGroupCooc x) (getGroupCooc y)))) $ map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (unifySharedKeys (getGroupCooc x) (getGroupCooc y))))
......
...@@ -54,7 +54,7 @@ import qualified Data.List as List ...@@ -54,7 +54,7 @@ import qualified Data.List as List
------------------------------------------------------ ------------------------------------------------------
export :: IO () export :: IO ()
export = dotToFile "./export_test" "cesar_cleopatre.dot" phyloDot export = dotToFile "/home/qlobbe/data/epique/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView phyloDot = viewToDot phyloView
...@@ -103,7 +103,7 @@ queryEx = "title=Cesar et Cleôpatre" ...@@ -103,7 +103,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)" phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] defaultWeightedLogJaccard 3 defaultRelatedComponents 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.0001 10) 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10)
...@@ -113,7 +113,7 @@ phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy ...@@ -113,7 +113,7 @@ phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy
phylo6 :: Phylo phylo6 :: Phylo
phylo6 = toNthLevel 6 defaultWeightedLogJaccard defaultRelatedComponents phylo3 phylo6 = toNthLevel 6 defaultWeightedLogJaccard (RelatedComponents (initRelatedComponents (Just defaultWeightedLogJaccard))) phylo3
phylo3 :: Phylo phylo3 :: Phylo
...@@ -122,7 +122,7 @@ phylo3 = setPhyloBranches 3 ...@@ -122,7 +122,7 @@ phylo3 = setPhyloBranches 3
$ interTempoMatching Ascendant 3 defaultWeightedLogJaccard $ interTempoMatching Ascendant 3 defaultWeightedLogJaccard
$ setLevelLinks (2,3) $ setLevelLinks (2,3)
$ addPhyloLevel 3 $ addPhyloLevel 3
(phyloToClusters 2 defaultWeightedLogJaccard defaultRelatedComponents phyloBranch2) (phyloToClusters 2 (RelatedComponents (initRelatedComponents (Just defaultWeightedLogJaccard))) phyloBranch2)
phyloBranch2 phyloBranch2
...@@ -153,7 +153,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1 ...@@ -153,7 +153,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster] phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 1 defaultWeightedLogJaccard defaultRelatedComponents phyloBranch1 phyloCluster = phyloToClusters 1 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
---------------------------------- ----------------------------------
......
...@@ -149,7 +149,7 @@ toNthLevel lvlMax prox clus p ...@@ -149,7 +149,7 @@ toNthLevel lvlMax prox clus p
$ interTempoMatching Ascendant (lvl + 1) prox $ interTempoMatching Ascendant (lvl + 1) prox
$ setLevelLinks (lvl, lvl + 1) $ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1) $ addPhyloLevel (lvl + 1)
(phyloToClusters lvl (getProximity clus) clus p) p (phyloToClusters lvl clus p) p
where where
-------------------------------------- --------------------------------------
lvl :: Level lvl :: Level
......
...@@ -43,7 +43,7 @@ relatedComp idx curr (nodes,edges) next memo ...@@ -43,7 +43,7 @@ relatedComp idx curr (nodes,edges) next memo
| otherwise = memo ++ [[curr]] | otherwise = memo ++ [[curr]]
-------------------------------------- --------------------------------------
next' :: [PhyloGroup] next' :: [PhyloGroup]
next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours True curr edges) next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
-------------------------------------- --------------------------------------
nodes' :: [PhyloGroup] nodes' :: [PhyloGroup]
nodes' = filter (\x -> not $ elem x next') nodes nodes' = filter (\x -> not $ elem x next') nodes
......
...@@ -20,6 +20,7 @@ module Gargantext.Viz.Phylo.Metrics.Proximity ...@@ -20,6 +20,7 @@ module Gargantext.Viz.Phylo.Metrics.Proximity
import Data.List (null) import Data.List (null)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size) import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size)
import Gargantext.Prelude import Gargantext.Prelude
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields -- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
......
...@@ -24,7 +24,7 @@ import Data.List (filter, intersect, (++), sort, null, tail, last, ta ...@@ -24,7 +24,7 @@ import Data.List (filter, intersect, (++), sort, null, tail, last, ta
import Data.Maybe (mapMaybe,fromMaybe) import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, (!)) import Data.Map (Map, mapKeys, member, (!))
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text, toLower) import Data.Text (Text,toLower,unwords)
import Data.Tuple.Extra import Data.Tuple.Extra
import Data.Vector (Vector,elemIndex) import Data.Vector (Vector,elemIndex)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -125,6 +125,16 @@ listToUnDirectedCombiWith :: forall a b. (a -> b) -> [a] -> [(b,b)] ...@@ -125,6 +125,16 @@ listToUnDirectedCombiWith :: forall a b. (a -> b) -> [a] -> [(b,b)]
listToUnDirectedCombiWith f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ] listToUnDirectedCombiWith f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ]
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = unwords $ ngramsToText ngrams l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
-- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x) -- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x)
unifySharedKeys :: Eq a => Ord a => Map (a,a) b -> Map (a,a) b -> Map (a,a) b unifySharedKeys :: Eq a => Ord a => Map (a,a) b -> Map (a,a) b -> Map (a,a) b
unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2 unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2
...@@ -136,7 +146,6 @@ unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2 ...@@ -136,7 +146,6 @@ unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2
-- | Phylo | -- -- | Phylo | --
--------------- ---------------
-- | An analyzer ingests a Ngrams and generates a modified version of it -- | An analyzer ingests a Ngrams and generates a modified version of it
phyloAnalyzer :: Ngrams -> Ngrams phyloAnalyzer :: Ngrams -> Ngrams
phyloAnalyzer n = toLower n phyloAnalyzer n = toLower n
...@@ -342,6 +351,10 @@ getGroupPeriodParents = _phylo_groupPeriodParents ...@@ -342,6 +351,10 @@ getGroupPeriodParents = _phylo_groupPeriodParents
getGroupPeriodParentsId :: PhyloGroup -> [PhyloGroupId] getGroupPeriodParentsId :: PhyloGroup -> [PhyloGroupId]
getGroupPeriodParentsId g = map fst $ getGroupPeriodParents g getGroupPeriodParentsId g = map fst $ getGroupPeriodParents g
-- | To get the roots labels of a list of group ngrams
getGroupText :: PhyloGroup -> Phylo -> [Text]
getGroupText g p = ngramsToText (getRootsLabels p) (getGroupNgrams g)
-- | To get all the PhyloGroup of a Phylo -- | To get all the PhyloGroup of a Phylo
getGroups :: Phylo -> [PhyloGroup] getGroups :: Phylo -> [PhyloGroup]
......
...@@ -47,8 +47,8 @@ type DotId = T'.Text ...@@ -47,8 +47,8 @@ type DotId = T'.Text
-- | Dot to File | -- -- | Dot to File | --
--------------------- ---------------------
dotToFile :: FilePath -> FilePath -> DotGraph DotId -> IO () dotToFile :: FilePath -> DotGraph DotId -> IO ()
dotToFile filePath fileName dotG = writeFile (combine filePath fileName) $ dotToString dotG dotToFile filePath dotG = writeFile filePath $ dotToString dotG
dotToString :: DotGraph DotId -> [Char] dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG) dotToString dotG = unpack (printDotGraph dotG)
......
...@@ -19,24 +19,13 @@ module Gargantext.Viz.Phylo.View.Taggers ...@@ -19,24 +19,13 @@ module Gargantext.Viz.Phylo.View.Taggers
import Control.Lens hiding (makeLenses, both, Level) import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,groupBy,sortOn,sort) import Data.List (concat,nub,groupBy,sortOn,sort)
import Data.Text (Text,unwords) import Data.Text (Text)
import Data.Tuple (fst, snd) import Data.Tuple (fst, snd)
import Data.Vector (Vector) import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Viz.Phylo import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector as Vector
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = unwords $ ngramsToText ngrams l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
-- | To get the nth most frequent Ngrams in a list of PhyloGroups -- | To get the nth most frequent Ngrams in a list of PhyloGroups
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment