Commit 19e0bdc1 authored by Quentin Lobbé's avatar Quentin Lobbé

tune the threshold and sensibility of the WeightedLogJaccard

parent 237a8f2b
......@@ -30,6 +30,7 @@ import Gargantext.Prelude
import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.Terms.WithList
import System.Environment
import Gargantext.Viz.Phylo
......@@ -64,10 +65,8 @@ filterTerms patterns (year', doc) = (year',termsInText patterns doc)
termsInText pats txt = DL.nub $ DL.concat $ map (map unwords) $ extractTermsWithList pats txt
-- csvToCorpus :: Int -> FilePath -> IO (DM.Map Int [Text])
csvToCorpus :: Int -> FilePath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList
-- DM.fromListWith (<>)
. DV.take limit
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> readCsv csv
......@@ -87,20 +86,31 @@ parse limit corpus liste = do
main :: IO ()
main = do
-- [corpusFile, termListFile, outputFile] <- getArgs
-- [corpusPath, termListPath, outputPath] <- getArgs
let corpusPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/texts/fullCorpus.csv"
let termListPath = "/home/qlobbe/data/epique/corpus/cultural_evolution/termList.csv"
let outputPath = "/home/qlobbe/data/epique/output/cultural_evolution.dot"
let query = PhyloQueryBuild "cultural_evolution" "Test" 5 3 defaultFis [] [] defaultWeightedLogJaccard 3 defaultRelatedComponents
let query = PhyloQueryBuild "cultural_evolution" "" 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0 0)
2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.1 10)
let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
corpus <- parse 5000 corpusPath termListPath
putStrLn $ show "-- Start parsing the corpus"
corpus <- parse 500 corpusPath termListPath
let foundations = DL.nub $ DL.concat $ map text corpus
-- putStrLn $ show $ csvGraphTermList termListPath
-- putStrLn $ show (map text corpus)
-- foundations <- DL.concat <$> DL.concat <$> map snd <$> csvGraphTermList termListPath
-- putStrLn $ show foundations
-- a <- map snd <$> csvGraphTermList liste
let phylo = toPhylo query corpus foundations []
......
......@@ -36,11 +36,11 @@ graphToClusters clust (nodes,edges) = case clust of
_ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a Phylo into Clusters of PhyloGroups at a given level
phyloToClusters :: Level -> Proximity -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
phyloToClusters lvl prox clus p = Map.fromList
$ zip (getPhyloPeriods p)
(map (\prd -> let graph = groupsToGraph prox (getGroupsWithFilters lvl prd p) p
in if null (fst graph)
then []
else graphToClusters clus graph)
(getPhyloPeriods p))
phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
phyloToClusters lvl clus p = Map.fromList
$ zip (getPhyloPeriods p)
(map (\prd -> let graph = groupsToGraph (getProximity clus) (getGroupsWithFilters lvl prd p) p
in if null (fst graph)
then []
else graphToClusters clus graph)
(getPhyloPeriods p))
......@@ -20,11 +20,13 @@ module Gargantext.Viz.Phylo.BranchMaker
import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),tail)
import Data.Tuple (fst, snd)
import Data.Map (Map,fromList,toList)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Metrics.Clustering
import Gargantext.Viz.Phylo.Metrics.Proximity
import Gargantext.Viz.Phylo.Tools
-- import Debug.Trace (trace)
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
......@@ -35,6 +37,10 @@ graphToBranches _lvl (nodes,edges) _p = concat
$ relatedComp 0 (head' "branchMaker" nodes) (tail nodes,edges) [] []
mirror :: Ord a => Map (a,a) b -> Map (a,a) b
mirror m = fromList $ concat $ map (\((k,k'),v) -> [((k,k'),v),((k',k),v)]) $ toList m
-- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
groupsToGraph :: Proximity -> [PhyloGroup] -> Phylo -> GroupGraph
groupsToGraph prox groups p = (groups,edges)
......@@ -44,8 +50,8 @@ groupsToGraph prox groups p = (groups,edges)
Filiation -> (nub . concat) $ map (\g -> (map (\g' -> ((g',g),1)) $ getGroupParents g p)
++
(map (\g' -> ((g,g'),1)) $ getGroupChilds g p)) groups
WeightedLogJaccard (WLJParams thr sens) -> filter (\edge -> snd edge >= thr)
$ map (\(x,y) -> ((x,y), weightedLogJaccard sens (getGroupCooc x) (unifySharedKeys (getGroupCooc x) (getGroupCooc y))))
WeightedLogJaccard (WLJParams thr sens) -> filter (\(_,v) -> v >= thr)
$ map (\(x,y) -> ((x,y), weightedLogJaccard sens (mirror $ getGroupCooc x) (mirror $ getGroupCooc y)))
$ listToDirectedCombi groups
Hamming (HammingParams thr) -> filter (\edge -> snd edge <= thr)
$ map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (unifySharedKeys (getGroupCooc x) (getGroupCooc y))))
......
......@@ -54,7 +54,7 @@ import qualified Data.List as List
------------------------------------------------------
export :: IO ()
export = dotToFile "./export_test" "cesar_cleopatre.dot" phyloDot
export = dotToFile "/home/qlobbe/data/epique/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView
......@@ -103,7 +103,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
5 3 defaultFis [] [] defaultWeightedLogJaccard 3 defaultRelatedComponents
5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.0001 10) 3 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10)
......@@ -113,7 +113,7 @@ phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy
phylo6 :: Phylo
phylo6 = toNthLevel 6 defaultWeightedLogJaccard defaultRelatedComponents phylo3
phylo6 = toNthLevel 6 defaultWeightedLogJaccard (RelatedComponents (initRelatedComponents (Just defaultWeightedLogJaccard))) phylo3
phylo3 :: Phylo
......@@ -122,7 +122,7 @@ phylo3 = setPhyloBranches 3
$ interTempoMatching Ascendant 3 defaultWeightedLogJaccard
$ setLevelLinks (2,3)
$ addPhyloLevel 3
(phyloToClusters 2 defaultWeightedLogJaccard defaultRelatedComponents phyloBranch2)
(phyloToClusters 2 (RelatedComponents (initRelatedComponents (Just defaultWeightedLogJaccard))) phyloBranch2)
phyloBranch2
......@@ -153,7 +153,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 1 defaultWeightedLogJaccard defaultRelatedComponents phyloBranch1
phyloCluster = phyloToClusters 1 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
----------------------------------
......
......@@ -149,7 +149,7 @@ toNthLevel lvlMax prox clus p
$ interTempoMatching Ascendant (lvl + 1) prox
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
(phyloToClusters lvl (getProximity clus) clus p) p
(phyloToClusters lvl clus p) p
where
--------------------------------------
lvl :: Level
......
......@@ -43,7 +43,7 @@ relatedComp idx curr (nodes,edges) next memo
| otherwise = memo ++ [[curr]]
--------------------------------------
next' :: [PhyloGroup]
next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours True curr edges)
next' = filter (\x -> not $ elem x $ concat memo) $ nub $ next ++ (getNeighbours False curr edges)
--------------------------------------
nodes' :: [PhyloGroup]
nodes' = filter (\x -> not $ elem x next') nodes
......
......@@ -20,6 +20,7 @@ module Gargantext.Viz.Phylo.Metrics.Proximity
import Data.List (null)
import Data.Map (Map,elems,unionWith,intersectionWith,intersection,size)
import Gargantext.Prelude
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double
......
......@@ -24,7 +24,7 @@ import Data.List (filter, intersect, (++), sort, null, tail, last, ta
import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, (!))
import Data.Set (Set)
import Data.Text (Text, toLower)
import Data.Text (Text,toLower,unwords)
import Data.Tuple.Extra
import Data.Vector (Vector,elemIndex)
import Gargantext.Prelude
......@@ -125,6 +125,16 @@ listToUnDirectedCombiWith :: forall a b. (a -> b) -> [a] -> [(b,b)]
listToUnDirectedCombiWith f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ]
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = unwords $ ngramsToText ngrams l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
-- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x)
unifySharedKeys :: Eq a => Ord a => Map (a,a) b -> Map (a,a) b -> Map (a,a) b
unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2
......@@ -136,7 +146,6 @@ unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2
-- | Phylo | --
---------------
-- | An analyzer ingests a Ngrams and generates a modified version of it
phyloAnalyzer :: Ngrams -> Ngrams
phyloAnalyzer n = toLower n
......@@ -342,6 +351,10 @@ getGroupPeriodParents = _phylo_groupPeriodParents
getGroupPeriodParentsId :: PhyloGroup -> [PhyloGroupId]
getGroupPeriodParentsId g = map fst $ getGroupPeriodParents g
-- | To get the roots labels of a list of group ngrams
getGroupText :: PhyloGroup -> Phylo -> [Text]
getGroupText g p = ngramsToText (getRootsLabels p) (getGroupNgrams g)
-- | To get all the PhyloGroup of a Phylo
getGroups :: Phylo -> [PhyloGroup]
......
......@@ -47,8 +47,8 @@ type DotId = T'.Text
-- | Dot to File | --
---------------------
dotToFile :: FilePath -> FilePath -> DotGraph DotId -> IO ()
dotToFile filePath fileName dotG = writeFile (combine filePath fileName) $ dotToString dotG
dotToFile :: FilePath -> DotGraph DotId -> IO ()
dotToFile filePath dotG = writeFile filePath $ dotToString dotG
dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG)
......
......@@ -19,24 +19,13 @@ module Gargantext.Viz.Phylo.View.Taggers
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,groupBy,sortOn,sort)
import Data.Text (Text,unwords)
import Data.Text (Text)
import Data.Tuple (fst, snd)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Tools
import qualified Data.Map as Map
import qualified Data.Vector as Vector
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = unwords $ ngramsToText ngrams l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
-- | To get the nth most frequent Ngrams in a list of PhyloGroups
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment