Commit e0f3433b authored by Alexandre Delanoë's avatar Alexandre Delanoë

[GRAPH] Working on terms selection to complete balanced graph.

parent bb2042f3
...@@ -36,7 +36,7 @@ import Gargantext.Core.Types (CorpusId) ...@@ -36,7 +36,7 @@ import Gargantext.Core.Types (CorpusId)
--import Gargantext.Database.Types.Node --import Gargantext.Database.Types.Node
import Gargantext.Prelude import Gargantext.Prelude
--import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) --import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..)) --import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..))
--import Gargantext.Text.Metrics.Count (coocOn) --import Gargantext.Text.Metrics.Count (coocOn)
--import Gargantext.Text.Parsers.CSV --import Gargantext.Text.Parsers.CSV
--import Gargantext.Text.Terms (TermType, extractTerms) --import Gargantext.Text.Terms (TermType, extractTerms)
...@@ -120,21 +120,21 @@ cooc2graph :: (Map (Text, Text) Int) -> IO Graph ...@@ -120,21 +120,21 @@ cooc2graph :: (Map (Text, Text) Int) -> IO Graph
cooc2graph myCooc = do cooc2graph myCooc = do
--printDebug "myCooc" myCooc --printDebug "myCooc" myCooc
-- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores -- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
{-
let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 ) let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 )
(InclusionSize 500 ) (InclusionSize 500 )
(SampleBins 10 ) (SampleBins 10 )
(Clusters 3 ) (Clusters 3 )
(DefaultValue 0 ) (DefaultValue 0 )
) myCooc ) myCooc
--printDebug "myCooc3 size" $ M.size myCooc3 --} --printDebug "myCooc3 size" $ M.size myCooc3
--printDebug "myCooc3" myCooc3
-- Cooc -> Matrix -- Cooc -> Matrix
let (ti, _) = createIndices myCooc3 let (ti, _) = createIndices myCooc
--printDebug "ti size" $ M.size ti --printDebug "ti size" $ M.size ti
--printDebug "ti" ti --printDebug "ti" ti
let myCooc4 = toIndex ti myCooc3 let myCooc4 = toIndex ti myCooc
--printDebug "myCooc4 size" $ M.size myCooc4 --printDebug "myCooc4 size" $ M.size myCooc4
--printDebug "myCooc4" myCooc4 --printDebug "myCooc4" myCooc4
...@@ -153,7 +153,7 @@ cooc2graph myCooc = do ...@@ -153,7 +153,7 @@ cooc2graph myCooc = do
--printDebug "distanceMap size" $ M.size distanceMap --printDebug "distanceMap size" $ M.size distanceMap
--printDebug "distanceMap" distanceMap --printDebug "distanceMap" distanceMap
-- let distance = fromIndex fi distanceMap --let distance = fromIndex fi distanceMap
--printDebug "distance" $ M.size distance --printDebug "distance" $ M.size distance
partitions <- case Map.size distanceMap > 0 of partitions <- case Map.size distanceMap > 0 of
......
...@@ -106,8 +106,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs ...@@ -106,8 +106,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys = take b $ drop a ns ys = take b $ drop a ns
zs = drop b $ drop a ns zs = drop b $ drop a ns
a = 100 a = 1
b = 1000 b = 10000
isStopTerm :: Text -> Bool isStopTerm :: Text -> Bool
isStopTerm x = Text.length x < 3 isStopTerm x = Text.length x < 3
......
...@@ -38,28 +38,25 @@ data SampleBins = SampleBins Double ...@@ -38,28 +38,25 @@ data SampleBins = SampleBins Double
data Clusters = Clusters Int data Clusters = Clusters Int
data DefaultValue = DefaultValue Int data DefaultValue = DefaultValue Int
data FilterConfig = FilterConfig { fc_mapListSize :: MapListSize data FilterConfig = FilterConfig
, fc_inclusionSize :: InclusionSize { fc_mapListSize :: MapListSize
, fc_sampleBins :: SampleBins , fc_inclusionSize :: InclusionSize
, fc_clusters :: Clusters , fc_sampleBins :: SampleBins
, fc_defaultValue :: DefaultValue , fc_clusters :: Clusters
} , fc_defaultValue :: DefaultValue
}
filterCooc :: (Show t, Ord t) => FilterConfig -> Map (t, t) Int -> Map (t, t) Int filterCooc :: (Show t, Ord t) => FilterConfig -> Map (t, t) Int -> Map (t, t) Int
filterCooc fc cc = (filterCooc' fc) ts cc filterCooc fc cc = (filterCooc' fc) ts cc
where where
ts = map _scored_terms $ takeSome fc $ coocScored cc ts = map _scored_terms $ takeSome fc $ coocScored cc
filterCooc' :: (Show t, Ord t) => FilterConfig -> [t] -> Map (t, t) Int -> Map (t, t) Int filterCooc' :: (Show t, Ord t) => FilterConfig -> [t] -> Map (t, t) Int -> Map (t, t) Int
filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m =
-- trace ("coocScored " <> show ts) $ -- trace ("coocScored " <> show ts) $
foldl' (\m' k -> M.insert k (maybe dv identity $ M.lookup k m) m') foldl' (\m' k -> M.insert k (maybe dv identity $ M.lookup k m) m')
M.empty selection M.empty (listToCombi identity ts)
where
selection = [(x,y) | x <- ts
, y <- ts
, x > y
]
-- | Map list creation -- | Map list creation
...@@ -70,7 +67,8 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = ...@@ -70,7 +67,8 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m =
takeSome :: Ord t => FilterConfig -> [Scored t] -> [Scored t] takeSome :: Ord t => FilterConfig -> [Scored t] -> [Scored t]
takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters _) _) scores = L.take l takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters _) _) scores = L.take l
$ takeSample n m $ takeSample n m
$ L.take l' $ reverse $ sortWith (Down . _scored_incExc) scores $ L.take l'
$ reverse $ sortWith (Down . _scored_incExc) scores
-- splitKmeans k scores -- splitKmeans k scores
where where
-- TODO: benchmark with accelerate-example kmeans version -- TODO: benchmark with accelerate-example kmeans version
...@@ -90,16 +88,17 @@ takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Cluste ...@@ -90,16 +88,17 @@ takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Cluste
$ sortWith (Down . _scored_speGen) xs $ sortWith (Down . _scored_speGen) xs
data Scored ts = Scored { _scored_terms :: !ts data Scored ts = Scored
, _scored_incExc :: !InclusionExclusion { _scored_terms :: !ts
, _scored_speGen :: !SpecificityGenericity , _scored_incExc :: !InclusionExclusion
} deriving (Show) , _scored_speGen :: !SpecificityGenericity
} deriving (Show)
-- TODO in the textflow we end up needing these indices, it might be better -- TODO in the textflow we end up needing these indices, it might be better
-- to compute them earlier and pass them around. -- to compute them earlier and pass them around.
coocScored :: Ord t => Map (t,t) Int -> [Scored t] coocScored :: Ord t => Map (t,t) Int -> [Scored t]
coocScored m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scores coocScored m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scores
where where
(ti,fi) = createIndices m (ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat ti m (is, ss) = incExcSpeGen $ cooc2mat ti m
scores = DAA.toList $ DAA.run $ DAA.zip (DAA.use is) (DAA.use ss) scores = DAA.toList $ DAA.run $ DAA.zip (DAA.use is) (DAA.use ss)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment