Commit 4a8e3c73 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[REBASE]

parent a6aaf952
......@@ -70,7 +70,7 @@ library:
- Gargantext.API.Prelude
- Gargantext.Core
- Gargantext.Core.NodeStory
- Gargantext.Core.Methods.Distances
- Gargantext.Core.Methods.Similarities
- Gargantext.Core.Types
- Gargantext.Core.Types.Individu
- Gargantext.Core.Types.Main
......
......@@ -187,11 +187,12 @@ getCoocByNgrams' f (Diagonal diag) m =
<$> (fmap f $ HM.lookup t1 m)
<*> (fmap f $ HM.lookup t2 m)
)
| (t1,t2) <- if diag then
[ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
-- more efficient to enumerate all the y <= x.
else
listToCombi identity ks
| (t1,t2) <- if diag
then [ (x,y) | x <- ks, y <- ks, x <= y]
-- TODO if we keep a Data.Map here it might be
-- more efficient to enumerate all the y <= x.
else
listToCombi identity ks
]
where ks = HM.keys m
......
......@@ -27,7 +27,7 @@ import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Ngrams.List (reIndexWith)
--import Gargantext.API.Ngrams.Types (TabType(..))
import Gargantext.API.Prelude (GargM, GargError, simuLogs)
import Gargantext.Core.Methods.Distances (GraphMetric(..))
import Gargantext.Core.Methods.Similarities (GraphMetric(..))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph (Strength)
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
......
......@@ -62,7 +62,7 @@ import Data.Set (fromList, toList, isSubsetOf)
import Data.Graph.Inductive hiding (Graph, neighbors, subgraph, (&))
import Gargantext.Core.Viz.Graph.FGL (Graph_Undirected, degree, neighbors, mkGraphUfromEdges)
import Gargantext.Core.Viz.Graph.Tools (cooc2graph',cooc2graph'', Threshold)
import Gargantext.Core.Methods.Distances (Distance)
import Gargantext.Core.Methods.Similarities (Similarity)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex)
import Gargantext.Core.Viz.Phylo
-- import Debug.Trace (trace)
......@@ -70,9 +70,9 @@ type Graph = Graph_Undirected
type Neighbor = Node
-- | getMaxCliques
-- TODO chose distance order
-- TODO chose similarity order
getMaxCliques :: Ord a => MaxCliqueFilter -> Distance -> Threshold -> Map (a, a) Int -> [[a]]
getMaxCliques :: Ord a => MaxCliqueFilter -> Similarity -> Threshold -> Map (a, a) Int -> [[a]]
getMaxCliques f d t m = map fromIndices $ getMaxCliques' t m'
where
m' = toIndex to m
......
{-|
Module : Gargantext.Graph.Distances
Description : Distance management tools
Module : Gargantext.Graph.Similarities
Description : Similarity management tools
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
......@@ -11,32 +11,32 @@ Portability : POSIX
{-# LANGUAGE Strict #-}
module Gargantext.Core.Methods.Distances
module Gargantext.Core.Methods.Similarities
where
import Data.Aeson
import Data.Array.Accelerate (Matrix)
import Data.Swagger
import GHC.Generics (Generic)
import Gargantext.Core.Methods.Distances.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Methods.Distances.Accelerate.Distributional (logDistributional)
import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Methods.Similarities.Accelerate.Distributional (logDistributional)
import Gargantext.Prelude (Ord, Eq, Int, Double, Show)
import Prelude (Enum, Bounded, minBound, maxBound)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
------------------------------------------------------------------------
data Distance = Conditional | Distributional
data Similarity = Conditional | Distributional
deriving (Show, Eq)
measure :: Distance -> Matrix Int -> Matrix Double
measure :: Similarity -> Matrix Int -> Matrix Double
measure Conditional x = measureConditional x
measure Distributional x = y
where
y = logDistributional x
------------------------------------------------------------------------
withMetric :: GraphMetric -> Distance
withMetric :: GraphMetric -> Similarity
withMetric Order1 = Conditional
withMetric Order2 = Distributional
......
{-|
Module : Gargantext.Core.Methods.Distances.Accelerate.Conditional
Module : Gargantext.Core.Methods.Similarities.Accelerate.Conditional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -20,7 +20,7 @@ See Gargantext.Core.Methods.Graph.Accelerate)
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Methods.Distances.Accelerate.Conditional
module Gargantext.Core.Methods.Similarities.Accelerate.Conditional
where
-- import qualified Data.Foldable as P (foldl1)
......@@ -28,7 +28,7 @@ module Gargantext.Core.Methods.Distances.Accelerate.Conditional
import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
import qualified Gargantext.Prelude as P
......
{-|
Module : Gargantext.Core.Methods.Distances.Accelerate.Distributional
Module : Gargantext.Core.Methods.Similarities.Accelerate.Distributional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -8,7 +8,7 @@ Stability : experimental
Portability : POSIX
* Distributional Distance metric
* Distributional Similarity metric
__Definition :__ Distributional metric is a relative metric which depends on the
selected list, it represents structural equivalence of mutual information.
......@@ -41,7 +41,7 @@ where $n_{ij}$ is the cooccurrence between term $i$ and term $j$
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE GADTs #-}
module Gargantext.Core.Methods.Distances.Accelerate.Distributional
module Gargantext.Core.Methods.Similarities.Accelerate.Distributional
where
-- import qualified Data.Foldable as P (foldl1)
......
{-|
Module : Gargantext.Core.Methods.Distances.Accelerate.SpeGen
Module : Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -20,7 +20,7 @@ See Gargantext.Core.Methods.Graph.Accelerate)
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Methods.Distances.Accelerate.SpeGen
module Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
where
-- import qualified Data.Foldable as P (foldl1)
......
{-|
Module : Gargantext.Core.Methods.Distances
Module : Gargantext.Core.Methods.Similarities
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -12,7 +12,7 @@ Motivation and definition of the @Conditional@ distance.
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Strict #-}
module Gargantext.Core.Methods.Distances.Conditional
module Gargantext.Core.Methods.Similarities.Conditional
where
import Control.DeepSeq (NFData)
......
{-|
Module : Gargantext.Core.Methods.Distances.Distributional
Module : Gargantext.Core.Methods.Similarities.Distributional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -14,7 +14,7 @@ Motivation and definition of the @Distributional@ distance.
{-# LANGUAGE Strict #-}
module Gargantext.Core.Methods.Distances.Distributional
module Gargantext.Core.Methods.Similarities.Distributional
where
import Data.Matrix hiding (identity)
......
......@@ -23,7 +23,7 @@ import Data.Map (Map)
import Data.Monoid (Monoid, mempty)
import Data.HashMap.Strict (HashMap)
import Data.Semigroup (Semigroup)
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import Gargantext.Core.Methods.Similarities.Accelerate.SpeGen
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude
......
......@@ -39,7 +39,7 @@ import Gargantext.Prelude
-- starting as an API rexporting main functions of the great lib
-- text-metrics of Mark Karpov
-- | Levenshtein Distance
-- | Levenshtein Similarity
-- In information theory, Linguistics and computer science,
-- the Levenshtein distance is a string metric for measuring
-- the difference between two sequences.
......@@ -86,7 +86,7 @@ overlap = DTM.overlap
jaccard :: Text -> Text -> Ratio Int
jaccard = DTM.jaccard
-- | Hamming Distance
-- | Hamming Similarity
-- In information theory, the Hamming distance between two strings of
-- equal length is the number of positions at which the corresponding
-- symbols are different. In other words, it measures the minimum number of
......
......@@ -25,7 +25,7 @@ import qualified Data.Text as T
import qualified Text.Read as T
import Gargantext.API.Ngrams.Types (NgramsTerm(..), NgramsRepoElement(..), mSetToList)
import Gargantext.Core.Methods.Distances (GraphMetric)
import Gargantext.Core.Methods.Similarities (GraphMetric)
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Database.Admin.Types.Node (NodeId)
......
......@@ -27,7 +27,7 @@ import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Prelude
import Gargantext.Core.Methods.Distances (Distance(..), GraphMetric(..), withMetric)
import Gargantext.Core.Methods.Similarities (Similarity(..), GraphMetric(..), withMetric)
import Gargantext.Core.NodeStory
import Gargantext.Core.Types.Main
import Gargantext.Core.Viz.Graph
......@@ -101,7 +101,7 @@ getGraph _uId nId = do
listId <- defaultList cId
repo <- getRepo [listId]
-- TODO Distance in Graph params
-- TODO Similarity in Graph params
case graph of
Nothing -> do
let defaultMetric = Order1
......@@ -129,16 +129,16 @@ recomputeGraph :: FlowCmdM env err m
-> Maybe Strength
-> Bool
-> m Graph
recomputeGraph _uId nId method maybeDistance maybeStrength force = do
recomputeGraph _uId nId method maybeSimilarity maybeStrength force = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let
graph = nodeGraph ^. node_hyperdata . hyperdataGraph
camera = nodeGraph ^. node_hyperdata . hyperdataCamera
graphMetadata = graph ^? _Just . graph_metadata . _Just
listVersion = graph ^? _Just . graph_metadata . _Just . gm_list . lfg_version
graphMetric = case maybeDistance of
graphMetric = case maybeSimilarity of
Nothing -> graph ^? _Just . graph_metadata . _Just . gm_metric
_ -> maybeDistance
_ -> maybeSimilarity
similarity = case graphMetric of
Nothing -> withMetric Order1
Just m -> withMetric m
......@@ -164,7 +164,7 @@ recomputeGraph _uId nId method maybeDistance maybeStrength force = do
case graph of
Nothing -> do
mt <- defaultGraphMetadata cId "Title" repo (fromMaybe Order1 maybeDistance) strength
mt <- defaultGraphMetadata cId "Title" repo (fromMaybe Order1 maybeSimilarity) strength
g <- computeG $ Just mt
pure $ trace "[G.V.G.API.recomputeGraph] Graph empty, computed" g
Just graph' -> if (listVersion == Just v) && (not force)
......@@ -177,27 +177,24 @@ recomputeGraph _uId nId method maybeDistance maybeStrength force = do
computeGraph :: FlowCmdM env err m
=> CorpusId
-> PartitionMethod
-> Distance
-> Similarity
-> Strength
-> NgramsType
-> NodeListStory
-> m Graph
computeGraph cId method d strength nt repo = do
lId <- defaultList cId
computeGraph corpusId method similarity strength nt repo = do
lId <- defaultList corpusId
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot [MapTerm]
$ mapTermListRoot [lId] nt repo
!myCooc <- HashMap.filter (>1) -- Removing the hapax (ngrams with 1 cooc)
-- Removing the hapax (ngrams with 1 cooc)
!myCooc <- HashMap.filter (>1)
<$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
graph <- liftBase $ cooc2graphWith method d 0 strength myCooc
<$> getContextsByNgramsOnlyUser corpusId (lIds <> [lId]) nt (HashMap.keys ngs)
--listNgrams <- getListNgrams [lId] nt
--let graph' = mergeGraphNgrams graph (Just listNgrams)
-- saveAsFileDebug "/tmp/graphWithNodes" graph'
graph <- liftBase $ cooc2graphWith method similarity 0 strength myCooc
pure graph
......
{-|
Module : Gargantext.Graph.Distances.Utils
Module : Gargantext.Graph.Similarities.Utils
Description : Tools to compute distances from Cooccurrences
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......
......@@ -22,8 +22,8 @@ import Data.Swagger hiding (items)
import GHC.Float (sin, cos)
import GHC.Generics (Generic)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Distances (Distance(..), measure)
import Gargantext.Core.Methods.Distances.Conditional (conditional)
import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
import Gargantext.Core.Methods.Similarities.Conditional (conditional)
import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
......@@ -65,7 +65,7 @@ defaultClustering x = spinglass 1 x
type Threshold = Double
cooc2graph' :: Ord t => Distance
cooc2graph' :: Ord t => Similarity
-> Double
-> Map (t, t) Int
-> Map (Index, Index) Double
......@@ -87,7 +87,7 @@ cooc2graph' distance threshold myCooc
-- coocurrences graph computation
cooc2graphWith :: PartitionMethod
-> Distance
-> Similarity
-> Threshold
-> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int
......@@ -100,13 +100,13 @@ cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
cooc2graphWith' :: ToComId a
=> Partitions a
-> Distance
-> Similarity
-> Threshold
-> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith' doPartitions distance threshold strength myCooc = do
let (distanceMap, diag, ti) = doDistanceMap distance threshold strength myCooc
cooc2graphWith' doPartitions similarity threshold strength myCooc = do
let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
distanceMap `seq` diag `seq` ti `seq` return ()
--{- -- Debug
......@@ -134,7 +134,7 @@ cooc2graphWith' doPartitions distance threshold strength myCooc = do
type Reverse = Bool
doDistanceMap :: Distance
doSimilarityMap :: Similarity
-> Threshold
-> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int
......@@ -142,7 +142,7 @@ doDistanceMap :: Distance
, Map (Index, Index) Int
, Map NgramsTerm Index
)
doDistanceMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
doSimilarityMap Distributional threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
where
-- TODO remove below
(diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
......@@ -168,7 +168,7 @@ doDistanceMap Distributional threshold strength myCooc = (distanceMap, toIndex t
$ (\m -> m `seq` Map.filter (> threshold) m)
$ similarities `seq` mat2map similarities
doDistanceMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti)
where
myCooc' = Map.fromList $ HashMap.toList myCooc
(ti, _it) = createIndices myCooc'
......@@ -302,7 +302,7 @@ layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
-----------------------------------------------------------------------------
-- MISC Tools
cooc2graph'' :: Ord t => Distance
cooc2graph'' :: Ord t => Similarity
-> Double
-> Map (t, t) Int
-> Map (Index, Index) Double
......
......@@ -21,7 +21,7 @@ import Data.Vector (Vector)
import Debug.Trace (trace)
import Prelude (floor)
import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Methods.Similarities (Similarity(Conditional))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
......
......@@ -210,8 +210,8 @@ synchronicClustering phylo =
in toNextScale phylo $ levelUpAncestors $ concat newBranches'
-- synchronicDistance :: Phylo -> Level -> String
-- synchronicDistance phylo lvl =
-- synchronicSimilarity :: Phylo -> Level -> String
-- synchronicSimilarity phylo lvl =
-- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment