Commit 151b54d0 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Graph Multipartite connected, need to change the node shape in Graph and tests

parent 4a8e3c73
This diff is collapsed.
This diff is collapsed.
......@@ -195,7 +195,36 @@ getCoocByNgrams' f (Diagonal diag) m =
listToCombi identity ks
]
where ks = HM.keys m
where
ks = HM.keys m
-- TODO k could be either k1 or k2 here
getCoocByNgrams'' :: (Hashable k, Ord k, Ord contexts)
=> Diagonal
-> (contextA -> Set contexts, contextB -> Set contexts)
-> (HashMap k contextA, HashMap k contextB)
-> HashMap (k, k) Int
getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
HM.fromList [( (t1,t2)
, maybe 0 Set.size $ Set.intersection
<$> (fmap f1 $ HM.lookup t1 m1)
<*> (fmap f2 $ HM.lookup t2 m2)
)
| (t1,t2) <- if diag
then
[ (x,y) | x <- ks1, y <- ks2, x <= y]
-- TODO if we keep a Data.Map here it might be
-- more efficient to enumerate all the y <= x.
else
[ (x,y) | x <- ks1, y <- ks2, x < y]
-- TODO check optim
-- listToCombi identity ks1
]
where
ks1 = HM.keys m1
ks2 = HM.keys m2
------------------------------------------
......
......@@ -64,6 +64,8 @@ data UpdateNodeParams = UpdateNodeParamsList { methodList :: !Method }
| UpdateNodeParamsGraph { methodGraphMetric :: !GraphMetric
, methodGraphClustering :: !PartitionMethod
, methodGraphEdgesStrength :: !Strength
, methodGraphNodeType1 :: !NgramsType
, methodGraphNodeType2 :: !NgramsType
}
| UpdateNodeParamsTexts { methodTexts :: !Granularity }
......@@ -104,7 +106,7 @@ updateNode :: (HasSettings env, FlowCmdM env err m)
-> UpdateNodeParams
-> (JobLog -> m ())
-> m JobLog
updateNode uId nId (UpdateNodeParamsGraph metric method strength) logStatus = do
updateNode uId nId (UpdateNodeParamsGraph metric method strength nt1 nt2) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
......@@ -112,7 +114,7 @@ updateNode uId nId (UpdateNodeParamsGraph metric method strength) logStatus = do
, _scst_events = Just []
}
printDebug "Computing graph: " method
_ <- recomputeGraph uId nId method (Just metric) (Just strength) True
_ <- recomputeGraph uId nId method (Just metric) (Just strength) nt1 nt2 True
printDebug "Graph computed: " method
pure JobLog { _scst_succeeded = Just 2
......@@ -273,7 +275,7 @@ instance ToSchema UpdateNodeParams
instance Arbitrary UpdateNodeParams where
arbitrary = do
l <- UpdateNodeParamsList <$> arbitrary
g <- UpdateNodeParamsGraph <$> arbitrary <*> arbitrary <*> arbitrary
g <- UpdateNodeParamsGraph <$> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary
t <- UpdateNodeParamsTexts <$> arbitrary
b <- UpdateNodeParamsBoard <$> arbitrary
elements [l,g,t,b]
......
......@@ -17,20 +17,20 @@ module Gargantext.Core.Viz.Graph
import Data.ByteString.Lazy as DBL (readFile, writeFile)
import Data.HashMap.Strict (HashMap, lookup)
import Data.HashSet (HashSet)
import Data.Text (pack)
import GHC.IO (FilePath)
import qualified Data.Aeson as DA
import qualified Data.Text as T
import qualified Text.Read as T
import Gargantext.API.Ngrams.Types (NgramsTerm(..), NgramsRepoElement(..), mSetToList)
import Gargantext.Core.Methods.Similarities (GraphMetric)
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.Aeson as DA
import qualified Data.HashSet as HashSet
import qualified Data.Text as Text
import qualified Text.Read as Text
data TypeNode = Terms | Unknown
deriving (Show, Generic)
......@@ -84,7 +84,34 @@ instance ToSchema LegendField where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lf_")
makeLenses ''LegendField
---------------------------------------------------------------
data Partite = Partite { _partite_nodes :: HashSet NgramsTerm
, _partite_type :: NgramsType
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "_partite_") ''Partite)
instance ToSchema Partite where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_partite_")
makeLenses ''Partite
data MultiPartite = MultiPartite { _multipartite_data1 :: Partite
, _multipartite_data2 :: Partite
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "_multipartite_") ''MultiPartite)
instance ToSchema MultiPartite where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_multipartite_")
makeLenses ''MultiPartite
defaultMultipartite :: MultiPartite
defaultMultipartite = MultiPartite a a
where
a = Partite HashSet.empty NgramsTerms
---------------------------------------------------------------
type Version = Int
data ListForGraph =
ListForGraph { _lfg_listId :: ListId
......@@ -117,6 +144,7 @@ data GraphMetadata =
, _gm_legend :: [LegendField] -- legend of the Graph
, _gm_list :: ListForGraph
, _gm_startForceAtlas :: Bool
-- , _gm_nodesTypes :: Maybe (NgramsType, NgramsType)
-- , _gm_version :: Int
}
deriving (Show, Generic)
......@@ -248,7 +276,7 @@ graphV3ToGraph (GraphV3 links nodes) = Graph { _graph_nodes = map nodeV32node no
linkV32edge n (EdgeV3 eo_s' eo_t' eo_w') =
Edge { edge_source = cs $ show eo_s'
, edge_target = cs $ show eo_t'
, edge_weight = (T.read $ T.unpack eo_w') :: Double
, edge_weight = (Text.read $ Text.unpack eo_w') :: Double
, edge_confluence = 0.5
, edge_id = cs $ show n }
......@@ -258,7 +286,7 @@ graphV3ToGraphWithFiles g1 g2 = do
-- GraphV3 <- IO Fichier
graph <- DBL.readFile g1
let newGraph = case DA.decode graph :: Maybe GraphV3 of
Nothing -> panic (T.pack "no graph")
Nothing -> panic (Text.pack "no graph")
Just new -> new
DBL.writeFile g2 (DA.encode $ graphV3ToGraph newGraph)
......
......@@ -107,7 +107,7 @@ getGraph _uId nId = do
let defaultMetric = Order1
let defaultPartitionMethod = Spinglass
let defaultEdgesStrength = Strong
graph' <- computeGraph cId defaultPartitionMethod (withMetric defaultMetric) defaultEdgesStrength NgramsTerms repo
graph' <- computeGraph cId defaultPartitionMethod (withMetric defaultMetric) defaultEdgesStrength (NgramsTerms, NgramsTerms) repo
mt <- defaultGraphMetadata cId "Title" repo defaultMetric defaultEdgesStrength
let
graph'' = set graph_metadata (Just mt) graph'
......@@ -127,9 +127,11 @@ recomputeGraph :: FlowCmdM env err m
-> PartitionMethod
-> Maybe GraphMetric
-> Maybe Strength
-> NgramsType
-> NgramsType
-> Bool
-> m Graph
recomputeGraph _uId nId method maybeSimilarity maybeStrength force = do
recomputeGraph _uId nId method maybeSimilarity maybeStrength nt1 nt2 force = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let
graph = nodeGraph ^. node_hyperdata . hyperdataGraph
......@@ -157,7 +159,7 @@ recomputeGraph _uId nId method maybeSimilarity maybeStrength force = do
let v = repo ^. unNodeStory . at listId . _Just . a_version
let computeG mt = do
!g <- computeGraph cId method similarity strength NgramsTerms repo
!g <- computeGraph cId method similarity strength (nt1,nt2) repo
let g' = set graph_metadata mt g
_nentries <- updateHyperdata nId (HyperdataGraph (Just g') camera)
pure g'
......@@ -174,31 +176,53 @@ recomputeGraph _uId nId method maybeSimilarity maybeStrength force = do
pure $ trace "[G.V.G.API] Graph exists, recomputing" g
-- TODO remove repo
computeGraph :: FlowCmdM env err m
=> CorpusId
-> PartitionMethod
-> Similarity
-> Strength
-> NgramsType
-> (NgramsType, NgramsType)
-> NodeListStory
-> m Graph
computeGraph corpusId method similarity strength nt repo = do
computeGraph corpusId method similarity strength (nt1,nt2) repo = do
-- Getting the Node parameters
lId <- defaultList corpusId
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot [MapTerm]
$ mapTermListRoot [lId] nt repo
-- Removing the hapax (ngrams with 1 cooc)
!myCooc <- HashMap.filter (>1)
<$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getContextsByNgramsOnlyUser corpusId (lIds <> [lId]) nt (HashMap.keys ngs)
graph <- liftBase $ cooc2graphWith method similarity 0 strength myCooc
-- Getting the Ngrams to compute with and grouping it according to the lists
let
groupedContextsByNgrams nt corpusId' (lists_master, lists_user) = do
let
ngs = filterListWithRoot [MapTerm] $ mapTermListRoot lists_user nt repo
groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser corpusId'
(lists_user <> lists_master) nt (HashMap.keys ngs)
-- Optim if nt1 == nt2 : do not compute twice
(m1,m2) <- do
m1 <- groupedContextsByNgrams nt1 corpusId (lIds, [lId])
if nt1 == nt2
then
pure (m1,m1)
else do
m2 <- groupedContextsByNgrams nt2 corpusId (lIds, [lId])
pure (m1,m2)
-- Removing the hapax (ngrams with 1 cooc)
let !myCooc = HashMap.filter (>1)
$ getCoocByNgrams'' (Diagonal True) (identity, identity) (m1,m2)
-- TODO MultiPartite Here
graph <- liftBase
$ cooc2graphWith method (MultiPartite (Partite (HashMap.keysSet m1) nt1)
(Partite (HashMap.keysSet m2) nt2)
)
similarity 0 strength myCooc
pure graph
defaultGraphMetadata :: HasNodeError err
=> CorpusId
-> Text
......@@ -240,6 +264,7 @@ graphAsync u n =
-- -> NodeId
-- -> (JobLog -> GargNoServer ())
-- -> GargNoServer JobLog
-- TODO get Graph Metadata to recompute
graphRecompute :: FlowCmdM env err m
=> UserId
-> NodeId
......@@ -251,7 +276,7 @@ graphRecompute u n logStatus = do
, _scst_remaining = Just 1
, _scst_events = Just []
}
_g <- recomputeGraph u n Spinglass Nothing Nothing False
_g <- recomputeGraph u n Spinglass Nothing Nothing NgramsTerms NgramsTerms False
pure JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 0
......@@ -306,7 +331,7 @@ recomputeVersions :: FlowCmdM env err m
=> UserId
-> NodeId
-> m Graph
recomputeVersions uId nId = recomputeGraph uId nId Spinglass Nothing Nothing False
recomputeVersions uId nId = recomputeGraph uId nId Spinglass Nothing Nothing NgramsTerms NgramsTerms False
------------------------------------------------------------
graphClone :: UserId
......
......@@ -30,6 +30,7 @@ import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
import Gargantext.Prelude
import Graph.Types (ClusterNode)
......@@ -40,6 +41,7 @@ import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.HashSet as HashSet
import qualified Data.Text as Text
import qualified Data.Vector.Storable as Vec
import qualified Graph.BAC.ProxemyOptim as BAC
......@@ -87,6 +89,7 @@ cooc2graph' distance threshold myCooc
-- coocurrences graph computation
cooc2graphWith :: PartitionMethod
-> MultiPartite
-> Similarity
-> Threshold
-> Strength
......@@ -100,12 +103,13 @@ cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
cooc2graphWith' :: ToComId a
=> Partitions a
-> MultiPartite
-> Similarity
-> Threshold
-> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith' doPartitions similarity threshold strength myCooc = do
cooc2graphWith' doPartitions multi similarity threshold strength myCooc = do
let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
distanceMap `seq` diag `seq` ti `seq` return ()
......@@ -130,7 +134,7 @@ cooc2graphWith' doPartitions similarity threshold strength myCooc = do
n' = Set.size $ Set.fromList $ as <> bs
!bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
!confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
pure $ data2graph ti diag bridgeness' confluence' partitions
pure $ data2graph multi ti diag bridgeness' confluence' partitions
type Reverse = Bool
......@@ -187,14 +191,26 @@ doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti
type Occurrences = Int
data2graph :: ToComId a
=> Map NgramsTerm Int
multiPartiteWith :: MultiPartite -> NgramsTerm -> TypeNode
multiPartiteWith (MultiPartite (Partite s1 t1) (Partite _s2 t2)) t =
if HashSet.member t s1
then typeNode t1
else typeNode t2
typeNode :: NgramsType -> TypeNode
typeNode NgramsTerms = Terms
typeNode _ = Unknown
data2graph :: ToComId a
=> MultiPartite
-> Map NgramsTerm Int
-> Map (Int, Int) Occurrences
-> Map (Int, Int) Double
-> Map (Int, Int) Double
-> [a]
-> Graph
data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
data2graph multi labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
, _graph_edges = edges
, _graph_metadata = Nothing
}
......@@ -202,7 +218,7 @@ data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = no
nodes = map (setCoord ForceAtlas labels bridge)
[ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
, node_type = Terms -- or Unknown
, node_type = multiPartiteWith multi l
, node_id = cs (show n)
, node_label = unNgramsTerm l
, node_x_coord = 0
......
......@@ -23,20 +23,21 @@ module Gargantext.Database.Schema.Ngrams
import Codec.Serialise (Serialise())
import Control.Lens (over)
import Control.Monad (mzero)
import Data.Maybe (fromMaybe)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Aeson
import Data.Aeson.Types (toJSONKeyText)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Map (fromList, lookup)
import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
import Data.Maybe (fromMaybe)
import Data.Text (Text, splitOn, pack, strip)
import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
import Gargantext.Core (HasDBid(..))
import Gargantext.Core.Types (TODO(..), Typed(..))
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Types
import Gargantext.Prelude
import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
import Gargantext.Core (HasDBid(..))
import Gargantext.Database.Types
import Gargantext.Database.Schema.Prelude
import Test.QuickCheck (elements)
import Text.Read (read)
import qualified Data.ByteString.Char8 as B
import qualified Data.HashMap.Strict as HashMap
......@@ -84,10 +85,19 @@ ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTable
-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance Serialise NgramsType
instance FromJSON NgramsType
where
parseJSON (String "Authors") = pure Authors
parseJSON (String "Institutes") = pure Institutes
parseJSON (String "Sources") = pure Sources
parseJSON (String "Terms") = pure NgramsTerms
parseJSON _ = mzero
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
......@@ -97,6 +107,9 @@ instance ToHttpApiData NgramsType where
toUrlPiece = pack . show
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance Arbitrary NgramsType where
arbitrary = elements [ minBound .. maxBound ]
-- map NgramsType to its assigned id
instance FromField NgramsType where
fromField fld mdata =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment