Verified Commit d1151583 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into issue-436-dockerfile-and-corenlp

parents 69ad9f99 7f759ab4
## Version 0.0.7.4.5
* [BACK][FIX][Error when uploading a specific TSV file (#433)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/433)
## Version 0.0.7.4.4
* [BACK][FIX][Order 1 advanced distance (#445)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/445)
* [FRONT][FIX][Frontend for bridgeness method choice (#730)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/730)
* [FRONT][FIX][Unify CSS files to a single syntax format (#712)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/712)
* [FRONT][FIX][Upgrade sigma.js (#705)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/705)
* [FRONT][FIX][Subcorpus frontend (#718)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/718)
* [FRONT][FIX][[Corpus upload] Fix an error on form select "NoList" option (#729)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/729)
* [FRONT][FIX][Basic feature flag hook (#721)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/721)
## Version 0.0.7.4.3 ## Version 0.0.7.4.3
* [BACK][UPGRADE][Remove obsolete GHC option (#388)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/388) * [BACK][UPGRADE][Remove obsolete GHC option (#388)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/388)
......
...@@ -75,6 +75,7 @@ import_p = fmap CCMD_import $ ImportArgs ...@@ -75,6 +75,7 @@ import_p = fmap CCMD_import $ ImportArgs
<*> ( option str ( long "user") ) <*> ( option str ( long "user") )
<*> ( option str ( long "name") ) <*> ( option str ( long "name") )
<*> settings_p <*> settings_p
-- <*> (fmap Limit ( option auto ( long "limit" <> metavar "INT" <> help "The limit for the query") ))
<*> ( option str ( long "corpus-path" <> help "Path to corpus file") ) <*> ( option str ( long "corpus-path" <> help "Path to corpus file") )
function_p :: String -> Either String ImportFunction function_p :: String -> Either String ImportFunction
......
...@@ -5,7 +5,7 @@ cabal-version: 3.4 ...@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.7.4.3 version: 0.0.7.4.5
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -32,6 +32,7 @@ data-files: ...@@ -32,6 +32,7 @@ data-files:
ekg-assets/bootstrap-1.4.0.min.css ekg-assets/bootstrap-1.4.0.min.css
ekg-assets/chart_line_add.png ekg-assets/chart_line_add.png
ekg-assets/cross.png ekg-assets/cross.png
test-data/ngrams/433-utf-encoding-issue.tsv
test-data/ngrams/GarganText_DocsList-nodeId-177.json test-data/ngrams/GarganText_DocsList-nodeId-177.json
test-data/ngrams/GarganText_NgramsTerms-nodeId-177.json test-data/ngrams/GarganText_NgramsTerms-nodeId-177.json
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
......
...@@ -178,27 +178,28 @@ ngramsListFromTSVData tsvData = case decodeTsv of ...@@ -178,27 +178,28 @@ ngramsListFromTSVData tsvData = case decodeTsv of
binaryData = BSL.fromStrict $ P.encodeUtf8 tsvData binaryData = BSL.fromStrict $ P.encodeUtf8 tsvData
decodeTsv :: Either Prelude.String (Vector NgramsTableMap) decodeTsv :: Either Prelude.String (Vector NgramsTableMap)
decodeTsv = Tsv.decodeWithP tsvToNgramsTableMap decodeTsv = Vec.catMaybes <$>
Tsv.decodeWithP tsvToNgramsTableMap
(Tsv.defaultDecodeOptions { Tsv.decDelimiter = fromIntegral (P.ord '\t') }) (Tsv.defaultDecodeOptions { Tsv.decDelimiter = fromIntegral (P.ord '\t') })
Tsv.HasHeader Tsv.HasHeader
binaryData binaryData
-- | Converts a plain TSV 'Record' into an NgramsTableMap -- | Converts a plain TSV 'Record' into an NgramsTableMap
tsvToNgramsTableMap :: Tsv.Record -> Tsv.Parser NgramsTableMap tsvToNgramsTableMap :: Tsv.Record -> Tsv.Parser (Maybe NgramsTableMap)
tsvToNgramsTableMap record = case Vec.toList record of tsvToNgramsTableMap record = case Vec.toList record of
(map P.decodeUtf8 -> [status, label, forms]) (map P.decodeUtf8 -> [status, label, forms])
-> pure $ conv status label forms -> pure $ Just $ conv status label forms
_ -> Prelude.fail "tsvToNgramsTableMap failed" -- WARNING: This silently ignores errors (#433)
_ -> pure Nothing
where where
conv :: Text -> Text -> Text -> NgramsTableMap conv :: Text -> Text -> Text -> NgramsTableMap
conv status label forms = Map.singleton (NgramsTerm label) conv status label forms = Map.singleton (NgramsTerm label)
$ NgramsRepoElement { _nre_size = 1 $ NgramsRepoElement { _nre_size = 1
, _nre_list = case status == "map" of , _nre_list = case status of
True -> MapTerm "map" -> MapTerm
False -> case status == "main" of "main" -> CandidateTerm
True -> CandidateTerm _ -> StopTerm
False -> StopTerm
, _nre_root = Nothing , _nre_root = Nothing
, _nre_parent = Nothing , _nre_parent = Nothing
, _nre_children = MSet , _nre_children = MSet
......
...@@ -63,12 +63,12 @@ updateNode :: (HasNodeStory env err m ...@@ -63,12 +63,12 @@ updateNode :: (HasNodeStory env err m
-> JobHandle m -> JobHandle m
-> m () -> m ()
updateNode nId (UpdateNodeParamsGraph updateNode nId (UpdateNodeParamsGraph
(UpdateNodeConfigGraph metric partitionMethod bridgeMethod strength nt1 nt2)) jobHandle = do (UpdateNodeConfigGraph metric bridgeMethod strength nt1 nt2)) jobHandle = do
markStarted 2 jobHandle markStarted 2 jobHandle
markProgress 1 jobHandle markProgress 1 jobHandle
-- printDebug "Computing graph: " method -- printDebug "Computing graph: " method
_ <- recomputeGraph nId partitionMethod bridgeMethod (Just metric) (Just strength) nt1 nt2 True _ <- recomputeGraph nId bridgeMethod (Just metric) (Just strength) nt1 nt2 True
-- printDebug "Graph computed: " method -- printDebug "Graph computed: " method
markComplete jobHandle markComplete jobHandle
......
...@@ -5,8 +5,7 @@ import Data.Aeson ...@@ -5,8 +5,7 @@ import Data.Aeson
import Data.Swagger ( ToSchema ) import Data.Swagger ( ToSchema )
import Gargantext.Core.Methods.Similarities (GraphMetric(..)) import Gargantext.Core.Methods.Similarities (GraphMetric(..))
import Gargantext.Core.Text.Ngrams (NgramsType) import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..), BridgenessMethod(..)) import Gargantext.Core.Viz.Graph.Types (BridgenessMethod, Strength)
import Gargantext.Core.Viz.Graph.Types (Strength)
import Gargantext.Core.Viz.Phylo (PhyloSubConfigAPI(..)) import Gargantext.Core.Viz.Phylo (PhyloSubConfigAPI(..))
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType ) import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType )
import Gargantext.Prelude import Gargantext.Prelude
...@@ -46,7 +45,6 @@ data Charts = Sources | Authors | Institutes | Ngrams | All ...@@ -46,7 +45,6 @@ data Charts = Sources | Authors | Institutes | Ngrams | All
------------------------------------------------------------------------ ------------------------------------------------------------------------
data UpdateNodeConfigGraph = UpdateNodeConfigGraph { methodGraphMetric :: !GraphMetric data UpdateNodeConfigGraph = UpdateNodeConfigGraph { methodGraphMetric :: !GraphMetric
, methodGraphClustering :: !PartitionMethod
, methodGraphBridgeness :: !BridgenessMethod , methodGraphBridgeness :: !BridgenessMethod
, methodGraphEdgesStrength :: !Strength , methodGraphEdgesStrength :: !Strength
, methodGraphNodeType1 :: !NgramsType , methodGraphNodeType1 :: !NgramsType
......
...@@ -71,10 +71,9 @@ getGraph nId = do ...@@ -71,10 +71,9 @@ getGraph nId = do
case graph of case graph of
Nothing -> do Nothing -> do
let defaultMetric = Order1 let defaultMetric = Order1
let defaultPartitionMethod = Spinglass
let defaultEdgesStrength = Strong let defaultEdgesStrength = Strong
let defaultBridgenessMethod = BridgenessMethod_Basic let defaultBridgenessMethod = BridgenessBasic
graph' <- computeGraph cId defaultPartitionMethod defaultBridgenessMethod (withMetric defaultMetric) defaultEdgesStrength (NgramsTerms, NgramsTerms) repo graph' <- computeGraph cId defaultBridgenessMethod (withMetric defaultMetric) defaultEdgesStrength (NgramsTerms, NgramsTerms) repo
mt <- defaultGraphMetadata cId listId "Title" repo defaultMetric defaultEdgesStrength mt <- defaultGraphMetadata cId listId "Title" repo defaultMetric defaultEdgesStrength
let mt' = set gm_legend (generateLegend graph') mt let mt' = set gm_legend (generateLegend graph') mt
let let
...@@ -91,7 +90,6 @@ getGraph nId = do ...@@ -91,7 +90,6 @@ getGraph nId = do
--recomputeGraph :: UserId -> NodeId -> Maybe GraphMetric -> GargNoServer Graph --recomputeGraph :: UserId -> NodeId -> Maybe GraphMetric -> GargNoServer Graph
recomputeGraph :: HasNodeStory env err m recomputeGraph :: HasNodeStory env err m
=> NodeId => NodeId
-> PartitionMethod
-> BridgenessMethod -> BridgenessMethod
-> Maybe GraphMetric -> Maybe GraphMetric
-> Maybe Strength -> Maybe Strength
...@@ -99,7 +97,7 @@ recomputeGraph :: HasNodeStory env err m ...@@ -99,7 +97,7 @@ recomputeGraph :: HasNodeStory env err m
-> NgramsType -> NgramsType
-> Bool -> Bool
-> m Graph -> m Graph
recomputeGraph nId partitionMethod bridgeMethod maybeSimilarity maybeStrength nt1 nt2 force' = do recomputeGraph nId bridgeMethod maybeSimilarity maybeStrength nt1 nt2 force' = do
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph) nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let let
graph = nodeGraph ^. node_hyperdata . hyperdataGraph graph = nodeGraph ^. node_hyperdata . hyperdataGraph
...@@ -127,7 +125,7 @@ recomputeGraph nId partitionMethod bridgeMethod maybeSimilarity maybeStrength nt ...@@ -127,7 +125,7 @@ recomputeGraph nId partitionMethod bridgeMethod maybeSimilarity maybeStrength nt
let v = repo ^. unNodeStory . at listId . _Just . a_version let v = repo ^. unNodeStory . at listId . _Just . a_version
let computeG mt = do let computeG mt = do
!g <- computeGraph cId partitionMethod bridgeMethod similarity strength (nt1,nt2) repo !g <- computeGraph cId bridgeMethod similarity strength (nt1,nt2) repo
let mt' = set gm_legend (generateLegend g) mt let mt' = set gm_legend (generateLegend g) mt
let g' = set graph_metadata (Just mt') g let g' = set graph_metadata (Just mt') g
_nentries <- updateHyperdata nId (HyperdataGraph (Just g') camera) _nentries <- updateHyperdata nId (HyperdataGraph (Just g') camera)
...@@ -154,14 +152,13 @@ recomputeGraph nId partitionMethod bridgeMethod maybeSimilarity maybeStrength nt ...@@ -154,14 +152,13 @@ recomputeGraph nId partitionMethod bridgeMethod maybeSimilarity maybeStrength nt
-- TODO remove repo -- TODO remove repo
computeGraph :: HasNodeError err computeGraph :: HasNodeError err
=> CorpusId => CorpusId
-> PartitionMethod
-> BridgenessMethod -> BridgenessMethod
-> Similarity -> Similarity
-> Strength -> Strength
-> (NgramsType, NgramsType) -> (NgramsType, NgramsType)
-> NodeListStory -> NodeListStory
-> DBCmd err Graph -> DBCmd err Graph
computeGraph corpusId partitionMethod bridgeMethod similarity strength (nt1,nt2) repo = do computeGraph corpusId bridgeMethod similarity strength (nt1,nt2) repo = do
-- Getting the Node parameters -- Getting the Node parameters
lId <- defaultList corpusId lId <- defaultList corpusId
lIds <- selectNodesWithUsername NodeList userMaster lIds <- selectNodesWithUsername NodeList userMaster
...@@ -190,7 +187,7 @@ computeGraph corpusId partitionMethod bridgeMethod similarity strength (nt1,nt2) ...@@ -190,7 +187,7 @@ computeGraph corpusId partitionMethod bridgeMethod similarity strength (nt1,nt2)
-- TODO MultiPartite Here -- TODO MultiPartite Here
liftBase liftBase
$ cooc2graphWith partitionMethod bridgeMethod (MultiPartite (Partite (HashMap.keysSet m1) nt1) $ cooc2graphWith bridgeMethod (MultiPartite (Partite (HashMap.keysSet m1) nt1)
(Partite (HashMap.keysSet m2) nt2) (Partite (HashMap.keysSet m2) nt2)
) )
similarity 0 strength myCooc similarity 0 strength myCooc
...@@ -239,7 +236,7 @@ graphRecompute :: (HasNodeStory env err m, MonadJobStatus m) ...@@ -239,7 +236,7 @@ graphRecompute :: (HasNodeStory env err m, MonadJobStatus m)
-> m () -> m ()
graphRecompute n jobHandle = do graphRecompute n jobHandle = do
markStarted 1 jobHandle markStarted 1 jobHandle
_g <- recomputeGraph n Spinglass BridgenessMethod_Basic Nothing Nothing NgramsTerms NgramsTerms False _g <- recomputeGraph n BridgenessBasic Nothing Nothing NgramsTerms NgramsTerms False
markComplete jobHandle markComplete jobHandle
graphVersions :: (HasNodeStory env err m) graphVersions :: (HasNodeStory env err m)
...@@ -274,7 +271,7 @@ graphVersions u nId = do ...@@ -274,7 +271,7 @@ graphVersions u nId = do
recomputeVersions :: HasNodeStory env err m recomputeVersions :: HasNodeStory env err m
=> NodeId => NodeId
-> m Graph -> m Graph
recomputeVersions nId = recomputeGraph nId Spinglass BridgenessMethod_Basic Nothing Nothing NgramsTerms NgramsTerms False recomputeVersions nId = recomputeGraph nId BridgenessBasic Nothing Nothing NgramsTerms NgramsTerms False
------------------------------------------------------------ ------------------------------------------------------------
graphClone :: (HasNodeError err) graphClone :: (HasNodeError err)
......
...@@ -20,8 +20,6 @@ TODO use Map LouvainNodeId (Map LouvainNodeId) ...@@ -20,8 +20,6 @@ TODO use Map LouvainNodeId (Map LouvainNodeId)
{-# OPTIONS_GHC -fno-warn-deprecations #-} {-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE BangPatterns #-}
module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness) module Gargantext.Core.Viz.Graph.Bridgeness -- (bridgeness)
where where
...@@ -31,8 +29,8 @@ import Data.Map.Strict (fromListWith, lookup, toList, mapWithKey, elems) ...@@ -31,8 +29,8 @@ import Data.Map.Strict (fromListWith, lookup, toList, mapWithKey, elems)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Tuple.Extra qualified as Tuple import Data.Tuple.Extra qualified as Tuple
import Gargantext.Core.Methods.Similarities (Similarity(..)) import Gargantext.Core.Viz.Graph.Types (BridgenessMethod(..))
import Gargantext.Prelude hiding (toList) import Gargantext.Prelude hiding (toList, filter)
import Graph.Types (ClusterNode(..)) import Graph.Types (ClusterNode(..))
---------------------------------------------------------------------- ----------------------------------------------------------------------
...@@ -66,51 +64,21 @@ clusterNodes2sets = Dico.elems ...@@ -66,51 +64,21 @@ clusterNodes2sets = Dico.elems
. Dico.fromListWith (<>) . Dico.fromListWith (<>)
. (map ((Tuple.second Set.singleton) . swap . nodeId2comId)) . (map ((Tuple.second Set.singleton) . swap . nodeId2comId))
---------------------------------------------------------------------- -- | Filter the edges of a graph based on the computed clustering
data Bridgeness = Bridgeness_Basic { bridgeness_partitions :: [ClusterNode] bridgeness :: [ClusterNode] -- ^ Clustering
, bridgeness_filter :: Double -> BridgenessMethod -- ^ basic/advanced flag
} -> Double -- ^ Bridgeness threshold
| Bridgeness_Advanced { bridgeness_similarity :: Similarity -> Map (NodeId, NodeId) Double -- ^ Input graph
, bridgness_confluence :: Confluence -> Map (NodeId, NodeId) Double -- ^ Output graph
} bridgeness partitions method filterThreshold graph =
| Bridgeness_Recursive { br_partitions :: [[Set NodeId]] Map.fromList $
, br_filter :: Double List.concat $
, br_similarity :: Similarity Map.elems $
} (case method of
BridgenessBasic -> filterComs (round filterThreshold)
BridgenessAdvanced -> filterComsAdvanced
type Confluence = Map (NodeId, NodeId) Double ) $
groupEdges (Map.fromList $ map nodeId2comId partitions) graph
-- Filter Links between the Clusters
-- Links: Map (NodeId, NodeId) Double
-- List of Clusters: [Set NodeId]
bridgeness :: Bridgeness
-> Map (NodeId, NodeId) Double
-> Map (NodeId, NodeId) Double
bridgeness (Bridgeness_Recursive sn f sim) m =
Map.unions $ [linksBetween] <> map (\s -> bridgeness (Bridgeness_Basic (setNodes2clusterNodes s) (if sim == Conditional then pi*f else f)) m') sn
where
(linksBetween, m') = Map.partitionWithKey (\(n1,n2) _v -> Map.lookup n1 mapNodeIdClusterId
/= Map.lookup n2 mapNodeIdClusterId
) $ bridgeness (Bridgeness_Basic clusters f) m
clusters = setNodes2clusterNodes (map Set.unions sn)
mapNodeIdClusterId = clusterNodes2map clusters
bridgeness (Bridgeness_Advanced sim c) m = Map.fromList
$ List.filter (\x -> if sim == Conditional then snd x > 0.2 else snd x > 0.02)
$ map (\(ks, (v1,_v2)) -> (ks,v1))
$ Map.toList
$ Map.intersectionWithKey
(\k v1 v2 -> trace ("intersectionWithKey " <> (show (k, v1, v2)) :: Text) (v1, v2)) m c
bridgeness (Bridgeness_Basic ns b) m = Map.fromList
$ List.concat
$ Map.elems
$ filterComs (round b)
$ groupEdges (Map.fromList $ map nodeId2comId ns) m
groupEdges :: (Ord comId, Ord nodeId) groupEdges :: (Ord comId, Ord nodeId)
=> Map nodeId comId => Map nodeId comId
...@@ -130,7 +98,7 @@ filterComs :: (Ord n1, Eq n2) ...@@ -130,7 +98,7 @@ filterComs :: (Ord n1, Eq n2)
=> Int => Int
-> Map (n2, n2) [(a3, n1)] -> Map (n2, n2) [(a3, n1)]
-> Map (n2, n2) [(a3, n1)] -> Map (n2, n2) [(a3, n1)]
filterComs b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m filterComs b m = Map.filter (not . null) $ mapWithKey filter' m
where where
filter' (c1,c2) a filter' (c1,c2) a
| c1 == c2 = a | c1 == c2 = a
...@@ -143,40 +111,14 @@ filterComs b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m ...@@ -143,40 +111,14 @@ filterComs b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m
t :: Double t :: Double
t = fromIntegral $ length $ List.concat $ elems m t = fromIntegral $ length $ List.concat $ elems m
-------------------------------------------------------------- -- Weak links are often due to noise in the data and decrease the readability of the graph.
-- Utils -- This function prunes the links between the clusters when their weight is under a given 'threshold'.
{-- filterComsAdvanced :: (Ord a1, Fractional a1, Eq a2)
map2intMap :: Map (Int, Int) a -> IntMap (IntMap a) => Map (a2, a2) [(a3, a1)]
map2intMap m = IntMap.fromListWith (<>) -> Map (a2, a2) [(a3, a1)]
$ map (\((k1,k2), v) -> if k1 < k2 filterComsAdvanced m = Map.filter (not . null) $ mapWithKey filter' m
then (k1, IntMap.singleton k2 v) where
else (k2, IntMap.singleton k1 v) threshold = 0.03 -- TODO make this threshold configurable
) filter' (c1,c2) xs
$ Map.toList m | c1 == c2 = xs
| otherwise = List.filter (\(_nn,v) -> v >= threshold) xs
look :: (Int,Int) -> IntMap (IntMap a) -> Maybe a
look (k1,k2) m = if k1 < k2
then case (IntMap.lookup k1 m) of
Just m' -> IntMap.lookup k2 m'
_ -> Nothing
else look (k2,k1) m
{-
Compute the median of a list
From: https://hackage.haskell.org/package/dsp-0.2.5.1/docs/src/Numeric.Statistics.Median.html
Compute the center of the list in a more lazy manner
and thus halves memory requirement.
-}
median :: (Ord a, Fractional a) => [a] -> a
median [] = panic "medianFast: empty list has no median"
median zs =
let recurse (x0:_) (_:[]) = x0
recurse (x0:x1:_) (_:_:[]) = (x0+x1)/2
recurse (_:xs) (_:_:ys) = recurse xs ys
recurse _ _ =
panic "median: this error cannot occur in the way 'recurse' is called"
in recurse zs zs
-}
...@@ -23,18 +23,16 @@ import Data.HashSet qualified as HashSet ...@@ -23,18 +23,16 @@ import Data.HashSet qualified as HashSet
import Data.List qualified as List import Data.List qualified as List
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Swagger ( ToSchema )
import Data.Text qualified as Text import Data.Text qualified as Text
import Data.Vector.Storable qualified as Vec import Data.Vector.Storable qualified as Vec
import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Similarities (Similarity(..), measure) import Gargantext.Core.Methods.Similarities (Similarity(..), measure)
import Gargantext.Core.Statistics ( pcaReduceTo, Dimension(Dimension) ) import Gargantext.Core.Statistics ( pcaReduceTo, Dimension(Dimension) )
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Bridgeness(..), Partitions, nodeId2comId, setNodes2clusterNodes) import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, nodeId2comId, setNodes2clusterNodes)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..)) import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass') import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass, spinglass')
import Gargantext.Core.Viz.Graph.Tools.Infomap (infomap) import Gargantext.Core.Viz.Graph.Types (Attributes(..), BridgenessMethod, Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..), LegendField(..))
import Gargantext.Core.Viz.Graph.Types (Attributes(..), Edge(..), Graph(..), MultiPartite(..), Node(..), Partite(..), Strength(..), LegendField(..))
import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter) import Gargantext.Core.Viz.Graph.Utils (edgesFilter, nodesFilter)
import Gargantext.Prelude import Gargantext.Prelude
import Graph.BAC.ProxemyOptim qualified as BAC import Graph.BAC.ProxemyOptim qualified as BAC
...@@ -42,24 +40,6 @@ import Graph.Types (ClusterNode(..)) ...@@ -42,24 +40,6 @@ import Graph.Types (ClusterNode(..))
import IGraph qualified as Igraph import IGraph qualified as Igraph
import IGraph.Algorithms.Layout qualified as Layout import IGraph.Algorithms.Layout qualified as Layout
import IGraph.Random ( Gen ) -- (Gen(..)) import IGraph.Random ( Gen ) -- (Gen(..))
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
data PartitionMethod = Spinglass | Confluence | Infomap
deriving (Generic, Eq, Ord, Enum, Bounded, Show)
instance FromJSON PartitionMethod
instance ToJSON PartitionMethod
instance ToSchema PartitionMethod
instance Arbitrary PartitionMethod where
arbitrary = elements [ minBound .. maxBound ]
data BridgenessMethod = BridgenessMethod_Basic | BridgenessMethod_Advanced
deriving (Generic, Eq, Ord, Enum, Bounded, Show)
instance FromJSON BridgenessMethod
instance ToJSON BridgenessMethod
instance ToSchema BridgenessMethod
instance Arbitrary BridgenessMethod where
arbitrary = elements [ minBound .. maxBound ]
------------------------------------------------------------- -------------------------------------------------------------
...@@ -90,33 +70,18 @@ cooc2graph' distance threshold myCooc ...@@ -90,33 +70,18 @@ cooc2graph' distance threshold myCooc
-- coocurrences graph computation -- coocurrences graph computation
cooc2graphWith :: PartitionMethod cooc2graphWith :: BridgenessMethod
-> BridgenessMethod
-> MultiPartite -> MultiPartite
-> Similarity -> Similarity
-> Threshold -> Threshold
-> Strength -> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int -> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph -> IO Graph
cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1) cooc2graphWith bridgenessMethod multi similarity threshold strength myCooc = do
cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
cooc2graphWith Infomap = cooc2graphWith' (infomap "-v -N2")
--cooc2graphWith Infomap = cooc2graphWith' (infomap "--silent --two-level -N2")
-- TODO: change these options, or make them configurable in UI?
cooc2graphWith' :: Partitions
-> BridgenessMethod
-> MultiPartite
-> Similarity
-> Threshold
-> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold strength myCooc = do
let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc let (distanceMap, diag, ti) = doSimilarityMap similarity threshold strength myCooc
distanceMap `seq` diag `seq` ti `seq` pure () distanceMap `seq` diag `seq` ti `seq` pure ()
partitions <- if (Map.size distanceMap > 0) partitions <- if Map.size distanceMap > 0
then spinglass' 1 distanceMap then spinglass' 1 distanceMap
else panic $ Text.unwords [ "I can not compute the graph you request" else panic $ Text.unwords [ "I can not compute the graph you request"
, "because either the quantity of documents" , "because either the quantity of documents"
...@@ -130,13 +95,13 @@ cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold stren ...@@ -130,13 +95,13 @@ cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold stren
let let
!confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True !confluence' = BAC.computeConfluences 3 (Map.keys distanceMap) True
!bridgeness' = bridgeness !bridgeness' = bridgeness (partitionsToClusterNodes partitions)
(Bridgeness_Basic (partitionsToClusterNodes partitions) 1.0) bridgenessMethod
1.0
distanceMap distanceMap
pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes partitions) pure $ data2graph multi ti diag bridgeness' confluence' (setNodes2clusterNodes partitions)
-- | A converter from the partition type returned by `spinglass'` -- | A converter from the partition type returned by `spinglass'`
-- to the partition type required by `bridgeness` -- to the partition type required by `bridgeness`
partitionsToClusterNodes :: [Set Int] -> [ClusterNode] partitionsToClusterNodes :: [Set Int] -> [ClusterNode]
...@@ -154,7 +119,6 @@ partitionsToClusterNodes setlist = ...@@ -154,7 +119,6 @@ partitionsToClusterNodes setlist =
-- Turn pairs into `ClusterNode`s -- Turn pairs into `ClusterNode`s
fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId) fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId)
type Reverse = Bool type Reverse = Bool
doSimilarityMap :: Similarity doSimilarityMap :: Similarity
......
...@@ -40,6 +40,16 @@ instance ToJSON TypeNode ...@@ -40,6 +40,16 @@ instance ToJSON TypeNode
instance FromJSON TypeNode instance FromJSON TypeNode
instance ToSchema TypeNode instance ToSchema TypeNode
data BridgenessMethod = BridgenessBasic | BridgenessAdvanced
deriving (Generic, Eq, Ord, Enum, Bounded, Show)
instance FromJSON BridgenessMethod
instance ToJSON BridgenessMethod
instance ToSchema BridgenessMethod
instance Arbitrary BridgenessMethod where
arbitrary = elements [ minBound .. maxBound ]
data Attributes = Attributes { clust_default :: Int } data Attributes = Attributes { clust_default :: Int }
deriving (Show, Generic) deriving (Show, Generic)
$(deriveJSON (unPrefix "") ''Attributes) $(deriveJSON (unPrefix "") ''Attributes)
......
...@@ -418,6 +418,7 @@ insertMasterDocs ncs c lang hs = do ...@@ -418,6 +418,7 @@ insertMasterDocs ncs c lang hs = do
-- add documents to the corpus (create node_node link) -- add documents to the corpus (create node_node link)
-- this will enable global database monitoring -- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount))) mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
<- mapNodeIdNgrams <- mapNodeIdNgrams
<$> documentIdWithNgrams <$> documentIdWithNgrams
......
...@@ -23,8 +23,8 @@ import Control.Concurrent.STM.TChan ...@@ -23,8 +23,8 @@ import Control.Concurrent.STM.TChan
import Control.Concurrent.STM.TSem (newTSem, signalTSem, TSem) import Control.Concurrent.STM.TSem (newTSem, signalTSem, TSem)
import Control.Concurrent (threadDelay) import Control.Concurrent (threadDelay)
import Control.Lens ((^.)) import Control.Lens ((^.))
import Control.Monad.STM (atomically)
import Control.Monad (void) import Control.Monad (void)
import Control.Monad.STM (atomically)
import Data.Aeson qualified as Aeson import Data.Aeson qualified as Aeson
import Data.ByteString qualified as BS import Data.ByteString qualified as BS
import Data.Text.Encoding qualified as TE import Data.Text.Encoding qualified as TE
...@@ -47,12 +47,14 @@ import Test.Database.Types (test_config) ...@@ -47,12 +47,14 @@ import Test.Database.Types (test_config)
import Test.Hspec import Test.Hspec
import Test.Hspec.Wai.Internal (withApplication) import Test.Hspec.Wai.Internal (withApplication)
import Test.Instances () import Test.Instances ()
import Test.Utils (waitForTChanValue, waitForTSem)
import Test.Utils.Notifications (withAsyncWSConnection) import Test.Utils.Notifications (withAsyncWSConnection)
import Test.Utils (protected, waitForTChanValue, waitForTSem, withValidLoginA) import Test.Utils (protected, withValidLoginA)
import Text.RawString.QQ (r) import Text.RawString.QQ (r)
tests :: Spec tests :: Spec
tests = sequential $ around withTestDBAndPort $ do tests = sequential $ around withTestDBAndPort $ do
describe "Notifications" $ do describe "Notifications" $ do
......
...@@ -213,6 +213,11 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -213,6 +213,11 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
, (NgramsTerm "brazorf", NgramsRepoElement 1 StopTerm Nothing Nothing (MSet mempty)) , (NgramsTerm "brazorf", NgramsRepoElement 1 StopTerm Nothing Nothing (MSet mempty))
])]) ])])
it "parses TSV with UTF-8 issues" $ \(SpecContext _testEnv _port _app _) -> do
simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/433-utf-encoding-issue.tsv")
-- we don't care about the output, only that the file was parsed without errors (this file is garbage)
ngramsListFromTSVData simpleNgrams `shouldSatisfy` isRight
it "allows uploading a CSV ngrams file" $ \(SpecContext testEnv port app _) -> do it "allows uploading a CSV ngrams file" $ \(SpecContext testEnv port app _) -> do
cId <- newCorpusForUser testEnv "alice" cId <- newCorpusForUser testEnv "alice"
let log_cfg = (test_config testEnv) ^. gc_logging let log_cfg = (test_config testEnv) ^. gc_logging
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment