Commit 6bfa8794 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Phylo

parent 7ce4aac8
Pipeline #4265 failed with stages
in 42 minutes and 37 seconds
...@@ -5,7 +5,7 @@ cabal-version: 1.12 ...@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.9.9.6.9 version: 0.0.6.9.9.6.9
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
......
...@@ -32,7 +32,7 @@ import Gargantext.Core.Types.Main (ListType(..)) ...@@ -32,7 +32,7 @@ import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph) import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..), BridgenessMethod(..)) import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..), BridgenessMethod(..))
import Gargantext.Core.Viz.Graph.Types (Strength) import Gargantext.Core.Viz.Graph.Types (Strength)
import Gargantext.Core.Viz.Phylo (PhyloSubConfig(..), subConfig2config) import Gargantext.Core.Viz.Phylo (PhyloSubConfigAPI(..), subConfigAPI2config)
import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI) import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI)
import Gargantext.Database.Action.Flow.Pairing (pairing) import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
...@@ -43,7 +43,7 @@ import Gargantext.Database.Query.Table.Node (defaultList, getNode) ...@@ -43,7 +43,7 @@ import Gargantext.Database.Query.Table.Node (defaultList, getNode)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata) import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms)) import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Database.Schema.Node (node_parent_id) import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Prelude (Bool(..), Ord, Eq, (<$>), ($), printDebug, pure, show, cs, (<>), panic, (<*>)) import Gargantext.Prelude (Bool(..), Ord, Eq, (<$>), ($), {-printDebug,-} pure, show, cs, (<>), panic, (<*>))
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Prelude (Enum, Bounded, minBound, maxBound) import Prelude (Enum, Bounded, minBound, maxBound)
import Servant import Servant
...@@ -76,7 +76,7 @@ data UpdateNodeParams = UpdateNodeParamsList { methodList :: !Method } ...@@ -76,7 +76,7 @@ data UpdateNodeParams = UpdateNodeParamsList { methodList :: !Method }
| LinkNodeReq { nodeType :: !NodeType | LinkNodeReq { nodeType :: !NodeType
, id :: !NodeId } , id :: !NodeId }
| UpdateNodePhylo { config :: !PhyloSubConfig } | UpdateNodePhylo { config :: !PhyloSubConfigAPI }
deriving (Generic) deriving (Generic)
---------------------------------------------------------------------- ----------------------------------------------------------------------
...@@ -156,11 +156,18 @@ updateNode _uId lId (UpdateNodeParamsList _mode) jobHandle = do ...@@ -156,11 +156,18 @@ updateNode _uId lId (UpdateNodeParamsList _mode) jobHandle = do
updateNode _userId phyloId (UpdateNodePhylo config) jobHandle = do updateNode _userId phyloId (UpdateNodePhylo config) jobHandle = do
markStarted 3 jobHandle markStarted 3 jobHandle
corpusId' <- view node_parent_id <$> getNode phyloId corpusId' <- view node_parent_id <$> getNode phyloId
let corpusId = fromMaybe (panic "UpdateNodePhylo: no corpusId") corpusId'
let config' = subConfig2config config let corpusId = fromMaybe (panic "") corpusId'
printDebug "UpdateNodePhylo" config'
phy <- flowPhyloAPI config' corpusId phy <- flowPhyloAPI (subConfigAPI2config config) corpusId
markProgress 1 jobHandle
{-
logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
-}
_ <- updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy)) _ <- updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
markComplete jobHandle markComplete jobHandle
......
...@@ -41,9 +41,9 @@ import Gargantext.Core.Utils.Prefix (unPrefixSwagger) ...@@ -41,9 +41,9 @@ import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Text.Lazy as TextLazy import qualified Data.Text.Lazy as TextLazy
---------------- ---------------------
-- | PhyloConfig | -- -- | PhyloConfig | --
---------------- ---------------------
data CorpusParser = data CorpusParser =
Wos {_wos_limit :: Int} Wos {_wos_limit :: Int}
...@@ -66,7 +66,7 @@ data SeaElevation = ...@@ -66,7 +66,7 @@ data SeaElevation =
| Adaptative | Adaptative
{ _adap_steps :: Double } { _adap_steps :: Double }
| Evolving | Evolving
{ _evol_neighborhood :: Bool } { _evol_neighborhood :: Bool }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
instance ToSchema SeaElevation instance ToSchema SeaElevation
...@@ -78,8 +78,8 @@ data PhyloSimilarity = ...@@ -78,8 +78,8 @@ data PhyloSimilarity =
| WeightedLogSim | WeightedLogSim
{ _wls_sensibility :: Double { _wls_sensibility :: Double
, _wls_minSharedNgrams :: Int } , _wls_minSharedNgrams :: Int }
| Hamming | Hamming
{ _hmg_sensibility :: Double { _hmg_sensibility :: Double
, _hmg_minSharedNgrams :: Int} , _hmg_minSharedNgrams :: Int}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
...@@ -193,47 +193,51 @@ data PhyloConfig = ...@@ -193,47 +193,51 @@ data PhyloConfig =
} deriving (Show,Generic,Eq) } deriving (Show,Generic,Eq)
------------------------------------------------------------------------ --------------------------------
data PhyloSubConfig = -- | SubConfig API & 1Click | --
PhyloSubConfig { _sc_phyloProximity :: Double --------------------------------
, _sc_phyloSynchrony :: Double
, _sc_phyloQuality :: Double
, _sc_timeUnit :: TimeUnit
, _sc_clique :: Cluster
, _sc_exportFilter :: Double
, _sc_defaultMode :: Bool
}
deriving (Show,Generic,Eq)
data PhyloSubConfigAPI =
PhyloSubConfigAPI { _sc_phyloProximity :: Double
, _sc_phyloSynchrony :: Double
, _sc_phyloQuality :: Double
, _sc_timeUnit :: TimeUnit
, _sc_clique :: Cluster
, _sc_exportFilter :: Double
} deriving (Show,Generic,Eq)
subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1 subConfigAPI2config :: PhyloSubConfigAPI -> PhyloConfig
subConfigAPI2config subConfig = defaultConfig
{ similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 2
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 1 , phyloQuality = Quality (_sc_phyloQuality subConfig) 3
, timeUnit = _sc_timeUnit subConfig , timeUnit = _sc_timeUnit subConfig
, clique = _sc_clique subConfig , clique = _sc_clique subConfig
, defaultMode = _sc_defaultMode subConfig
, exportFilter = [ByBranchSize $ _sc_exportFilter subConfig] , exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
} }
------------------------------------------------------------------------ --------------------------
-- | SubConfig 1Click | --
--------------------------
defaultConfig :: PhyloConfig defaultConfig :: PhyloConfig
defaultConfig = defaultConfig =
PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
, listPath = "list.csv" -- useful for commandline only , listPath = "list.csv" -- useful for commandline only
, outputPath = "data/" , outputPath = "data/"
, corpusParser = Csv 100000 , corpusParser = Csv 150000
, listParser = V4 , listParser = V4
, phyloName = pack "Phylo Name" , phyloName = pack "Phylo Name"
, phyloScale = 2 , phyloScale = 2
, similarity = WeightedLogJaccard 0.5 1 , similarity = WeightedLogJaccard 0.5 2
, seaElevation = Constante 0.1 0.1 , seaElevation = Constante 0.1 0.1
, defaultMode = False , defaultMode = False
, findAncestors = False , findAncestors = True
, phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.6 0 AllBranches MergeAllGroups
, phyloQuality = Quality 0.3 1 , phyloQuality = Quality 0.5 3
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, clique = Fis 3 1 -- MaxClique 5 0.0001 ByThreshold , clique = Fis 2 3
, exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy Desc , exportSort = ByHierarchy Desc
, exportFilter = [ByBranchSize 3] , exportFilter = [ByBranchSize 3]
...@@ -241,13 +245,13 @@ defaultConfig = ...@@ -241,13 +245,13 @@ defaultConfig =
-- Main Instances -- Main Instances
instance ToSchema PhyloConfig instance ToSchema PhyloConfig
instance ToSchema PhyloSubConfig instance ToSchema PhyloSubConfigAPI
instance FromJSON PhyloConfig instance FromJSON PhyloConfig
instance ToJSON PhyloConfig instance ToJSON PhyloConfig
instance FromJSON PhyloSubConfig instance FromJSON PhyloSubConfigAPI
instance ToJSON PhyloSubConfig instance ToJSON PhyloSubConfigAPI
instance FromJSON CorpusParser instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
...@@ -433,6 +437,7 @@ data Phylo = ...@@ -433,6 +437,7 @@ data Phylo =
instance ToSchema Phylo where instance ToSchema Phylo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
---------------- ----------------
-- | Period | -- -- | Period | --
---------------- ----------------
...@@ -605,7 +610,7 @@ instance ToSchema PhyloExport where ...@@ -605,7 +610,7 @@ instance ToSchema PhyloExport where
---------------- ----------------
makeLenses ''PhyloConfig makeLenses ''PhyloConfig
makeLenses ''PhyloSubConfig makeLenses ''PhyloSubConfigAPI
makeLenses ''PhyloSimilarity makeLenses ''PhyloSimilarity
makeLenses ''SeaElevation makeLenses ''SeaElevation
makeLenses ''Quality makeLenses ''Quality
......
...@@ -740,7 +740,7 @@ tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]] ...@@ -740,7 +740,7 @@ tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
tracePhyloInfo :: Phylo -> Phylo tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = " tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
<> show(getLevel phylo) <> " applied to " <> show(getLevel phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations" <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
) phylo ) phylo
......
...@@ -193,7 +193,7 @@ findSeaLadder phylo = case getSeaElevation phylo of ...@@ -193,7 +193,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
) [] $ keys $ phylo ^. phylo_periods ) [] $ keys $ phylo ^. phylo_periods
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to scale " <> show (lvl) <> "\n")
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
. phylo_periodScales . phylo_periodScales
...@@ -488,19 +488,29 @@ initPhyloScales lvlMax pId = ...@@ -488,19 +488,29 @@ initPhyloScales lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax] fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax]
setDefault :: PhyloConfig -> TimeUnit -> PhyloConfig
setDefault conf timeScale = conf { setDefault :: PhyloConfig -> TimeUnit -> Int -> PhyloConfig
phyloScale = 2, setDefault conf timeScale nbDocs = defaultConfig
similarity = WeightedLogJaccard 0.5 2, { corpusPath = (corpusPath conf)
findAncestors = True, , listPath = (listPath conf)
phyloSynchrony = ByProximityThreshold 0.6 0 SiblingBranches MergeAllGroups, , outputPath = (outputPath conf)
phyloQuality = Quality 0.5 3, , corpusParser = (corpusParser conf)
timeUnit = timeScale, , listParser = (listParser conf)
clique = Fis 3 5, , phyloName = (phyloName conf)
exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2], , defaultMode = True
exportSort = ByHierarchy Desc, , timeUnit = timeScale
exportFilter = [ByBranchSize 3] , clique = Fis (toSupport nbDocs) 3}
} where
--------------------------------------
toSupport :: Int -> Support
toSupport n
| n < 500 = 1
| n < 1000 = 2
| n < 2000 = 3
| n < 3000 = 4
| n < 5000 = 5
| otherwise = 6
--------------------------------------
-- Init the basic elements of a Phylo -- Init the basic elements of a Phylo
...@@ -518,7 +528,7 @@ initPhylo docs conf = ...@@ -518,7 +528,7 @@ initPhylo docs conf =
(docsToTermFreq docs (foundations ^. foundations_roots)) (docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod timeScale) docs (foundations ^. foundations_roots)) (docsToLastTermFreq (getTimePeriod timeScale) docs (foundations ^. foundations_roots))
params = if (defaultMode conf) params = if (defaultMode conf)
then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale } then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) }
else defaultPhyloParam { _phyloParam_config = conf } else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n") in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
......
...@@ -633,7 +633,7 @@ updateLevel level phylo = phylo { _phylo_level = level } ...@@ -633,7 +633,7 @@ updateLevel level phylo = phylo { _phylo_level = level }
traceToPhylo :: Scale -> Phylo -> Phylo traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo = traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with " trace ("\n" <> "-- | End of phylo making at scale " <> show (lvl) <> " with "
<> show (length $ getGroupsFromScale lvl phylo) <> " groups and " <> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo
...@@ -697,14 +697,14 @@ toRelatedComponents nodes edges = ...@@ -697,14 +697,14 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo = traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo) trace ( "-- | End synchronic clustering at scale " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo = traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo) trace ( "\n" <> "-- | Start synchronic clustering at scale " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment