Commit e7c244d8 authored by qlobbe's avatar qlobbe

maybe fix the phylo issue

parent e93416f8
...@@ -32,7 +32,7 @@ import Gargantext.Core.Types.Main (ListType(..)) ...@@ -32,7 +32,7 @@ import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph) import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..), BridgenessMethod(..)) import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..), BridgenessMethod(..))
import Gargantext.Core.Viz.Graph.Types (Strength) import Gargantext.Core.Viz.Graph.Types (Strength)
import Gargantext.Core.Viz.Phylo (PhyloSubConfig(..), subConfig2config) import Gargantext.Core.Viz.Phylo (PhyloSubConfigAPI(..), subConfigAPI2config)
import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI) import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI)
import Gargantext.Database.Action.Flow.Pairing (pairing) import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
...@@ -76,7 +76,7 @@ data UpdateNodeParams = UpdateNodeParamsList { methodList :: !Method } ...@@ -76,7 +76,7 @@ data UpdateNodeParams = UpdateNodeParamsList { methodList :: !Method }
| LinkNodeReq { nodeType :: !NodeType | LinkNodeReq { nodeType :: !NodeType
, id :: !NodeId } , id :: !NodeId }
| UpdateNodePhylo { config :: !PhyloSubConfig } | UpdateNodePhylo { config :: !PhyloSubConfigAPI }
deriving (Generic) deriving (Generic)
---------------------------------------------------------------------- ----------------------------------------------------------------------
...@@ -209,7 +209,7 @@ updateNode _userId phyloId (UpdateNodePhylo config) logStatus = do ...@@ -209,7 +209,7 @@ updateNode _userId phyloId (UpdateNodePhylo config) logStatus = do
let corpusId = fromMaybe (panic "") corpusId' let corpusId = fromMaybe (panic "") corpusId'
phy <- flowPhyloAPI (subConfig2config config) corpusId phy <- flowPhyloAPI (subConfigAPI2config config) corpusId
logStatus JobLog { _scst_succeeded = Just 2 logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0 , _scst_failed = Just 0
......
...@@ -41,9 +41,9 @@ import Gargantext.Core.Utils.Prefix (unPrefixSwagger) ...@@ -41,9 +41,9 @@ import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Text.Lazy as TextLazy import qualified Data.Text.Lazy as TextLazy
---------------- ---------------------
-- | PhyloConfig | -- -- | PhyloConfig | --
---------------- ---------------------
data CorpusParser = data CorpusParser =
Wos {_wos_limit :: Int} Wos {_wos_limit :: Int}
...@@ -193,45 +193,51 @@ data PhyloConfig = ...@@ -193,45 +193,51 @@ data PhyloConfig =
} deriving (Show,Generic,Eq) } deriving (Show,Generic,Eq)
------------------------------------------------------------------------ --------------------------------
data PhyloSubConfig = -- | SubConfig API & 1Click | --
PhyloSubConfig { _sc_phyloProximity :: Double --------------------------------
, _sc_phyloSynchrony :: Double
, _sc_phyloQuality :: Double
, _sc_timeUnit :: TimeUnit
, _sc_clique :: Cluster
, _sc_exportFilter :: Double
}
deriving (Show,Generic,Eq)
data PhyloSubConfigAPI =
PhyloSubConfigAPI { _sc_phyloProximity :: Double
, _sc_phyloSynchrony :: Double
, _sc_phyloQuality :: Double
, _sc_timeUnit :: TimeUnit
, _sc_clique :: Cluster
, _sc_exportFilter :: Double
} deriving (Show,Generic,Eq)
subConfig2config :: PhyloSubConfig -> PhyloConfig
subConfig2config subConfig = defaultConfig { similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 1 subConfigAPI2config :: PhyloSubConfigAPI -> PhyloConfig
subConfigAPI2config subConfig = defaultConfig
{ similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 2
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 1 , phyloQuality = Quality (_sc_phyloQuality subConfig) 3
, timeUnit = _sc_timeUnit subConfig , timeUnit = _sc_timeUnit subConfig
, clique = _sc_clique subConfig , clique = _sc_clique subConfig
, exportFilter = [ByBranchSize $ _sc_exportFilter subConfig] , exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
} }
------------------------------------------------------------------------ --------------------------
-- | SubConfig 1Click | --
--------------------------
defaultConfig :: PhyloConfig defaultConfig :: PhyloConfig
defaultConfig = defaultConfig =
PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
, listPath = "list.csv" -- useful for commandline only , listPath = "list.csv" -- useful for commandline only
, outputPath = "data/" , outputPath = "data/"
, corpusParser = Csv 100000 , corpusParser = Csv 150000
, listParser = V4 , listParser = V4
, phyloName = pack "Phylo Name" , phyloName = pack "Phylo Name"
, phyloScale = 2 , phyloScale = 2
, similarity = WeightedLogJaccard 0.5 1 , similarity = WeightedLogJaccard 0.5 2
, seaElevation = Constante 0.1 0.1 , seaElevation = Constante 0.1 0.1
, defaultMode = True , defaultMode = False
, findAncestors = False , findAncestors = True
, phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups , phyloSynchrony = ByProximityThreshold 0.6 0 AllBranches MergeAllGroups
, phyloQuality = Quality 0.5 1 , phyloQuality = Quality 0.5 3
, timeUnit = Year 3 1 5 , timeUnit = Year 3 1 5
, clique = MaxClique 5 0.0001 ByThreshold , clique = Fis 2 3
, exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy Desc , exportSort = ByHierarchy Desc
, exportFilter = [ByBranchSize 3] , exportFilter = [ByBranchSize 3]
...@@ -239,13 +245,13 @@ defaultConfig = ...@@ -239,13 +245,13 @@ defaultConfig =
-- Main Instances -- Main Instances
instance ToSchema PhyloConfig instance ToSchema PhyloConfig
instance ToSchema PhyloSubConfig instance ToSchema PhyloSubConfigAPI
instance FromJSON PhyloConfig instance FromJSON PhyloConfig
instance ToJSON PhyloConfig instance ToJSON PhyloConfig
instance FromJSON PhyloSubConfig instance FromJSON PhyloSubConfigAPI
instance ToJSON PhyloSubConfig instance ToJSON PhyloSubConfigAPI
instance FromJSON CorpusParser instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
...@@ -604,7 +610,7 @@ instance ToSchema PhyloExport where ...@@ -604,7 +610,7 @@ instance ToSchema PhyloExport where
---------------- ----------------
makeLenses ''PhyloConfig makeLenses ''PhyloConfig
makeLenses ''PhyloSubConfig makeLenses ''PhyloSubConfigAPI
makeLenses ''PhyloSimilarity makeLenses ''PhyloSimilarity
makeLenses ''SeaElevation makeLenses ''SeaElevation
makeLenses ''Quality makeLenses ''Quality
......
...@@ -716,7 +716,7 @@ tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]] ...@@ -716,7 +716,7 @@ tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
tracePhyloInfo :: Phylo -> Phylo tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with λ = " tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
<> show(getLevel phylo) <> " applied to " <> show(getLevel phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations" <> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
) phylo ) phylo
......
...@@ -193,7 +193,7 @@ findSeaLadder phylo = case getSeaElevation phylo of ...@@ -193,7 +193,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
) [] $ keys $ phylo ^. phylo_periods ) [] $ keys $ phylo ^. phylo_periods
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to Level " <> show (lvl) <> "\n") appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ concat $ elems m) <> " groups to scale " <> show (lvl) <> "\n")
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
. phylo_periodScales . phylo_periodScales
...@@ -489,19 +489,29 @@ initPhyloScales lvlMax pId = ...@@ -489,19 +489,29 @@ initPhyloScales lvlMax pId =
fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax] fromList $ map (\lvl -> ((pId,lvl),PhyloScale pId ("","") lvl empty)) [1..lvlMax]
setDefault :: PhyloConfig -> TimeUnit -> PhyloConfig
setDefault conf timeScale = conf { setDefault :: PhyloConfig -> TimeUnit -> Int -> PhyloConfig
phyloScale = 2, setDefault conf timeScale nbDocs = defaultConfig
similarity = WeightedLogJaccard 0.5 2, { corpusPath = (corpusPath conf)
findAncestors = True, , listPath = (listPath conf)
phyloSynchrony = ByProximityThreshold 0.6 0 SiblingBranches MergeAllGroups, , outputPath = (outputPath conf)
phyloQuality = Quality 0.5 3, , corpusParser = (corpusParser conf)
timeUnit = timeScale, , listParser = (listParser conf)
clique = Fis 3 5, , phyloName = (phyloName conf)
exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2], , defaultMode = True
exportSort = ByHierarchy Desc, , timeUnit = timeScale
exportFilter = [ByBranchSize 3] , clique = Fis (toSupport nbDocs) 3}
} where
--------------------------------------
toSupport :: Int -> Support
toSupport n
| n < 500 = 1
| n < 1000 = 2
| n < 2000 = 3
| n < 3000 = 4
| n < 5000 = 5
| otherwise = 6
--------------------------------------
-- Init the basic elements of a Phylo -- Init the basic elements of a Phylo
...@@ -519,7 +529,7 @@ initPhylo docs conf = ...@@ -519,7 +529,7 @@ initPhylo docs conf =
(docsToTermFreq docs (foundations ^. foundations_roots)) (docsToTermFreq docs (foundations ^. foundations_roots))
(docsToLastTermFreq (getTimePeriod timeScale) docs (foundations ^. foundations_roots)) (docsToLastTermFreq (getTimePeriod timeScale) docs (foundations ^. foundations_roots))
params = if (defaultMode conf) params = if (defaultMode conf)
then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale } then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) }
else defaultPhyloParam { _phyloParam_config = conf } else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale) periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n") in trace ("\n" <> "-- | Init a phylo out of " <> show(length docs) <> " docs \n")
......
...@@ -633,7 +633,7 @@ updateLevel level phylo = phylo { _phylo_level = level } ...@@ -633,7 +633,7 @@ updateLevel level phylo = phylo { _phylo_level = level }
traceToPhylo :: Scale -> Phylo -> Phylo traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo = traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at level " <> show (lvl) <> " with " trace ("\n" <> "-- | End of phylo making at scale " <> show (lvl) <> " with "
<> show (length $ getGroupsFromScale lvl phylo) <> " groups and " <> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo) <> " branches" <> "\n") phylo
...@@ -697,14 +697,14 @@ toRelatedComponents nodes edges = ...@@ -697,14 +697,14 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo = traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at level " <> show (getLastLevel phylo) trace ( "-- | End synchronic clustering at scale " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
traceSynchronyStart :: Phylo -> Phylo traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo = traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at level " <> show (getLastLevel phylo) trace ( "\n" <> "-- | Start synchronic clustering at scale " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups" <> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches" <> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo) <> " branches"
<> "\n" ) phylo <> "\n" ) phylo
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment