Verified Commit e2745b4a authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 224-dev-uniform-ngrams-creation

parents c9c21779 030f7dad
## Version 0.0.6.9.9.9.4.3
## Version 0.0.6.9.9.9.4.4
* [FRONT][FIX][trim / deblank missing in invitations modal box (#618)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/618)
* [FRONT][FIX][[Breadcrumb] Remove unnecessary informations (#616)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/616)
## Version 0.0.6.9.9.9.4.3
* [BACK][LOGS][A consistent error format for Gargantext (#267)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/267)
* [BACK][ADM] Build Deps / test / cabal
......
This diff is collapsed.
This diff is collapsed.
......@@ -2,18 +2,54 @@
{-# LANGUAGE TypeApplications #-}
module Main where
import Control.DeepSeq
import Gargantext.Core.Types.Individu
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools (readPhylo)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Prelude.Crypto.Auth (createPasswordHash)
import Test.Tasty.Bench
import Paths_gargantext
phyloConfig :: PhyloConfig
phyloConfig = PhyloConfig {
corpusPath = "corpus.csv"
, listPath = "list.csv"
, outputPath = "data/"
, corpusParser = Csv {_csv_limit = 150000}
, listParser = V4
, phyloName = "Phylo Name"
, phyloScale = 2
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = True
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
, clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold}
, exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2}
, GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2}
]
, exportSort = ByHierarchy {_sort_order = Desc}
, exportFilter = [ByBranchSize {_branch_size = 3.0}]
}
main :: IO ()
main = defaultMain
[ bgroup "Benchmarks"
[ bgroup "User creation" [
bench "createPasswordHash" $ whnfIO (createPasswordHash "rabbit")
, bench "toUserHash" $
whnfIO (toUserHash $ NewUser "alfredo" "alfredo@well-typed.com" (GargPassword "rabbit"))
]
main = do
_issue290Phylo <- force . setConfig phyloConfig <$> (readPhylo =<< getDataFileName "bench-data/phylo/issue-290.json")
issue290PhyloSmall <- force . setConfig phyloConfig <$> (readPhylo =<< getDataFileName "bench-data/phylo/issue-290-small.json")
defaultMain
[ bgroup "Benchmarks"
[ bgroup "User creation" [
bench "createPasswordHash" $ whnfIO (createPasswordHash "rabbit")
, bench "toUserHash" $
whnfIO (toUserHash $ NewUser "alfredo" "alfredo@well-typed.com" (GargPassword "rabbit"))
]
, bgroup "Phylo" [
bench "toPhylo (small)" $ nf toPhylo issue290PhyloSmall
]
]
]
]
......@@ -5,7 +5,7 @@ cabal-version: 2.0
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.9.9.9.4.3
version: 0.0.6.9.9.9.4.4
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -17,6 +17,8 @@ license: AGPL-3
license-file: LICENSE
build-type: Simple
data-files:
bench-data/phylo/issue-290.json
bench-data/phylo/issue-290-small.json
devops/postgres/extensions.sql
devops/postgres/schema.sql
ekg-assets/index.html
......@@ -32,6 +34,7 @@ data-files:
test-data/ngrams/simple.csv
test-data/phylo/bpa_phylo_test.json
test-data/phylo/open_science.json
test-data/phylo/issue-290-small.golden.json
test-data/test_config.ini
.clippy.dhall
......@@ -957,6 +960,7 @@ test-suite garg-test-tasty
Test.Ngrams.Query.PaginationCorpus
Test.Offline.Errors
Test.Offline.JSON
Test.Offline.Phylo
Test.Parsers.Date
Test.Parsers.Types
Test.Parsers.WOS
......@@ -1161,11 +1165,14 @@ benchmark garg-bench
main-is: Main.hs
hs-source-dirs: bench
type: exitcode-stdio-1.0
other-modules:
Paths_gargantext
build-depends: base
, bytestring
, deepseq
, gargantext
, gargantext-prelude
, tasty-bench
ghc-options: "-with-rtsopts=-A32m"
ghc-options: "-with-rtsopts=-T -A32m"
if impl(ghc >= 8.6)
ghc-options: "-with-rtsopts=--nonmoving-gc"
......@@ -15,6 +15,15 @@ rec {
];
})
else pkgs.haskell.compiler.ghc8107;
graphviz = pkgs.graphviz.overrideAttrs (finalAttrs: previousAttrs: {
# Increase the YY_BUF_SIZE, see https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/290#note_9015
patches = [
(pkgs.fetchpatch {
url = "https://gist.githubusercontent.com/adinapoli/e93ca7b1d714d27f4af537716b03e3bb/raw/b9cc297c3465878da2d18ee92a3f9b8273923493/graphviz-yy-buf-size.patch";
sha256 = "sha256-8Q3tf37iYaPV50P+Vf/n263ordECiu5eKwONCy3ynV8=";
})
];
});
haskell1 = pkgs.haskell // {
packages = pkgs.haskell.packages // {
ghc8107 = pkgs.haskell.packages.ghc8107.override {
......
......@@ -50,6 +50,8 @@ import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Utils.UTCTime (timeMeasured)
import Gargantext.System.Logging
------------------------------------------------------------------------
type API = Summary " Update node according to NodeType params"
......@@ -94,7 +96,11 @@ api nId =
serveJobsAPI UpdateNodeJob $ \jHandle p ->
updateNode nId p jHandle
updateNode :: (HasNodeStory env err m, HasSettings env, MonadJobStatus m)
updateNode :: (HasNodeStory env err m
, HasSettings env
, MonadJobStatus m
, MonadLogger m
)
=> NodeId
-> UpdateNodeParams
-> JobHandle m
......@@ -156,7 +162,7 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do
let corpusId = fromMaybe (panicTrace "no corpus id") corpusId'
phy <- flowPhyloAPI (subConfigAPI2config config) corpusId
phy <- timeMeasured "updateNode.flowPhyloAPI" $ flowPhyloAPI (subConfigAPI2config config) corpusId
markProgress 2 jobHandle
{-
......@@ -166,7 +172,7 @@ updateNode phyloId (UpdateNodePhylo config) jobHandle = do
, _scst_events = Just []
}
-}
_ <- updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
_ <- timeMeasured "updateNode.updateHyperdataPhylo" $ updateHyperdata phyloId (HyperdataPhylo Nothing (Just phy))
-- TODO: catch the error of sendMail if userId is not found, then debug
-- sendMail (UserDBId userId)
......
......@@ -26,6 +26,7 @@ Phylomemy was first described in Chavalarias, D., Cointet, J.-P., 2013. Phylomem
module Gargantext.Core.Types.Phylo where
import Control.DeepSeq
import Control.Lens (makeLenses)
import Control.Monad.Fail (fail)
import Data.Aeson
......@@ -51,6 +52,8 @@ data Phylo = Phylo { _phylo_Duration :: (Start, End)
, _phylo_Periods :: [PhyloPeriod]
} deriving (Generic)
instance NFData Phylo
-- | UTCTime in seconds since UNIX epoch
type Start = POSIXTime
type End = POSIXTime
......@@ -66,6 +69,8 @@ data PhyloPeriod = PhyloPeriod { _phylo_PeriodId :: PhyloPeriodId
, _phylo_PeriodLevels :: [PhyloLevel]
} deriving (Generic)
instance NFData PhyloPeriod
type PhyloPeriodId = (Start, End)
-- | PhyloLevel : levels of phylomemy on level axis
......@@ -78,6 +83,8 @@ data PhyloLevel = PhyloLevel { _phylo_LevelId :: PhyloLevelId
, _phylo_LevelGroups :: [PhyloGroup]
} deriving (Generic)
instance NFData PhyloLevel
type PhyloLevelId = (PhyloPeriodId, Int)
-- | PhyloGroup : group of ngrams at each level and step
......@@ -88,14 +95,16 @@ type PhyloLevelId = (PhyloPeriodId, Int)
data PhyloGroup = PhyloGroup { _phylo_GroupId :: PhyloGroupId
, _phylo_GroupLabel :: Maybe Text
, _phylo_GroupNgrams :: [NgramId]
, _phylo_GroupPeriodParents :: [Edge]
, _phylo_GroupPeriodChilds :: [Edge]
, _phylo_GroupLevelParents :: [Edge]
, _phylo_GroupLevelChilds :: [Edge]
} deriving (Generic)
instance NFData PhyloGroup
type PhyloGroupId = (PhyloLevelId, Int)
type Edge = (PhyloGroupId, Weight)
type Weight = Double
......
......@@ -26,6 +26,7 @@ one 8, e54847.
module Gargantext.Core.Viz.Phylo where
import Control.DeepSeq
import Control.Lens (makeLenses)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
......@@ -62,7 +63,7 @@ data SeaElevation =
| Adaptative
{ _adap_steps :: Double }
| Evolving
{ _evol_neighborhood :: Bool }
{ _evol_neighborhood :: Bool }
deriving (Show,Generic,Eq)
instance ToSchema SeaElevation
......@@ -74,8 +75,8 @@ data PhyloSimilarity =
| WeightedLogSim
{ _wls_sensibility :: Double
, _wls_minSharedNgrams :: Int }
| Hamming
{ _hmg_sensibility :: Double
| Hamming
{ _hmg_sensibility :: Double
, _hmg_minSharedNgrams :: Int}
deriving (Show,Generic,Eq)
......@@ -188,7 +189,6 @@ data PhyloConfig =
, exportFilter :: [Filter]
} deriving (Show,Generic,Eq)
--------------------------------
-- | SubConfig API & 1Click | --
--------------------------------
......@@ -205,8 +205,8 @@ data PhyloSubConfigAPI =
subConfigAPI2config :: PhyloSubConfigAPI -> PhyloConfig
subConfigAPI2config subConfig = defaultConfig
{ similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 2
subConfigAPI2config subConfig = defaultConfig
{ similarity = WeightedLogJaccard (_sc_phyloProximity subConfig) 2
, phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
, phyloQuality = Quality (_sc_phyloQuality subConfig) 3
, timeUnit = _sc_timeUnit subConfig
......@@ -217,7 +217,7 @@ subConfigAPI2config subConfig = defaultConfig
--------------------------
-- | SubConfig 1Click | --
--------------------------
--------------------------
defaultConfig :: PhyloConfig
defaultConfig =
......@@ -474,7 +474,6 @@ data PhyloScale =
instance ToSchema PhyloScale where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
type PhyloGroupId = (PhyloScaleId, Int)
-- | BranchId : (a scale, a sequence of branch index)
......@@ -552,19 +551,16 @@ data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
instance ToSchema Filter where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data Order = Asc | Desc deriving (Show,Generic,Eq, ToSchema)
data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy {_sort_order :: Order } deriving (Show,Generic,Eq)
instance ToSchema Sort where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sort_")
data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
instance ToSchema Tagger where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
data PhyloLabel =
BranchLabel
{ _branch_labelTagger :: Tagger
......@@ -654,3 +650,30 @@ instance FromJSON PhyloGroup
instance ToJSON PhyloGroup
$(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)
-- NFData instances
instance NFData CorpusParser
instance NFData ListParser
instance NFData SeaElevation
instance NFData PhyloSimilarity
instance NFData SynchronyScope
instance NFData SynchronyStrategy
instance NFData Synchrony
instance NFData MaxCliqueFilter
instance NFData Cluster
instance NFData Quality
instance NFData PhyloConfig
instance NFData Software
instance NFData PhyloParam
instance NFData PhyloFoundations
instance NFData PhyloCounts
instance NFData PhyloSources
instance NFData Phylo
instance NFData PhyloPeriod
instance NFData PhyloScale
instance NFData Filter
instance NFData Order
instance NFData Sort
instance NFData Tagger
instance NFData PhyloLabel
......@@ -12,12 +12,14 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE BangPatterns #-}
module Gargantext.Core.Viz.Phylo.API.Tools
where
import Control.Lens hiding (Context)
import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
import Data.Aeson (Value, decodeFileStrict, encode, eitherDecodeFileStrict')
import Data.ByteString.Lazy qualified as Lazy
import Data.Map.Strict qualified as Map
import Data.Proxy
......@@ -47,10 +49,12 @@ import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging
import Prelude qualified
import System.FilePath ((</>))
import System.IO.Temp (withTempDirectory)
import System.Process qualified as Shell
import Gargantext.Utils.UTCTime (timeMeasured)
--------------------------------------------------------------------
getPhyloData :: HasNodeError err
......@@ -88,15 +92,20 @@ phylo2dot2json phylo = do
Just v -> pure v
flowPhyloAPI :: (HasNodeStory env err m, HasNodeError err)
flowPhyloAPI :: (HasNodeStory env err m, HasNodeError err, MonadLogger m)
=> PhyloConfig -> CorpusId -> m Phylo
flowPhyloAPI config cId = do
corpus <- corpusIdtoDocuments (timeUnit config) cId
let phyloWithCliques = toPhyloWithoutLink corpus config
let !phyloWithCliques = toPhyloWithoutLink corpus config
-- writePhylo phyloWithCliquesFile phyloWithCliques
printDebug "PhyloConfig old: " config
$(logLocM) DEBUG $ "PhyloConfig old: " <> show config
pure $ toPhylo $ setConfig config phyloWithCliques
_ <- timeMeasured "flowPhyloAPI.phyloWithCliques" (pure $! phyloWithCliques)
let !phyloConfigured = setConfig config phyloWithCliques
_ <- timeMeasured "flowPhyloAPI.phyloConfigured" (pure $! phyloConfigured)
pure $! toPhylo phyloConfigured
--------------------------------------------------------------------
corpusIdtoDocuments :: (HasNodeStory env err m, HasNodeError err)
......@@ -192,13 +201,8 @@ writePhylo path phylo = Lazy.writeFile path $ encode phylo
readPhylo :: [Char] -> IO Phylo
readPhylo path = do
phyloJson <- (eitherDecode <$> readJson path) :: IO (Either Prelude.String Phylo)
case phyloJson of
Left err -> do
putStrLn err
undefined
Right phylo -> pure phylo
phyloJson <- eitherDecodeFileStrict' @Phylo path
either errorTrace pure phyloJson
-- | To read and decode a Json file
readJson :: FilePath -> IO Lazy.ByteString
......
......@@ -11,6 +11,7 @@ Portability : POSIX
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Utils.UTCTime where
......@@ -19,10 +20,14 @@ import Data.Aeson (FromJSON, ToJSON)
import Data.Morpheus.Kind (SCALAR)
import Data.Morpheus.Types (GQLType(..), DecodeScalar(..), EncodeScalar(..))
import Data.Morpheus.Types qualified as DMT
import Data.String (fromString)
import Data.Swagger (ToSchema)
import Data.Text qualified as T
import Data.Time (UTCTime)
import Data.Time.Clock.POSIX (getPOSIXTime)
import Gargantext.Prelude
import Gargantext.System.Logging
import Prelude (String)
newtype NUTCTime = NUTCTime UTCTime
......@@ -39,3 +44,27 @@ instance GQLType NUTCTime where
instance FromJSON NUTCTime
instance ToJSON NUTCTime
instance ToSchema NUTCTime
timeMeasured :: (MonadLogger m, MonadBase IO m, HasCallStack)
=> String
-- ^ A label
-> m a
-- ^ The action to run
-> m a
timeMeasured = withFrozenCallStack $ timeMeasured' DEBUG
timeMeasured' :: (MonadLogger m, MonadBase IO m, HasCallStack)
=> LogLevel
-- ^ The severity of the log
-> String
-- ^ A label to identify the action.
-> m a
-- ^ The action to run
-> m a
timeMeasured' severity label action = withFrozenCallStack $ do
startTime <- liftBase getPOSIXTime
res <- action
endTime <- liftBase getPOSIXTime
let msg = label <> " took " <> (show $ endTime - startTime) <> " seconds to execute."
$(logLocM) severity (fromString msg)
return res
This diff is collapsed.
......@@ -7,6 +7,9 @@
module Test.API.UpdateList (
tests
, newCorpusForUser
, JobPollHandle(..)
, pollUntilFinished
) where
import Data.Aeson qualified as JSON
......
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE QuasiQuotes #-}
module Test.Offline.Phylo (tests) where
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools (readPhylo, writePhylo)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Prelude
import Test.Tasty
import Test.Tasty.HUnit
import Test.Tasty.QuickCheck
import Paths_gargantext
phyloConfig :: PhyloConfig
phyloConfig = PhyloConfig {
corpusPath = "corpus.csv"
, listPath = "list.csv"
, outputPath = "data/"
, corpusParser = Csv {_csv_limit = 150000}
, listParser = V4
, phyloName = "Phylo Name"
, phyloScale = 2
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = True
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
, clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold}
, exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2}
, GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2}
]
, exportSort = ByHierarchy {_sort_order = Desc}
, exportFilter = [ByBranchSize {_branch_size = 3.0}]
}
tests :: TestTree
tests = testGroup "Phylo" [
testCase "returns expected data" testSmallPhyloExpectedOutput
]
testSmallPhyloExpectedOutput :: Assertion
testSmallPhyloExpectedOutput = do
issue290PhyloSmall <- setConfig phyloConfig <$> (readPhylo =<< getDataFileName "bench-data/phylo/issue-290-small.json")
expected <- readPhylo =<< getDataFileName "test-data/phylo/issue-290-small.golden.json"
let actual = toPhylo issue290PhyloSmall
expected @?= actual
{-# LANGUAGE TypeApplications #-}
module Main where
import Gargantext.Prelude
import Control.Monad
import Data.Text (isInfixOf)
import Shelly hiding (FilePath)
import System.IO
import System.Process
import Test.Hspec
import qualified Data.Text as T
import qualified Test.API as API
import qualified Test.Database.Operations as DB
import Test.Hspec
startCoreNLPServer :: IO ProcessHandle
startCoreNLPServer = do
devNull <- openFile "/dev/null" WriteMode
let p = proc "./startServer.sh" []
(_, _, _, hdl) <- createProcess $ p { cwd = Just "devops/coreNLP/stanford-corenlp-current"
(_, _, _, hdl) <- (createProcess $ p { cwd = Just "devops/coreNLP/stanford-corenlp-current"
, delegate_ctlc = True
, create_group = True
, std_out = UseHandle devNull
, std_err = UseHandle devNull
}
}) `catch` \e -> case e of
_ | True <- "does not exist" `isInfixOf` (T.pack . show @SomeException $ e)
-> fail $ "Cannot execute the 'startServer.sh' script. If this is the " <>
"first time you are running the tests, you have to run " <>
"cd devops/coreNLP && ./build.sh first. You have to run it only once, " <>
"and then you are good to go for the time being."
| otherwise -> throwIO e
pure hdl
stopCoreNLPServer :: ProcessHandle -> IO ()
......
......@@ -20,6 +20,7 @@ import qualified Test.Ngrams.NLP as NLP
import qualified Test.Ngrams.Query as NgramsQuery
import qualified Test.Offline.JSON as JSON
import qualified Test.Offline.Errors as Errors
import qualified Test.Offline.Phylo as Phylo
import qualified Test.Parsers.Date as PD
import qualified Test.Utils.Crypto as Crypto
import qualified Test.Utils.Jobs as Jobs
......@@ -51,4 +52,5 @@ main = do
, CorpusQuery.tests
, JSON.tests
, Errors.tests
, Phylo.tests
]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment