[phylo] work on adding day/hour/minute/second

Too many refactorings along the way unfortunately
parent cd0aa9ca
Pipeline #7809 failed with stages
in 18 minutes and 20 seconds
...@@ -168,6 +168,7 @@ source-repository-package ...@@ -168,6 +168,7 @@ source-repository-package
allow-newer: MissingH:base allow-newer: MissingH:base
, *:base , *:base
, *:unordered-containers
, crawlerHAL:* , crawlerHAL:*
, epo-api-client:http-client-tls , epo-api-client:http-client-tls
, openalex:http-client-tls , openalex:http-client-tls
......
...@@ -646,7 +646,7 @@ library ...@@ -646,7 +646,7 @@ library
, tree-diff , tree-diff
, tuple ^>= 0.3.0.2 , tuple ^>= 0.3.0.2
, unicode-collation >= 0.1.3.5 , unicode-collation >= 0.1.3.5
, unordered-containers ^>= 0.2.16.0 , unordered-containers ^>= 0.2.20
-- needed for Worker / System.Posix.Signals -- needed for Worker / System.Posix.Signals
, unix >= 2.7.3 && < 2.9 , unix >= 2.7.3 && < 2.9
, uri-encode ^>= 1.5.0.7 , uri-encode ^>= 1.5.0.7
...@@ -798,7 +798,7 @@ common commonTestDependencies ...@@ -798,7 +798,7 @@ common commonTestDependencies
, unicode-collation >= 0.1.3.5 , unicode-collation >= 0.1.3.5
, unix >= 2.7.3 && < 2.9 , unix >= 2.7.3 && < 2.9
, unliftio , unliftio
, unordered-containers ^>= 0.2.16.0 , unordered-containers ^>= 0.2.20
, utf8-string ^>= 1.0.2 , utf8-string ^>= 1.0.2
, validity ^>= 0.12.0.2 , validity ^>= 0.12.0.2
, vector >= 0.12.3.0 && <= 0.13.1.0 , vector >= 0.12.3.0 && <= 0.13.1.0
......
...@@ -15,7 +15,7 @@ import Data.Text qualified as T ...@@ -15,7 +15,7 @@ import Data.Text qualified as T
import Data.Time (UTCTime(..), fromGregorian, toGregorian, utctDay) import Data.Time (UTCTime(..), fromGregorian, toGregorian, utctDay)
import Data.Time.Format (parseTimeM, defaultTimeLocale) import Data.Time.Format (parseTimeM, defaultTimeLocale)
import Data.Time.Format.ISO8601 (iso8601ParseM) import Data.Time.Format.ISO8601 (iso8601ParseM)
import Data.Time.LocalTime (TimeOfDay(..), timeOfDayToTime) import Data.Time.LocalTime (TimeOfDay(..), timeOfDayToTime, timeToTimeOfDay)
import Gargantext.Defaults qualified as Def import Gargantext.Defaults qualified as Def
import Gargantext.Prelude import Gargantext.Prelude
import Text.Printf (printf) import Text.Printf (printf)
...@@ -45,18 +45,45 @@ averageLength l = fromIntegral (sum (map length l)) / fromIntegral (length l) ...@@ -45,18 +45,45 @@ averageLength l = fromIntegral (sum (map length l)) / fromIntegral (length l)
-- print c -- $ toYear $ toGregorian $ utctDay c -- print c -- $ toYear $ toGregorian $ utctDay c
makeUTCTime :: Int -> Int -> Int -> Int -> Int -> Int -> UTCTime -- | A simplified UTCTime record for our purposes
makeUTCTime year month day hour minute second' = data UTCTimeR =
UTCTime dayPart timePart UTCTimeR { year :: Int
, month :: Int
, day :: Int
, hour :: Int
, minute :: Int
, sec :: Int }
deriving (Show, Eq, Generic)
defUTCTimeR :: UTCTimeR
defUTCTimeR = UTCTimeR { year = fromIntegral Def.year
, month = Def.month
, day = Def.day
, hour = Def.hour
, minute = Def.minute
, sec = Def.second }
toUTCTime :: UTCTimeR -> UTCTime
toUTCTime (UTCTimeR { .. }) = UTCTime dayPart timePart
where where
dayPart = fromGregorian (fromIntegral year) month day dayPart = fromGregorian (fromIntegral year) month day
timePart = timeOfDayToTime (TimeOfDay hour minute (fromIntegral second')) timePart = timeOfDayToTime (TimeOfDay hour minute (fromIntegral sec))
toUTCTimeR :: UTCTime -> UTCTimeR
toUTCTimeR (UTCTime { .. }) = UTCTimeR { year = fromIntegral year
, sec = round (realToFrac sec :: Float)
, .. }
where
(year, month, day) = toGregorian utctDay
TimeOfDay hour minute sec = timeToTimeOfDay utctDayTime
-- | Parse with multiple format attempts -- | Parse with multiple format attempts
parseFlexibleTime :: Text -> Maybe UTCTime parseFlexibleTime :: Text -> Maybe UTCTime
parseFlexibleTime t = msum parseFlexibleTime t = msum
[ iso8601ParseM s [ iso8601ParseM s
, parseTimeM True defaultTimeLocale "%Y-%m-%dT%H:%M:%S" s
, parseTimeM True defaultTimeLocale "%Y-%m-%d %H:%M:%S" s
, parseTimeM True defaultTimeLocale "%Y-%m-%d %H:%M:%S UTC" s , parseTimeM True defaultTimeLocale "%Y-%m-%d %H:%M:%S UTC" s
, parseTimeM True defaultTimeLocale "%Y-%m-%d" s , parseTimeM True defaultTimeLocale "%Y-%m-%d" s
] ]
...@@ -68,6 +95,6 @@ dateParts :: Text -> [Text] ...@@ -68,6 +95,6 @@ dateParts :: Text -> [Text]
dateParts t = [T.pack $ printf "%04d" y, T.pack $ printf "%02d" m, T.pack $ printf "%02d" d] dateParts t = [T.pack $ printf "%04d" y, T.pack $ printf "%02d" m, T.pack $ printf "%02d" d]
where where
mParsed = parseFlexibleTime t mParsed = parseFlexibleTime t
parsed = fromMaybe (makeUTCTime (fromIntegral Def.year) Def.month Def.day parsed = fromMaybe (toUTCTime defUTCTimeR) mParsed
Def.hour Def.minute Def.second) mParsed
(y, m, d) = toGregorian $ utctDay parsed (y, m, d) = toGregorian $ utctDay parsed
...@@ -31,6 +31,8 @@ module Gargantext.Core.Viz.Phylo where ...@@ -31,6 +31,8 @@ module Gargantext.Core.Viz.Phylo where
import Control.Lens (over) import Control.Lens (over)
import Data.Aeson.Types qualified as JS import Data.Aeson.Types qualified as JS
import Data.Discrimination qualified as D
import Data.HashMap.Strict qualified as HM
import Data.List.NonEmpty qualified as NE import Data.List.NonEmpty qualified as NE
import Data.Swagger (ToSchema(..), genericDeclareNamedSchema) import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
import Data.Text (pack) import Data.Text (pack)
...@@ -354,7 +356,10 @@ defaultPhyloParam = ...@@ -354,7 +356,10 @@ defaultPhyloParam =
------------------ ------------------
-- | Date : a simple Integer (maxInt on 64 bit is good enough for storing number of seconds since year 0) -- | Date : a simple Integer (maxInt on 64 bit is good enough for storing number of seconds since year 0)
type Date = Int newtype Date = Date { unDate :: Int }
deriving (Generic)
deriving newtype (NFData, Show, Eq, ToExpr, Ord, ToSchema, ToJSON, JS.ToJSONKey, FromJSON, JS.FromJSONKey, Hashable, Num, Enum, Integral, Real)
deriving anyclass (D.Grouping, D.Sorting)
-- | DateStr : the string version of a Date -- | DateStr : the string version of a Date
type DateStr = Text type DateStr = Text
...@@ -388,7 +393,8 @@ data PhyloFoundations = PhyloFoundations ...@@ -388,7 +393,8 @@ data PhyloFoundations = PhyloFoundations
data PhyloCounts = PhyloCounts data PhyloCounts = PhyloCounts
{ coocByDate :: !(Map Date Cooc) { coocByDate :: !(Map Date Cooc)
, docsByDate :: !(Map Date Double) -- | For many docs, HashMap seems more performant than Map
, docsByDate :: !(HM.HashMap Date Double)
, rootsCountByDate :: !(Map Date (Map Int Double)) , rootsCountByDate :: !(Map Date (Map Int Double))
, rootsCount :: !(Map Int Double) , rootsCount :: !(Map Int Double)
, rootsFreq :: !(Map Int Double) , rootsFreq :: !(Map Int Double)
......
{-| {-|
Module : Gargantext.Core.Viz.Phylo.API Module : Gargantext.Core.Viz.Phylo.API.Tools
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -24,7 +24,7 @@ import Data.ByteString.Lazy qualified as Lazy ...@@ -24,7 +24,7 @@ import Data.ByteString.Lazy qualified as Lazy
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Text (pack) import Data.Text (pack)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian) import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, {-diffDays,-} showGregorian, Day)
import Data.Time.Clock (UTCTime(..)) import Data.Time.Clock (UTCTime(..))
import Data.Time.Clock.POSIX (posixSecondsToUTCTime, utcTimeToPOSIXSeconds) import Data.Time.Clock.POSIX (posixSecondsToUTCTime, utcTimeToPOSIXSeconds)
import Data.Time.Format (defaultTimeLocale, formatTime) import Data.Time.Format (defaultTimeLocale, formatTime)
...@@ -35,7 +35,7 @@ import Gargantext.Core.NodeStory.Types (HasNodeStory, NodeStoryEnv, HasNodeStory ...@@ -35,7 +35,7 @@ import Gargantext.Core.NodeStory.Types (HasNodeStory, NodeStoryEnv, HasNodeStory
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(MapTerm)) import Gargantext.Core.Types.Main (ListType(MapTerm))
import Gargantext.Core.Utils.DateUtils (makeUTCTime) import Gargantext.Core.Utils.DateUtils (UTCTimeR(..), toUTCTime)
import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo (_phylo_computeTime), trackComputeTime, ComputeTimeHistory) import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo (_phylo_computeTime), trackComputeTime, ComputeTimeHistory)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile) import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink) import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
...@@ -53,10 +53,15 @@ import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperda ...@@ -53,10 +53,15 @@ import Gargantext.Database.Schema.Node ( NodePoly(_node_hyperdata), node_hyperda
import Gargantext.Prelude hiding (to) import Gargantext.Prelude hiding (to)
import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM ) import Gargantext.System.Logging ( MonadLogger, LogLevel(DEBUG), logLocM )
import Gargantext.Utils.UTCTime (timeMeasured, timeMeasured'') import Gargantext.Utils.UTCTime (timeMeasured, timeMeasured'')
import System.Directory (copyFile)
import System.FilePath ((</>)) import System.FilePath ((</>))
import System.IO.Temp (withTempDirectory) import System.IO.Temp (withTempDirectory)
import System.Process qualified as Shell import System.Process qualified as Shell
year0 :: Day
year0 = fromGregorian 0000 1 1
-------------------------------------------------------------------- --------------------------------------------------------------------
getPhyloData :: HasNodeError err getPhyloData :: HasNodeError err
=> PhyloId -> DBQuery err x (Maybe Phylo) => PhyloId -> DBQuery err x (Maybe Phylo)
...@@ -107,6 +112,8 @@ phylo2dot phylo = do ...@@ -107,6 +112,8 @@ phylo2dot phylo = do
value <- readFile fileFrom value <- readFile fileFrom
copyFile fileFrom ("/home/przemek/phylo/phyloFrom.dot")
case value of case value of
"" -> panic "[G.C.V.Phylo.API.phylo2dot] Error no file" "" -> panic "[G.C.V.Phylo.API.phylo2dot] Error no file"
_ -> pure value _ -> pure value
...@@ -193,6 +200,7 @@ context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do ...@@ -193,6 +200,7 @@ context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
-- TODO better default date and log the errors to improve data quality -- TODO better default date and log the errors to improve data quality
-- TODO Context already has a date field
context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text) context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
context2date context timeUnit = do context2date context timeUnit = do
let hyperdata = _context_hyperdata context let hyperdata = _context_hyperdata context
...@@ -213,39 +221,44 @@ context2date context timeUnit = do ...@@ -213,39 +221,44 @@ context2date context timeUnit = do
toMonths :: Int -> Int -> Int -> Date toMonths :: Int -> Int -> Int -> Date
toMonths y m d = fromIntegral $ cdMonths toMonths y m d = fromIntegral $ cdMonths
$ diffGregorianDurationClip (fromGregorian (fromIntegral y) m d) $ diffGregorianDurationClip (fromGregorian (fromIntegral y) m d)
(fromGregorian 0000 0 0) year0
toDays :: Int -> Int -> Int -> Date toDays :: Int -> Int -> Int -> Date
toDays y m d = fromIntegral toDays y m d = toSeconds y m d 0 0 0 `div` (24*60*60)
$ diffDays (fromGregorian (fromIntegral y) m d) (fromGregorian 0000 0 0) -- toDays y m d = fromIntegral
-- $ diffDays (fromGregorian (fromIntegral y) m d) year0
-- | Convert UTCTime to seconds since year 0
-- utcTimeToSecondsFromYear0 :: UTCTime -> Integer
-- utcTimeToSecondsFromYear0 utcTime =
-- let posixSeconds = utcTimeToPOSIXSeconds utcTime
-- -- POSIX epoch is 1970-01-01, need to add seconds from year 0 to 1970
-- year0To1970Seconds = 1970 * 365.25 * 24 * 3600 -- Approximate
-- in floor (posixSeconds + year0To1970Seconds)
-- Convert UTCTime to seconds since year 0 -- | Convert UTCTime to seconds since year 1970
utcTimeToSecondsFromYear0 :: UTCTime -> Integer utcTimeToSecondsFromYear1970 :: UTCTime -> Integer
utcTimeToSecondsFromYear0 utcTime = utcTimeToSecondsFromYear1970 utcTime = floor $ utcTimeToPOSIXSeconds utcTime
let posixSeconds = utcTimeToPOSIXSeconds utcTime
-- POSIX epoch is 1970-01-01, need to add seconds from year 0 to 1970
year0To1970Seconds = 1970 * 365.25 * 24 * 3600 -- Approximate
in floor (posixSeconds + year0To1970Seconds)
toHours :: Int -> Int -> Int -> Int -> Date toHours :: Int -> Int -> Int -> Int -> Date
toHours y m d hh = fromIntegral $ toHours y m d hh = toSeconds y m d hh 0 0 `div` (60*60)
(utcTimeToSecondsFromYear0 $ makeUTCTime y m d hh 0 0) `div` (60*60)
toMinutes :: Int -> Int -> Int -> Int -> Int -> Date toMinutes :: Int -> Int -> Int -> Int -> Int -> Date
toMinutes y m d hh mm = fromIntegral $ toMinutes y m d hh mm = toSeconds y m d hh mm 0 `div` 60
(utcTimeToSecondsFromYear0 $ makeUTCTime y m d hh mm 0) `div` 60
toSeconds :: Int -> Int -> Int -> Int -> Int -> Int -> Date toSeconds :: Int -> Int -> Int -> Int -> Int -> Int -> Date
toSeconds y m d hh mm ss = fromIntegral $ toSeconds y m d hh mm ss = fromIntegral $
utcTimeToSecondsFromYear0 $ makeUTCTime y m d hh mm ss utcTimeToSecondsFromYear1970 $ toUTCTime utcTimeR
where
utcTimeR = UTCTimeR { year = y, month = m, day = d, hour = hh, minute = mm, sec = ss }
-- | This is kinda like a hashing function that assigns different -- | This is kinda like a hashing function that assigns different
-- 'Date' (in fact an 'Int') to different period, depending on the -- 'Date' (in fact an 'Int') to different period, depending on the
-- 'TimeUnit' -- 'TimeUnit'
toPhyloDate :: Int -> Int -> Int -> Int -> Int -> Int -> TimeUnit -> Date toPhyloDate :: Int -> Int -> Int -> Int -> Int -> Int -> TimeUnit -> Date
toPhyloDate y m d hh mm ss tu = case tu of toPhyloDate y m d hh mm ss tu = case tu of
Year {} -> y Year {} -> fromIntegral y
Month {} -> toMonths y m d Month {} -> toMonths y m d
Week {} -> div (toDays y m d) 7 Week {} -> div (toDays y m d) 7
Day {} -> toDays y m d Day {} -> toDays y m d
...@@ -261,11 +274,17 @@ toPhyloDate' y m d _hh _mm _ss (Month {}) = pack $ showGregorian $ fromGregor ...@@ -261,11 +274,17 @@ toPhyloDate' y m d _hh _mm _ss (Month {}) = pack $ showGregorian $ fromGregor
toPhyloDate' y m d _hh _mm _ss (Week {}) = pack $ showGregorian $ fromGregorian (toInteger y) m d toPhyloDate' y m d _hh _mm _ss (Week {}) = pack $ showGregorian $ fromGregorian (toInteger y) m d
toPhyloDate' y m d _hh _mm _ss (Day {}) = pack $ showGregorian $ fromGregorian (toInteger y) m d toPhyloDate' y m d _hh _mm _ss (Day {}) = pack $ showGregorian $ fromGregorian (toInteger y) m d
toPhyloDate' y m d hh _mm _ss (Hour {}) = toPhyloDate' y m d hh _mm _ss (Hour {}) =
pack $ formatTime defaultTimeLocale "%Y-%m-%d %H" $ makeUTCTime y m d hh 0 0 pack $ formatTime defaultTimeLocale "%Y-%m-%d %H" $ toUTCTime utcTimeR
where
utcTimeR = UTCTimeR { year = y, month = m, day = d, hour = hh, minute = 0, sec = 0 }
toPhyloDate' y m d hh mm _ss (Minute {}) = toPhyloDate' y m d hh mm _ss (Minute {}) =
pack $ formatTime defaultTimeLocale "%Y-%m-%d %H:%M" $ makeUTCTime y m d hh mm 0 pack $ formatTime defaultTimeLocale "%Y-%m-%d %H:%M" $ toUTCTime utcTimeR
where
utcTimeR = UTCTimeR { year = y, month = m, day = d, hour = hh, minute = mm, sec = 0 }
toPhyloDate' y m d hh mm ss (Second {}) = toPhyloDate' y m d hh mm ss (Second {}) =
pack $ formatTime defaultTimeLocale "%Y-%m-%d %H:%M:%S" $ makeUTCTime y m d hh mm ss pack $ formatTime defaultTimeLocale "%Y-%m-%d %H:%M:%S" $ toUTCTime utcTimeR
where
utcTimeR = UTCTimeR { year = y, month = m, day = d, hour = hh, minute = mm, sec = ss }
-- Utils -- Utils
......
...@@ -17,6 +17,7 @@ module Gargantext.Core.Viz.Phylo.Example where ...@@ -17,6 +17,7 @@ module Gargantext.Core.Viz.Phylo.Example where
import Control.Lens import Control.Lens
import Data.GraphViz.Types.Generalised (DotGraph) import Data.GraphViz.Types.Generalised (DotGraph)
import Data.HashMap.Strict qualified as HM
import Data.List (nub) import Data.List (nub)
import Data.Map qualified as Map import Data.Map qualified as Map
import Data.Text (toLower) import Data.Text (toLower)
...@@ -90,7 +91,7 @@ periods = toPeriods (sort $ nub $ map date docs) ...@@ -90,7 +91,7 @@ periods = toPeriods (sort $ nub $ map date docs)
(getTimeStep $ timeUnit config) (getTimeStep $ timeUnit config)
nbDocsByYear :: Map Date Double nbDocsByYear :: HM.HashMap Date Double
nbDocsByYear = docsToTimeScaleNb docs nbDocsByYear = docsToTimeScaleNb docs
......
...@@ -16,6 +16,7 @@ module Gargantext.Core.Viz.Phylo.PhyloExport where ...@@ -16,6 +16,7 @@ module Gargantext.Core.Viz.Phylo.PhyloExport where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.HashMap.Strict qualified as HM
import Data.GraphViz hiding (DotGraph, Order) import Data.GraphViz hiding (DotGraph, Order)
import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order) import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Attributes.HTML qualified as H import Data.GraphViz.Attributes.HTML qualified as H
...@@ -195,7 +196,7 @@ exportToDot phylo export = ...@@ -195,7 +196,7 @@ exportToDot phylo export =
{-- home made attributes -} {-- home made attributes -}
<> [ toAttr (fromStrict "phyloFoundations") $ show (length $ Vector.toList $ getRoots phylo) <> [ toAttr (fromStrict "phyloFoundations") $ show (length $ Vector.toList $ getRoots phylo)
, toAttr (fromStrict "phyloTerms") $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups) , toAttr (fromStrict "phyloTerms") $ show (length $ nub $ concat $ map (^. phylo_groupNgrams) $ export ^. export_groups)
, toAttr (fromStrict "phyloDocs") $ show (sum $ elems $ getDocsByDate phylo) , toAttr (fromStrict "phyloDocs") $ show (sum $ HM.elems $ getDocsByDate phylo)
, toAttr (fromStrict "phyloPeriods") $ show (length $ elems $ phylo ^. phylo_periods) , toAttr (fromStrict "phyloPeriods") $ show (length $ elems $ phylo ^. phylo_periods)
, toAttr (fromStrict "phyloBranches") $ show (length $ export ^. export_branches) , toAttr (fromStrict "phyloBranches") $ show (length $ export ^. export_branches)
, toAttr (fromStrict "phyloGroups") $ show (length $ export ^. export_groups) , toAttr (fromStrict "phyloGroups") $ show (length $ export ^. export_groups)
...@@ -639,7 +640,7 @@ toHorizon phylo = ...@@ -639,7 +640,7 @@ toHorizon phylo =
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds)) heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
$ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
noHeads = groups \\ heads noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (getDocsByDate phylo) [prd] nbDocs = sum $ HM.elems $ filterDocs (getDocsByDate phylo) [prd]
diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd] diago = reduceDiagos $ filterDiago (getCoocByDate phylo) [prd]
sim = (similarity $ getConfig phylo) sim = (similarity $ getConfig phylo)
step = case getSeaElevation phylo of step = case getSeaElevation phylo of
......
...@@ -19,6 +19,7 @@ import Control.Lens hiding (Level) ...@@ -19,6 +19,7 @@ import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parMap, rpar) import Control.Parallel.Strategies (parMap, rpar)
import Data.Containers.ListUtils (nubOrd) import Data.Containers.ListUtils (nubOrd)
import Data.Discrimination qualified as D import Data.Discrimination qualified as D
import Data.HashMap.Strict qualified as HM
import Data.List (partition, intersect, tail) import Data.List (partition, intersect, tail)
import Data.List qualified as List import Data.List qualified as List
import Data.Map (fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, insert) import Data.Map (fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, insert)
...@@ -196,7 +197,7 @@ findSeaLadder phylo = case getSeaElevation phylo of ...@@ -196,7 +197,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
let candidates = filter (\target -> (> 2) $ length let candidates = filter (\target -> (> 2) $ length
$ intersect (getGroupNgrams source) (getGroupNgrams target)) targets $ intersect (getGroupNgrams source) (getGroupNgrams target)) targets
in map (\target -> in map (\target ->
let nbDocs = (sum . elems) let nbDocs = (sum . HM.elems)
$ filterDocs docs ([idToPrd (getGroupId source), idToPrd (getGroupId target)]) $ filterDocs docs ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
diago = reduceDiagos diago = reduceDiagos
$ filterDiago diagos ([idToPrd (getGroupId source), idToPrd (getGroupId target)]) $ filterDiago diagos ([idToPrd (getGroupId source), idToPrd (getGroupId target)])
...@@ -208,10 +209,12 @@ findSeaLadder phylo = case getSeaElevation phylo of ...@@ -208,10 +209,12 @@ findSeaLadder phylo = case getSeaElevation phylo of
appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo appendGroups :: (a -> Period -> (Text,Text) -> Scale -> Int -> [Cooc] -> Map Int Double -> PhyloGroup) -> Scale -> Map (Date,Date) [a] -> Phylo -> Phylo
appendGroups f lvl m phylo = appendGroups f lvl m phylo =
tracePhylo ("\n" <> "-- | Append " tracePhylo (mconcat [ "\n" :: Text
<> show (length $ concat $ elems m) , "-- | Append "
<> " groups to scale " , show $ length $ concat $ elems m
<> show (lvl) <> "\n" :: Text) , " groups to scale "
, show lvl
, "\n" ])
$ over ( phylo_periods $ over ( phylo_periods
. traverse . traverse
. phylo_periodScales . phylo_periodScales
...@@ -397,9 +400,11 @@ docsToTimeScaleCooc docs fdt = ...@@ -397,9 +400,11 @@ docsToTimeScaleCooc docs fdt =
mCooc' = fromList mCooc' = fromList
$ map (\t -> (t,empty)) $ map (\t -> (t,empty))
$ toTimeScale (map date docs) 1 $ toTimeScale (map date docs) 1
in tracePhylo ("\n" <> "-- | Build the coocurency matrix for " in tracePhylo (mconcat [ "\n" :: Text
<> show (length $ keys mCooc') , "-- | Build the coocurency matrix for "
<> " unit of time" <> "\n" :: Text) , show $ length $ keys mCooc'
, " unit of time"
, "\n" ])
$ unionWith sumCooc mCooc mCooc' $ unionWith sumCooc mCooc mCooc'
...@@ -423,10 +428,13 @@ groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(da ...@@ -423,10 +428,13 @@ groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(da
groupDocsByPeriod' f pds docs = groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = parMap rpar (inPeriode f docs') pds periods = parMap rpar (inPeriode f docs') pds
in tracePhylo ("\n" <> "-- | Group " in tracePhylo (mconcat [ "\n" :: Text
<> show (length docs) , "-- | Group "
<> " docs by " , show $ length docs
<> show (length pds) <> " periods" <> "\n" :: Text) , " docs by "
, show $ length pds
, " periods"
,"\n" ])
$ fromList $ zip pds periods $ fromList $ zip pds periods
where where
-------------------------------------- --------------------------------------
...@@ -442,9 +450,13 @@ groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents ...@@ -442,9 +450,13 @@ groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents
groupDocsByPeriod f pds es = groupDocsByPeriod f pds es =
let periods = parMap rpar (inPeriode f es) pds let periods = parMap rpar (inPeriode f es) pds
in tracePhylo ("\n" <> "-- | Group " in tracePhylo (mconcat [ "\n" :: Text
<> show (length es) <> " docs by " , "-- | Group "
<> show (length pds) <> " periods" <> "\n" :: Text) , show $ length es
, " docs by "
, show $ length pds
, " periods"
, "\n" ])
$ fromList $ zip pds periods $ fromList $ zip pds periods
where where
-------------------------------------- --------------------------------------
...@@ -462,7 +474,13 @@ docsToTermFreq docs fdt = ...@@ -462,7 +474,13 @@ docsToTermFreq docs fdt =
$ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToTermFreq" lst, fromIntegral $ length lst))
$ group $ D.sort $ concat $ map (\d -> D.nub $ ngramsToIdx (text d) fdt) docs $ group $ D.sort $ concat $ map (\d -> D.nub $ ngramsToIdx (text d) fdt) docs
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in tracePhylo (mconcat [ "\n" :: Text
, "-- | Docs "
, show nbDocs
, " to term freq "
, show $ length freqs
, "\n" ])
$ map (/sumFreqs) freqs
docsToTermCount :: [Document] -> Vector Ngrams -> Map Int Double docsToTermCount :: [Document] -> Vector Ngrams -> Map Int Double
...@@ -479,7 +497,13 @@ docsToTimeTermCount docs roots = ...@@ -479,7 +497,13 @@ docsToTimeTermCount docs roots =
$ fromListWith (++) $ fromListWith (++)
$ map (\d -> (date d, D.nub $ ngramsToIdx (text d) roots)) docs $ map (\d -> (date d, D.nub $ ngramsToIdx (text d) roots)) docs
time = fromList $ map (\t -> (t,Map.empty)) $ toTimeScale (keys docs') 1 time = fromList $ map (\t -> (t,Map.empty)) $ toTimeScale (keys docs') 1
in unionWith (Map.union) time docs' in tracePhylo (mconcat [ "\n" :: Text
, "-- | Docs "
, show $ length docs'
, " to time term count "
, show $ length time
, "\n" ])
$ unionWith (Map.union) time docs'
docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double docsToLastTermFreq :: Int -> [Document] -> Vector Ngrams -> Map Int Double
...@@ -491,20 +515,30 @@ docsToLastTermFreq n docs fdt = ...@@ -491,20 +515,30 @@ docsToLastTermFreq n docs fdt =
$ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst)) $ map (\lst -> (head' "docsToLastTermFreq" lst, fromIntegral $ length lst))
$ group $ D.sort $ concat $ map (\d -> D.nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs $ group $ D.sort $ concat $ map (\d -> D.nub $ ngramsToIdx (text d) fdt) $ filter (\d -> elem (date d) last) docs
sumFreqs = sum $ elems freqs sumFreqs = sum $ elems freqs
in map (/sumFreqs) freqs in tracePhylo (mconcat [ "\n" :: Text
, "-- | Docs "
, show nbDocs
, " to last term freq "
, show $ length freqs
, "\n" ])
$ map (/sumFreqs) freqs
-- To count the number of docs by unit of time -- To count the number of docs by unit of time
docsToTimeScaleNb :: [Document] -> Map Date Double docsToTimeScaleNb :: [Document] -> HM.HashMap Date Double
docsToTimeScaleNb docs = docsToTimeScaleNb docs =
let docs' = fromListWith (+) $ map (\d -> (date d,1)) docs let docs' = HM.fromListWith (+) $ map (\d -> (date d,1)) docs
time = fromList $ map (\t -> (t,0)) $ toTimeScale (keys docs') 1 time = HM.fromList $ map (\t -> (t,0)) $ toTimeScale (HM.keys docs') 1
in tracePhylo ("\n" <> "-- | Group " in tracePhylo (mconcat [ "\n" :: Text
<> show (length docs) , "-- | Group "
<> " docs by " , show $ length docs
<> show (length time) , " docs by "
<> " unit of time" <> "\n" :: Text) , show $ length time
$ unionWith (+) time docs' , " unit of time"
, ", docs keys: "
, show $ HM.keys docs'
, "\n" ])
$ HM.unionWith (+) time docs'
initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale initPhyloScales :: Int -> Period -> Map PhyloScaleId PhyloScale
...@@ -555,16 +589,19 @@ initPhylo docs conf = ...@@ -555,16 +589,19 @@ initPhylo docs conf =
then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) } then defaultPhyloParam { _phyloParam_config = setDefault conf timeScale (length docs) }
else defaultPhyloParam { _phyloParam_config = conf } else defaultPhyloParam { _phyloParam_config = conf }
periods = toPeriods (D.sort $ D.nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale) periods = toPeriods (D.sort $ D.nub $ map date docs) (getTimePeriod timeScale) (getTimeStep timeScale)
in tracePhylo ("\n" <> "-- | Init a phylo out of " in tracePhylo (mconcat [ "\n" :: Text
<> show (length docs) <> " docs \n" :: Text) , "-- | Init a phylo out of "
$ tracePhylo ("\n" <> "-- | lambda " , show $ length docs
<> show (_qua_granularity $ phyloQuality $ _phyloParam_config params) :: Text) , " docs \n" ])
$ Phylo foundations $ tracePhylo (mconcat [ "\n" :: Text
docsSources , "-- | lambda "
docsCounts , show $ _qua_granularity $ phyloQuality $ _phyloParam_config params ])
[] $ Phylo { _phylo_foundations = foundations
params , _phylo_sources = docsSources
(fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods) , _phylo_counts = docsCounts
0 , _phylo_seaLadder = []
(_qua_granularity $ phyloQuality $ _phyloParam_config params) , _phylo_param = params
Nothing , _phylo_periods = fromList $ map (\prd -> (prd, PhyloPeriod prd ("","") (initPhyloScales 1 prd))) periods
, _phylo_quality = 0
, _phylo_level = _qua_granularity $ phyloQuality $ _phyloParam_config params
, _phylo_computeTime = Nothing }
...@@ -15,6 +15,7 @@ Portability : POSIX ...@@ -15,6 +15,7 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloTools where module Gargantext.Core.Viz.Phylo.PhyloTools where
import Control.Lens ( over, filtered, view, (%~) ) import Control.Lens ( over, filtered, view, (%~) )
import Data.HashMap.Strict qualified as HM
import Data.List (union, nub, init, tail, partition, nubBy, (!!)) import Data.List (union, nub, init, tail, partition, nubBy, (!!))
import Data.List qualified as List import Data.List qualified as List
import Data.Map (elems, empty, fromList, findWithDefault, unionWith, keys, member, (!), filterWithKey, fromListWith, restrictKeys) import Data.Map (elems, empty, fromList, findWithDefault, unionWith, keys, member, (!), filterWithKey, fromListWith, restrictKeys)
...@@ -184,7 +185,7 @@ getTimeScale p = case timeUnit $ getConfig p of ...@@ -184,7 +185,7 @@ getTimeScale p = case timeUnit $ getConfig p of
toTimeScale :: [Date] -> Int -> [Date] toTimeScale :: [Date] -> Int -> [Date]
toTimeScale dates step = toTimeScale dates step =
let (start,end) = findBounds dates let (start,end) = findBounds dates
in [start, (start + step) .. end] in [start, (start + fromIntegral step) .. end]
getTimeStep :: TimeUnit -> Int getTimeStep :: TimeUnit -> Int
...@@ -550,7 +551,7 @@ getCoocByDate phylo = coocByDate (phylo ^. phylo_counts) ...@@ -550,7 +551,7 @@ getCoocByDate phylo = coocByDate (phylo ^. phylo_counts)
getRootsCountByDate :: Phylo -> Map Date (Map Int Double) getRootsCountByDate :: Phylo -> Map Date (Map Int Double)
getRootsCountByDate phylo = rootsCountByDate (phylo ^. phylo_counts) getRootsCountByDate phylo = rootsCountByDate (phylo ^. phylo_counts)
getDocsByDate :: Phylo -> Map Date Double getDocsByDate :: Phylo -> HM.HashMap Date Double
getDocsByDate phylo = docsByDate (phylo ^. phylo_counts) getDocsByDate phylo = docsByDate (phylo ^. phylo_counts)
getRootsCount :: Phylo -> Map Int Double getRootsCount :: Phylo -> Map Int Double
......
...@@ -14,6 +14,7 @@ module Gargantext.Core.Viz.Phylo.SynchronicClustering where ...@@ -14,6 +14,7 @@ module Gargantext.Core.Viz.Phylo.SynchronicClustering where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.HashMap.Strict qualified as HM
import Data.List (intersect, nub) import Data.List (intersect, nub)
import Data.Map (fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member, unionWith) import Data.Map (fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member, unionWith)
import Data.Map qualified as Map import Data.Map qualified as Map
...@@ -21,6 +22,7 @@ import Gargantext.Core.Viz.Phylo ...@@ -21,6 +22,7 @@ import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics) import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos) import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Data.HashMap.Strict.Utils qualified as HMU
import Gargantext.Prelude hiding (empty) import Gargantext.Prelude hiding (empty)
...@@ -157,7 +159,12 @@ toParentId :: PhyloGroup -> PhyloGroupId ...@@ -157,7 +159,12 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex) toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupScale + 1), child ^. phylo_groupIndex)
reduceGroups :: PhyloSimilarity -> Synchrony -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] reduceGroups :: PhyloSimilarity
-> Synchrony
-> HM.HashMap Date Double
-> Map Date Cooc
-> [PhyloGroup]
-> [PhyloGroup]
reduceGroups prox sync docs diagos branch = reduceGroups prox sync docs diagos branch =
-- 1) reduce a branch as a set of periods & groups -- 1) reduce a branch as a set of periods & groups
let periods = fromListWith (++) let periods = fromListWith (++)
...@@ -167,9 +174,9 @@ reduceGroups prox sync docs diagos branch = ...@@ -167,9 +174,9 @@ reduceGroups prox sync docs diagos branch =
$ mapWithKey (\prd groups -> $ mapWithKey (\prd groups ->
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold -- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let diago = reduceDiagos $ filterDiago diagos [prd] let diago = reduceDiagos $ filterDiago diagos [prd]
edgesLeft = fromList $ groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago groups edgesLeft = fromList $ groupsToEdges prox sync ((sum . HM.elems) $ HMU.restrictKeys docs $ periodsToYears [prd]) diago groups
edgesRight = fromList $ map (\((k1,k2),v) -> ((k2,k1),v)) edgesRight = fromList $ map (\((k1,k2),v) -> ((k2,k1),v))
$ groupsToEdges prox sync ((sum . elems) $ restrictKeys docs $ periodsToYears [prd]) diago (reverse groups) $ groupsToEdges prox sync ((sum . HM.elems) $ HMU.restrictKeys docs $ periodsToYears [prd]) diago (reverse groups)
mergedEdges = Map.toList mergedEdges = Map.toList
$ unionWith (\v1 v2 -> if v1 >= v2 $ unionWith (\v1 v2 -> if v1 >= v2
then v1 then v1
......
...@@ -15,6 +15,7 @@ module Gargantext.Core.Viz.Phylo.TemporalMatching where ...@@ -15,6 +15,7 @@ module Gargantext.Core.Viz.Phylo.TemporalMatching where
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.HashMap.Strict qualified as HM
import Data.List (tail, intersect, nub, nubBy, union, partition) import Data.List (tail, intersect, nub, nubBy, union, partition)
import Data.List qualified as List import Data.List qualified as List
import Data.Map (fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), empty, mapKeys, adjust, filterWithKey) import Data.Map (fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), empty, mapKeys, adjust, filterWithKey)
...@@ -25,6 +26,7 @@ import Data.Text qualified as T ...@@ -25,6 +26,7 @@ import Data.Text qualified as T
import Data.Vector qualified as Vector import Data.Vector qualified as Vector
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Data.HashMap.Strict.Utils qualified as HMU
import Gargantext.Prelude hiding (empty) import Gargantext.Prelude hiding (empty)
import Text.Printf import Text.Printf
...@@ -192,8 +194,8 @@ filterPointersByPeriod fil pts = ...@@ -192,8 +194,8 @@ filterPointersByPeriod fil pts =
ToChildsMemory -> undefined ToChildsMemory -> undefined
ToParentsMemory -> undefined ToParentsMemory -> undefined
filterDocs :: Map Date Double -> [Period] -> Map Date Double filterDocs :: HM.HashMap Date Double -> [Period] -> HM.HashMap Date Double
filterDocs d pds = restrictKeys d $ periodsToYears pds filterDocs d pds = HMU.restrictKeys d $ periodsToYears pds
filterDiago :: Map Date Cooc -> [Period] -> Map Date Cooc filterDiago :: Map Date Cooc -> [Period] -> Map Date Cooc
filterDiago diago pds = restrictKeys diago $ periodsToYears pds filterDiago diago pds = restrictKeys diago $ periodsToYears pds
...@@ -232,8 +234,16 @@ groupsToBranches groups = ...@@ -232,8 +234,16 @@ groupsToBranches groups =
{- {-
-- find the best pair/singleton of parents/childs for a given group -- find the best pair/singleton of parents/childs for a given group
-} -}
makePairs :: (PhyloGroupId,[Int]) -> [(PhyloGroupId,[Int])] -> [Period] -> [Pointer] -> Filiation -> Double -> PhyloSimilarity makePairs :: (PhyloGroupId,[Int])
-> Map Date Double -> Map Date Cooc -> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))] -> [(PhyloGroupId,[Int])]
-> [Period]
-> [Pointer]
-> Filiation
-> Double
-> PhyloSimilarity
-> HM.HashMap Date Double
-> Map Date Cooc
-> [((PhyloGroupId,[Int]),(PhyloGroupId,[Int]))]
makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos = makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs diagos =
if (null periods) if (null periods)
then [] then []
...@@ -247,7 +257,7 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di ...@@ -247,7 +257,7 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di
inPairs :: [PhyloGroupId] inPairs :: [PhyloGroupId]
inPairs = map fst inPairs = map fst
$ filter (\(id,ngrams) -> $ filter (\(id,ngrams) ->
let nbDocs = (sum . elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id]) let nbDocs = (sum . HM.elems) $ filterDocs docs ([(fst . fst) egoId, (fst . fst) id])
diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id]) diago = reduceDiagos $ filterDiago diagos ([(fst . fst) egoId, (fst . fst) id])
in (toSimilarity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr in (toSimilarity nbDocs diago prox egoNgrams egoNgrams ngrams) >= thr
) candidates ) candidates
...@@ -259,8 +269,15 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di ...@@ -259,8 +269,15 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di
{- {-
-- find the best temporal links between a given group and its parents/childs -- find the best temporal links between a given group and its parents/childs
-} -}
phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> PhyloSimilarity -> Map Date Double -> Map Date Cooc phyloGroupMatching :: [[(PhyloGroupId,[Int])]]
-> Double -> [Pointer] -> (PhyloGroupId,[Int]) -> [Pointer] -> Filiation
-> PhyloSimilarity
-> HM.HashMap Date Double
-> Map Date Cooc
-> Double
-> [Pointer]
-> (PhyloGroupId,[Int])
-> [Pointer]
phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ngrams) = phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ngrams) =
if (null $ filterPointers proxi thr oldPointers) if (null $ filterPointers proxi thr oldPointers)
-- if no previous pointers satisfy the current threshold then let's find new pointers -- if no previous pointers satisfy the current threshold then let's find new pointers
...@@ -282,7 +299,7 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng ...@@ -282,7 +299,7 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
$ scanl (\acc targets -> $ scanl (\acc targets ->
let periods = nub $ map (fst . fst . fst) targets let periods = nub $ map (fst . fst . fst) targets
lastPrd = findLastPeriod filiation periods lastPrd = findLastPeriod filiation periods
nbdocs = sum $ elems $ (filterDocs docs ([(fst . fst) id] ++ periods)) nbdocs = sum $ HM.elems $ (filterDocs docs ([(fst . fst) id] ++ periods))
diago = reduceDiagos diago = reduceDiagos
$ filterDiago diagos ([(fst . fst) id] ++ periods) $ filterDiago diagos ([(fst . fst) id] ++ periods)
singletons = processSimilarity nbdocs diago $ map (\g -> (g,g)) $ filter (\g -> (fst . fst . fst) g == lastPrd) targets singletons = processSimilarity nbdocs diago $ map (\g -> (g,g)) $ filter (\g -> (fst . fst . fst) g == lastPrd) targets
...@@ -330,7 +347,14 @@ getCandidates minNgrams ego targets = ...@@ -330,7 +347,14 @@ getCandidates minNgrams ego targets =
{- {-
-- set up and start performing the upstream/downstream inter‐temporal matching period by period -- set up and start performing the upstream/downstream inter‐temporal matching period by period
-} -}
reconstructTemporalLinks :: Int -> [Period] -> PhyloSimilarity -> Double -> Map Date Double -> Map Date Cooc -> [PhyloGroup] -> [PhyloGroup] reconstructTemporalLinks :: Int
-> [Period]
-> PhyloSimilarity
-> Double
-> HM.HashMap Date Double
-> Map Date Cooc
-> [PhyloGroup]
-> [PhyloGroup]
reconstructTemporalLinks frame periods similarity thr docs coocs groups = reconstructTemporalLinks frame periods similarity thr docs coocs groups =
let groups' = groupByField _phylo_groupPeriod groups let groups' = groupByField _phylo_groupPeriod groups
in foldl' (\acc prd -> in foldl' (\acc prd ->
...@@ -397,7 +421,15 @@ filterByNgrams inf ngrams groups = ...@@ -397,7 +421,15 @@ filterByNgrams inf ngrams groups =
{- {-
-- perform the upstream/downstream inter‐temporal matching process group by group -- perform the upstream/downstream inter‐temporal matching process group by group
-} -}
reconstructTemporalLinks' :: Int -> [Period] -> PhyloSimilarity -> Double -> Map Date Double -> Map Date Cooc -> Map Int [PhyloGroupId] -> [PhyloGroup] -> [PhyloGroup] reconstructTemporalLinks' :: Int
-> [Period]
-> PhyloSimilarity
-> Double
-> HM.HashMap Date Double
-> Map Date Cooc
-> Map Int [PhyloGroupId]
-> [PhyloGroup]
-> [PhyloGroup]
reconstructTemporalLinks' frame periods similarity thr docs coocs roots groups = reconstructTemporalLinks' frame periods similarity thr docs coocs roots groups =
let egos = map (\ego -> let egos = map (\ego ->
let -- 1) find the parents/childs matching periods let -- 1) find the parents/childs matching periods
...@@ -433,7 +465,15 @@ reconstructTemporalLinks' frame periods similarity thr docs coocs roots groups = ...@@ -433,7 +465,15 @@ reconstructTemporalLinks' frame periods similarity thr docs coocs roots groups =
{- {-
-- reconstruct a phylomemetic network from a list of groups and from a given threshold -- reconstruct a phylomemetic network from a list of groups and from a given threshold
-} -}
toPhylomemeticNetwork :: Int -> [Period] -> PhyloSimilarity -> Double -> Map Date Double -> Map Date Cooc -> Map Int [PhyloGroupId] -> [PhyloGroup] -> [Branch] toPhylomemeticNetwork :: Int
-> [Period]
-> PhyloSimilarity
-> Double
-> HM.HashMap Date Double
-> Map Date Cooc
-> Map Int [PhyloGroupId]
-> [PhyloGroup]
-> [Branch]
toPhylomemeticNetwork timescale periods similarity thr docs coocs roots groups = toPhylomemeticNetwork timescale periods similarity thr docs coocs roots groups =
groupsToBranches $ fromList $ map (\g -> (getGroupId g, g)) groupsToBranches $ fromList $ map (\g -> (getGroupId g, g))
$ reconstructTemporalLinks' timescale periods similarity thr docs coocs roots groups $ reconstructTemporalLinks' timescale periods similarity thr docs coocs roots groups
...@@ -593,8 +633,18 @@ thrToMeta thr branches = ...@@ -593,8 +633,18 @@ thrToMeta thr branches =
-- done = all the already separated branches -- done = all the already separated branches
-- rest = all the branches we still have to separate -- rest = all the branches we still have to separate
-} -}
separateBranches :: Double -> PhyloSimilarity -> Double -> Map Int Double -> Int -> Double -> Double separateBranches :: Double
-> Int -> Map Date Double -> Map Date Cooc -> Map Int [PhyloGroupId] -> [Period] -> PhyloSimilarity
-> Double
-> Map Int Double
-> Int
-> Double
-> Double
-> Int
-> HM.HashMap Date Double
-> Map Date Cooc
-> Map Int [PhyloGroupId]
-> [Period]
-> [(Branch,ShouldTry)] -> (Branch,ShouldTry) -> [(Branch,ShouldTry)] -> [(Branch,ShouldTry)] -> (Branch,ShouldTry) -> [(Branch,ShouldTry)]
-> [(Branch,ShouldTry)] -> [(Branch,ShouldTry)]
separateBranches fdt similarity lambda frequency minBranch thr rise timescale docs coocs roots periods done currentBranch rest = separateBranches fdt similarity lambda frequency minBranch thr rise timescale docs coocs roots periods done currentBranch rest =
...@@ -651,7 +701,7 @@ separateBranches fdt similarity lambda frequency minBranch thr rise timescale do ...@@ -651,7 +701,7 @@ separateBranches fdt similarity lambda frequency minBranch thr rise timescale do
seaLevelRise :: Double -> PhyloSimilarity -> Double -> Int -> Map Int Double seaLevelRise :: Double -> PhyloSimilarity -> Double -> Int -> Map Int Double
-> [Double] -> Double -> [Double] -> Double
-> Int -> [Period] -> Int -> [Period]
-> Map Date Double -> Map Date Cooc -> HM.HashMap Date Double -> Map Date Cooc
-> Map Int [PhyloGroupId] -> Map Int [PhyloGroupId]
-> [(Branch,ShouldTry)] -> [(Branch,ShouldTry)]
-> ([(Branch,ShouldTry)],FinalQuality) -> ([(Branch,ShouldTry)],FinalQuality)
......
...@@ -13,6 +13,7 @@ module Gargantext.Data.HashMap.Strict.Utils where ...@@ -13,6 +13,7 @@ module Gargantext.Data.HashMap.Strict.Utils where
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap import Data.HashMap.Strict qualified as HashMap
import Data.Set qualified as Set
import Gargantext.Prelude import Gargantext.Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -45,3 +46,7 @@ getKeysOrderedByValueMaxFirst m = go [] Nothing (HashMap.toList m) ...@@ -45,3 +46,7 @@ getKeysOrderedByValueMaxFirst m = go [] Nothing (HashMap.toList m)
| v > u = go [k] (Just v) rest | v > u = go [k] (Just v) rest
| otherwise = go (k:ks) (Just v) rest | otherwise = go (k:ks) (Just v) rest
-- | Similar to 'Data.Map.restrictKeys'
restrictKeys :: (Hashable k, Eq k, Ord k) => HashMap k v -> Set.Set k -> HashMap k v
restrictKeys hm keysToKeep = HashMap.filterWithKey (\k _ -> Set.member k keysToKeep) hm
...@@ -29,15 +29,17 @@ data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe T ...@@ -29,15 +29,17 @@ data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe T
, _hd_institutes :: !(Maybe Text) , _hd_institutes :: !(Maybe Text)
, _hd_source :: !(Maybe Text) , _hd_source :: !(Maybe Text)
, _hd_abstract :: !(Maybe Text) , _hd_abstract :: !(Maybe Text)
, _hd_language_iso2 :: !(Maybe Text)
, _hd_institutes_tree :: !(Maybe (Map Text [Text]))
, _hd_publication_date :: !(Maybe Text) , _hd_publication_date :: !(Maybe Text)
, _hd_publication_year :: !(Maybe Int) , _hd_publication_year :: !(Maybe Int)
, _hd_publication_month :: !(Maybe Int) , _hd_publication_month :: !(Maybe Int)
, _hd_publication_day :: !(Maybe Int) , _hd_publication_day :: !(Maybe Int)
, _hd_publication_hour :: !(Maybe Int) , _hd_publication_hour :: !(Maybe Int)
, _hd_publication_minute :: !(Maybe Int) , _hd_publication_minute :: !(Maybe Int)
, _hd_publication_second :: !(Maybe Int) , _hd_publication_second :: !(Maybe Int)
, _hd_language_iso2 :: !(Maybe Text)
, _hd_institutes_tree :: !(Maybe (Map Text [Text]))
} }
deriving (Show, Generic) deriving (Show, Generic)
...@@ -73,23 +75,25 @@ data StatusV3 = StatusV3 { statusV3_error :: !(Maybe Text) ...@@ -73,23 +75,25 @@ data StatusV3 = StatusV3 { statusV3_error :: !(Maybe Text)
} deriving (Show, Generic) } deriving (Show, Generic)
------------------------------------------------------------------------ ------------------------------------------------------------------------
data HyperdataDocumentV3 = HyperdataDocumentV3 { _hdv3_publication_day :: !(Maybe Int) data HyperdataDocumentV3 = HyperdataDocumentV3 { _hdv3_language_iso2 :: !(Maybe Text)
, _hdv3_language_iso2 :: !(Maybe Text)
, _hdv3_publication_second :: !(Maybe Int)
, _hdv3_publication_minute :: !(Maybe Int)
, _hdv3_publication_month :: !(Maybe Int)
, _hdv3_publication_hour :: !(Maybe Int)
, _hdv3_error :: !(Maybe Text) , _hdv3_error :: !(Maybe Text)
, _hdv3_language_iso3 :: !(Maybe Text) , _hdv3_language_iso3 :: !(Maybe Text)
, _hdv3_authors :: !(Maybe Text) , _hdv3_authors :: !(Maybe Text)
, _hdv3_publication_year :: !(Maybe Int)
, _hdv3_publication_date :: !(Maybe Text)
, _hdv3_language_name :: !(Maybe Text) , _hdv3_language_name :: !(Maybe Text)
, _hdv3_statuses :: !(Maybe [StatusV3]) , _hdv3_statuses :: !(Maybe [StatusV3])
, _hdv3_realdate_full_ :: !(Maybe Text) , _hdv3_realdate_full_ :: !(Maybe Text)
, _hdv3_source :: !(Maybe Text) , _hdv3_source :: !(Maybe Text)
, _hdv3_abstract :: !(Maybe Text) , _hdv3_abstract :: !(Maybe Text)
, _hdv3_title :: !(Maybe Text) , _hdv3_title :: !(Maybe Text)
, _hdv3_publication_date :: !(Maybe Text)
, _hdv3_publication_second :: !(Maybe Int)
, _hdv3_publication_minute :: !(Maybe Int)
, _hdv3_publication_hour :: !(Maybe Int)
, _hdv3_publication_day :: !(Maybe Int)
, _hdv3_publication_month :: !(Maybe Int)
, _hdv3_publication_year :: !(Maybe Int)
} deriving (Show, Generic) } deriving (Show, Generic)
......
...@@ -14,9 +14,12 @@ module Test.Core.Utils where ...@@ -14,9 +14,12 @@ module Test.Core.Utils where
import Data.Time (toGregorian, utctDay, utctDayTime) import Data.Time (toGregorian, utctDay, utctDayTime)
import Data.Time.LocalTime (TimeOfDay(..), timeOfDayToTime) import Data.Time.LocalTime (TimeOfDay(..), timeOfDayToTime)
import Gargantext.Core.Utils import Gargantext.Core.Utils
import Gargantext.Core.Utils.DateUtils (dateParts, parseFlexibleTime) import Gargantext.Core.Utils.DateUtils (dateParts, parseFlexibleTime, toUTCTime, toUTCTimeR)
import Gargantext.Prelude import Gargantext.Prelude
import Test.Hspec import Test.Hspec
import Test.Hspec.QuickCheck
import Test.Instances ()
-- | Core.Utils tests -- | Core.Utils tests
test :: Spec test :: Spec
...@@ -40,7 +43,11 @@ test = do ...@@ -40,7 +43,11 @@ test = do
it "simple list" $ nonemptyIntercalate "," ["x"] `shouldBe` "x" it "simple list" $ nonemptyIntercalate "," ["x"] `shouldBe` "x"
it "two-element list" $ nonemptyIntercalate "," ["x", "y"] `shouldBe` "x,y" it "two-element list" $ nonemptyIntercalate "," ["x", "y"] `shouldBe` "x,y"
it "with empty strings" $ nonemptyIntercalate "," ["a", "", "b", "", "c", ""] `shouldBe` "a,b,c" it "with empty strings" $ nonemptyIntercalate "," ["a", "", "b", "", "c", ""] `shouldBe` "a,b,c"
describe "DateUtils works" $ do
describe "DateUtils" $ do
describe "UTCTimeR works" $ do
prop "can convert to/from" $
\utcTimeR -> toUTCTimeR (toUTCTime utcTimeR) == utcTimeR
describe "parseFlexibleTime works" $ do describe "parseFlexibleTime works" $ do
it "ISO8601 format works 1" $ do it "ISO8601 format works 1" $ do
let parsed = parseFlexibleTime "2025-05-04T12:05:01.000Z" let parsed = parseFlexibleTime "2025-05-04T12:05:01.000Z"
...@@ -50,6 +57,14 @@ test = do ...@@ -50,6 +57,14 @@ test = do
let parsed = parseFlexibleTime "2025-05-04T12:05:01Z" let parsed = parseFlexibleTime "2025-05-04T12:05:01Z"
(toGregorian . utctDay) <$> parsed `shouldBe` (Just (2025, 5, 4)) (toGregorian . utctDay) <$> parsed `shouldBe` (Just (2025, 5, 4))
utctDayTime <$> parsed `shouldBe` (Just $ timeOfDayToTime $ TimeOfDay 12 5 1) utctDayTime <$> parsed `shouldBe` (Just $ timeOfDayToTime $ TimeOfDay 12 5 1)
it "'2025-07-20T01:00:13' format works" $ do
let parsed = parseFlexibleTime "2025-07-20T01:00:13"
(toGregorian . utctDay) <$> parsed `shouldBe` (Just (2025, 7, 20))
utctDayTime <$> parsed `shouldBe` (Just $ timeOfDayToTime $ TimeOfDay 1 0 13)
it "'2025-07-20 01:00:13' format works" $ do
let parsed = parseFlexibleTime "2025-07-20 01:00:13"
(toGregorian . utctDay) <$> parsed `shouldBe` (Just (2025, 7, 20))
utctDayTime <$> parsed `shouldBe` (Just $ timeOfDayToTime $ TimeOfDay 1 0 13)
it "'2025-07-20 01:00:13 UTC' format works" $ do it "'2025-07-20 01:00:13 UTC' format works" $ do
let parsed = parseFlexibleTime "2025-07-20 01:00:13 UTC" let parsed = parseFlexibleTime "2025-07-20 01:00:13 UTC"
(toGregorian . utctDay) <$> parsed `shouldBe` (Just (2025, 7, 20)) (toGregorian . utctDay) <$> parsed `shouldBe` (Just (2025, 7, 20))
......
...@@ -54,6 +54,7 @@ import Gargantext.Core.Text.Corpus.Query (RawQuery(..)) ...@@ -54,6 +54,7 @@ import Gargantext.Core.Text.Corpus.Query (RawQuery(..))
import Gargantext.Core.Types (TableResult) import Gargantext.Core.Types (TableResult)
import Gargantext.Core.Types.Individu qualified as Individu import Gargantext.Core.Types.Individu qualified as Individu
import Gargantext.Core.Types.Main (ListType(CandidateTerm, StopTerm, MapTerm)) import Gargantext.Core.Types.Main (ListType(CandidateTerm, StopTerm, MapTerm))
import Gargantext.Core.Utils.DateUtils (UTCTimeR(..))
import Gargantext.Core.Viz.Phylo qualified as Phylo import Gargantext.Core.Viz.Phylo qualified as Phylo
import Gargantext.Core.Worker.Jobs.Types (Job(..)) import Gargantext.Core.Worker.Jobs.Types (Job(..))
import Gargantext.Database.Admin.Types.Hyperdata qualified as Hyperdata import Gargantext.Database.Admin.Types.Hyperdata qualified as Hyperdata
...@@ -212,6 +213,22 @@ instance Arbitrary TableQuery where ...@@ -212,6 +213,22 @@ instance Arbitrary TableQuery where
instance Arbitrary UTCTimeR where
arbitrary = do
year <- arbitrary
month <- chooseInt (1, 12)
day <- if month `elem` [1, 3, 5, 7, 8, 10, 12]
then chooseInt (1, 31)
else if month == 2
then chooseInt (1, 28)
else chooseInt (1, 30)
hour <- chooseInt (0, 23)
minute <- chooseInt (0, 59)
sec <- chooseInt (0, 59)
pure $ UTCTimeR { .. }
-- phylo -- phylo
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment