Commit f3cb9626 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev-lts-16.26-upgrade' into dev-tree-reload

parents ba3cd903 8404a553
Pipeline #1313 failed with stage
from fpco/stack-build:lts-14.27
from fpco/stack-build:lts-16.26
RUN apt-get update && \
apt-get install -y git libigraph0-dev && \
......
name: gargantext
version: '0.0.2.2.1'
version: '0.0.2.3'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -150,6 +150,7 @@ library:
- full-text-search
- fullstop
- graphviz
- hashable
- haskell-igraph
- hlcm
- hsparql
......@@ -188,6 +189,7 @@ library:
- product-profunctors
- profunctors
- protolude
- pretty-simple
- pureMD5
- quickcheck-instances
- rake
......
......@@ -19,16 +19,17 @@ module Gargantext.API.Metrics
where
import Control.Lens
import qualified Data.Map as Map
import Data.Text (Text)
import Data.Time (UTCTime)
import Servant
import Data.Vector (Vector)
import Gargantext.API.HashedResponse
import Gargantext.API.Ngrams.NgramsTree
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.Text.Metrics (Scored(..), {-normalizeGlobal,-} normalizeLocal)
import Gargantext.Core.Types (CorpusId, Limit, ListId, ListType(..))
import Gargantext.Core.Viz.Chart
import Gargantext.Core.Viz.Types
import Gargantext.Database.Action.Flow
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataList(..), hl_chart, hl_pie, hl_scatter, hl_tree)
import Gargantext.Database.Admin.Types.Metrics (ChartMetrics(..), Metric(..), Metrics(..))
......@@ -39,9 +40,8 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Core.Text.Metrics (Scored(..), {-normalizeGlobal,-} normalizeLocal)
import Gargantext.Core.Viz.Chart
import Gargantext.Core.Viz.Types
import Servant
import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Database.Action.Metrics as Metrics
-------------------------------------------------------------
......@@ -78,7 +78,7 @@ getScatter cId maybeListId tabType _maybeLimit = do
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let HyperdataList { _hl_scatter = scatterMap } = node ^. node_hyperdata
mChart = Map.lookup tabType scatterMap
mChart = HashMap.lookup tabType scatterMap
chart <- case mChart of
Just chart -> pure chart
......@@ -111,9 +111,9 @@ updateScatter' cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit
let
metrics = map (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs'))
$ map normalizeLocal scores
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
metrics = fmap (\(Scored t s1 s2) -> Metric (unNgramsTerm t) s1 s2 (listType t ngs'))
$ fmap normalizeLocal scores
listType t m = maybe (panic errorMsg) fst $ HashMap.lookup t m
errorMsg = "API.Node.metrics: key absent"
listId <- case maybeListId of
......@@ -122,7 +122,7 @@ updateScatter' cId maybeListId tabType maybeLimit = do
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let hl = node ^. node_hyperdata
scatterMap = hl ^. hl_scatter
_ <- updateHyperdata listId $ hl { _hl_scatter = Map.insert tabType (Metrics metrics) scatterMap }
_ <- updateHyperdata listId $ hl { _hl_scatter = HashMap.insert tabType (Metrics metrics) scatterMap }
pure $ Metrics metrics
......@@ -172,7 +172,7 @@ getChart cId _start _end maybeListId tabType = do
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let chartMap = node ^. node_hyperdata ^. hl_chart
mChart = Map.lookup tabType chartMap
mChart = HashMap.lookup tabType chartMap
chart <- case mChart of
Just chart -> pure chart
......@@ -209,7 +209,7 @@ updateChart' cId maybeListId tabType _maybeLimit = do
let hl = node ^. node_hyperdata
chartMap = hl ^. hl_chart
h <- histoData cId
_ <- updateHyperdata listId $ hl { _hl_chart = Map.insert tabType (ChartMetrics h) chartMap }
_ <- updateHyperdata listId $ hl { _hl_chart = HashMap.insert tabType (ChartMetrics h) chartMap }
pure $ ChartMetrics h
......@@ -258,7 +258,7 @@ getPie cId _start _end maybeListId tabType = do
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let pieMap = node ^. node_hyperdata ^. hl_pie
mChart = Map.lookup tabType pieMap
mChart = HashMap.lookup tabType pieMap
chart <- case mChart of
Just chart -> pure chart
......@@ -296,7 +296,7 @@ updatePie' cId maybeListId tabType _maybeLimit = do
pieMap = hl ^. hl_pie
p <- chartData cId (ngramsTypeFromTabType tabType) MapTerm
_ <- updateHyperdata listId $ hl { _hl_pie = Map.insert tabType (ChartMetrics p) pieMap }
_ <- updateHyperdata listId $ hl { _hl_pie = HashMap.insert tabType (ChartMetrics p) pieMap }
pure $ ChartMetrics p
......@@ -317,7 +317,7 @@ type TreeApi = Summary " Tree API"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParamR "listType" ListType
:> Get '[JSON] (HashedResponse (ChartMetrics [NgramsTree]))
:> Get '[JSON] (HashedResponse (ChartMetrics (Vector NgramsTree)))
:<|> Summary "Tree Chart update"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
......@@ -341,7 +341,7 @@ getTree :: FlowCmdM env err m
-> Maybe ListId
-> TabType
-> ListType
-> m (HashedResponse (ChartMetrics [NgramsTree]))
-> m (HashedResponse (ChartMetrics (Vector NgramsTree)))
getTree cId _start _end maybeListId tabType listType = do
listId <- case maybeListId of
Just lid -> pure lid
......@@ -349,7 +349,7 @@ getTree cId _start _end maybeListId tabType listType = do
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let treeMap = node ^. node_hyperdata ^. hl_tree
mChart = Map.lookup tabType treeMap
mChart = HashMap.lookup tabType treeMap
chart <- case mChart of
Just chart -> pure chart
......@@ -377,17 +377,17 @@ updateTree' :: FlowCmdM env err m =>
-> Maybe ListId
-> TabType
-> ListType
-> m (ChartMetrics [NgramsTree])
-> m (ChartMetrics (Vector NgramsTree))
updateTree' cId maybeListId tabType listType = do
listId <- case maybeListId of
Just lid -> pure lid
Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let hl = node ^. node_hyperdata
treeMap = hl ^. hl_tree
let hl = node ^. node_hyperdata
treeMap = hl ^. hl_tree
t <- treeData cId (ngramsTypeFromTabType tabType) listType
_ <- updateHyperdata listId $ hl { _hl_tree = Map.insert tabType (ChartMetrics t) treeMap }
_ <- updateHyperdata listId $ hl { _hl_tree = HashMap.insert tabType (ChartMetrics t) treeMap }
pure $ ChartMetrics t
......
......@@ -535,7 +535,7 @@ getTableNgrams _nType nId tabType listId limit_ offset
setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
setScores False table = pure table
setScores True table = do
let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
let ngrams_terms = table ^.. each . ne_ngrams
t1 <- getTime'
occurrences <- getOccByNgramsOnlyFast' nId
listId
......@@ -552,7 +552,7 @@ getTableNgrams _nType nId tabType listId limit_ offset
ngrams_terms
-}
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
pure $ table & each %~ setOcc
---------------------------------------
......@@ -594,13 +594,13 @@ scoresRecomputeTableNgrams nId tabType listId = do
setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
setScores table = do
let ngrams_terms = unNgramsTerm <$> (table ^.. each . ne_ngrams)
let ngrams_terms = table ^.. each . ne_ngrams
occurrences <- getOccByNgramsOnlyFast' nId
listId
ngramsType
ngrams_terms
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (unNgramsTerm (ne ^. ne_ngrams)) . _Just) occurrences
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
pure $ table & each %~ setOcc
......
......@@ -15,23 +15,21 @@ module Gargantext.API.Ngrams.NgramsTree
where
import Data.Aeson.TH (deriveJSON)
import Data.Text (Text)
import Data.Tree
import Data.HashMap.Strict (HashMap)
import Data.Maybe (catMaybes)
import Data.Map (Map)
import Data.Set (Set)
import Data.Swagger
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
import Data.Text (Text)
import Data.Tree
import GHC.Generics (Generic)
import Test.QuickCheck
import Gargantext.Prelude
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (ListType(..), NodeId)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Prelude
import Test.QuickCheck
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Set as Set
type Children = Text
type Root = Text
......@@ -42,8 +40,8 @@ data NgramsTree = NgramsTree { mt_label :: Text
}
deriving (Generic, Show)
toNgramsTree :: Tree (Text,Double) -> NgramsTree
toNgramsTree (Node (l,v) xs) = NgramsTree l v (map toNgramsTree xs)
toNgramsTree :: Tree (NgramsTerm,Double) -> NgramsTree
toNgramsTree (Node (NgramsTerm l,v) xs) = NgramsTree l v (map toNgramsTree xs)
deriveJSON (unPrefix "mt_") ''NgramsTree
......@@ -53,24 +51,27 @@ instance Arbitrary NgramsTree
where
arbitrary = NgramsTree <$> arbitrary <*> arbitrary <*> arbitrary
toTree :: ListType -> Map Text (Set NodeId) -> Map Text NgramsRepoElement -> [NgramsTree]
toTree :: ListType
-> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm NgramsRepoElement
-> [NgramsTree]
toTree lt vs m = map toNgramsTree $ unfoldForest buildNode roots
where
buildNode r = maybe ((r, value r),[])
(\x -> ((r, value r), unNgramsTerm <$> (mSetToList $ _nre_children x)))
(Map.lookup r m)
(\x -> ((r, value r), mSetToList $ _nre_children x))
(HashMap.lookup r m)
value l = maybe 0 (fromIntegral . Set.size) $ Map.lookup l vs
value l = maybe 0 (fromIntegral . Set.size) $ HashMap.lookup l vs
rootsCandidates :: [NgramsTerm]
rootsCandidates = catMaybes
$ List.nub
$ map (\(c, c') -> case _nre_root c' of
Nothing -> Just $ NgramsTerm c
_ -> _nre_root c') (Map.toList m)
Nothing -> Just c
_ -> _nre_root c') (HashMap.toList m)
roots = map fst
$ filter (\(_,l) -> l == lt)
$ catMaybes
$ map (\c -> (,) <$> Just c <*> (_nre_list <$> Map.lookup c m))
$ (unNgramsTerm <$> rootsCandidates)
$ map (\c -> (,) <$> Just c <*> (_nre_list <$> HashMap.lookup c m))
$ rootsCandidates
......@@ -9,29 +9,30 @@ Portability : POSIX
-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.API.Ngrams.Tools
where
import Control.Concurrent
import Control.Lens (_Just, (^.), at, view)
import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
import Control.Monad.Reader
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Set (Set)
import Data.Text (Text)
import Data.Validity
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (ListType(..), NodeId, ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
mergeNgramsElement _neOld neNew = neNew
type RootTerm = Text
type RootTerm = NgramsTerm
getRepo :: RepoCmdM env err m => m NgramsRepo
getRepo = do
......@@ -39,87 +40,105 @@ getRepo = do
liftBase $ readMVar v
listNgramsFromRepo :: [ListId] -> NgramsType
-> NgramsRepo -> Map Text NgramsRepoElement
listNgramsFromRepo nodeIds ngramsType repo = Map.mapKeys unNgramsTerm ngrams
-> NgramsRepo -> HashMap NgramsTerm NgramsRepoElement
listNgramsFromRepo nodeIds ngramsType repo = ngrams
where
ngramsMap = repo ^. r_state . at ngramsType . _Just
ngrams = Map.unionsWith mergeNgramsElement
-- TODO HashMap linked
ngrams = HM.fromList $ Map.toList $ Map.unionsWith mergeNgramsElement
[ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
-- TODO-ACCESS: We want to do the security check before entering here.
-- Add a static capability parameter would be nice.
-- Ideally this is the access to `repoVar` which needs to
-- be properly guarded.
getListNgrams :: RepoCmdM env err m
=> [ListId] -> NgramsType
-> m (Map Text NgramsRepoElement)
-> m (HashMap NgramsTerm NgramsRepoElement)
getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
getTermsWith :: (RepoCmdM env err m, Ord a)
=> (Text -> a ) -> [ListId]
getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
=> (NgramsTerm -> a) -> [ListId]
-> NgramsType -> ListType
-> m (Map a [a])
getTermsWith f ls ngt lt = Map.fromListWith (<>)
<$> map (toTreeWith f)
<$> Map.toList
<$> Map.filter (\f' -> (fst f') == lt)
-> m (HashMap a [a])
getTermsWith f ls ngt lt = HM.fromListWith (<>)
<$> map toTreeWith
<$> HM.toList
<$> HM.filter (\f' -> fst f' == lt)
<$> mapTermListRoot ls ngt
<$> getRepo
where
toTreeWith f'' (t, (_lt, maybeRoot)) = case maybeRoot of
Nothing -> (f'' t, [])
Just r -> (f'' r, map f'' [t])
toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
Nothing -> (f t, [])
Just r -> (f r, [f t])
mapTermListRoot :: [ListId]
-> NgramsType
-> NgramsRepo
-> Map Text (ListType, (Maybe Text))
-> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
mapTermListRoot nodeIds ngramsType repo =
Map.fromList [ (t, (_nre_list nre, unNgramsTerm <$> _nre_root nre))
| (t, nre) <- Map.toList ngrams
]
where ngrams = listNgramsFromRepo nodeIds ngramsType repo
(\nre -> (_nre_list nre, _nre_root nre))
<$> listNgramsFromRepo nodeIds ngramsType repo
filterListWithRootHashMap :: ListType
-> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
-> HashMap NgramsTerm (Maybe RootTerm)
filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
where
isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> l == lt
Just r -> case HM.lookup r m of
Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> unNgramsTerm r
Just (l',_) -> l' == lt
filterListWithRoot :: ListType
-> Map Text (ListType, Maybe Text)
-> Map Text (Maybe RootTerm)
filterListWithRoot lt m = Map.fromList
$ map (\(t,(_,r)) -> (t,r))
$ filter isMapTerm (Map.toList m)
-> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
-> HashMap NgramsTerm (Maybe RootTerm)
filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
where
isMapTerm (_t,(l, maybeRoot)) = case maybeRoot of
isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> l == lt
Just r -> case Map.lookup r m of
Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
Just r -> case HM.lookup r m of
Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> unNgramsTerm r
Just (l',_) -> l' == lt
groupNodesByNgrams :: Map Text (Maybe RootTerm)
-> Map Text (Set NodeId)
-> Map Text (Set NodeId)
groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
groupNodesByNgrams :: ( At root_map
, Index root_map ~ NgramsTerm
, IxValue root_map ~ Maybe RootTerm
)
=> root_map
-> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm (Set NodeId)
groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
where
occs' = map toSyn (Map.toList occs)
toSyn (t,ns) = case Map.lookup t syn of
Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> t
occs' = map toSyn (HM.toList occs)
toSyn (t,ns) = case syn ^. at t of
Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
Just r -> case r of
Nothing -> (t, ns)
Just r' -> (r',ns)
data Diagonal = Diagonal Bool
getCoocByNgrams :: Diagonal -> Map Text (Set NodeId) -> Map (Text, Text) Int
getCoocByNgrams :: Diagonal -> HashMap NgramsTerm (Set NodeId) -> HashMap (NgramsTerm, NgramsTerm) Int
getCoocByNgrams = getCoocByNgrams' identity
getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int
getCoocByNgrams' :: (Hashable a, Ord a, Ord c) => (b -> Set c) -> Diagonal -> HashMap a b -> HashMap (a, a) Int
getCoocByNgrams' f (Diagonal diag) m =
Map.fromList [( (t1,t2)
, maybe 0 Set.size $ Set.intersection
<$> (fmap f $ Map.lookup t1 m)
<*> (fmap f $ Map.lookup t2 m)
) | (t1,t2) <- case diag of
True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y]
False -> listToCombi identity (Map.keys m)
]
HM.fromList [( (t1,t2)
, maybe 0 Set.size $ Set.intersection
<$> (fmap f $ HM.lookup t1 m)
<*> (fmap f $ HM.lookup t2 m)
)
| (t1,t2) <- if diag then
[ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
-- more efficient to enumerate all the y <= x.
else
listToCombi identity ks
]
where ks = HM.keys m
......@@ -19,6 +19,7 @@ import Data.Aeson hiding ((.=))
import Data.Aeson.TH (deriveJSON)
import Data.Either (Either(..))
import Data.Foldable
import Data.Hashable (Hashable)
import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
import qualified Data.List as List
import Data.Map.Strict (Map)
......@@ -46,6 +47,7 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Protolude (maybeToEither)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..), ListId, NodeId)
import Gargantext.Core.Types (TODO)
......@@ -60,6 +62,9 @@ data TabType = Docs | Trash | MoreFav | MoreTrash
| Contacts
deriving (Bounded, Enum, Eq, Generic, Ord, Show)
instance Hashable TabType
instance FromHttpApiData TabType
where
parseUrlPiece "Docs" = pure Docs
......@@ -120,7 +125,13 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema)
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable)
instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t
instance Monoid NgramsTerm where
mempty = NgramsTerm ""
instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
......@@ -342,11 +353,13 @@ isRem = (== remPatch)
type PatchMap = PM.PatchMap
newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
deriving (Eq, Show, Generic, Validity, Semigroup, Monoid, Group,
Transformable, Composable)
unPatchMSet :: PatchMSet a -> PatchMap a AddRem
unPatchMSet (PatchMSet a) = a
type ConflictResolutionPatchMSet a = a -> ConflictResolutionReplace (Maybe ())
type instance ConflictResolution (PatchMSet a) = ConflictResolutionPatchMSet a
......@@ -644,7 +657,7 @@ data Repo s p = Repo
, _r_history :: ![p]
-- first patch in the list is the most recent
}
deriving (Generic)
deriving (Generic, Show)
instance (FromJSON s, FromJSON p) => FromJSON (Repo s p) where
parseJSON = genericParseJSON $ unPrefix "_r_"
......
......@@ -16,11 +16,9 @@ Main exports of Gargantext:
module Gargantext.API.Node.Corpus.Export
where
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Data.Set (Set)
import Data.Text (Text)
import Gargantext.API.Node.Corpus.Export.Types
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
......@@ -41,6 +39,7 @@ import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.HashMap.Strict as HashMap
--------------------------------------------------
-- | Hashes are ordered by Set
......@@ -62,13 +61,13 @@ getCorpus cId lId nt' = do
ngs <- getNodeNgrams cId lId nt repo
let -- uniqId is hash computed already for each document imported in database
r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hash b)) (d_hash a b)
) ns ngs
) ns (Map.map (Set.map unNgramsTerm) ngs)
where
d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
, hash b
]
, hash b
]
pure $ Corpus (Map.elems r) (hash $ List.map _d_hash
$ Map.elems r
$ Map.elems r
)
getNodeNgrams :: HasNodeError err
......@@ -76,7 +75,7 @@ getNodeNgrams :: HasNodeError err
-> Maybe ListId
-> NgramsType
-> NgramsRepo
-> Cmd err (Map NodeId (Set Text))
-> Cmd err (Map NodeId (Set NgramsTerm))
getNodeNgrams cId lId' nt repo = do
lId <- case lId' of
Nothing -> defaultList cId
......@@ -84,9 +83,10 @@ getNodeNgrams cId lId' nt repo = do
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
-- TODO HashMap
r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
pure r
-- TODO
-- Exports List
-- Version number of the list
\ No newline at end of file
-- Version number of the list
......@@ -20,7 +20,7 @@ import qualified Data.List as DL
import qualified Data.Vector as DV
import qualified Data.Map as M
import Gargantext.Core.Text.Metrics.Freq as F
import Gargantext.Core.Text.Metrics.Utils as Utils
import Gargantext.Core.Text.Corpus.Parsers.CSV as CSV
data School = School { school_shortName :: Text
......@@ -115,7 +115,7 @@ publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSc
$ DL.reverse
$ DL.sortOn snd
$ M.toList
$ F.freq
$ Utils.freq
$ DL.concat
$ DV.toList
$ DV.map (\n -> splitOn ( ", ") (csvHal_instStructId_i n) )
......
This diff is collapsed.
......@@ -19,61 +19,49 @@ module Gargantext.Core.Text.List.Group
where
import Control.Lens (view)
import Data.Map (Map)
import Data.HashMap.Strict (HashMap)
import Data.Maybe (fromMaybe)
import Data.Monoid (Monoid, mempty)
import Data.Text (Text)
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Group.WithStem
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.HashMap.Strict as HashMap
------------------------------------------------------------------------
-- | TODO add group with stemming
toGroupedTree :: (Ord a, Monoid a, GroupWithStem a)
=> GroupParams
-> FlowCont Text FlowListScores
-> Map Text a
-- -> Map Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores a)
toGroupedTree groupParams flc scores = {-view flc_scores-} flow2
toGroupedTree :: (Ord a, Monoid a)
=> FlowCont NgramsTerm FlowListScores
-> HashMap NgramsTerm a
-> FlowCont NgramsTerm (GroupedTreeScores a)
toGroupedTree flc scores =
groupWithScores' flc scoring
where
flow1 = groupWithScores' flc scoring
scoring t = fromMaybe mempty $ Map.lookup t scores
flow2 = case (view flc_cont flow1) == Map.empty of
True -> flow1
False -> groupWithStem' groupParams flow1
scoring t = fromMaybe mempty $ HashMap.lookup t scores
------------------------------------------------------------------------
setScoresWithMap :: (Ord a, Ord b, Monoid b) => Map Text b
-> Map Text (GroupedTreeScores a)
-> Map Text (GroupedTreeScores b)
setScoresWithMap :: (Ord a, Ord b, Monoid b) => HashMap NgramsTerm b
-> HashMap NgramsTerm (GroupedTreeScores a)
-> HashMap NgramsTerm (GroupedTreeScores b)
setScoresWithMap m = setScoresWith (score m)
where
score m' t = case Map.lookup t m' of
score m' t = case HashMap.lookup t m' of
Nothing -> mempty
Just r -> r
setScoresWith :: (Ord a, Ord b)
=> (Text -> b)
-> Map Text (GroupedTreeScores a)
-> Map Text (GroupedTreeScores b)
=> (NgramsTerm -> b)
-> HashMap NgramsTerm (GroupedTreeScores a)
-> HashMap NgramsTerm (GroupedTreeScores b)
{-
-- | This Type level lenses solution does not work
setScoresWith f = Map.mapWithKey (\k v -> over gts'_children (setScoresWith f)
$ set gts'_score (f k) v
)
-}
setScoresWith f = Map.mapWithKey (\k v -> v { _gts'_score = f k
setScoresWith f = HashMap.mapWithKey (\k v -> v { _gts'_score = f k
, _gts'_children = setScoresWith f
$ view gts'_children v
}
)
------------------------------------------------------------------------
......@@ -17,29 +17,28 @@ module Gargantext.Core.Text.List.Group.Prelude
where
import Control.Lens (makeLenses, view, set, over)
import Data.HashMap.Strict (HashMap)
import Data.Maybe (fromMaybe)
import Data.Monoid
import Data.Semigroup
import Data.Set (Set)
import Data.Text (Text)
import Data.Maybe (fromMaybe)
import Data.Map (Map)
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.Core.Text.Metrics (Scored(..), scored_genInc)
import Gargantext.Core.Types (ListType(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.Metrics (Scored(..), scored_genInc)
import Gargantext.API.Ngrams.Types (NgramsElement, mkNgramsElement, NgramsTerm(..), RootParent(..), mSetFromList)
import Gargantext.Prelude
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Set as Set
type Stem = Text
type Stem = NgramsTerm
------------------------------------------------------------------------
-- | Main Types to group With Scores but preserving Tree dependencies
-- Therefore there is a need of Tree of GroupedTextScores
-- to target continuation type for the flow (FlowCont Text GroupedTreeScores)
data GroupedTreeScores score =
GroupedTreeScores { _gts'_listType :: !(Maybe ListType)
, _gts'_children :: !(Map Text (GroupedTreeScores score))
, _gts'_children :: !(HashMap NgramsTerm (GroupedTreeScores score))
, _gts'_score :: !score
} deriving (Show, Ord, Eq)
......@@ -76,7 +75,7 @@ class ToNgramsElement a where
toNgramsElement :: a -> [NgramsElement]
class HasTerms a where
hasTerms :: a -> Set Text
hasTerms :: a -> Set NgramsTerm
------------------------------------------------------------------------
-- | Instances declartion for (GroupedTreeScores a)
......@@ -87,8 +86,8 @@ instance SetListType (GroupedTreeScores a) where
setListType lt g = over gts'_children (setListType lt)
$ set gts'_listType lt g
instance SetListType (Map Text (GroupedTreeScores a)) where
setListType lt = Map.map (set gts'_listType lt)
instance SetListType (HashMap NgramsTerm (GroupedTreeScores a)) where
setListType lt = HashMap.map (set gts'_listType lt)
------
......@@ -99,7 +98,7 @@ instance ViewScores (GroupedTreeScores Double) Double where
viewScores g = sum $ parent : children
where
parent = view gts'_score g
children = map viewScores $ Map.elems $ view gts'_children g
children = map viewScores $ HashMap.elems $ view gts'_children g
instance ViewScore (GroupedTreeScores (Set NodeId)) Int where
......@@ -109,57 +108,55 @@ instance ViewScores (GroupedTreeScores (Set NodeId)) (Set NodeId) where
viewScores g = Set.unions $ parent : children
where
parent = view gts'_score g
children = map viewScores $ Map.elems $ view gts'_children g
children = map viewScores $ HashMap.elems $ view gts'_children g
instance ViewScore (GroupedTreeScores (Scored Text)) Double where
instance ViewScore (GroupedTreeScores (Scored NgramsTerm)) Double where
viewScore = view (gts'_score . scored_genInc)
------
instance HasTerms (Map Text (GroupedTreeScores a)) where
hasTerms = Set.unions . (map hasTerms) . Map.toList
instance HasTerms (HashMap NgramsTerm (GroupedTreeScores a)) where
hasTerms = Set.unions . (map hasTerms) . HashMap.toList
instance HasTerms (Text, GroupedTreeScores a) where
instance HasTerms (NgramsTerm, GroupedTreeScores a) where
hasTerms (t, g) = Set.singleton t <> children
where
children = Set.unions
$ map hasTerms
$ Map.toList
$ HashMap.toList
$ view gts'_children g
------
instance ToNgramsElement (Map Text (GroupedTreeScores a)) where
toNgramsElement = List.concat . (map toNgramsElement) . Map.toList
instance ToNgramsElement (HashMap NgramsTerm (GroupedTreeScores a)) where
toNgramsElement = List.concat . (map toNgramsElement) . HashMap.toList
instance ToNgramsElement (Text, GroupedTreeScores a) where
instance ToNgramsElement (NgramsTerm, GroupedTreeScores a) where
toNgramsElement (t, gts) = parent : children
where
parent = mkNgramsElement (NgramsTerm t)
parent = mkNgramsElement t
(fromMaybe CandidateTerm $ viewListType gts)
Nothing
(mSetFromList $ map NgramsTerm
$ Map.keys
(mSetFromList $ HashMap.keys
$ view gts'_children gts
)
children = List.concat
$ map (childrenWith (NgramsTerm t) (NgramsTerm t) )
$ Map.toList
$ map (childrenWith t t)
$ HashMap.toList
$ view gts'_children gts
childrenWith root parent' (t', gts') = parent'' : children'
where
parent'' = mkNgramsElement (NgramsTerm t')
parent'' = mkNgramsElement t'
(fromMaybe CandidateTerm $ viewListType gts')
(Just $ RootParent root parent')
(mSetFromList $ map NgramsTerm
$ Map.keys
(mSetFromList $ HashMap.keys
$ view gts'_children gts'
)
children' = List.concat
$ map (childrenWith root (NgramsTerm t') )
$ Map.toList
$ map (childrenWith root t' )
$ HashMap.toList
$ view gts'_children gts'
......@@ -16,49 +16,48 @@ module Gargantext.Core.Text.List.Group.WithScores
where
import Control.Lens (view, set, over)
import Data.Semigroup
import Data.Map (Map)
import Data.Monoid (Monoid, mempty)
import Data.HashMap.Strict (HashMap)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Gargantext.Core.Text.List.Social.Prelude
import Data.Monoid (Monoid, mempty)
import Data.Semigroup
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.HashMap.Strict as HashMap
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Main function
groupWithScores' :: (Eq a, Ord a, Monoid a)
=> FlowCont Text FlowListScores
-> (Text -> a) -- Map Text (a)
-> FlowCont Text (GroupedTreeScores (a))
=> FlowCont NgramsTerm FlowListScores
-> (NgramsTerm -> a)
-> FlowCont NgramsTerm (GroupedTreeScores a)
groupWithScores' flc scores = FlowCont groups orphans
where
-- parent/child relation is inherited from social lists
groups = toGroupedTree
$ toMapMaybeParent scores
$ view flc_scores flc
$ (view flc_scores flc <> view flc_cont flc)
-- orphans should be filtered already then becomes empty
orphans = mempty
-- orphans should be filtered already
orphans = toGroupedTree
$ toMapMaybeParent scores
$ view flc_cont flc
------------------------------------------------------------------------
toMapMaybeParent :: (Eq a, Ord a, Monoid a)
=> (Text -> a)
-> Map Text FlowListScores
-> Map (Maybe Parent) (Map Text (GroupedTreeScores (a)))
toMapMaybeParent f = Map.fromListWith (<>)
=> (NgramsTerm -> a)
-> HashMap NgramsTerm FlowListScores
-> HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores a))
toMapMaybeParent f = HashMap.fromListWith (<>)
. (map (fromScores'' f))
. Map.toList
. HashMap.toList
fromScores'' :: (Eq a, Ord a, Monoid a)
=> (Text -> a)
-> (Text, FlowListScores)
-> (Maybe Parent, Map Text (GroupedTreeScores (a)))
=> (NgramsTerm -> a)
-> (NgramsTerm, FlowListScores)
-> (Maybe Parent, HashMap NgramsTerm (GroupedTreeScores a))
fromScores'' f' (t, fs) = ( maybeParent
, Map.fromList [( t, set gts'_score (f' t)
, HashMap.fromList [( t, set gts'_score (f' t)
$ set gts'_listType maybeList mempty
)]
)
......@@ -66,32 +65,27 @@ fromScores'' f' (t, fs) = ( maybeParent
maybeParent = keyWithMaxValue $ view fls_parents fs
maybeList = keyWithMaxValue $ view fls_listType fs
------------------------------------------------------------------------
toGroupedTree :: Eq a
=> Map (Maybe Parent) (Map Text (GroupedTreeScores (a)))
-> Map Parent (GroupedTreeScores (a))
toGroupedTree m = case Map.lookup Nothing m of
=> HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores a))
-> HashMap Parent (GroupedTreeScores a)
toGroupedTree m = case HashMap.lookup Nothing m of
Nothing -> mempty
Just m' -> toGroupedTree' m m'
toGroupedTree' :: Eq a => Map (Maybe Parent) (Map Text (GroupedTreeScores (a)))
-> (Map Text (GroupedTreeScores (a)))
-> Map Parent (GroupedTreeScores (a))
toGroupedTree' :: Eq a => HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores a))
-> (HashMap NgramsTerm (GroupedTreeScores a))
-> HashMap Parent (GroupedTreeScores a)
toGroupedTree' m notEmpty
| notEmpty == mempty = mempty
| otherwise = Map.mapWithKey (addGroup m) notEmpty
| otherwise = HashMap.mapWithKey (addGroup m) notEmpty
where
addGroup m' k v = over gts'_children ( (toGroupedTree' m')
. (Map.union ( fromMaybe mempty
$ Map.lookup (Just k) m'
. (HashMap.union ( fromMaybe mempty
$ HashMap.lookup (Just k) m'
)
)
)
v
......@@ -17,20 +17,30 @@ Portability : POSIX
module Gargantext.Core.Text.List.Group.WithStem
where
import Control.Lens (view, over)
import Data.Set (Set)
import Data.HashSet (HashSet)
import Data.Map (Map)
import Data.Monoid (mempty)
import Data.Text (Text)
import Data.Maybe (catMaybes)
import Gargantext.API.Ngrams.Types
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Patch
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Text as Text
import qualified Data.HashSet as Set
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Map.Strict.Patch as PatchMap
import qualified Data.Patch.Class as Patch (Replace(..))
import qualified Data.Text as Text
------------------------------------------------------------------------
addScoreStem :: GroupParams
-> HashSet NgramsTerm
-> FlowCont NgramsTerm FlowListScores
-> FlowCont NgramsTerm FlowListScores
addScoreStem groupParams ngrams fl = foldl' addScorePatch fl
$ stemPatches groupParams ngrams
------------------------------------------------------------------------
-- | Main Types
......@@ -49,177 +59,55 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
| GroupIdentity
deriving (Eq)
------------------------------------------------------------------------
class GroupWithStem a where
groupWithStem' :: GroupParams
-> FlowCont Text (GroupedTreeScores a)
-> FlowCont Text (GroupedTreeScores a)
-- TODO factorize groupWithStem_*
instance GroupWithStem (Set NodeId) where
groupWithStem' = groupWithStem_SetNodeId
instance GroupWithStem Double where
groupWithStem' = groupWithStem_Double
------------------------------------------------------------------------
groupWith :: GroupParams
-> Text
-> Text
-> NgramsTerm
-> NgramsTerm
groupWith GroupIdentity = identity
groupWith (GroupParams l _m _n _) =
Text.intercalate " "
NgramsTerm
. Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
-- . (List.filter (\t -> Text.length t > m))
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------
groupWithStem_SetNodeId :: GroupParams
-> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
groupWithStem_SetNodeId g flc
| g == GroupIdentity = FlowCont ( (<>)
(view flc_scores flc)
(view flc_cont flc)
) mempty
| otherwise = mergeWith (groupWith g) flc
groupWithStem_Double :: GroupParams
-> FlowCont Text (GroupedTreeScores Double)
-> FlowCont Text (GroupedTreeScores Double)
groupWithStem_Double g flc
| g == GroupIdentity = FlowCont ( (<>)
(view flc_scores flc)
(view flc_cont flc)
) mempty
| otherwise = mergeWith_Double (groupWith g) flc
-- | MergeWith : with stem, we always have an answer
-- if Maybe lems then we should add it to continuation
mergeWith :: (Text -> Text)
-> FlowCont Text (GroupedTreeScores (Set NodeId))
-> FlowCont Text (GroupedTreeScores (Set NodeId))
mergeWith fun flc = FlowCont scores mempty
. unNgramsTerm
--------------------------------------------------------------------
stemPatches :: GroupParams
-> HashSet NgramsTerm
-> [(NgramsTerm, NgramsPatch)]
stemPatches groupParams = patches
. Map.fromListWith (<>)
. map (\ng -> ( groupWith groupParams ng
, Set.singleton ng)
)
. Set.toList
-- | For now all NgramsTerm which have same stem
-- are grouped together
-- Parent is taken arbitrarly for now (TODO use a score like occ)
patches :: Map Stem (HashSet NgramsTerm)
-> [(NgramsTerm, NgramsPatch)]
patches = catMaybes . map patch . Map.elems
patch :: HashSet NgramsTerm
-> Maybe (NgramsTerm, NgramsPatch)
patch s = case Set.size s > 1 of
False -> Nothing
True -> do
let ngrams = Set.toList s
parent <- headMay ngrams
let children = List.tail ngrams
pure (parent, toNgramsPatch children)
toNgramsPatch :: [NgramsTerm] -> NgramsPatch
toNgramsPatch children = NgramsPatch children' Patch.Keep
where
scores :: Map Text (GroupedTreeScores (Set NodeId))
scores = foldl' (alter (mapStems scores')) scores' cont'
where
scores' = view flc_scores flc
cont' = Map.toList $ view flc_cont flc
-- TODO insert at the right place in group hierarchy
-- adding as child of the parent for now
alter :: Map Stem Text
-> Map Text (GroupedTreeScores (Set NodeId))
-> (Text, GroupedTreeScores (Set NodeId))
-> Map Text (GroupedTreeScores (Set NodeId))
alter st target (t,g) = case Map.lookup t st of
Nothing -> Map.alter (alter' (t,g)) t target
Just t' -> Map.alter (alter' (t,g)) t' target
alter' (_t,g) Nothing = Just g
alter' ( t,g) (Just g') = Just $ over gts'_children
( Map.union (Map.singleton t g))
g'
mapStems :: Map Text (GroupedTreeScores (Set NodeId))
-> Map Stem Text
mapStems = (Map.fromListWith (<>)) . List.concat . (map mapStem) . Map.toList
mapStem :: (Text, GroupedTreeScores (Set NodeId))
-> [(Stem, Text)]
mapStem (s,g) = parent : children
where
parent = (fun s, s)
children = List.concat $ map mapStem (Map.toList $ view gts'_children g)
-- | MergeWith : with stem, we always have an answer
-- if Maybe lems then we should add it to continuation
mergeWith_Double :: (Text -> Text)
-> FlowCont Text (GroupedTreeScores Double)
-> FlowCont Text (GroupedTreeScores Double)
mergeWith_Double fun flc = FlowCont scores mempty
where
scores :: Map Text (GroupedTreeScores Double)
scores = foldl' (alter (mapStems scores')) scores' cont'
where
scores' = view flc_scores flc
cont' = Map.toList $ view flc_cont flc
-- TODO insert at the right place in group hierarchy
-- adding as child of the parent for now
alter :: Map Stem Text
-> Map Text (GroupedTreeScores Double)
-> (Text, GroupedTreeScores Double)
-> Map Text (GroupedTreeScores Double)
alter st target (t,g) = case Map.lookup t st of
Nothing -> Map.alter (alter' (t,g)) t target
Just t' -> Map.alter (alter' (t,g)) t' target
alter' (_t,g) Nothing = Just g
alter' ( t,g) (Just g') = Just $ over gts'_children
( Map.union (Map.singleton t g))
g'
mapStems :: Map Text (GroupedTreeScores Double)
-> Map Stem Text
mapStems = (Map.fromListWith (<>)) . List.concat . (map mapStem) . Map.toList
mapStem :: (Text, GroupedTreeScores Double)
-> [(Stem, Text)]
mapStem (s,g) = parent : children
where
parent = (fun s, s)
children = List.concat $ map mapStem (Map.toList $ view gts'_children g)
{-
-- | TODO fixme
mergeWith_a :: (Text -> Text)
-> FlowCont Text (GroupedTreeScores a)
-> FlowCont Text (GroupedTreeScores a)
mergeWith_a fun flc = FlowCont scores mempty
where
scores :: Map Text (GroupedTreeScores a)
scores = foldl' (alter (mapStems scores')) scores' cont'
where
scores' = view flc_scores flc
cont' = Map.toList $ _flc_cont flc
-- TODO insert at the right place in group hierarchy
-- adding as child of the parent for now
alter :: Map Stem Text
-> Map Text (GroupedTreeScores a)
-> (Text, GroupedTreeScores a)
-> Map Text (GroupedTreeScores a)
alter st target (t,g) = case Map.lookup t st of
Nothing -> Map.alter (alter' (t,g)) t target
Just t' -> Map.alter (alter' (t,g)) t' target
alter' (_t,g) Nothing = Just g
alter' ( t,g) (Just g') = Just $ over gts'_children
( Map.union (Map.singleton t g))
g'
mapStems :: Map Text (GroupedTreeScores a)
-> Map Stem Text
mapStems = (Map.fromListWith (<>)) . List.concat . (map mapStem) . Map.toList
mapStem :: (Text, GroupedTreeScores a)
-> [(Stem, Text)]
mapStem (s,g) = parent : children
where
parent = (fun s, s)
children = List.concat $ map mapStem (Map.toList $ view gts'_children g)
-}
children' :: PatchMSet NgramsTerm
children' = PatchMSet
$ fst
$ PatchMap.fromList
$ List.zip children (List.cycle [addPatch])
{-|
Module : Gargantext.Core.Text.List.Merge
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Merge
where
import Control.Lens (view)
import Data.Map (Map)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.Prelude
import Data.Map.Strict.Patch hiding (PatchMap)
type List = Map NgramsTerm NgramsRepoElement
type Patch = PatchMap NgramsTerm (Replace (Maybe NgramsRepoElement))
-- Question: which version of Patching increment is using the FrontEnd ?
diffList :: Versioned List -> Versioned List -> Versioned Patch
diffList l1 l2 = Versioned (1 + view v_version l1)
(diff (view v_data l1) (view v_data l2))
-- | TODO
{-
commit :: ListId -> NgramsType -> Versioned Patch -> List -> List
commit = undefined
-}
......@@ -11,13 +11,15 @@ Portability : POSIX
module Gargantext.Core.Text.List.Social
where
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Monoid (mconcat)
import Data.Text (Text)
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.List.Social.Find
import Gargantext.Core.Text.List.Social.History
import Gargantext.Core.Text.List.Social.Patch
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.List.Social.Scores
import Gargantext.Core.Types.Individu
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
......@@ -39,11 +41,12 @@ flowSocialListPriority :: FlowSocialListPriority -> [NodeMode]
flowSocialListPriority MySelfFirst = [Private{-, Shared, Public -}]
flowSocialListPriority OthersFirst = reverse $ flowSocialListPriority MySelfFirst
{-
-- | We keep the parents for all ngrams but terms
keepAllParents :: NgramsType -> KeepAllParents
keepAllParents NgramsTerms = KeepAllParents False
keepAllParents _ = KeepAllParents True
-}
------------------------------------------------------------------------
flowSocialList :: ( RepoCmdM env err m
......@@ -53,8 +56,8 @@ flowSocialList :: ( RepoCmdM env err m
)
=> FlowSocialListPriority
-> User -> NgramsType
-> FlowCont Text FlowListScores
-> m (FlowCont Text FlowListScores)
-> FlowCont NgramsTerm FlowListScores
-> m (FlowCont NgramsTerm FlowListScores)
flowSocialList flowPriority user nt flc =
mconcat <$> mapM (flowSocialListByMode' user nt flc)
(flowSocialListPriority flowPriority)
......@@ -66,9 +69,9 @@ flowSocialList flowPriority user nt flc =
, HasTreeError err
)
=> User -> NgramsType
-> FlowCont Text FlowListScores
-> FlowCont NgramsTerm FlowListScores
-> NodeMode
-> m (FlowCont Text FlowListScores)
-> m (FlowCont NgramsTerm FlowListScores)
flowSocialListByMode' user' nt' flc' mode =
findListsId user' mode
>>= flowSocialListByModeWith nt' flc'
......@@ -80,10 +83,39 @@ flowSocialList flowPriority user nt flc =
, HasTreeError err
)
=> NgramsType
-> FlowCont Text FlowListScores
-> [NodeId]
-> m (FlowCont Text FlowListScores)
flowSocialListByModeWith nt'' flc'' ns =
mapM (\l -> getListNgrams [l] nt'') ns
-> FlowCont NgramsTerm FlowListScores
-> [ListId]
-> m (FlowCont NgramsTerm FlowListScores)
flowSocialListByModeWith nt'' flc'' listes =
getHistoryScores History_User nt'' flc'' listes
{-
mapM (\l -> getListNgrams [l] nt'') listes
>>= pure
. toFlowListScores (keepAllParents nt'') flc''
-}
-----------------------------------------------------------------
getHistoryScores :: ( RepoCmdM env err m
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> History
-> NgramsType
-> FlowCont NgramsTerm FlowListScores
-> [ListId]
-> m (FlowCont NgramsTerm FlowListScores)
getHistoryScores hist nt fl listes =
addScorePatches nt listes fl <$> getHistory hist nt listes
getHistory :: ( RepoCmdM env err m
, CmdM env err m
, HasNodeError err
, HasTreeError err
)
=> History
-> NgramsType
-> [ListId]
-> m (Map NgramsType (Map ListId [HashMap NgramsTerm NgramsPatch]))
getHistory hist nt listes =
history hist [nt] listes <$> getRepo
{-|
Module : Gargantext.Core.Text.List.Social.History
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.History
where
import Control.Lens hiding (cons)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Map.Strict as Map
-- TODO put this in Prelude
cons :: a -> [a]
cons a = [a]
------------------------------------------------------------------------
-- | History control
data History = History_User
| History_NotUser
| History_All
------------------------------------------------------------------------
-- | Main Function
history :: History
-> [NgramsType]
-> [ListId]
-> Repo s NgramsStatePatch
-> Map NgramsType (Map ListId [HashMap NgramsTerm NgramsPatch])
history History_User t l = clean . (history' t l)
where
clean = Map.map (Map.map List.init)
history History_NotUser t l = clean . (history' t l)
where
clean = Map.map (Map.map last)
last = (maybe [] cons) . lastMay
history _ t l = history' t l
------------------------------------------------------------------------
history' :: [NgramsType]
-> [ListId]
-> Repo s NgramsStatePatch
-> Map NgramsType (Map ListId [HashMap NgramsTerm NgramsPatch])
history' types lists = merge
. map (Map.map ( Map.map cons))
. map (Map.map ((Map.filterWithKey (\k _ -> List.elem k lists))))
. map (Map.filterWithKey (\k _ -> List.elem k types))
. map toMap
. view r_history
merge :: [Map NgramsType (Map ListId [HashMap NgramsTerm NgramsPatch])]
-> Map NgramsType (Map ListId [HashMap NgramsTerm NgramsPatch])
merge = Map.unionsWith merge'
where
merge' :: Map ListId [HashMap NgramsTerm NgramsPatch]
-> Map ListId [HashMap NgramsTerm NgramsPatch]
-> Map ListId [HashMap NgramsTerm NgramsPatch]
merge' = Map.unionWith (<>)
toMap :: PatchMap NgramsType
(PatchMap ListId
(NgramsTablePatch
)
)
-> Map NgramsType
(Map ListId
(HashMap NgramsTerm NgramsPatch
)
)
toMap = Map.map (Map.map unNgramsTablePatch) . (Map.map unPatchMapToMap) . unPatchMapToMap
{-|
Module : Gargantext.Core.Text.List.Social.Patch
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.List.Social.Patch
where
import Control.Lens hiding (cons)
import Data.Map (Map)
import Data.Hashable (Hashable)
import Data.HashMap.Strict (HashMap)
import Data.Monoid
import Data.Semigroup
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Patch.Class as Patch (Replace(..))
addScorePatches :: NgramsType -> [ListId]
-> FlowCont NgramsTerm FlowListScores
-> Map NgramsType (Map ListId [HashMap NgramsTerm NgramsPatch])
-> FlowCont NgramsTerm FlowListScores
addScorePatches nt listes fl repo = foldl' (addScorePatchesList nt repo) fl listes
addScorePatchesList :: NgramsType
-> Map NgramsType (Map ListId [HashMap NgramsTerm NgramsPatch])
-> FlowCont NgramsTerm FlowListScores
-> ListId
-> FlowCont NgramsTerm FlowListScores
addScorePatchesList nt repo fl lid = foldl' addScorePatch fl patches
where
patches = maybe [] (List.concat . (map HashMap.toList)) patches'
patches' = do
lists <- Map.lookup nt repo
mapPatches <- Map.lookup lid lists
pure mapPatches
addScorePatch :: FlowCont NgramsTerm FlowListScores
-> (NgramsTerm , NgramsPatch)
-> FlowCont NgramsTerm FlowListScores
{- | Case of changing listType only. Patches look like:
This patch move "problem" from MapTerm to CandidateTerm
,fromList [(NgramsTerm {unNgramsTerm = "problem"},NgramsPatch {_patch_children = PatchMSet (PatchMap (fromList [])), _patch_list = Replace {_old = MapTerm, _new = CandidateTerm}})]
This patch move "paper" from MapTerm to StopTerm
,fromList [(NgramsTerm {unNgramsTerm = "paper"},NgramsPatch {_patch_children = PatchMSet (PatchMap (fromList [])), _patch_list = Replace {_old = MapTerm, _new = StopTerm}})]])])]
Children are not modified in this specific case.
-}
-- | Old list get -1 score
-- New list get +1 score
-- Hence others lists lay around 0 score
addScorePatch fl (t, (NgramsPatch children' (Patch.Replace old_list new_list))) =
-- | Adding New Children score
addScorePatch fl' (t, NgramsPatch children' Patch.Keep)
where
-- | Adding New ListType score
fl' = fl & flc_scores . at t %~ (score fls_listType old_list (-1))
& flc_scores . at t %~ (score fls_listType new_list ( 1))
& flc_cont %~ (HashMap.delete t)
-- | Patching existing Ngrams with children
addScorePatch fl (p, NgramsPatch children' Patch.Keep) =
foldl' addChild fl $ patchMSet_toList children'
where
-- | Adding a child
addChild fl' (t, Patch.Replace Nothing (Just _)) = doLink ( 1) p t fl'
-- | Removing a child
addChild fl' (t, Patch.Replace (Just _) Nothing) = doLink (-1) p t fl'
-- | This case should not happen: does Nothing
addChild fl' _ = fl'
-- | Inserting a new Ngrams
addScorePatch fl (t, NgramsReplace Nothing (Just nre)) =
childrenScore 1 t (nre ^. nre_children)
$ fl & flc_scores . at t %~ (score fls_listType $ nre ^. nre_list) 1
& flc_cont %~ (HashMap.delete t)
addScorePatch fl (t, NgramsReplace (Just old_nre) maybe_new_nre) =
let fl' = childrenScore (-1) t (old_nre ^. nre_children)
$ fl & flc_scores . at t %~ (score fls_listType $ old_nre ^. nre_list) (-1)
& flc_cont %~ (HashMap.delete t)
in case maybe_new_nre of
Nothing -> fl'
Just new_nre -> addScorePatch fl' (t, NgramsReplace Nothing (Just new_nre))
addScorePatch fl (_, NgramsReplace Nothing Nothing) = fl
-------------------------------------------------------------------------------
-- | Utils
childrenScore :: Int
-> NgramsTerm
-> MSet NgramsTerm
-> FlowCont NgramsTerm FlowListScores
-> FlowCont NgramsTerm FlowListScores
childrenScore n parent children' fl =
foldl' add' fl $ unMSet children'
where
add' fl' t = doLink n parent t fl'
------------------------------------------------------------------------
doLink :: (Ord a, Hashable a)
=> Int
-> NgramsTerm
-> a
-> FlowCont a FlowListScores
-> FlowCont a FlowListScores
doLink n parent child fl' = fl' & flc_scores . at child %~ (score fls_parents parent n)
score :: (Monoid a, At m, Semigroup (IxValue m))
=> ((m -> Identity m) -> a -> Identity b)
-> Index m -> IxValue m -> Maybe a -> Maybe b
score field list n m = (Just mempty <> m)
& _Just
. field
. at list
%~ (<> Just n)
------------------------------------------------------------------------
patchMSet_toList :: (Ord a, Hashable a) => PatchMSet a -> [(a,AddRem)]
patchMSet_toList = HashMap.toList . unPatchMapToHashMap . unPatchMSet
unMSet :: MSet a -> [a]
unMSet (MSet a) = Map.keys a
......@@ -19,29 +19,34 @@ module Gargantext.Core.Text.List.Social.Prelude
where
import Control.Lens
import Data.Semigroup (Semigroup(..))
import Data.Monoid
import Data.Map (Map)
import Data.Text (Text)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Monoid
import Data.Semigroup (Semigroup(..))
import GHC.Generics (Generic)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Prelude
import GHC.Generics (Generic)
import qualified Data.Map as Map
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
import qualified Data.Map.Strict as Map
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Map.Strict.Patch as PatchMap
------------------------------------------------------------------------
type Parent = Text
type Parent = NgramsTerm
------------------------------------------------------------------------
-- | DataType inspired by continuation Monad (but simpler)
data FlowCont a b =
FlowCont { _flc_scores :: Map a b
, _flc_cont :: Map a b
FlowCont { _flc_scores :: HashMap a b
, _flc_cont :: HashMap a b
}
deriving (Show)
instance (Ord a, Eq b) => Monoid (FlowCont a b) where
instance (Ord a, Eq b, Hashable a) => Monoid (FlowCont a b) where
mempty = FlowCont mempty mempty
instance (Eq a, Ord a, Eq b) => Semigroup (FlowCont a b) where
instance (Eq a, Ord a, Eq b, Hashable a) => Semigroup (FlowCont a b) where
(<>) (FlowCont m1 s1)
(FlowCont m2 s2)
= FlowCont (m1 <> m2)
......@@ -51,10 +56,10 @@ makeLenses ''FlowCont
-- | Datatype definition
data FlowListScores =
FlowListScores { _fls_listType :: Map ListType Int
, _fls_parents :: Map Parent Int
FlowListScores { _fls_listType :: HashMap ListType Int
, _fls_parents :: HashMap Parent Int
-- You can add any score by incrementing this type
-- , _flc_score :: Map Score Int
-- , _flc_score :: HashMap Score Int
}
deriving (Show, Generic, Eq)
......@@ -72,16 +77,16 @@ instance Semigroup FlowListScores where
(l1 <> l2)
instance Monoid FlowListScores where
mempty = FlowListScores Map.empty Map.empty
mempty = FlowListScores HashMap.empty HashMap.empty
------------------------------------------------------------------------
-- | Tools to inherit groupings
------------------------------------------------------------------------
-- | Tools
parentUnionsMerge :: (Ord a, Ord b, Num c)
=> [Map a (Map b c)]
-> Map a (Map b c)
parentUnionsMerge = Map.unionsWith (Map.unionWith (+))
parentUnionsMerge :: (Ord a, Ord b, Num c, Hashable a, Hashable b)
=> [HashMap a (HashMap b c)]
-> HashMap a (HashMap b c)
parentUnionsMerge = HashMap.unionsWith (HashMap.unionWith (+))
-- This Parent union is specific
-- [Private, Shared, Public]
......@@ -89,22 +94,38 @@ parentUnionsMerge = Map.unionsWith (Map.unionWith (+))
-- Private > Shared > Public
-- if data have not been tagged privately, then use others tags
-- This unions behavior takes first key only and ignore others
parentUnionsExcl :: Ord a
=> [Map a b]
-> Map a b
parentUnionsExcl = Map.unions
parentUnionsExcl :: (Ord a, Hashable a)
=> [HashMap a b]
-> HashMap a b
parentUnionsExcl = HashMap.unions
------------------------------------------------------------------------
-- | Takes key with max value if and only if value > 0
-- If value <= 0 alors key is not taken at all
-- It can happens since some score are non positive (i.e. removing a child)
-- >>> keyWithMaxValue $ DM.fromList $ zip (['a'..'z'] :: [Char]) ([1,2..]::[Int])
-- Just 'z'
-- >>> keyWithMaxValue $ DM.fromList $ zip (['a'..'z'] :: [Char]) ([-1,-2..]::[Int])
-- Nothing
-- TODO duplicate with getMaxFromMap and improve it (lookup value should not be needed)
-- TODO put in custom Prelude
keyWithMaxValue :: (Ord a, Ord b, Num b, Hashable a)
=> HashMap a b -> Maybe a
keyWithMaxValue m = do
maxKey <- headMay $ HashMap.getKeysOrderedByValueMaxFirst m
maxValue <- HashMap.lookup maxKey m
if maxValue > 0
then pure maxKey
else Nothing
hasParent :: Text
-> Map Text (Map Parent Int)
-> Maybe Parent
hasParent t m = case Map.lookup t m of
Nothing -> Nothing
Just m' -> keyWithMaxValue m'
------------------------------------------------------------------------
keyWithMaxValue :: Map a b -> Maybe a
keyWithMaxValue m = (fst . fst) <$> Map.maxViewWithKey m
unPatchMapToHashMap :: (Ord a, Hashable a) => PatchMap a b -> HashMap a b
unPatchMapToHashMap = HashMap.fromList . PatchMap.toList
unPatchMapToMap :: Ord a => PatchMap a b -> Map a b
unPatchMapToMap = Map.fromList . PatchMap.toList
unNgramsTablePatch :: NgramsTablePatch -> HashMap NgramsTerm NgramsPatch
unNgramsTablePatch (NgramsTablePatch p) = unPatchMapToHashMap p
{-|
Module : Gargantext.Core.Text.List.Social.Scores
Description :
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.List.Social.Scores
where
import Control.Lens
import Data.Map (Map)
import Data.Monoid (mempty)
import Data.Set (Set)
import Data.Text (Text)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Prelude
import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------------------------------------------------------
-- | Generates Score from list of Map Text NgramsRepoElement
toFlowListScores :: KeepAllParents
-> FlowCont Text FlowListScores
-> [Map Text NgramsRepoElement]
-> FlowCont Text FlowListScores
toFlowListScores k flc_origin = foldl' (toFlowListScores_Level1 k flc_origin) mempty
where
toFlowListScores_Level1 :: KeepAllParents
-> FlowCont Text FlowListScores
-> FlowCont Text FlowListScores
-> Map Text NgramsRepoElement
-> FlowCont Text FlowListScores
toFlowListScores_Level1 k' flc_origin' flc_dest ngramsRepo =
Set.foldl' (toFlowListScores_Level2 k' ngramsRepo flc_origin')
flc_dest
(Set.fromList $ Map.keys $ view flc_cont flc_origin')
toFlowListScores_Level2 :: KeepAllParents
-> Map Text NgramsRepoElement
-> FlowCont Text FlowListScores
-> FlowCont Text FlowListScores
-> Text
-> FlowCont Text FlowListScores
toFlowListScores_Level2 k'' ngramsRepo flc_origin'' flc_dest' t =
case Map.lookup t ngramsRepo of
Nothing -> over flc_cont (Map.union $ Map.singleton t mempty) flc_dest'
Just nre -> updateScoresParent k'' ngramsRepo nre flc_origin''
$ updateScores k'' t nre setText flc_dest'
where
setText = Set.fromList
$ Map.keys
$ view flc_cont flc_origin''
updateScoresParent :: KeepAllParents -> Map Text NgramsRepoElement -> NgramsRepoElement
-> FlowCont Text FlowListScores
-> FlowCont Text FlowListScores
-> FlowCont Text FlowListScores
updateScoresParent keep@(KeepAllParents k''') ngramsRepo nre flc_origin'' flc_dest'' = case k''' of
False -> flc_dest''
True -> case view nre_parent nre of
Nothing -> flc_dest''
Just (NgramsTerm parent) -> toFlowListScores_Level2 keep ngramsRepo flc_origin'' flc_dest'' parent
------------------------------------------------------------------------
updateScores :: KeepAllParents
-> Text -> NgramsRepoElement -> Set Text
-> FlowCont Text FlowListScores
-> FlowCont Text FlowListScores
updateScores k t nre setText mtf =
over flc_cont ( Map.delete t)
$ over flc_scores ((Map.alter (addParent k nre setText ) t)
.(Map.alter (addList $ view nre_list nre) t)
) mtf
------------------------------------------------------------------------
-- | Main addFunctions to groupResolution the FlowListScores
-- Use patch-map library here
-- diff, transformWith patches simplifies functions below
addList :: ListType
-> Maybe FlowListScores
-> Maybe FlowListScores
addList l Nothing =
Just $ set fls_listType (addListScore l mempty) mempty
addList l (Just fls) =
Just $ over fls_listType (addListScore l) fls
-- * Unseful but nice comment:
-- "the addList function looks like an ASCII bird"
-- | Concrete function to pass to PatchMap
addListScore :: ListType -> Map ListType Int -> Map ListType Int
addListScore l m = Map.alter (plus l) l m
where
plus CandidateTerm Nothing = Just 1
plus CandidateTerm (Just x) = Just $ x + 1
plus MapTerm Nothing = Just 2
plus MapTerm (Just x) = Just $ x + 2
plus StopTerm Nothing = Just 3
plus StopTerm (Just x) = Just $ x + 3
------------------------------------------------------------------------
data KeepAllParents = KeepAllParents Bool
addParent :: KeepAllParents -> NgramsRepoElement -> Set Text
-> Maybe FlowListScores
-> Maybe FlowListScores
addParent k nre ss Nothing =
Just $ FlowListScores mempty mapParent
where
mapParent = addParentScore k (view nre_parent nre) ss mempty
addParent k nre ss (Just fls{-(FlowListScores mapList mapParent)-}) =
Just $ over fls_parents (addParentScore k (view nre_parent nre) ss) fls
addParentScore :: Num a
=> KeepAllParents
-> Maybe NgramsTerm
-> Set Text
-> Map Text a
-> Map Text a
addParentScore _ Nothing _ss mapParent = mapParent
addParentScore (KeepAllParents keep) (Just (NgramsTerm p')) ss mapParent =
case keep of
True -> Map.alter addCount p' mapParent
False -> case Set.member p' ss of
False -> mapParent
True -> Map.alter addCount p' mapParent
where
addCount Nothing = Just 1
addCount (Just n) = Just $ n + 1
------------------------------------------------------------------------
------------------------------------------------------------------------
......@@ -20,29 +20,32 @@ module Gargantext.Core.Text.Metrics
--import Math.KMeans (kmeans, euclidSq, elements)
import Control.Lens (makeLenses)
import Data.Map (Map)
import Data.Semigroup (Semigroup)
import Data.Monoid (Monoid, mempty)
import Gargantext.Prelude
import Data.HashMap.Strict (HashMap)
import Data.Semigroup (Semigroup)
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import Gargantext.Core.Viz.Graph.Index
import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
import Gargantext.Core.Viz.Graph.Index
import Gargantext.Prelude
import qualified Data.Array.Accelerate as DAA
import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.Map as Map
import qualified Data.Vector as V
import qualified Data.Vector.Storable as Vec
import qualified Data.HashMap.Strict as HashMap
type MapListSize = Int
type InclusionSize = Int
scored :: Ord t => Map (t,t) Int -> [Scored t]
scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
scored :: Ord t => HashMap (t,t) Int -> V.Vector (Scored t)
scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map . Map.fromList . HashMap.toList
where
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
map2scored :: Ord t => Map t (Vec.Vector Double) -> V.Vector (Scored t)
map2scored = V.map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . V.fromList . Map.toList
-- TODO change type with (x,y)
data Scored ts = Scored
......
......@@ -25,20 +25,17 @@ module Gargantext.Core.Text.Metrics.FrequentItemSet
)
where
import Data.List (concat, null)
import Data.Map.Strict (Map)
import Data.Maybe (catMaybes)
import Data.Set (Set)
import Gargantext.Prelude
import HLCM
import Prelude (Functor(..)) -- TODO
import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map)
import qualified Data.Set as Set
import Data.Set (Set)
import qualified Data.Vector as V
import Data.List (concat, null)
import Data.Maybe (catMaybes)
import HLCM
import Gargantext.Prelude
data Size = Point Int | Segment Int Int
------------------------------------------------------------------------
......
{-|
Module : Gargantext.Core.Text.Metrics.Freq
Module : Gargantext.Core.Text.Metrics.Utils
Description : Some functions to count.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -10,7 +10,7 @@ Portability : POSIX
-}
module Gargantext.Core.Text.Metrics.Freq where
module Gargantext.Core.Text.Metrics.Utils where
import Gargantext.Prelude
import Data.Map (empty, Map, insertWith, toList)
......
......@@ -107,8 +107,6 @@ $(deriveJSON (unPrefix "_") ''PosSentences)
-- },
--
corenlp' :: ( FromJSON a
, ConvertibleStrings p ByteString
)
......
......@@ -20,6 +20,7 @@ module Gargantext.Core.Types.Main where
import Data.Aeson (FromJSON, ToJSON)
import Data.Aeson.TH (deriveJSON)
import Data.Either (Either(..))
import Data.Hashable (Hashable)
import Data.Map (fromList, lookup)
import Data.Semigroup (Semigroup(..))
import Data.Swagger
......@@ -59,6 +60,7 @@ instance ToSchema ListType
instance ToParamSchema ListType
instance Arbitrary ListType where
arbitrary = elements [minBound..maxBound]
instance Hashable ListType
instance Semigroup ListType
where
......
......@@ -14,11 +14,11 @@ Portability : POSIX
module Gargantext.Core.Viz.Chart
where
import Data.List (unzip, sortOn)
import Data.List (sortOn)
import Data.Map (toList)
import qualified Data.List as List
import qualified Data.Map as Map
import Data.Maybe (catMaybes)
import qualified Data.Vector as V
import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Config
......@@ -33,17 +33,21 @@ import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
-- Pie Chart
import Gargantext.API.Ngrams.NgramsTree
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types
import Gargantext.Database.Action.Flow
import Gargantext.Database.Action.Metrics.NgramsByNode
import Gargantext.Database.Schema.Ngrams
import Gargantext.Core.Viz.Types
import qualified Data.HashMap.Strict as HashMap
histoData :: CorpusId -> Cmd err Histo
histoData cId = do
dates <- selectDocsDates cId
let (ls, css) = unzip
$ sortOn fst
let (ls, css) = V.unzip
$ V.fromList
$ sortOn fst -- TODO Vector.sortOn
$ toList
$ occurrencesWith identity dates
pure (Histo ls css)
......@@ -58,20 +62,20 @@ chartData cId nt lt = do
ts <- mapTermListRoot ls nt <$> getRepo
let
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ Map.toList dico
group dico' x = case Map.lookup x dico' of
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ HashMap.toList dico
group dico' x = case HashMap.lookup x dico' of
Nothing -> x
Just x' -> maybe x identity x'
(_total,mapTerms) <- countNodesByNgramsWith (group dico)
<$> getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
let (dates, count) = unzip $ map (\(t,(d,_)) -> (t, d)) $ Map.toList mapTerms
pure (Histo dates (map round count))
let (dates, count) = V.unzip $ fmap (\(NgramsTerm t,(d,_)) -> (t, d)) $ V.fromList $ HashMap.toList mapTerms
pure (Histo dates (round <$> count))
treeData :: FlowCmdM env err m
=> CorpusId -> NgramsType -> ListType
-> m [NgramsTree]
-> m (V.Vector NgramsTree)
treeData cId nt lt = do
ls' <- selectNodesWithUsername NodeList userMaster
ls <- map (_node_id) <$> getListsWithParentId cId
......@@ -79,10 +83,10 @@ treeData cId nt lt = do
let
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ Map.toList dico
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ HashMap.toList dico
cs' <- getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
m <- getListNgrams ls nt
pure $ toTree lt cs' m
pure $ V.fromList $ toTree lt cs' m
......@@ -18,38 +18,36 @@ module Gargantext.Core.Viz.Graph.API
import Control.Lens (set, (^.), _Just, (^?))
import Data.Aeson
import qualified Data.Map as Map
import Data.Swagger
import Data.Text
import Debug.Trace (trace)
import GHC.Generics (Generic)
import Servant
import Servant.Job.Async
import Servant.XML
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams.Types (NgramsRepo, r_version)
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types (NgramsRepo, r_version)
import Gargantext.API.Prelude
import Gargantext.Core.Methods.Distances (Distance(..), GraphMetric(..))
import Gargantext.Core.Types.Main
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.GEXF ()
import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.User (getNodeUser)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Query.Table.Node.User (getNodeUser)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node (node_parentId, node_hyperdata, node_name, node_userId)
import Gargantext.Prelude
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.GEXF ()
import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
import Gargantext.Core.Methods.Distances (Distance(..), GraphMetric(..))
import Servant
import Servant.Job.Async
import Servant.XML
import qualified Data.HashMap.Strict as HashMap
------------------------------------------------------------------------
-- | There is no Delete specific API for Graph since it can be deleted
-- as simple Node.
......@@ -150,10 +148,10 @@ computeGraph cId d nt repo = do
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
-- TODO split diagonal
myCooc <- Map.filter (>1)
myCooc <- HashMap.filter (>1)
<$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
graph <- liftBase $ cooc2graph d 0 myCooc
......
......@@ -13,28 +13,31 @@ Portability : POSIX
module Gargantext.Core.Viz.Graph.Tools
where
import Debug.Trace (trace)
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
-- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Data.Map (Map)
import qualified Data.Set as Set
import Data.HashMap.Strict (HashMap)
import Data.Text (Text)
import Gargantext.Prelude
import Debug.Trace (trace)
import GHC.Float (sin, cos)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Methods.Distances (Distance(..), measure)
import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
import Gargantext.Core.Statistics
import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness)
import Gargantext.Core.Methods.Distances (Distance(..), measure)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index)
import Gargantext.Core.Viz.Graph.IGraph (mkGraphUfromEdges)
import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
import GHC.Float (sin, cos)
import qualified IGraph as Igraph
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index)
import Gargantext.Prelude
import IGraph.Random -- (Gen(..))
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector.Storable as Vec
import qualified IGraph as Igraph
import qualified IGraph.Algorithms.Layout as Layout
import qualified Data.Vector.Storable as Vec
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.HashMap.Strict as HashMap
type Threshold = Double
......@@ -54,13 +57,15 @@ cooc2graph' distance threshold myCooc = distanceMap
cooc2graph :: Distance
-> Threshold
-> (Map (Text, Text) Int)
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graph distance threshold myCooc = do
printDebug "cooc2graph" distance
let
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
-- TODO remove below
theMatrix = Map.fromList $ HashMap.toList myCooc
(ti, _) = createIndices theMatrix
myCooc' = toIndex ti theMatrix
matCooc = map2mat 0 (Map.size ti)
$ Map.filterWithKey (\(a,b) _ -> a /= b)
$ Map.filter (> 1) myCooc'
......@@ -87,7 +92,7 @@ cooc2graph distance threshold myCooc = do
$ bridgeness rivers partitions distanceMap
confluence' = confluence (Map.keys bridgeness') 3 True False
pure $ data2graph (Map.toList ti) myCooc' bridgeness' confluence' partitions
pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti) myCooc' bridgeness' confluence' partitions
......
......@@ -14,34 +14,32 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.Main
where
import Data.GraphViz
import qualified Data.ByteString as DB
import qualified Data.List as List
import qualified Data.Map as Map
import Data.Maybe
import qualified Data.Text as Text
import Data.Text (Text)
import Debug.Trace (trace)
import GHC.IO (FilePath)
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.Core.Types
import Gargantext.Database.Action.Flow
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Query.Table.Node(defaultList)
import Gargantext.Database.Query.Table.NodeNode (selectDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types
import Gargantext.Core.Viz.Phylo hiding (Svg, Dot)
import Gargantext.Core.Viz.Phylo.LevelMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.View.Export
import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
import Gargantext.Database.Action.Flow
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Query.Table.Node(defaultList)
import Gargantext.Database.Query.Table.NodeNode (selectDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.ByteString as DB
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text
import qualified Data.HashMap.Strict as HashMap
type MinSizeBranch = Int
......@@ -51,7 +49,7 @@ flowPhylo :: FlowCmdM env err m
flowPhylo cId = do
list <- defaultList cId
termList <- Map.toList <$> getTermsWith Text.words [list] NgramsTerms MapTerm
termList <- HashMap.toList <$> getTermsWith (Text.words . unNgramsTerm) [list] NgramsTerms MapTerm
docs' <- catMaybes
<$> map (\h -> (,) <$> _hd_publication_year h
......
......@@ -9,6 +9,8 @@ module Gargantext.Core.Viz.Types where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Vector (Vector)
import qualified Data.Vector as V
import Protolude
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
......@@ -23,8 +25,8 @@ data Chart = ChartHisto | ChartScatter | ChartPie
deriving (Generic)
-- TODO use UTCTime
data Histo = Histo { histo_dates :: ![Text]
, histo_count :: ![Int]
data Histo = Histo { histo_dates :: !(Vector Text)
, histo_count :: !(Vector Int)
}
deriving (Show, Generic)
......@@ -32,7 +34,7 @@ instance ToSchema Histo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "histo_")
instance Arbitrary Histo
where
arbitrary = elements [ Histo ["2012"] [1]
, Histo ["2013"] [1]
arbitrary = elements [ Histo (V.singleton "2012") (V.singleton 1)
, Histo (V.singleton "2013") (V.singleton 1)
]
deriveJSON (unPrefix "histo_") ''Histo
module Gargantext.Data.HashMap.Strict.Utils where
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
------------------------------------------------------------------------
unionsWith :: (Foldable f, Eq k, Hashable k) => (a->a->a) -> f (HashMap k a) -> HashMap k a
unionsWith f = foldl' (HashMap.unionWith f) HashMap.empty
------------------------------------------------------------------------
-- | Partition the map according to some predicate. The first map contains all
-- elements that satisfy the predicate, the second all elements that fail the
-- predicate.
partition :: (Ord k, Hashable k) => (a -> Bool) -> HashMap k a -> (HashMap k a, HashMap k a)
partition p m = (HashMap.filter p m, HashMap.filter (not . p) m)
-- | Partition the map according to some predicate. The first map contains all
-- elements that satisfy the predicate, the second all elements that fail the
-- predicate.
partitionWithKey :: (Ord a, Hashable k) => (k -> a -> Bool) -> HashMap k a -> (HashMap k a, HashMap k a)
partitionWithKey p m = (HashMap.filterWithKey p m, HashMap.filterWithKey (\k -> not . p k) m)
------------------------------------------------------------------------
-- getKeyWithMaxValue :: Hashable k => HashMap k a -> Maybe k
getKeysOrderedByValueMaxFirst :: (Ord k, Hashable k, Ord a) => HashMap k a -> [k]
getKeysOrderedByValueMaxFirst m = go [] Nothing (HashMap.toList m)
where
go ks _ [] = ks
go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
go ks (Just u) ((k,v):rest)
| v < u = go ks (Just u) rest
| v > u = go [k] (Just v) rest
| otherwise = go (k:ks) (Just v) rest
......@@ -17,13 +17,14 @@ module Gargantext.Database.Action.Flow.Pairing
where
import Control.Lens (_Just, (^.))
import Data.Map (Map, fromList, fromListWith)
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Data.Maybe (catMaybes, fromMaybe)
import Data.Set (Set)
import Data.Text (Text)
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.Types (TableResult(..), Term)
import Gargantext.Core.Types (TableResult(..))
import Gargantext.Core.Types.Main
import Gargantext.Database
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
......@@ -39,11 +40,10 @@ import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (sum)
import Opaleye
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as DT
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Set as Set
import qualified Data.Text as DT
-- | isPairedWith
-- All NodeAnnuaire paired with a Corpus of NodeId nId:
......@@ -79,7 +79,7 @@ dataPairing :: AnnuaireId
-> (CorpusId, ListId, NgramsType)
-> (ContactName -> Projected)
-> (DocAuthor -> Projected)
-> GargNoServer (Map ContactId (Set DocId))
-> GargNoServer (HashMap ContactId (Set DocId))
dataPairing aId (cId, lId, ngt) fc fa = do
mc <- getNgramsContactId aId
md <- getNgramsDocId cId lId ngt
......@@ -87,14 +87,14 @@ dataPairing aId (cId, lId, ngt) fc fa = do
printDebug "ngramsContactId" mc
printDebug "ngramsDocId" md
let
from = projectionFrom (Set.fromList $ Map.keys mc) fc
to = projectionTo (Set.fromList $ Map.keys md) fa
from = projectionFrom (Set.fromList $ HM.keys mc) fc
to = projectionTo (Set.fromList $ HM.keys md) fa
pure $ fusion mc $ align from to md
prepareInsert :: Map ContactId (Set DocId) -> [NodeNode]
prepareInsert :: HashMap ContactId (Set DocId) -> [NodeNode]
prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
$ List.concat
$ map (\(contactId, setDocIds)
......@@ -102,21 +102,21 @@ prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
-> (contactId, setDocId)
) $ Set.toList setDocIds
)
$ Map.toList m
$ HM.toList m
------------------------------------------------------------------------
type ContactName = Text
type DocAuthor = Text
type Projected = Text
type ContactName = NgramsTerm
type DocAuthor = NgramsTerm
type Projected = NgramsTerm
projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected
projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss)
projectionFrom :: Set ContactName -> (ContactName -> Projected) -> HashMap ContactName Projected
projectionFrom ss f = HM.fromList $ map (\s -> (s, f s)) (Set.toList ss) -- use HS.toMap
projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor)
projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss)
projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> HashMap Projected (Set DocAuthor)
projectionTo ss f = HM.fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss) -- use HS.toMap
------------------------------------------------------------------------
takeName :: Term -> Term
takeName texte = DT.toLower texte'
takeName :: NgramsTerm -> NgramsTerm
takeName (NgramsTerm texte) = NgramsTerm $ DT.toLower texte'
where
texte' = maybe texte (\x -> if DT.length x > 3 then x else texte)
(lastName' texte)
......@@ -124,51 +124,51 @@ takeName texte = DT.toLower texte'
------------------------------------------------------------------------
align :: Map ContactName Projected
-> Map Projected (Set DocAuthor)
-> Map DocAuthor (Set DocId)
-> Map ContactName (Set DocId)
align mc ma md = fromListWith (<>)
align :: HashMap ContactName Projected
-> HashMap Projected (Set DocAuthor)
-> HashMap DocAuthor (Set DocId)
-> HashMap ContactName (Set DocId)
align mc ma md = HM.fromListWith (<>)
$ map (\c -> (c, getProjection md $ testProjection c mc ma))
$ Map.keys mc
$ HM.keys mc
where
getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
getProjection :: HashMap DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
getProjection ma' sa' =
if Set.null sa'
then Set.empty
else Set.unions $ sets ma' sa'
where
sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'')
lookup s' ma''= fromMaybe Set.empty (HM.lookup s' ma'')
testProjection :: ContactName
-> Map ContactName Projected
-> Map Projected (Set DocAuthor)
-> HashMap ContactName Projected
-> HashMap Projected (Set DocAuthor)
-> Set DocAuthor
testProjection cn' mc' ma' = case Map.lookup cn' mc' of
testProjection cn' mc' ma' = case HM.lookup cn' mc' of
Nothing -> Set.empty
Just c -> case Map.lookup c ma' of
Just c -> case HM.lookup c ma' of
Nothing -> Set.empty
Just a -> a
fusion :: Map ContactName (Set ContactId)
-> Map ContactName (Set DocId)
-> Map ContactId (Set DocId)
fusion mc md = Map.fromListWith (<>)
fusion :: HashMap ContactName (Set ContactId)
-> HashMap ContactName (Set DocId)
-> HashMap ContactId (Set DocId)
fusion mc md = HM.fromListWith (<>)
$ catMaybes
$ [ (,) <$> Just cId <*> Map.lookup cn md
| (cn, setContactId) <- Map.toList mc
$ [ (,) <$> Just cId <*> HM.lookup cn md
| (cn, setContactId) <- HM.toList mc
, cId <- Set.toList setContactId
]
------------------------------------------------------------------------
getNgramsContactId :: AnnuaireId
-> Cmd err (Map ContactName (Set NodeId))
-> Cmd err (HashMap ContactName (Set NodeId))
getNgramsContactId aId = do
contacts <- getAllContacts aId
pure $ fromListWith (<>)
pure $ HM.fromListWith (<>)
$ catMaybes
$ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName)
$ map (\contact -> (,) <$> (NgramsTerm <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName))
<*> Just ( Set.singleton (contact^.node_id))
) (tr_docs contacts)
......@@ -176,11 +176,11 @@ getNgramsContactId aId = do
getNgramsDocId :: CorpusId
-> ListId
-> NgramsType
-> GargNoServer (Map DocAuthor (Set NodeId))
-> GargNoServer (HashMap DocAuthor (Set NodeId))
getNgramsDocId cId lId nt = do
repo <- getRepo
lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
......@@ -10,17 +10,14 @@ Portability : POSIX
Node API
-}
module Gargantext.Database.Action.Metrics
where
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Text (Text)
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType)
import Data.HashMap.Strict (HashMap)
import Data.Vector (Vector)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm)
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser{-, getTficfWith-})
......@@ -29,21 +26,22 @@ import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Prelude
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import qualified Data.HashMap.Strict as HM
getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text])
-> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
getMetrics cId maybeListId tabType maybeLimit = do
(ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
-- TODO HashMap
pure (ngs, scored myCooc)
getNgramsCooc :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text)
, Map Text (Maybe RootTerm)
, Map (Text, Text) Int
-> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
, HashMap NgramsTerm (Maybe RootTerm)
, HashMap (NgramsTerm, NgramsTerm) Int
)
getNgramsCooc cId maybeListId tabType maybeLimit = do
(ngs', ngs) <- getNgrams cId maybeListId tabType
......@@ -55,17 +53,19 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType)
(take' maybeLimit $ Map.keys ngs)
myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType)
(take' maybeLimit $ HM.keys ngs)
pure $ (ngs', ngs, myCooc)
getNgrams :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType
-> m (Map Text (ListType, Maybe Text), Map Text (Maybe RootTerm))
-> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
, HashMap NgramsTerm (Maybe RootTerm)
)
getNgrams cId maybeListId tabType = do
lId <- case maybeListId of
......@@ -73,7 +73,7 @@ getNgrams cId maybeListId tabType = do
Just lId' -> pure lId'
lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo
let maybeSyn = Map.unions $ map (\t -> filterListWithRoot t lists)
let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
[MapTerm, StopTerm, CandidateTerm]
pure (lists, maybeSyn)
......@@ -19,17 +19,16 @@ Portability : POSIX
module Gargantext.Database.Action.Metrics.Lists
where
import Prelude hiding (null, id, map, sum)
import qualified Data.Map as Map
import qualified Data.Vector as Vec
import qualified Gargantext.Database.Action.Metrics as Metrics
import Gargantext.API.Ngrams.Types (TabType(..))
import Gargantext.Core.Text.Metrics (Scored(..))
import Gargantext.Core.Types -- (NodePoly(..), NodeCorpus, ListId)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Prelude hiding (sum, head)
import Gargantext.Core.Text.Metrics (Scored(..))
import Prelude hiding (null, id, map, sum)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Map as Map
import qualified Data.Vector as Vec
import qualified Gargantext.Database.Action.Metrics as Metrics
{-
trainModel :: FlowCmdM env ServantErr m
=> Username -> m Score
......@@ -50,11 +49,11 @@ getMetrics' cId maybeListId tabType maybeLimit = do
let
metrics = map (\(Scored t s1 s2) -> (listType t ngs', [Vec.fromList [s1,s2]])) scores
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
listType t m = maybe (panic errorMsg) fst $ HashMap.lookup t m
errorMsg = "API.Node.metrics: key absent"
{-
_ <- Learn.grid 100 110 metrics' metrics'
--}
pure $ Map.fromListWith (<>) metrics
pure $ Map.fromListWith (<>) $ Vec.toList metrics
......@@ -16,40 +16,37 @@ module Gargantext.Database.Action.Metrics.TFICF
-- import Debug.Trace (trace)
-- import Gargantext.Core (Lang(..))
import Data.Map.Strict (Map, toList, fromList)
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Gargantext.Core.Text.Metrics.TFICF
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getOccByNgramsOnlyFast)
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.NodeNode (selectCountDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.API.Ngrams.Types
import Gargantext.Prelude
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
getTficf :: UserCorpusId
-> MasterCorpusId
-> NgramsType
-> Cmd err (Map Text Double)
-> Cmd err (HashMap NgramsTerm Double)
getTficf cId mId nt = do
mapTextDoubleLocal <- Map.filter (> 1)
<$> Map.map (fromIntegral . Set.size)
mapTextDoubleLocal <- HM.filter (> 1)
<$> HM.map (fromIntegral . Set.size)
<$> getNodesByNgramsUser cId nt
mapTextDoubleGlobal <- Map.map fromIntegral
<$> getOccByNgramsOnlyFast mId nt (Map.keys mapTextDoubleLocal)
mapTextDoubleGlobal <- HM.map fromIntegral
<$> getOccByNgramsOnlyFast mId nt (HM.keys mapTextDoubleLocal)
countLocal <- selectCountDocs cId
countGlobal <- selectCountDocs mId
pure $ fromList [ ( t
, tficf (TficfInfra (Count n )
(Total $ fromIntegral countLocal ))
(TficfSupra (Count $ fromMaybe 0 $ Map.lookup t mapTextDoubleGlobal)
(Total $ fromIntegral countGlobal))
)
| (t, n) <- toList mapTextDoubleLocal
]
pure $ HM.mapWithKey (\t n ->
tficf (TficfInfra (Count n )
(Total $ fromIntegral countLocal))
(TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal)
(Total $ fromIntegral countGlobal))
) mapTextDoubleLocal
......@@ -21,8 +21,10 @@ Portability : POSIX
module Gargantext.Database.Admin.Types.Hyperdata.List
where
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Vector (Vector)
--import qualified Data.Vector as V
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Control.Applicative
import Gargantext.Prelude
......@@ -34,11 +36,11 @@ import Gargantext.Database.Admin.Types.Metrics (ChartMetrics(..), Metrics)
------------------------------------------------------------------------
data HyperdataList =
HyperdataList { _hl_chart :: !(Map TabType (ChartMetrics Histo))
HyperdataList { _hl_chart :: !(HashMap TabType (ChartMetrics Histo))
, _hl_list :: !(Maybe Text)
, _hl_pie :: !(Map TabType (ChartMetrics Histo))
, _hl_scatter :: !(Map TabType Metrics)
, _hl_tree :: !(Map TabType (ChartMetrics [NgramsTree]))
, _hl_pie :: !(HashMap TabType (ChartMetrics Histo))
, _hl_scatter :: !(HashMap TabType Metrics)
, _hl_tree :: !(HashMap TabType (ChartMetrics (Vector NgramsTree)))
} deriving (Show, Generic)
-- HyperdataList { _hl_chart :: !(Maybe (ChartMetrics Histo))
-- , _hl_list :: !(Maybe Text)
......@@ -49,11 +51,11 @@ data HyperdataList =
defaultHyperdataList :: HyperdataList
defaultHyperdataList =
HyperdataList { _hl_chart = Map.empty
HyperdataList { _hl_chart = HM.empty
, _hl_list = Nothing
, _hl_pie = Map.empty
, _hl_scatter = Map.empty
, _hl_tree = Map.empty
, _hl_pie = HM.empty
, _hl_scatter = HM.empty
, _hl_tree = HM.empty
}
------------------------------------------------------------------------
......
......@@ -5,6 +5,8 @@ module Gargantext.Database.Admin.Types.Metrics where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Vector (Vector)
import qualified Data.Vector as V
import Protolude
import Test.QuickCheck.Arbitrary
......@@ -13,15 +15,15 @@ import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
----------------------------------------------------------------------------
data Metrics = Metrics
{ metrics_data :: [Metric]}
newtype Metrics = Metrics
{ metrics_data :: Vector Metric}
deriving (Generic, Show)
instance ToSchema Metrics where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "metrics_")
instance Arbitrary Metrics
where
arbitrary = Metrics <$> arbitrary
arbitrary = (Metrics . V.fromList) <$> arbitrary
data Metric = Metric
{ m_label :: !Text
......@@ -43,7 +45,7 @@ deriveJSON (unPrefix "metrics_") ''Metrics
deriveJSON (unPrefix "m_") ''Metric
data ChartMetrics a = ChartMetrics { chartMetrics_data :: a }
newtype ChartMetrics a = ChartMetrics { chartMetrics_data :: a }
deriving (Generic, Show)
instance (Typeable a, ToSchema a) => ToSchema (ChartMetrics a) where
......
......@@ -24,6 +24,7 @@ import Control.Monad (mzero)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Either
import Data.Hashable (Hashable)
import Data.Swagger
import Data.Text (Text, unpack)
import Data.Time (UTCTime)
......@@ -130,7 +131,7 @@ pgNodeId = O.pgInt4 . id2int
------------------------------------------------------------------------
newtype NodeId = NodeId Int
deriving (Show, Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON)
deriving (Show, Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable)
unNodeId :: NodeId -> Int
unNodeId (NodeId n) = n
......
......@@ -19,6 +19,7 @@ Ngrams connection to the Database.
module Gargantext.Database.Schema.Ngrams
where
import Data.Hashable (Hashable)
import Codec.Serialise (Serialise())
import Control.Lens (over)
import Control.Monad (mzero)
......@@ -81,6 +82,7 @@ data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance Serialise NgramsType
instance Hashable NgramsType
ngramsTypes :: [NgramsType]
ngramsTypes = [minBound..]
......@@ -153,6 +155,7 @@ text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
where
txt' = strip txt
-------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
-- Named entity are typed ngrams of Terms Ngrams
......
......@@ -45,6 +45,7 @@ instance {-# OVERLAPPING #-} IsHashable String where
instance IsHashable Text where
hash = hash . Text.unpack
instance IsHashable (Set Hash) where
hash = hash . foldl (<>) "" . Set.toList
......
resolver: lts-16.14
resolver: lts-16.26
flags: {}
extra-package-dbs: []
packages:
......@@ -10,7 +10,7 @@ packages:
docker:
enable: false
repo: 'fpco/stack-build:lts-14.27-garg'
repo: 'fpco/stack-build:lts-16.26-garg'
run-args:
- '--publish=8008:8008'
......@@ -21,47 +21,57 @@ nix:
allow-newer: true
extra-deps:
# Data Mining Libs
- git: https://github.com/delanoe/data-time-segment.git
commit: 10a416b9f6c443866b36479c3441ebb3bcdeb7ef
- git: https://gitlab.iscpif.fr/gargantext/hlcm.git
commit: 6f0595d2421005837d59151a8b26eee83ebb67b5
- git: https://github.com/delanoe/servant-static-th.git
commit: 8cb8aaf2962ad44d319fcea48442e4397b3c49e8
- git: https://github.com/delanoe/hstatistics.git
commit: 90eef7604bb230644c2246eccd094d7bfefcb135
- git: https://github.com/paulrzcz/HSvm.git
commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9
# API libs
- git: https://github.com/delanoe/servant-static-th.git
commit: 8cb8aaf2962ad44d319fcea48442e4397b3c49e8
# Databases libs
- git: https://github.com/delanoe/haskell-opaleye.git #- opaleye-0.6.7002.0
commit: 63ee65d974e9d20eaaf17a2e83652175988cbb79
- git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
#
# External API connectin to get data
# External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: a9d8e08a7ef82f90e29dfaced4071704a3163394
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit: 95e8f01a5d3b404a14a7fc664996569a6fb41ec4
commit: 020f5f9b308f5c23c925aedf5fb11f8b4728fb19
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
#
- git: https://gitlab.com/npouillard/patches-class.git
commit: 4712bfb055888fae63cd2e88431972375f979b94
# NP libs
#- git: https://github.com/np/servant-job.git # waiting for PR
- git: https://github.com/delanoe/servant-job.git
commit: a9d8ec247b60906ae0ad76ea017cacd6ff36a7a1
- git: https://github.com/np/patches-map
commit: d42c37de5046ba22abcb5e21c121d1072126f3cc
- git: https://github.com/delanoe/haskell-opaleye.git #- opaleye-0.6.7002.0
commit: 63ee65d974e9d20eaaf17a2e83652175988cbb79
- git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
commit: 7d74f96dfea8e51fbab1793cc0429b2fe741f73d
#- git: https://github.com/np/patches-map
- git: https://github.com/delanoe/patches-map
commit: 76cae88f367976ff091e661ee69a5c3126b94694
#- git: https://gitlab.com/npouillard/patches-class.git
- git: https://gitlab.iscpif.fr/gargantext/patches-class.git
commit: d3e971d4e78d1dfcc853f2fb86bde1995faf22ae
# Graph libs
- git: https://github.com/kaizhang/haskell-igraph.git
commit: 34553acc4ebdcae7065311dcefb426e0fd58c5a0
- git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
commit: 7d74f96dfea8e51fbab1793cc0429b2fe741f73d
# Others dependencies (with stack resolver)
- KMP-0.2.0.0@sha256:6dfbac03ef00ebd9347234732cb86a40f62ab5a80c0cc6bedb8eb51766f7df28,2562
- Unique-0.4.7.7@sha256:2269d3528271e25d34542e7c24a4e541e27ec33460e1ea00845da95b82eec6fa,2777
- accelerate-1.2.0.1@sha256:bb1928efe602545df4043692916ed427c959110cbd678d03c3f9c3be25d1ae88,20112
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment