Commit 6fb2db8d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Charts Metrics Data (Histo, Bar/Pie, Tree)

[STACK] upgrade version
[ML] improved metrics of ngrams selection
parent 29750a15
#!/bin/bash
if git --version;
then
echo "git installed, ok"
......@@ -8,8 +11,13 @@ else
sudo apt update && sudo apt install git
fi
sudo apt update && sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev
sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql postgresql-server-dev-9.6
#echo "Which user?"
#read USER
#sudo adduser --disabled-password --gecos "" $USER
#sudo su $USER
curl -sSL https://get.haskellstack.org/ | sh
stack update
......@@ -36,6 +44,30 @@ stack build
stack install
# Specific to our servers
### Configure timezone and locale ###################################
echo "########### LOCALES & TZ #################"
echo "Europe/Paris" > /etc/timezone
dpkg-reconfigure --frontend=noninteractive tzdata
#ENV TZ "Europe/Paris"
sed -i -e 's/# en_GB.UTF-8 UTF-8/en_GB.UTF-8 UTF-8/' /etc/locale.gen && \
sed -i -e 's/# fr_FR.UTF-8 UTF-8/fr_FR.UTF-8 UTF-8/' /etc/locale.gen && \
locale-gen && \
update-locale LANG=fr_FR.UTF-8 && \
update-locale LANGUAGE=fr_FR.UTF-8 && \
update-locale LC_ALL=fr_FR.UTF-8
################################################################
# Database configuration
# CREATE USER gargantua WITH PASSWORD $(grep DB_PASS gargantext.ini)
# GRANT ALL PRIVILEGES ON DATABASE gargandbV4 to gargantua
#######################################################################
## POSTGRESQL DATA (as ROOT)
#######################################################################
sed -iP "s%^data_directory.*%data_directory = \'\/srv\/gargandata\'%" /etc/postgresql/9.6/main/postgresql.conf
echo "host all all 0.0.0.0/0 md5" >> /etc/postgresql/9.6/main/pg_hba.conf
echo "listen_addresses='*'" >> /etc/postgresql/9.6/main/postgresql.conf
......@@ -35,6 +35,9 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Utils
import Gargantext.Core.Types (CorpusId)
import Gargantext.Prelude
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.NTree
import Gargantext.Database.Flow
import Gargantext.Viz.Chart
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
......@@ -66,7 +69,6 @@ instance Arbitrary Metric
deriveJSON (unPrefix "metrics_") ''Metrics
deriveJSON (unPrefix "m_") ''Metric
-------------------------------------------------------------
data ChartMetrics a = ChartMetrics { chartMetrics_data :: a }
......@@ -88,6 +90,20 @@ instance Arbitrary Histo
]
deriveJSON (unPrefix "histo_") ''Histo
instance ToSchema (TreeChartMetrics)
instance Arbitrary (TreeChartMetrics)
where
arbitrary = TreeChartMetrics <$> arbitrary
instance ToSchema MyTree
instance Arbitrary MyTree
where
arbitrary = MyTree <$> arbitrary <*> arbitrary <*> arbitrary
-- TODO add start / end
getChart :: CorpusId -> Maybe UTCTime -> Maybe UTCTime -> Cmd err (ChartMetrics Histo)
......@@ -95,18 +111,15 @@ getChart cId _start _end = do
h <- histoData cId
pure (ChartMetrics h)
getPie :: FlowCmdM env err m => CorpusId -> Maybe UTCTime -> Maybe UTCTime -> TabType -> m (ChartMetrics Histo)
getPie cId _start _end tt = do
p <- pieData cId (ngramsTypeFromTabType tt) GraphTerm
pure (ChartMetrics p)
getTree :: FlowCmdM env err m => CorpusId -> Maybe UTCTime -> Maybe UTCTime -> TabType -> ListType -> m (ChartMetrics TreeChartMetrics)
getTree cId _start _end tt lt = do
p <- treeData cId (ngramsTypeFromTabType tt) lt
pure (ChartMetrics p)
{-
data FacetChart = FacetChart { facetChart_time :: UTCTime'
, facetChart_count :: Double
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "facetChart_") ''FacetChart)
instance ToSchema FacetChart
instance Arbitrary FacetChart where
arbitrary = FacetChart <$> arbitrary <*> arbitrary
-}
......@@ -131,6 +131,14 @@ mSetFromSet = MSet . Map.fromSet (const ())
mSetFromList :: Ord a => [a] -> MSet a
mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
-- mSetToSet :: Ord a => MSet a -> Set a
-- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
mSetToSet :: Ord a => MSet a -> Set a
mSetToSet = Set.fromList . mSetToList
mSetToList :: MSet a -> [a]
mSetToList (MSet a) = Map.keys a
instance Foldable MSet where
foldMap f (MSet m) = Map.foldMapWithKey (\k _ -> f k) m
......
{-|
Module : Gargantext.API.Ngrams.NTree
Description : Tree of Ngrams
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Ngrams.NTree
where
import Data.Text (Text)
import Gargantext.Prelude
import GHC.Generics (Generic)
import Data.Aeson.TH (deriveJSON)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Core.Types (ListType(..), NodeId)
import Gargantext.API.Ngrams
import Data.Tree
import Data.Maybe (catMaybes)
import Data.Map (Map)
import Data.Set (Set)
import qualified Data.Set as Set
import qualified Data.Map as Map
import qualified Data.List as List
type Children = Text
type Root = Text
data MyTree = MyTree { mt_label :: Text
, mt_value :: Double
, mt_children :: [MyTree]
} deriving (Generic, Show)
toMyTree :: Tree (Text,Double) -> MyTree
toMyTree (Node (l,v) xs) = MyTree l v (map toMyTree xs)
deriveJSON (unPrefix "mt_") ''MyTree
toTree :: ListType -> Map Text (Set NodeId) -> Map Text NgramsRepoElement -> [MyTree]
toTree lt vs m = map toMyTree $ unfoldForest buildNode roots
where
buildNode r = maybe ((r, value r),[]) (\x -> ((r, value r), mSetToList $ _nre_children x)) (Map.lookup r m)
value l = maybe 0 (fromIntegral . Set.size) $ Map.lookup l vs
rootsCandidates = catMaybes
$ List.nub
$ map (\(c,c') -> case _nre_root c' of
Nothing -> Just c
_ -> _nre_root c' ) (Map.toList m)
roots = map fst
$ filter (\(_,l) -> l == lt)
$ catMaybes
$ map (\c -> (,) <$> Just c <*> (_nre_list <$> Map.lookup c m)) rootsCandidates
......@@ -58,7 +58,6 @@ mapTermListRoot nodeIds ngramsType = do
| (t, nre) <- Map.toList ngrams
]
filterListWithRoot :: ListType -> Map Text (ListType, Maybe Text)
-> Map Text (Maybe RootTerm)
filterListWithRoot lt m = Map.fromList
......@@ -71,7 +70,6 @@ filterListWithRoot lt m = Map.fromList
Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r
Just (l',_) -> l' == lt
groupNodesByNgrams :: Map Text (Maybe RootTerm)
-> Map Text (Set NodeId)
-> Map Text (Set NodeId)
......@@ -97,4 +95,3 @@ getCoocByNgrams (Diagonal diag) m =
False -> listToCombi identity (Map.keys m)
]
......@@ -49,7 +49,7 @@ import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, TableNgramsApiGet, ta
import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.API.Types
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types.Main (Tree, NodeTree)
import Gargantext.Core.Types.Main (Tree, NodeTree, ListType)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),runViewAuthorsDoc)
import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Node ( getNodesWithParentId, getNode, deleteNode, deleteNodes, mkNodeWithParent, JSONB, NodeError(..), HasNodeError(..))
......@@ -138,6 +138,8 @@ type NodeAPI a = Get '[JSON] (Node a)
-- VIZ
:<|> "metrics" :> MetricsAPI
:<|> "chart" :> ChartApi
:<|> "pie" :> PieApi
:<|> "tree" :> TreeApi
:<|> "phylo" :> PhyloAPI
-- TODO-ACCESS: check userId CanRenameNode nodeId
......@@ -178,7 +180,10 @@ nodeAPI p uId id
:<|> getMetrics id
:<|> getChart id
:<|> getPie id
:<|> getTree id
:<|> phyloAPI id
-- Annuaire
-- :<|> upload
-- :<|> query
......@@ -264,6 +269,21 @@ type ChartApi = Summary " Chart API"
:> QueryParam "to" UTCTime
:> Get '[JSON] (ChartMetrics Histo)
type PieApi = Summary " Chart API"
:> QueryParam "from" UTCTime
:> QueryParam "to" UTCTime
:> QueryParamR "ngramsType" TabType
:> Get '[JSON] (ChartMetrics Histo)
type TreeApi = Summary " Tree API"
:> QueryParam "from" UTCTime
:> QueryParam "to" UTCTime
:> QueryParamR "ngramsType" TabType
:> QueryParamR "listType" ListType
:> Get '[JSON] (ChartMetrics TreeChartMetrics)
-- Depending on the Type of the Node, we could post
-- New documents for a corpus
-- New map list terms
......@@ -370,12 +390,11 @@ getMetrics cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics' cId maybeListId tabType maybeLimit
let
metrics = map (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs')) scores
metrics = map (\(Scored t s1 s2) -> Metric t (log' 5 s1) (log' 2 s2) (listType t ngs')) scores
log' n x = 1 + (if x <= 0 then 0 else (log $ (10^(n::Int)) * x))
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics
......@@ -12,6 +12,10 @@ Portability : POSIX
-- check userId CanFillUserCorpus userCorpusId
-- check masterUserId CanFillMasterCorpus masterCorpusId
-- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
-- TODO-EVENTS: InsertedNodes
-}
{-# LANGUAGE ConstraintKinds #-}
......@@ -24,15 +28,6 @@ Portability : POSIX
module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where
--import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
--import Gargantext.Database.Metrics.TFICF (getTficf)
--import Gargantext.Database.Node.Contact (HyperdataContact(..))
--import Gargantext.Database.Schema.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
--import Gargantext.Database.Schema.NodeNgramsNgrams (NodeNgramsNgramsPoly(..), insertNodeNgramsNgramsNew)
--import Gargantext.Database.Schema.User (insertUsers, simpleUser, gargantuaUser)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
--import Gargantext.Text.Metrics.TFICF (Tficf(..))
--import Debug.Trace (trace)
import Control.Lens ((^.), view, Lens', _Just)
import Control.Monad (mapM_)
import Control.Monad.IO.Class (liftIO)
......@@ -48,30 +43,31 @@ import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (NodePoly(..), Terms(..))
import Gargantext.Core.Types.Individu (Username)
import Gargantext.Core.Types.Main
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Config (userMaster, corpusMasterName)
import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..))
import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import Gargantext.Database.Schema.User (getUser, UserLight(..))
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Utils (Cmd, CmdM)
import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Prelude
import Gargantext.Text.List (buildNgramsLists,StopSize(..))
import Gargantext.Text.Parsers (parseDocs, FileFormat)
import Gargantext.Text.Terms (TermType(..), tt_lang)
import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
import qualified Gargantext.Text.Parsers.GrandDebat as GD
import Servant (ServantErr)
import System.FilePath (FilePath)
import qualified Data.Map as DM
import qualified Data.Text as Text
import qualified Gargantext.Database.Node.Document.Add as Doc (add)
import qualified Gargantext.Text.Parsers.GrandDebat as GD
type FlowCmdM env err m =
( CmdM env err m
......@@ -130,9 +126,6 @@ flowCorpusSearchInDatabase u la q = do
------------------------------------------------------------------------
-- TODO-ACCESS: check uId CanInsertDoc pId && checkDocType nodeType
-- TODO-EVENTS: InsertedNodes
flow :: (FlowCmdM env ServantErr m, FlowCorpus a, MkCorpus c)
=> Maybe c -> Username -> CorpusName -> TermType Lang -> [[a]] -> m CorpusId
......
......@@ -114,7 +114,7 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
zs = drop b $ drop a ns
a = 3
b = 5000
b = 500
isStopTerm :: StopSize -> Text -> Bool
isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
......
......@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-}
module Gargantext.Viz.Chart
where
......@@ -21,12 +22,29 @@ module Gargantext.Viz.Chart
import Data.Text (Text)
import Data.List (unzip, sortOn)
import Data.Map (toList)
import Data.Aeson.TH (deriveJSON)
import GHC.Generics (Generic)
import Gargantext.Prelude
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Schema.NodeNode (selectDocsDates)
import Gargantext.Database.Utils
import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Text.Metrics.Count (occurrencesWith)
import Gargantext.Core.Types.Main
-- Pie Chart
import Data.Maybe (catMaybes)
import qualified Data.Map as Map
import qualified Data.List as List
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.NTree
import Gargantext.Database.Metrics.NgramsByNode
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node
import Gargantext.Core.Types
import Gargantext.Database.Flow
import Servant
data Chart = ChartHisto | ChartScatter | ChartPie
......@@ -47,3 +65,65 @@ histoData cId = do
$ occurrencesWith identity dates
pure (Histo ls css)
pieData :: FlowCmdM env err m
=> CorpusId -> NgramsType -> ListType
-> m Histo
pieData cId nt lt = do
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt
let
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ Map.toList dico
group dico' x = case Map.lookup x dico' of
Nothing -> x
Just x' -> maybe x identity x'
(_total,mapTerms) <- countNodesByNgramsWith (group dico)
<$> getNodesByNgramsOnlyUser cId nt terms
let (dates, count) = unzip $ map (\(t,(d,_)) -> (t, d)) $ Map.toList mapTerms
pure (Histo dates (map round count))
data TreeChartMetrics = TreeChartMetrics { _tcm_data :: [MyTree]
}
deriving (Generic, Show)
deriveJSON (unPrefix "_tcm_") ''TreeChartMetrics
treeData :: FlowCmdM env err m
=> CorpusId -> NgramsType -> ListType
-> m TreeChartMetrics
treeData cId nt lt = do
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt
let
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ Map.toList dico
cs' <- getNodesByNgramsOnlyUser cId nt terms
m <- getListNgrams ls nt
pure $ TreeChartMetrics $ toTree lt cs' m
treeData' :: FlowCmdM env ServantErr m
=> CorpusId -> NgramsType -> ListType
-> m TreeChartMetrics
treeData' cId nt lt = do
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt
let
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ Map.toList dico
cs' <- getNodesByNgramsOnlyUser cId nt terms
m <- getListNgrams ls nt
pure $ TreeChartMetrics $ toTree lt cs' m
resolver: lts-12.10
resolver: lts-12.26
flags: {}
extra-package-dbs: []
packages:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment