Commit 7cd80ff2 authored by qlobbe's avatar qlobbe

merge done

parents 58efcc61 c14f31a5
Pipeline #506 failed with stage
......@@ -45,8 +45,11 @@ main = do
let createUsers :: Cmd ServantErr Int64
createUsers = insertUsersDemo
let cmd :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) (Unsupervised EN 5 1 Nothing) CsvHalFormat corpusPath
let
--tt = (Unsupervised EN 5 1 Nothing)
tt = (Mono EN)
cmd :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) tt CsvHalFormat corpusPath
{-
let debatCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
debatCorpus = do
......
......@@ -10,5 +10,9 @@ fi
sudo apt update
sudo apt install liblzma-dev libpcre3-dev libblas-dev liblapack-dev pkg-config libgsl-dev libbz2-dev postgresql nginx libpq-dev libigraph0-dev
# Phylo management
sudo apt install graphviz
sudo apt install postgresql-server-dev-9.6
......@@ -122,6 +122,7 @@ library:
- http-client
- http-client-tls
- http-conduit
- http-media
- http-api-data
- http-types
- hsparql
......
......@@ -73,11 +73,10 @@ import Gargantext.API.Count ( CountAPI, count, Query)
import Gargantext.API.FrontEnd (FrontEndAPI, frontEndServer)
import Gargantext.API.Ngrams (HasRepo(..), HasRepoSaver(..), saveRepo, TableNgramsApi, apiNgramsTableDoc)
import Gargantext.API.Node
import Gargantext.API.Search ( SearchAPI, search, SearchQuery)
import Gargantext.API.Search (SearchPairsAPI, searchPairs)
import Gargantext.API.Types
import qualified Gargantext.API.Corpus.New as New
import Gargantext.Core.Types (HasInvalidError(..))
import Gargantext.Database.Facet
import Gargantext.Database.Schema.Node (HasNodeError(..), NodeError)
import Gargantext.Database.Tree (HasTreeError(..), TreeError)
import Gargantext.Database.Types.Node
......@@ -263,12 +262,7 @@ type GargAPI' =
:> ReqBody '[JSON] Query :> CountAPI
-- Corpus endpoint --> TODO rename s/search/filter/g
:<|> "search":> Summary "Search endpoint"
:> ReqBody '[JSON] SearchQuery
:> QueryParam "offset" Int
:> QueryParam "limit" Int
:> QueryParam "order" OrderBy
:> SearchAPI
:<|> "search":> Capture "corpus" NodeId :> SearchPairsAPI
-- TODO move to NodeAPI?
:<|> "graph" :> Summary "Graph endpoint"
......@@ -320,7 +314,7 @@ serverGargAPI -- orchestrator
:<|> apiNgramsTableDoc
:<|> nodesAPI
:<|> count -- TODO: undefined
:<|> search
:<|> searchPairs -- TODO: move elsewhere
:<|> graphAPI -- TODO: mock
:<|> treeAPI
:<|> New.api
......
......@@ -18,7 +18,7 @@ Loads all static file for the front-end.
---------------------------------------------------------------------
module Gargantext.API.FrontEnd where
import Servant.Static.TH (createApiAndServerDecs)
import Servant.Static.TH (createApiAndServerDecs)
---------------------------------------------------------------------
$(createApiAndServerDecs "FrontEndAPI" "frontEndServer" "purescript-gargantext/dist")
......
......@@ -33,14 +33,18 @@ import GHC.Generics (Generic)
import Gargantext.Core.Types (ListType(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Utils
import Gargantext.Core.Types (CorpusId)
import Gargantext.Core.Types (CorpusId, ListId, Limit)
import Gargantext.Prelude
import Gargantext.API.Ngrams
import Gargantext.Text.Metrics (Scored(..))
import Gargantext.API.Ngrams.NTree
import Gargantext.Database.Flow
import Gargantext.Viz.Chart
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map
import qualified Gargantext.Database.Metrics as Metrics
data Metrics = Metrics
{ metrics_data :: [Metric]}
......@@ -97,6 +101,30 @@ instance Arbitrary MyTree
arbitrary = MyTree <$> arbitrary <*> arbitrary <*> arbitrary
-------------------------------------------------------------
-- | Scatter metrics API
type ScatterAPI = Summary "SepGen IncExc metrics"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParam "limit" Int
:> Get '[JSON] Metrics
getScatter :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> Maybe Limit
-> m Metrics
getScatter cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit
let
metrics = map (\(Scored t s1 s2) -> Metric t (log' 5 s1) (log' 2 s2) (listType t ngs')) scores
log' n x = 1 + (if x <= 0 then 0 else (log $ (10^(n::Int)) * x))
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics
......
......@@ -49,6 +49,20 @@ getListNgrams nodeIds ngramsType = do
pure ngrams
getTermsWith :: (RepoCmdM env err m, Ord a)
=> (Text -> a ) -> [ListId]
-> NgramsType -> ListType
-> m (Map a [a])
getTermsWith f ls ngt lt = Map.fromListWith (<>)
<$> map (toTreeWith f)
<$> Map.toList
<$> Map.filter (\f' -> (fst f') == lt)
<$> mapTermListRoot ls ngt
where
toTreeWith f'' (t, (_lt, maybeRoot)) = case maybeRoot of
Nothing -> (f'' t, [])
Just r -> (f'' r, map f'' [t])
mapTermListRoot :: RepoCmdM env err m
=> [ListId] -> NgramsType
-> m (Map Text (ListType, (Maybe Text)))
......@@ -85,13 +99,19 @@ groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
data Diagonal = Diagonal Bool
getCoocByNgrams :: Diagonal -> Map Text (Set NodeId) -> Map (Text, Text) Int
getCoocByNgrams (Diagonal diag) m =
getCoocByNgrams = getCoocByNgrams' identity
getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int
getCoocByNgrams' f (Diagonal diag) m =
Map.fromList [((t1,t2)
,maybe 0 Set.size $ Set.intersection
<$> Map.lookup t1 m
<*> Map.lookup t2 m
<$> (fmap f $ Map.lookup t1 m)
<*> (fmap f $ Map.lookup t2 m)
) | (t1,t2) <- case diag of
True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y]
False -> listToCombi identity (Map.keys m)
]
......@@ -51,7 +51,7 @@ import GHC.Generics (Generic)
import Gargantext.API.Metrics
import Gargantext.API.Ngrams (TabType(..), TableNgramsApi, apiNgramsTableCorpus, QueryParamR, TODO)
import Gargantext.API.Ngrams.NTree (MyTree)
import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.API.Search (SearchDocsAPI, searchDocs)
import Gargantext.API.Types
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Core.Types.Main (Tree, NodeTree, ListType)
......@@ -65,7 +65,6 @@ import Gargantext.Database.Types.Node
import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude
import Gargantext.Prelude.Utils (hash)
import Gargantext.Text.Metrics (Scored(..))
import Gargantext.Viz.Chart
import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Servant
......@@ -74,8 +73,6 @@ import Servant.Swagger (HasSwagger(toSwagger))
import Servant.Swagger.Internal
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map
import qualified Gargantext.Database.Metrics as Metrics
import qualified Gargantext.Database.Node.Update as U (update, Update(..))
{-
......@@ -133,18 +130,13 @@ type NodeAPI a = Get '[JSON] (Node a)
:<|> "table" :> TableApi
:<|> "ngrams" :> TableNgramsApi
:<|> "pairing" :> PairingApi
:<|> "favorites" :> FavApi
:<|> "documents" :> DocsApi
:<|> "search":> Summary "Node Search"
:> ReqBody '[JSON] SearchInQuery
:> QueryParam "offset" Int
:> QueryParam "limit" Int
:> QueryParam "order" OrderBy
:> SearchAPI
:<|> "search" :> SearchDocsAPI
-- VIZ
:<|> "metrics" :> MetricsAPI
:<|> "metrics" :> ScatterAPI
:<|> "chart" :> ChartApi
:<|> "pie" :> PieApi
:<|> "tree" :> TreeApi
......@@ -185,9 +177,8 @@ nodeAPI p uId id
:<|> favApi id
:<|> delDocs id
:<|> searchIn id
:<|> getMetrics id
:<|> searchDocs id
:<|> getScatter id
:<|> getChart id
:<|> getPie id
:<|> getTree id
......@@ -375,27 +366,6 @@ putNode = undefined -- TODO
query :: Monad m => Text -> m Text
query s = pure s
-------------------------------------------------------------
type MetricsAPI = Summary "SepGen IncExc metrics"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParam "limit" Int
:> Get '[JSON] Metrics
getMetrics :: NodeId -> GargServer MetricsAPI
getMetrics cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics' cId maybeListId tabType maybeLimit
let
metrics = map (\(Scored t s1 s2) -> Metric t (log' 5 s1) (log' 2 s2) (listType t ngs')) scores
log' n x = 1 + (if x <= 0 then 0 else (log $ (10^(n::Int)) * x))
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics
-------------------------------------------------------------
type Hash = Text
data FileType = CSV | PresseRIS
......
......@@ -33,54 +33,50 @@ import Servant
import Test.QuickCheck.Arbitrary
import Test.QuickCheck (elements)
-- import Control.Applicative ((<*>))
import Gargantext.API.Types (GargServer)
import Gargantext.Prelude
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Core.Types.Main (Offset, Limit)
import Gargantext.Database.Types.Node
import Gargantext.Database.TextSearch
import Gargantext.Database.Facet
import Gargantext.Database.Utils (Cmd)
-----------------------------------------------------------------------
-- | SearchIn [NodesId] if empty then global search
-- TODO [Int]
data SearchQuery = SearchQuery { sq_query :: [Text]
, sq_corpus_id :: NodeId
} deriving (Generic)
data SearchQuery = SearchQuery
{ sq_query :: [Text]
} deriving (Generic)
$(deriveJSON (unPrefix "sq_") ''SearchQuery)
instance ToSchema SearchQuery where
declareNamedSchema =
genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = \fieldLabel -> drop 3 fieldLabel}
defaultSchemaOptions {fieldLabelModifier = drop 3}
instance Arbitrary SearchQuery where
arbitrary = elements [SearchQuery ["electrodes"] 472764]
--
arbitrary = elements [SearchQuery ["electrodes"]]
data SearchInQuery = SearchInQuery { siq_query :: [Text]
} deriving (Generic)
$(deriveJSON (unPrefix "siq_") ''SearchInQuery)
instance ToSchema SearchInQuery where
declareNamedSchema =
genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = \fieldLabel -> drop 4 fieldLabel}
-----------------------------------------------------------------------
instance Arbitrary SearchInQuery where
arbitrary = SearchInQuery <$> arbitrary
data SearchDocResults = SearchDocResults { sdr_results :: [FacetDoc]}
deriving (Generic)
$(deriveJSON (unPrefix "sdr_") ''SearchDocResults)
instance Arbitrary SearchDocResults where
arbitrary = SearchDocResults <$> arbitrary
-----------------------------------------------------------------------
instance ToSchema SearchDocResults where
declareNamedSchema =
genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = drop 4}
data SearchResults = SearchResults' { srs_resultsP :: [FacetDoc]}
| SearchResults { srs_results :: [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]}
data SearchPairedResults = SearchPairedResults { spr_results :: [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]] }
deriving (Generic)
$(deriveJSON (unPrefix "srs_") ''SearchResults)
$(deriveJSON (unPrefix "spr_") ''SearchPairedResults)
instance Arbitrary SearchResults where
arbitrary = SearchResults <$> arbitrary
instance Arbitrary SearchPairedResults where
arbitrary = SearchPairedResults <$> arbitrary
instance ToSchema SearchResults where
instance ToSchema SearchPairedResults where
declareNamedSchema =
genericDeclareNamedSchema
defaultSchemaOptions {fieldLabelModifier = \fieldLabel -> drop 4 fieldLabel}
......@@ -88,16 +84,25 @@ instance ToSchema SearchResults where
-----------------------------------------------------------------------
-- TODO-ACCESS: CanSearch? or is it part of CanGetNode
-- TODO-EVENTS: No event, this is a read-only query.
type SearchAPI = Post '[JSON] SearchResults
type SearchAPI results
= Summary "Search endpoint"
:> ReqBody '[JSON] SearchQuery
:> QueryParam "offset" Int
:> QueryParam "limit" Int
:> QueryParam "order" OrderBy
:> Post '[JSON] results
type SearchDocsAPI = SearchAPI SearchDocResults
type SearchPairsAPI = SearchAPI SearchPairedResults
-----------------------------------------------------------------------
search :: SearchQuery -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err SearchResults
search (SearchQuery q pId) o l order =
SearchResults <$> searchInCorpusWithContacts pId q o l order
searchPairs :: NodeId -> GargServer SearchPairsAPI
searchPairs pId (SearchQuery q) o l order =
SearchPairedResults <$> searchInCorpusWithContacts pId q o l order
searchIn :: NodeId -> SearchInQuery -> Maybe Offset -> Maybe Limit -> Maybe OrderBy -> Cmd err SearchResults
searchIn nId (SearchInQuery q ) o l order =
SearchResults' <$> searchInCorpus nId q o l order
searchDocs :: NodeId -> GargServer SearchDocsAPI
searchDocs nId (SearchQuery q) o l order =
SearchDocResults <$> searchInCorpus nId q o l order
--SearchResults <$> searchInCorpusWithContacts nId q o l order
......@@ -57,6 +57,7 @@ nodeTypeId n =
---- Scores
-- NodeOccurrences -> 10
NodeGraph -> 9
NodePhylo -> 90
NodeDashboard -> 7
NodeChart -> 51
......
......@@ -68,6 +68,7 @@ import qualified Opaleye.Internal.Unpackspec()
type Favorite = Bool
type Title = Text
-- TODO remove Title
type FacetDoc = Facet NodeId UTCTime Title HyperdataDocument Favorite Int
type FacetSources = FacetDoc
type FacetAuthors = FacetDoc
......
......@@ -52,7 +52,7 @@ import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..)
import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, mkPhylo, mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import Gargantext.Database.Schema.User (getUser, UserLight(..))
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
......@@ -190,6 +190,7 @@ flowCorpusUser l userName corpusName ctype ids = do
printDebug "userListId" userListId
-- User Graph Flow
_ <- mkGraph userCorpusId userId
_ <- mkPhylo userCorpusId userId
--}
-- User Dashboard Flow
......@@ -217,8 +218,14 @@ insertMasterDocs c lang hs = do
fixLang (Unsupervised l n s m) = Unsupervised l n s m'
where
m' = case m of
Nothing -> trace ("buildTries here" :: String) $ Just $ buildTries n (fmap toToken $ uniText $ Text.intercalate " " $ List.concat $ map hasText documentsWithId)
m'' -> m''
Nothing -> trace ("buildTries here" :: String)
$ Just
$ buildTries n ( fmap toToken $ uniText
$ Text.intercalate " . "
$ List.concat
$ map hasText documentsWithId
)
just_m -> just_m
fixLang l = l
lang' = fixLang lang
......
......@@ -39,7 +39,7 @@ import qualified Gargantext.Database.Metrics as Metrics
{-
trainModel :: FlowCmdM env ServantErr m
=> Username -> m Score
trainMode u = do
trainModel u = do
rootId <- _node_id <$> getRoot u
(id:ids) <- getCorporaWithParentId rootId
(s,_model) <- case length ids >0 of
......@@ -48,11 +48,11 @@ trainMode u = do
--}
getMetrics :: FlowCmdM env err m
getMetrics' :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Int
-> m (Map.Map ListType [Vec.Vector Double])
getMetrics cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics' cId maybeListId tabType maybeLimit
getMetrics' cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit
let
metrics = map (\(Scored t s1 s2) -> (listType t ngs', [Vec.fromList [s1,s2]])) scores
......
......@@ -24,26 +24,27 @@ import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm)
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..))
import Gargantext.Database.Flow (FlowCmdM)
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser, getTficfWith)
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser{-, getTficfWith-})
import Gargantext.Database.Node.Select
import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Types.Node (ListId, CorpusId, HyperdataCorpus)
import Gargantext.Database.Flow (getOrMkRootWithCorpus)
import Gargantext.Database.Types.Node (ListId, CorpusId{-, HyperdataCorpus-})
--import Gargantext.Database.Flow (getOrMkRootWithCorpus)
import Gargantext.Database.Config (userMaster)
import Gargantext.Prelude
import Gargantext.Text.Metrics (scored, Scored(..), localMetrics, toScored)
import Gargantext.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import qualified Data.Map as Map
import qualified Data.Vector.Storable as Vec
--import qualified Data.Vector.Storable as Vec
getMetrics' :: FlowCmdM env err m
getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text])
getMetrics' cId maybeListId tabType maybeLimit = do
(ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
getMetrics cId maybeListId tabType maybeLimit = do
(ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
pure (ngs, scored myCooc)
{- | TODO remove unused function
getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text])
......@@ -59,7 +60,6 @@ getMetrics cId maybeListId tabType maybeLimit = do
pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])
getLocalMetrics :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text)
......@@ -69,6 +69,7 @@ getLocalMetrics :: (FlowCmdM env err m)
getLocalMetrics cId maybeListId tabType maybeLimit = do
(ngs, ngs', myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
pure (ngs, ngs', localMetrics myCooc)
-}
getNgramsCooc :: (FlowCmdM env err m)
......
......@@ -103,6 +103,10 @@ instance FromField HyperdataGraph
where
fromField = fromField'
instance FromField HyperdataPhylo
where
fromField = fromField'
instance FromField HyperdataAnnuaire
where
fromField = fromField'
......@@ -143,6 +147,10 @@ instance QueryRunnerColumnDefault PGJsonb HyperdataGraph
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataPhylo
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataAnnuaire
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
......@@ -452,6 +460,17 @@ nodeGraphW maybeName maybeGraph pId = node NodeGraph name graph (Just pId)
name = maybe "Graph" identity maybeName
graph = maybe arbitraryGraph identity maybeGraph
------------------------------------------------------------------------
arbitraryPhylo :: HyperdataPhylo
arbitraryPhylo = HyperdataPhylo (Just "Preferences")
nodePhyloW :: Maybe Name -> Maybe HyperdataPhylo -> ParentId -> UserId -> NodeWrite
nodePhyloW maybeName maybePhylo pId = node NodePhylo name graph (Just pId)
where
name = maybe "Phylo" identity maybeName
graph = maybe arbitraryPhylo identity maybePhylo
------------------------------------------------------------------------
arbitraryDashboard :: HyperdataDashboard
......@@ -603,6 +622,9 @@ mkGraph p u = insertNodesR [nodeGraphW Nothing Nothing p u]
mkDashboard :: ParentId -> UserId -> Cmd err [NodeId]
mkDashboard p u = insertNodesR [nodeDashboardW Nothing Nothing p u]
mkPhylo :: ParentId -> UserId -> Cmd err [NodeId]
mkPhylo p u = insertNodesR [nodePhyloW Nothing Nothing p u]
-- | Default CorpusId Master and ListId Master
pgNodeId :: NodeId -> Column PGInt4
......
......@@ -149,6 +149,19 @@ queryDocs cId = proc () -> do
returnA -< view (node_hyperdata) n
selectDocNodes :: CorpusId -> Cmd err [NodeDocument]
selectDocNodes cId = runOpaQuery (queryDocNodes cId)
queryDocNodes :: CorpusId -> O.Query NodeRead
queryDocNodes cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< ( nn_node1_id nn) .== (toNullable $ pgNodeId cId)
restrict -< ( nn_delete nn) .== (toNullable $ pgBool False)
restrict -< (_node_typename n) .== (pgInt4 $ nodeTypeId NodeDocument)
returnA -< n
joinInCorpus :: O.Query (NodeRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeTable queryNodeNodeTable cond
where
......
......@@ -145,6 +145,7 @@ CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, dele
CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id);
CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
create INDEX on public.node_node_ngrams USING btree (node1_id, node2_id);
-- TRIGGERS
-- TODO user haskell-postgresql-simple to create this function
......
......@@ -98,7 +98,7 @@ dbTree rootId = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n) <$> runPGS
FROM nodes AS c
INNER JOIN tree AS s ON c.parent_id = s.id
WHERE c.typename IN (2,3,30,31,7,9)
WHERE c.typename IN (2,3,30,31,7,9,90)
)
SELECT * from tree;
|] (Only rootId)
......
......@@ -379,6 +379,7 @@ data HyperdataGraph = HyperdataGraph { hyperdataGraph_preferences :: !(Maybe T
$(deriveJSON (unPrefix "hyperdataGraph_") ''HyperdataGraph)
instance Hyperdata HyperdataGraph
------------------------------------------------------------------------
-- TODO add the Graph Structure here
......@@ -429,7 +430,7 @@ data NodeType = NodeUser
| NodeFolder
| NodeCorpus | NodeCorpusV3 | NodeDocument
| NodeAnnuaire | NodeContact
| NodeGraph
| NodeGraph | NodePhylo
| NodeDashboard | NodeChart
| NodeList | NodeListModel deriving (Show, Read, Eq, Generic, Bounded, Enum)
......
......@@ -16,16 +16,20 @@ Portability : POSIX
module Gargantext.Text.List
where
import Data.Either (partitionEithers, Either(..))
import Debug.Trace (trace)
import Data.Map (Map)
import Data.Set (Set)
import Data.Text (Text)
import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mSetFromList)
import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, NodeId)
import Gargantext.Database.Metrics.NgramsByNode (getTficf', sortTficf, ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Utils (Cmd)
import Gargantext.Text.List.Learn (Model(..))
import Gargantext.Text.Metrics (takeScored)
import Gargantext.Prelude
--import Gargantext.Text.Terms (TermType(..))
import qualified Data.Char as Char
......@@ -41,7 +45,13 @@ data NgramsListBuilder = BuilderStepO { stemSize :: Int
}
| BuilderStep1 { withModel :: Model }
| BuilderStepN { withModel :: Model }
| Tficf { nlb_lang :: Lang
, nlb_group1 :: Int
, nlb_group2 :: Int
, nlb_stopSize :: StopSize
, nlb_userCorpusId :: UserCorpusId
, nlb_masterCorpusId :: MasterCorpusId
}
data StopSize = StopSize {unStopSize :: Int}
......@@ -51,6 +61,7 @@ buildNgramsLists :: Lang -> Int -> Int -> StopSize -> UserCorpusId -> MasterCorp
-> Cmd err (Map NgramsType [NgramsElement])
buildNgramsLists l n m s uCid mCid = do
ngTerms <- buildNgramsTermsList l n m s uCid mCid
--ngTerms <- buildNgramsTermsList' uCid (ngramsGroup l n m) (isStopTerm s . fst) 550 300
othersTerms <- mapM (buildNgramsOthersList uCid identity) [Authors, Sources, Institutes]
pure $ Map.unions $ othersTerms <> [ngTerms]
......@@ -70,11 +81,54 @@ buildNgramsOthersList uCid groupIt nt = do
)
]
--{-
buildNgramsTermsList' :: UserCorpusId
-> (Text -> Text)
-> ((Text, (Set Text, Set NodeId)) -> Bool) -> Int -> Int
-> Cmd err (Map NgramsType [NgramsElement])
--}
buildNgramsTermsList' uCid groupIt stop gls is = do
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid NgramsTerms
let
(stops, candidates) = partitionEithers
$ map (\t -> if stop t then Left t else Right t)
$ Map.toList
$ Map.filter ((\s' -> Set.size s' > 1) . snd) ngs
(maps, candidates') = takeScored gls is
$ getCoocByNgrams' snd (Diagonal True)
$ Map.fromList candidates
toList' t = (fst t, (fromIntegral $ Set.size $ snd $ snd t, fst $ snd t))
(s,c,m) = (stops
, List.filter (\(k,_) -> List.elem k candidates') candidates
, List.filter (\(k,_) -> List.elem k maps) candidates
)
let ngs' = List.concat
$ map toNgramsElement
$ map (\t -> (StopTerm, toList' t)) s
<> map (\t -> (CandidateTerm, toList' t)) c
<> map (\t -> (GraphTerm, toList' t)) m
pure $ Map.fromList [(NgramsTerms, ngs')]
buildNgramsTermsList :: Lang -> Int -> Int -> StopSize -> UserCorpusId -> MasterCorpusId
-> Cmd err (Map NgramsType [NgramsElement])
buildNgramsTermsList l n m s uCid mCid = do
candidates <- sortTficf <$> getTficf' uCid mCid NgramsTerms (ngramsGroup l n m)
let termList = toTermList ((isStopTerm s) . fst) candidates
let
candidatesSize = 2000
a = 500
b = 500
candidatesHead = List.take candidatesSize candidates
candidatesTail = List.drop candidatesSize candidates
termList = (toTermList a b ((isStopTerm s) . fst) candidatesHead)
<> (map (toList ((isStopTerm s) .fst) CandidateTerm) candidatesTail)
let ngs = List.concat $ map toNgramsElement termList
pure $ Map.fromList [(NgramsTerms, ngs)]
......@@ -95,24 +149,26 @@ toNgramsElement (listType, (_stem, (_score, setNgrams))) =
(mSetFromList [])
) children
-- TODO remove hard coded parameters
toTermList :: (a -> Bool) -> [a] -> [(ListType, a)]
toTermList stop ns = map (toTermList' stop CandidateTerm) xs
<> map (toTermList' stop GraphTerm) ys
<> map (toTermList' stop CandidateTerm) zs
where
toTermList' stop' l n = case stop' n of
True -> (StopTerm, n)
False -> (l, n)
-- TODO use % of size of list
-- TODO user ML
toList :: (b -> Bool) -> ListType -> b -> (ListType, b)
toList stop l n = case stop n of
True -> (StopTerm, n)
False -> (l, n)
toTermList :: Int -> Int -> (a -> Bool) -> [a] -> [(ListType, a)]
toTermList _ _ _ [] = []
toTermList a b stop ns = trace ("computing toTermList") $
map (toList stop CandidateTerm) xs
<> map (toList stop GraphTerm) ys
<> toTermList a b stop zs
where
xs = take a ns
ys = take b $ drop a ns
zs = drop b $ drop a ns
ta = drop a ns
ys = take b ta
zs = drop b ta
a = 3
b = 500
isStopTerm :: StopSize -> Text -> Bool
isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
......
......@@ -22,7 +22,7 @@ module Gargantext.Text.Metrics
--import Math.KMeans (kmeans, euclidSq, elements)
--import GHC.Float (exp)
import Data.Tuple.Extra (both)
import Data.Map (Map)
import Data.List.Extra (sortOn)
import GHC.Real (round)
......@@ -40,21 +40,22 @@ import qualified Data.Vector.Storable as Vec
type GraphListSize = Int
type InclusionSize = Int
toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
toScored = map2scored
{-
toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
toScored' = map2scored
. (pcaReduceTo (Dimension 2))
. (Map.filter (\v -> Vec.length v > 1))
. (Map.unionsWith (<>))
-}
scored :: Ord t => Map (t,t) Int -> [Scored t]
scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
where
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
-- TODO change type with (x,y)
data Scored ts = Scored
......@@ -63,8 +64,8 @@ data Scored ts = Scored
, _scored_speGen :: !SpecificityGenericity
} deriving (Show)
localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
(Map.toList fi)
scores
where
......@@ -88,8 +89,8 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) s
$ DAA.zip (DAA.use is) (DAA.use ss)
takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
takeScored listSize incSize = map _scored_terms
takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> ([t],[t])
takeScored listSize incSize = both (map _scored_terms)
. linearTakes listSize incSize _scored_speGen
_scored_incExc
. scored
......@@ -100,8 +101,8 @@ takeScored listSize incSize = map _scored_terms
-- [(3,8),(6,5)]
linearTakes :: (Ord b1, Ord b2)
=> GraphListSize -> InclusionSize
-> (a -> b2) -> (a -> b1) -> [a] -> [a]
linearTakes gls incSize speGen incExc = take gls
-> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
linearTakes gls incSize speGen incExc = (List.splitAt gls)
. List.concat
. map (take $ round
$ (fromIntegral gls :: Double)
......
......@@ -78,8 +78,8 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
Nothing
Nothing
(Just $ cleanText $ langText t)
Nothing
(creator2text <$> as)
Nothing
(_sourceName <$> s)
(cleanText <$> langText <$> a)
(fmap (Text.pack . show) utcTime)
......
......@@ -83,8 +83,6 @@ extractTerms (Unsupervised l n s m) xs = mapM (terms (Unsupervised l n s (Just m
extractTerms termTypeLang xs = mapM (terms termTypeLang) xs
------------------------------------------------------------------------
-- | Terms from Text
-- Mono : mono terms
......@@ -132,8 +130,10 @@ newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t)
uniText :: Text -> [[Text]]
uniText = map (List.filter (not . isPunctuation))
uniText =
-- map (map (Text.toLower))
map (List.filter (not . isPunctuation))
. map tokenize
. sentences -- | TODO get sentences according to lang
. Text.toLower
......@@ -25,6 +25,7 @@ import Data.Text (Text, pack)
import GHC.Generics (Generic)
import GHC.IO (FilePath)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Core.Types (ListId)
import Gargantext.Database.Types.Node (NodeId)
import Gargantext.Prelude
import Test.QuickCheck (elements)
......@@ -91,6 +92,7 @@ makeLenses ''LegendField
data GraphMetadata = GraphMetadata { _gm_title :: Text -- title of the graph
, _gm_corpusId :: [NodeId] -- we can map with different corpus
, _gm_legend :: [LegendField] -- legend of the Graph
, _gm_listId :: ListId
}
deriving (Show, Generic)
$(deriveJSON (unPrefix "_gm_") ''GraphMetadata)
......
......@@ -60,15 +60,17 @@ getGraph :: NodeId -> GargServer (Get '[JSON] Graph)
getGraph nId = do
nodeGraph <- getNode nId HyperdataGraph
let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph
lId <- defaultList cId
let metadata = GraphMetadata "Title" [maybe 0 identity $ _node_parentId nodeGraph]
[ LegendField 1 "#FFF" "Cluster"
, LegendField 2 "#FFF" "Cluster"
]
lId
-- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph
lIds <- selectNodesWithUsername NodeList userMaster
lId <- defaultList cId
ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal False)
......
......@@ -9,10 +9,8 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE NoImplicitPrelude #-}
......@@ -20,11 +18,14 @@ Portability : POSIX
{-# LANGUAGE OverloadedLists #-} -- allows to write Map and HashMap as lists
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Gargantext.Viz.Phylo.API
where
--import Control.Monad.Reader (ask)
import qualified Data.ByteString as DB
import qualified Data.ByteString.Lazy.Char8 as DBL (pack)
import Data.Text (Text)
import Data.Map (empty)
import Data.Swagger
......@@ -32,16 +33,19 @@ import Gargantext.API.Types
import Gargantext.Database.Types.Node (PhyloId, ListId, CorpusId)
import Gargantext.Prelude
import Gargantext.Viz.Phylo
import Gargantext.Viz.Phylo.Main
import Gargantext.Viz.Phylo.Aggregates
import Gargantext.Viz.Phylo.Example
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.View.ViewMaker
--import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Viz.Phylo.LevelMaker
import Servant
import Servant.Job.Utils (swaggerOptions)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Web.HttpApiData (parseUrlPiece, readTextData)
import Control.Monad.IO.Class (liftIO)
import Network.HTTP.Media ((//), (/:))
------------------------------------------------------------------------
type PhyloAPI = Summary "Phylo API"
......@@ -51,10 +55,29 @@ type PhyloAPI = Summary "Phylo API"
phyloAPI :: PhyloId -> GargServer PhyloAPI
phyloAPI n = getPhylo n
phyloAPI n = getPhylo' n
-- :<|> putPhylo n
:<|> postPhylo n
newtype SVG = SVG DB.ByteString
instance ToSchema SVG
where
declareNamedSchema = undefined
--genericDeclareNamedSchemaUnrestricted (swaggerOptions "")
instance Show SVG where
show (SVG a) = show a
instance Accept SVG where
contentType _ = "SVG" // "image/svg+xml" /: ("charset", "utf-8")
instance Show a => MimeRender PlainText a where
mimeRender _ val = cs ("" <> show val)
instance Show a => MimeRender SVG a where
mimeRender _ val = DBL.pack $ show val
------------------------------------------------------------------------
type GetPhylo = QueryParam "listId" ListId
:> QueryParam "level" Level
......@@ -71,11 +94,12 @@ type GetPhylo = QueryParam "listId" ListId
:> QueryParam "export" ExportMode
:> QueryParam "display" DisplayMode
:> QueryParam "verbose" Bool
:> Get '[JSON] PhyloView
:> Get '[SVG] SVG
-- | TODO
-- Add real text processing
-- Fix Filter parameters
{-
getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo _phyloId _lId l f b l' ms x y z ts s o e d b' = do
let
......@@ -85,7 +109,12 @@ getPhylo _phyloId _lId l f b l' ms x y z ts s o e d b' = do
-- | TODO remove phylo for real data here
pure (toPhyloView q phylo)
-- TODO remove phylo for real data here
-}
getPhylo' :: PhyloId -> GargServer GetPhylo
getPhylo' _phyloId _lId _l _f _b _l' _ms _x _y _z _ts _s _o _e _d _b' = do
p <- liftIO $ viewPhylo2Svg phyloView
pure (SVG p)
------------------------------------------------------------------------
{-
type PutPhylo = (Put '[JSON] Phylo )
......
......@@ -43,17 +43,15 @@ import qualified Data.Vector as Vector
-- | Foundations | --
---------------------
-- | Extract all the labels of a termList
termListToNgrams :: TermList -> [Ngrams]
termListToNgrams l = map (\(lbl,_) -> unwords lbl) l
termListToNgrams = map (\(lbl,_) -> unwords lbl)
-------------------
-- | Documents | --
-------------------
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
......@@ -84,7 +82,7 @@ countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "Doc" l, last' "Doc" l))
initPeriods g s (start,end) = map (\l -> (head' "initPeriods" l, last' "initPeriods" l))
$ chunkAlong g s [start .. end]
......@@ -220,4 +218,4 @@ traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <
--------------------------------------
ngrms :: [Double]
ngrms = sort $ map (\f -> fromIntegral $ size $ _phyloFis_clique f) $ concat $ elems m
--------------------------------------
\ No newline at end of file
--------------------------------------
......@@ -149,4 +149,4 @@ setPhyloBranches lvl p = alterGroupWithLevel (\g ->
--------------------------------------
-- trace' bs = trace bs
\ No newline at end of file
-- trace' bs = trace bs
......@@ -17,7 +17,6 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Cluster
where
import Control.Parallel.Strategies
import Data.Graph.Clustering.Louvain.CplusPlus
import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
......@@ -92,7 +91,7 @@ groupsToGraph nbDocs prox gs = case prox of
candidates' = candidates `using` parList rdeepseq
in candidates' )
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
_ -> undefined
_ -> undefined
-- | To filter a Graph of Proximity using a given threshold
......@@ -118,7 +117,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ let gs = map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
$ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
gs' = gs `using` parList rdeepseq
in gs'
--------------------------------------
......
......@@ -28,7 +28,6 @@ TODO:
module Gargantext.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.Text (Text, toLower)
import Data.List ((++))
import Data.Map (Map,empty)
......@@ -44,7 +43,8 @@ import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Viz.Phylo.LinkMaker
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.Main (writePhylo)
import GHC.IO (FilePath)
import qualified Data.List as List
......@@ -52,11 +52,9 @@ import qualified Data.List as List
-- | STEP 12 | -- Create a PhyloView from a user Query
------------------------------------------------------
export :: IO ()
export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView
phyloExport :: FilePath -> IO FilePath
phyloExport fp = writePhylo fp phyloView
phyloView :: PhyloView
phyloView = toPhyloView (queryParser' queryViewEx) phyloFromQuery
......
......@@ -259,14 +259,14 @@ toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc f
cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
--------------------------------------
foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both date (head' "LevelMaker" c,last c)
$ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
--------------------------------------
......
{-|
Module : Gargantext.Viz.Phylo.Main
Description : Phylomemy Main
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Viz.Phylo.Main
where
import Debug.Trace (trace)
import qualified Data.Text as Text
import Data.Map (Map)
import Data.Text (Text)
import Data.Maybe
import Servant
import GHC.IO (FilePath)
import Data.GraphViz
import Gargantext.Prelude
import Gargantext.Text.Context (TermList)
import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Set as Set
import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Core.Types
import Gargantext.Text.Terms.WithList
import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Schema.NodeNode (selectDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Flow
import Gargantext.API.Ngrams.Tools (getTermsWith)
-- TODO : git mv ViewMaker Maker
import Gargantext.Viz.Phylo.View.ViewMaker
import Gargantext.Viz.Phylo hiding (Svg, Dot)
import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString as DB
type MinSizeBranch = Int
flowPhylo :: FlowCmdM env ServantErr m
=> CorpusId
-> Level -> MinSizeBranch
-> FilePath
-> m FilePath
flowPhylo cId l m fp = do
list <- defaultList cId
listMaster <- selectNodesWithUsername NodeList userMaster
termList <- Map.toList <$> getTermsWith Text.words [list] NgramsTerms GraphTerm
--printDebug "termList" termList
--x <- mapTermListRoot [list] NgramsTerms
--printDebug "mapTermListRoot" x
-- TODO optimize unwords
docs' <- catMaybes <$> map (\h -> (,) <$> _hyperdataDocument_publication_year h
<*> _hyperdataDocument_abstract h
) <$> selectDocs cId
let patterns = buildPatterns termList
let docs = map ( (\(y,t) -> Document y t) . filterTerms patterns) docs'
--printDebug "docs" docs
--printDebug "docs" termList
liftIO $ flowPhylo' (List.sortOn date docs) termList l m fp
parse :: TermList -> [(Date, Text)] -> IO [Document]
parse l c = do
let patterns = buildPatterns l
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) c
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns (y,d) = (y,termsInText patterns d)
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = List.nub $ List.concat $ map (map Text.unwords) $ extractTermsWithList pats txt
--------------------------------------
-- TODO SortedList Document
flowPhylo' :: [Document] -> TermList -- ^Build
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
flowPhylo' corpus terms l m fp = do
let
phylo = buildPhylo corpus terms
phVie = viewPhylo l m phylo
writePhylo fp phVie
defaultQuery :: PhyloQueryBuild
defaultQuery = defaultQueryBuild'
"Default Title"
"Default Description"
buildPhylo :: [Document] -> TermList -> Phylo
buildPhylo = trace (show defaultQuery) $ buildPhylo' defaultQuery
buildPhylo' :: PhyloQueryBuild -> [Document] -> TermList -> Phylo
buildPhylo' q corpus termList = toPhylo q corpus termList Map.empty
queryView :: Level -> MinSizeBranch -> PhyloQueryView
queryView level _minSizeBranch = PhyloQueryView level Merge False 2
[BranchAge]
[]
-- [SizeBranch $ SBParams minSizeBranch]
[BranchPeakFreq,GroupLabelCooc]
(Just (ByBranchAge,Asc))
Json Flat True
viewPhylo :: Level -> MinSizeBranch -> Phylo -> PhyloView
viewPhylo l b phylo = toPhyloView (queryView l b) phylo
writePhylo :: FilePath -> PhyloView -> IO FilePath
writePhylo fp phview = runGraphviz (viewToDot phview) Svg fp
viewPhylo2Svg :: PhyloView -> IO DB.ByteString
viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
......@@ -238,19 +238,18 @@ getFoundationsRoots p = (getFoundations p) ^. phylo_foundationsRoots
-- | To get the Index of a Ngrams in the foundationsRoots of a Phylo
getIdxInRoots :: Ngrams -> Phylo -> Int
getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots: " <> cs n
Just idx -> idx
getIdxInVector :: Ngrams -> Vector Ngrams -> Int
getIdxInVector n ns = case (elemIndex n ns) of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just idx -> idx
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInVector] Ngrams not in foundationsRoots: " <> cs n
Just idx -> idx
--------------------
-- | PhyloGroup | --
--------------------
-- | To alter a PhyloGroup matching a given Level
alterGroupWithLevel :: (PhyloGroup -> PhyloGroup) -> Level -> Phylo -> Phylo
alterGroupWithLevel f lvl p = over ( phylo_periods
......@@ -261,8 +260,7 @@ alterGroupWithLevel f lvl p = over ( phylo_periods
. traverse
) (\g -> if getGroupLevel g == lvl
then f g
else g ) p
else g ) p
-- | To alter each list of PhyloGroups following a given function
......@@ -830,17 +828,26 @@ initLouvain :: Maybe Proximity -> LouvainParams
initLouvain (def defaultWeightedLogJaccard -> proxi) = LouvainParams proxi
initRelatedComponents :: Maybe Proximity -> RCParams
initRelatedComponents (def Filiation -> proxi) = RCParams proxi
initRelatedComponents (def defaultWeightedLogJaccard -> proxi) = RCParams proxi
-- | TODO user param in main function
initWeightedLogJaccard :: Maybe Double -> Maybe Double -> WLJParams
initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens
initWeightedLogJaccard (def 0.3 -> thr) (def 20.0 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Int -> Maybe Double -> Maybe Double -> Maybe Int -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters)
(def defaultWeightedLogJaccard -> matching') (def 5 -> frame) (def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth) (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain steps cluster metrics filters matching' frame frameThr reBranchThr reBranchNth nthLevel nthCluster
initPhyloQueryBuild :: Text -> Text -> Maybe Int
-> Maybe Int -> Maybe Cluster -> Maybe [Metric]
-> Maybe [Filter]-> Maybe Proximity -> Maybe Int
-> Maybe Double -> Maybe Double -> Maybe Int
-> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain)
(def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics)
(def [] -> filters) (def defaultWeightedLogJaccard -> matching') (def 5 -> frame)
(def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth)
(def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain
steps cluster metrics filters matching' frame frameThr reBranchThr reBranchNth nthLevel nthCluster
-- | To initialize a PhyloQueryView default parameters
......@@ -890,13 +897,27 @@ defaultWeightedLogJaccard :: Proximity
defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing Nothing)
-- Queries
type Title = Text
type Desc = Text
defaultQueryBuild :: PhyloQueryBuild
defaultQueryBuild = initPhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
defaultQueryBuild = defaultQueryBuild'
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
defaultQueryBuild' :: Title -> Desc -> PhyloQueryBuild
defaultQueryBuild' t d = initPhyloQueryBuild t d
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing Nothing
defaultQueryView :: PhyloQueryView
defaultQueryView = initPhyloQueryView Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
defaultQueryView = initPhyloQueryView
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing
-- Software
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment