Commit c14f31a5 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PHYLO] flow phylo implemented (not optimized yet).

parent f3d9fe78
Pipeline #504 failed with stage
......@@ -25,6 +25,7 @@ module Gargantext.Viz.Phylo.API
--import Control.Monad.Reader (ask)
import qualified Data.ByteString as DB
import qualified Data.ByteString.Lazy.Char8 as DBL (pack)
import Data.Text (Text)
import Data.Map (empty)
import Data.Swagger
......@@ -75,7 +76,7 @@ instance Show a => MimeRender PlainText a where
mimeRender _ val = cs ("" <> show val)
instance Show a => MimeRender SVG a where
mimeRender _ val = cs ("" <> show val)
mimeRender _ val = DBL.pack $ show val
------------------------------------------------------------------------
type GetPhylo = QueryParam "listId" ListId
......
......@@ -17,7 +17,6 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Cluster
where
import Control.Parallel.Strategies
import Data.Graph.Clustering.Louvain.CplusPlus
import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
......@@ -92,7 +91,7 @@ groupsToGraph nbDocs prox gs = case prox of
candidates' = candidates `using` parList rdeepseq
in candidates' )
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
_ -> undefined
_ -> undefined
-- | To filter a Graph of Proximity using a given threshold
......@@ -118,7 +117,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ let gs = map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
$ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
gs' = gs `using` parList rdeepseq
in gs'
--------------------------------------
......
......@@ -7,7 +7,6 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE FlexibleContexts #-}
......@@ -19,8 +18,10 @@ Portability : POSIX
module Gargantext.Viz.Phylo.Main
where
--import Debug.Trace (trace)
import Debug.Trace (trace)
import qualified Data.Text as Text
import Data.Map (Map)
import Data.Text (Text)
import Data.Maybe
import Servant
import GHC.IO (FilePath)
......@@ -34,9 +35,10 @@ import Gargantext.Viz.Phylo.View.Export
import Gargantext.Viz.Phylo.Tools
import Gargantext.Viz.Phylo.LevelMaker
import Gargantext.Core.Types
import Gargantext.Text.Terms.WithList
import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Schema.NodeNode (selectDocNodes)
import Gargantext.Database.Schema.NodeNode (selectDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Node.Select (selectNodesWithUsername)
......@@ -66,39 +68,36 @@ flowPhylo cId l m fp = do
--printDebug "mapTermListRoot" x
-- TODO optimize unwords
let terms = Set.fromList
$ List.concat
$ map (\(a,b) -> [a] <> b) termList
getDate n = maybe (panic "flowPhylo") identity
$ _hyperdataDocument_publication_year
$ _node_hyperdata n
--printDebug "terms" terms
-- TODO optimize this Database function below
docs' <- map (\n -> (_node_id n, getDate n)) <$> selectDocNodes cId
--printDebug "docs'" docs'
nidTerms' <- getNodesByNgramsOnlyUser cId (listMaster <> [list])
NgramsTerms
(map Text.unwords $ Set.toList terms)
let nidTerms = Map.fromList
$ List.concat
$ map (\(t, ns) -> List.zip (Set.toList ns) (List.repeat t))
$ Map.toList
$ nidTerms'
let docs = List.sortOn date
$ List.filter (\d -> text d /= [])
$ map (\(n,d) -> Document d (maybe [] (\x -> [x])
$ Map.lookup n nidTerms)) docs'
docs' <- catMaybes <$> map (\h -> (,) <$> _hyperdataDocument_publication_year h
<*> _hyperdataDocument_abstract h
) <$> selectDocs cId
printDebug "docs" docs
printDebug "docs" termList
let patterns = buildPatterns termList
let docs = map ( (\(y,t) -> Document y t) . filterTerms patterns) docs'
--printDebug "docs" docs
--printDebug "docs" termList
liftIO $ flowPhylo' (List.sortOn date docs) termList l m fp
parse :: TermList -> [(Date, Text)] -> IO [Document]
parse l c = do
let patterns = buildPatterns l
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) c
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns (y,d) = (y,termsInText patterns d)
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = List.nub $ List.concat $ map (map Text.unwords) $ extractTermsWithList pats txt
--------------------------------------
liftIO $ flowPhylo' docs termList l m fp
-- TODO SortedList Document
flowPhylo' :: [Document] -> TermList -- ^Build
......@@ -119,7 +118,7 @@ defaultQuery = defaultQueryBuild'
"Default Description"
buildPhylo :: [Document] -> TermList -> Phylo
buildPhylo = buildPhylo' defaultQuery
buildPhylo = trace (show defaultQuery) $ buildPhylo' defaultQuery
buildPhylo' :: PhyloQueryBuild -> [Document] -> TermList -> Phylo
buildPhylo' q corpus termList = toPhylo q corpus termList Map.empty
......
......@@ -817,17 +817,26 @@ initLouvain :: Maybe Proximity -> LouvainParams
initLouvain (def defaultWeightedLogJaccard -> proxi) = LouvainParams proxi
initRelatedComponents :: Maybe Proximity -> RCParams
initRelatedComponents (def Filiation -> proxi) = RCParams proxi
initRelatedComponents (def defaultWeightedLogJaccard -> proxi) = RCParams proxi
-- | TODO user param in main function
initWeightedLogJaccard :: Maybe Double -> Maybe Double -> WLJParams
initWeightedLogJaccard (def 0 -> thr) (def 0.01 -> sens) = WLJParams thr sens
initWeightedLogJaccard (def 0.3 -> thr) (def 20.0 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int -> Maybe Int -> Maybe Cluster -> Maybe [Metric] -> Maybe [Filter] -> Maybe Proximity -> Maybe Int -> Maybe Double -> Maybe Double -> Maybe Int -> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain) (def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics) (def [] -> filters)
(def defaultWeightedLogJaccard -> matching') (def 5 -> frame) (def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth) (def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain steps cluster metrics filters matching' frame frameThr reBranchThr reBranchNth nthLevel nthCluster
initPhyloQueryBuild :: Text -> Text -> Maybe Int
-> Maybe Int -> Maybe Cluster -> Maybe [Metric]
-> Maybe [Filter]-> Maybe Proximity -> Maybe Int
-> Maybe Double -> Maybe Double -> Maybe Int
-> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain)
(def 3 -> steps) (def defaultFis -> cluster) (def [] -> metrics)
(def [] -> filters) (def defaultWeightedLogJaccard -> matching') (def 5 -> frame)
(def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth)
(def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain
steps cluster metrics filters matching' frame frameThr reBranchThr reBranchNth nthLevel nthCluster
-- | To initialize a PhyloQueryView default parameters
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment