Commit 0f6c2e5b authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-phylo' into dev

parents 706dd25d 148b0dcc
......@@ -170,14 +170,25 @@ main = do
let sensibility = case (phyloProximity config) of
Hamming -> undefined
WeightedLogJaccard s -> (show s)
WeightedLogJaccard s -> (show s)
let sync = case (phyloSynchrony config) of
ByProximityThreshold t _ _ _ -> (show t)
ByProximityDistribution _ _ -> undefined
-- to be improved
-- let br_length = case (take 1 $ exportFilter config) of
-- ByBranchSize t -> (show t)
let output = (outputPath config)
<> (unpack $ phyloName config)
<> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
<> "-level_" <> (show (phyloLevel config))
<> "-" <> clq
<> "-sens_" <> sensibility
<> "-level_" <> (show (phyloLevel config))
<> "-sens_" <> sensibility
-- <> "-lenght_" <> br_length
<> "-scale_" <> (show (_qua_granularity $ phyloQuality config))
<> "-sync_" <> sync
<> ".dot"
dotToFile output dot
......@@ -20,10 +20,9 @@ import Data.Map (Map)
import Data.Text (Text)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..))
import Gargantext.Core.Types (ListType(..)) -- (MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Types (MasterCorpusId, UserCorpusId)
-- import Gargantext.Core.Text.List.Learn (Model(..))
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import qualified Data.Set as Set
......@@ -31,7 +30,7 @@ import qualified Data.Map as Map
import qualified Data.List as List
import qualified Data.Text as Text
{-
data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, stemX :: !Int
, stopSize :: !StopSize
......@@ -45,6 +44,7 @@ data NgramsListBuilder = BuilderStepO { stemSize :: !Int
, nlb_userCorpusId :: !UserCorpusId
, nlb_masterCorpusId :: !MasterCorpusId
}
-}
data StopSize = StopSize {unStopSize :: !Int}
......@@ -52,19 +52,19 @@ data StopSize = StopSize {unStopSize :: !Int}
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
data GroupParams = GroupParams { unGroupParams_lang :: !Lang
, unGroupParams_len :: !Int
, unGroupParams_limit :: !Int
, unGroupParams_stopSize :: !StopSize
}
| GroupIdentity
| GroupIdentity
ngramsGroup :: GroupParams
-> Text
-> Text
ngramsGroup GroupIdentity = identity
ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
ngramsGroup (GroupParams l _m _n _) =
Text.intercalate " "
. map (stem l)
-- . take n
. List.sort
......@@ -72,12 +72,18 @@ ngramsGroup (GroupParams l _m _n _) = Text.intercalate " "
. Text.splitOn " "
. Text.replace "-" " "
------------------------------------------------------------------------------
------------------------------------------------------------------------
mergeMapParent :: Map Text (GroupedText b)
-> Map Text (Map Text Int)
-> Map Text (GroupedText b)
mergeMapParent = undefined
------------------------------------------------------------------------
toGroupedText :: Ord b
=> (Text -> Text)
-> (a -> b)
-> (a -> Set Text)
-> (a -> Set NodeId)
=> (Text -> Text )
-> (a -> b )
-> (a -> Set Text )
-> (a -> Set NodeId)
-> [(Text,a)]
-> Map Stem (GroupedText b)
toGroupedText fun_stem fun_score fun_texts fun_nodeIds from = groupStems' $ map group from
......@@ -108,7 +114,7 @@ groupStems' = Map.fromListWith grouping
gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
------------------------------------------------------------------------------
------------------------------------------------------------------------
type Group = Lang -> Int -> Int -> Text -> Text
type Stem = Text
type Label = Text
......@@ -116,15 +122,15 @@ data GroupedText score =
GroupedText { _gt_listType :: !(Maybe ListType)
, _gt_label :: !Label
, _gt_score :: !score
, _gt_group :: !(Set Text)
, _gt_children :: !(Set Text)
, _gt_size :: !Int
, _gt_stem :: !Stem
, _gt_nodes :: !(Set NodeId)
} deriving Show
{-
} {-deriving Show--}
--{-
instance Show score => Show (GroupedText score) where
show (GroupedText lt l s _ _ _ _) = show l <> " : " <> show lt <> " : " <> show s
-}
--}
instance (Eq a) => Eq (GroupedText a) where
(==) (GroupedText _ _ score1 _ _ _ _)
......@@ -137,18 +143,13 @@ instance (Eq a, Ord a) => Ord (GroupedText a) where
-- Lenses Instances
makeLenses 'GroupedText
------------------------------------------------------------------------------
------------------------------------------------------------------------
addListType :: Map Text ListType -> GroupedText a -> GroupedText a
addListType m g = set gt_listType (hasListType m g) g
where
hasListType :: Map Text ListType -> GroupedText a -> Maybe ListType
hasListType m' (GroupedText _ label _ g' _ _ _) =
List.foldl' (<>) Nothing
List.foldl' (<>) Nothing
$ map (\t -> Map.lookup t m')
$ Set.toList
$ Set.insert label g'
......@@ -58,8 +58,8 @@ buildNgramsLists :: ( RepoCmdM env err m
buildNgramsLists user gp uCid mCid = do
ngTerms <- buildNgramsTermsList user uCid mCid gp
othersTerms <- mapM (buildNgramsOthersList user uCid (ngramsGroup GroupIdentity))
[ (Authors, MapListSize 9)
, (Sources, MapListSize 9)
[ (Authors , MapListSize 9)
, (Sources , MapListSize 9)
, (Institutes, MapListSize 9)
]
......@@ -83,12 +83,13 @@ buildNgramsOthersList user uCid groupIt (nt, MapListSize mapListSize) = do
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt
let
grouped = toGroupedText groupIt (Set.size . snd) fst snd (Map.toList $ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b)) $ ngs)
grouped = toGroupedText groupIt (Set.size . snd) fst snd
(Map.toList $ Map.mapWithKey (\k (a,b) -> (Set.delete k a, b)) $ ngs)
socialLists <- flowSocialList user nt (Set.fromList $ Map.keys ngs)
let
groupedWithList = map (addListType (invertForw socialLists)) grouped
groupedWithList = map (addListType (invertForw socialLists)) grouped
(stopTerms, tailTerms) = Map.partition (\t -> t ^. gt_listType == Just StopTerm) groupedWithList
(mapTerms, tailTerms') = Map.partition (\t -> t ^. gt_listType == Just MapTerm) tailTerms
......
......@@ -35,6 +35,7 @@ import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
------------------------------------------------------------------------
flowSocialList :: ( RepoCmdM env err m
, CmdM env err m
, HasNodeError err
......@@ -56,7 +57,6 @@ flowSocialList user nt ngrams' = do
-- printDebug "* socialLists *: results \n" result
pure result
------------------------------------------------------------------------
unions :: (Ord a, Semigroup a, Semigroup b, Ord b)
=> [Map a (Set b)] -> Map a (Set b)
......@@ -75,13 +75,12 @@ invertBack = Map.fromListWith (<>)
unions_test :: Map ListType (Set Text)
unions_test = unions [m1, m2]
where
m1 = Map.fromList [ (StopTerm, Set.singleton "Candidate")]
m1 = Map.fromList [ (StopTerm , Set.singleton "Candidate")]
m2 = Map.fromList [ (CandidateTerm, Set.singleton "Candidate")
, (MapTerm, Set.singleton "Candidate")
, (MapTerm , Set.singleton "Candidate")
]
------------------------------------------------------------------------
termsByList :: ListType -> (Map (Maybe ListType) (Set Text)) -> Set Text
termsByList CandidateTerm m = Set.unions
$ map (\lt -> fromMaybe Set.empty $ Map.lookup lt m)
......@@ -108,7 +107,7 @@ flowSocialListByMode mode user nt ngrams' = do
-- printDebug "flowSocialListByMode r" r
pure r
---------------------------------------------------------------------------
------------------------------------------------------------------------
-- TODO: maybe use social groups too
toSocialList :: Map Text (Map ListType Int)
-> Set Text
......@@ -141,7 +140,7 @@ toSocialList1_testIsTrue = result == (Just MapTerm, Set.singleton token)
, (StopTerm , 3)
]
---------------------------------------------------------------------------
------------------------------------------------------------------------
-- | [ListId] does not merge the lists (it is for Master and User lists
-- here we need UserList only
countFilterList :: RepoCmdM env err m
......@@ -161,14 +160,83 @@ countFilterList' st nt ls input = do
-- printDebug "countFilterList'" ml
pure $ Set.foldl' (\m t -> countList t ml m) input st
---------------------------------------------------------------------------
------------------------------------------------------------------------
-- FIXME children have to herit the ListType of the parent
toMapTextListType :: Map Text NgramsRepoElement -> Map Text ListType
toMapTextListType m = Map.fromListWith (<>)
$ List.concat
$ (map (toList m))
$ Map.toList m
$ List.concat
$ map (toList m)
$ Map.toList m
----------------------
-- | Tools to inherit groupings
----------------------
type Parent = Text
parentUnionsMerge :: (Ord a, Ord b, Num c)
=> [Map a (Map b c)]
-> Map a (Map b c)
parentUnionsMerge = Map.unionsWith (Map.unionWith (+))
-- This Parent union is specific
-- [Private, Shared, Public]
-- means the following preferences:
-- Private > Shared > Public
-- if data have not been tagged privately, then use others tags
-- This unions behavior takes first key only and ignore others
parentUnionsExcl :: Ord a
=> [Map a b]
-> Map a b
parentUnionsExcl = Map.unions
hasParent :: Text
-> Map Text (Map Parent Int)
-> Maybe Parent
hasParent t m = case Map.lookup t m of
Nothing -> Nothing
Just m' -> (fst . fst) <$> Map.maxViewWithKey m'
toMapTextParent :: Set Text
-> Map Text (Map Parent Int)
-> [Map Text NgramsRepoElement]
-> Map Text (Map Parent Int)
toMapTextParent ts = foldl' (toMapTextParent' ts)
where
toMapTextParent' :: Set Text
-> Map Text (Map Parent Int)
-> Map Text NgramsRepoElement
-> Map Text (Map Parent Int)
toMapTextParent' ts' to from = Set.foldl' (toMapTextParent'' ts' from) to ts'
toMapTextParent'' :: Set Text
-> Map Text NgramsRepoElement
-> Map Text (Map Parent Int)
-> Text
-> Map Text (Map Parent Int)
toMapTextParent'' ss from to t = case Map.lookup t from of
Nothing -> to
Just nre -> case _nre_parent nre of
Just (NgramsTerm p') -> if Set.member p' ss
then Map.alter (addParent p') t to
else to
where
addParent p'' Nothing = Just $ addCountParent p'' Map.empty
addParent p'' (Just ps) = Just $ addCountParent p'' ps
addCountParent :: Parent -> Map Parent Int -> Map Parent Int
addCountParent p m = Map.alter addCount p m
where
addCount Nothing = Just 1
addCount (Just n) = Just $ n + 1
_ -> to
------------------------------------------------------------------------
toList :: Map Text NgramsRepoElement -> (Text, NgramsRepoElement) -> [(Text, ListType)]
toList m (t, nre@(NgramsRepoElement _ _ _ _ (MSet children))) =
List.zip terms (List.cycle [lt'])
......@@ -184,9 +252,10 @@ listOf m ng = case _nre_parent ng of
Nothing -> _nre_list ng
Just p -> case Map.lookup (unNgramsTerm p) m of
Just ng' -> listOf m ng'
Nothing -> panic "CandidateTerm -- Should Not happen"
Nothing -> CandidateTerm
-- panic "[G.C.T.L.Social.listOf] Nothing: Should Not happen"
---------------------------------------------------------------------------
------------------------------------------------------------------------
countList :: Text
-> Map Text ListType
-> Map Text (Map ListType Int)
......@@ -195,11 +264,11 @@ countList t m input = case Map.lookup t m of
Nothing -> input
Just l -> Map.alter addList t input
where
addList Nothing = Just $ addCount l Map.empty
addList (Just lm) = Just $ addCount l lm
addList Nothing = Just $ addCountList l Map.empty
addList (Just lm) = Just $ addCountList l lm
addCount :: ListType -> Map ListType Int -> Map ListType Int
addCount l m = Map.alter (plus l) l m
addCountList :: ListType -> Map ListType Int -> Map ListType Int
addCountList l m = Map.alter (plus l) l m
where
plus CandidateTerm Nothing = Just 1
plus CandidateTerm (Just x) = Just $ x + 1
......@@ -228,5 +297,3 @@ findNodes' Public r = findNodes Public r $ [NodeFolderPublic ] <> commonNodes
commonNodes:: [NodeType]
commonNodes = [NodeFolder, NodeCorpus, NodeList]
......@@ -47,7 +47,7 @@ cooc2graph' distance threshold myCooc = distanceMap
where
(ti, _) = createIndices myCooc
myCooc' = toIndex ti myCooc
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 0) myCooc'
matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 1) myCooc'
distanceMat = measure distance matCooc
distanceMap = Map.filter (> threshold) $ mat2map distanceMat
......
This diff is collapsed.
......@@ -17,6 +17,7 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.API
where
import Data.Maybe (fromMaybe)
import Control.Lens ((^.))
import Data.String.Conversions
--import Control.Monad.Reader (ask)
......@@ -98,13 +99,13 @@ getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo phId _lId l msb = do
phNode <- getNodeWith phId (Proxy :: Proxy HyperdataPhylo)
let
level = maybe 2 identity l
branc = maybe 2 identity msb
level = fromMaybe 2 l
branc = fromMaybe 2 msb
maybePhylo = phNode ^. (node_hyperdata . hp_data)
p <- liftBase $ viewPhylo2Svg
$ viewPhylo level branc
$ maybe phyloFromQuery identity maybePhylo
$ fromMaybe phyloFromQuery maybePhylo
pure (SVG p)
------------------------------------------------------------------------
type PostPhylo = QueryParam "listId" ListId
......@@ -112,16 +113,16 @@ type PostPhylo = QueryParam "listId" ListId
:> (Post '[JSON] NodeId)
postPhylo :: CorpusId -> UserId -> GargServer PostPhylo
postPhylo n userId _lId = do
postPhylo corpusId userId _lId = do
-- TODO get Reader settings
-- s <- ask
let
-- let
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
phy <- flowPhylo n
pId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just n) userId]
pure $ NodeId (fromIntegral pId)
phy <- flowPhylo corpusId -- params
phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
pure $ NodeId (fromIntegral phyloId)
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
......@@ -136,64 +137,25 @@ putPhylo = undefined
-- | Instances
instance Arbitrary PhyloView
where
arbitrary = elements [phyloView]
-- | TODO add phyloGroup ex
instance Arbitrary PhyloGroup
where
arbitrary = elements []
instance Arbitrary Phylo
where
arbitrary = elements [phylo]
instance ToSchema Order
instance ToParamSchema Order
instance FromHttpApiData Order
where
parseUrlPiece = readTextData
instance ToParamSchema Metric
instance FromHttpApiData [Metric]
where
parseUrlPiece = readTextData
instance FromHttpApiData Metric
where
parseUrlPiece = readTextData
instance Arbitrary Phylo where arbitrary = elements [phylo]
instance Arbitrary PhyloGroup where arbitrary = elements []
instance Arbitrary PhyloView where arbitrary = elements [phyloView]
instance FromHttpApiData DisplayMode where parseUrlPiece = readTextData
instance FromHttpApiData ExportMode where parseUrlPiece = readTextData
instance FromHttpApiData Filiation where parseUrlPiece = readTextData
instance FromHttpApiData Metric where parseUrlPiece = readTextData
instance FromHttpApiData Order where parseUrlPiece = readTextData
instance FromHttpApiData Sort where parseUrlPiece = readTextData
instance FromHttpApiData Tagger where parseUrlPiece = readTextData
instance FromHttpApiData [Metric] where parseUrlPiece = readTextData
instance FromHttpApiData [Tagger] where parseUrlPiece = readTextData
instance ToParamSchema DisplayMode
instance FromHttpApiData DisplayMode
where
parseUrlPiece = readTextData
instance ToParamSchema ExportMode
instance FromHttpApiData ExportMode
where
parseUrlPiece = readTextData
instance FromHttpApiData Sort
where
parseUrlPiece = readTextData
instance ToParamSchema Sort
instance FromHttpApiData [Tagger]
where
parseUrlPiece = readTextData
instance FromHttpApiData Tagger
where
parseUrlPiece = readTextData
instance ToParamSchema Tagger
instance FromHttpApiData Filiation
where
parseUrlPiece = readTextData
instance ToParamSchema Filiation
instance ToParamSchema Tagger
instance ToParamSchema Metric
instance ToParamSchema Order
instance ToParamSchema Sort
instance ToSchema Order
......@@ -44,7 +44,6 @@ import Numeric.Statistics (percentile)
-- | PhyloLevelMaker | --
-------------------------
-- | A typeClass for polymorphic PhyloLevel functions
class PhyloLevelMaker aggregate
where
......@@ -105,11 +104,15 @@ instance PhyloLevelMaker Document
addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
addPhyloLevel' lvl m p = alterPhyloPeriods
(\period -> let pId = _phylo_periodId period
in over (phylo_periodLevels)
in over phylo_periodLevels
(\phyloLevels ->
let groups = toPhyloGroups lvl pId (m ! pId) m p
in trace (show (length groups) <> " groups for " <> show (pId) ) $ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period) p
in trace (show (length groups)
<> " groups for "
<> show (pId) )
$ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period
) p
----------------------
......@@ -118,7 +121,14 @@ addPhyloLevel' lvl m p = alterPhyloPeriods
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloFis -> Map Date (Map (Int,Int) Double) -> Vector Ngrams -> PhyloGroup
cliqueToGroup :: PhyloPeriodId
-> Level
-> Int
-> Text
-> PhyloFis
-> Map Date (Map (Int,Int) Double)
-> Vector Ngrams
-> PhyloGroup
cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
......@@ -142,10 +152,17 @@ cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId -> Level -> Int -> Text -> PhyloCluster -> Map (Date,Date) [PhyloCluster]-> Phylo-> PhyloGroup
clusterToGroup :: PhyloPeriodId
-> Level
-> Int
-> Text
-> PhyloCluster
-> Map (Date,Date) [PhyloCluster]
-> Phylo
-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
empty
Nothing
......@@ -154,12 +171,14 @@ clusterToGroup prd lvl idx lbl groups _m p =
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) (getPhyloCooc p)
cooc = getMiniCooc (listToFullCombi ngrams)
(periodsToYears [prd] )
(getPhyloCooc p )
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
......@@ -195,9 +214,13 @@ toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase
$ toPhyloBase q (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just q)) c termList fis
--------------------------------------
phyloBase = tracePhyloBase
$ toPhyloBase q init c termList fis
where
init = initPhyloParam (Just defaultPhyloVersion)
(Just defaultSoftware )
(Just q )
---------------------------------------
-- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
......@@ -205,17 +228,16 @@ toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
toNthLevel lvlMax prox clus p
| lvl >= lvlMax = p
| otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
-- \$ transposePeriodLinks (lvl + 1)
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
$ transposeLinks (lvl + 1) Ascendant
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1)
(clusters) p
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
$ transposeLinks (lvl + 1) Ascendant
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1) (clusters) p
where
--------------------------------------
clusters :: Map (Date,Date) [PhyloCluster]
......
......@@ -35,12 +35,14 @@ import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Viz.Phylo hiding (Svg, Dot)
import Gargantext.Core.Viz.Phylo.LevelMaker
import Gargantext.Core.Viz.Phylo.LevelMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.View.Export
import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
type MinSizeBranch = Int
flowPhylo :: FlowCmdM env err m
......@@ -48,14 +50,14 @@ flowPhylo :: FlowCmdM env err m
-> m Phylo
flowPhylo cId = do
list <- defaultList cId
list <- defaultList cId
termList <- Map.toList <$> getTermsWith Text.words [list] NgramsTerms MapTerm
docs' <- catMaybes
<$> map (\h -> (,) <$> _hd_publication_year h
<*> _hd_abstract h
)
<$> selectDocs cId
<$> map (\h -> (,) <$> _hd_publication_year h
<*> _hd_abstract h
)
<$> selectDocs cId
let
patterns = buildPatterns termList
......@@ -65,10 +67,13 @@ flowPhylo cId = do
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = List.nub $ List.concat $ map (map Text.unwords) $ extractTermsWithList pats txt
termsInText pats txt = List.nub
$ List.concat
$ map (map Text.unwords)
$ extractTermsWithList pats txt
--------------------------------------
docs = map ( (\(y,t) -> Document y t) . filterTerms patterns) docs'
docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
--liftBase $ flowPhylo' (List.sortOn date docs) termList l m fp
pure $ buildPhylo (List.sortOn date docs) termList
......@@ -76,9 +81,9 @@ flowPhylo cId = do
-- TODO SortedList Document
flowPhylo' :: [Document] -> TermList -- ^Build
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
flowPhylo' corpus terms l m fp = do
let
phylo = buildPhylo corpus terms
......
......@@ -142,6 +142,7 @@ groupToDotNode fdt g bId =
, toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
, toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
])
......@@ -192,7 +193,7 @@ exportToDot phylo export =
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
])
{-
......@@ -201,7 +202,7 @@ exportToDot phylo export =
-- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do
graphAttrs [Rank SameRank]
-- graphAttrs [Rank SameRank]
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
......@@ -368,8 +369,8 @@ inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
ngramsMetrics :: PhyloExport -> PhyloExport
ngramsMetrics export =
ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
ngramsMetrics phylo export =
over ( export_groups
. traverse )
(\g -> g & phylo_groupMeta %~ insert "genericity"
......@@ -378,6 +379,8 @@ ngramsMetrics export =
(map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "inclusion"
(map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "frequence"
(map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
) export
......@@ -397,9 +400,9 @@ branchDating export =
& branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
processMetrics :: PhyloExport -> PhyloExport
processMetrics export = ngramsMetrics
$ branchDating export
processMetrics :: Phylo -> PhyloExport -> PhyloExport
processMetrics phylo export = ngramsMetrics phylo
$ branchDating export
-----------------
......@@ -598,8 +601,10 @@ toHorizon phylo =
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods level [prd] phylo
childs = getPreviousChildIds level frame prd periods phylo
heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
childs = getPreviousChildIds level frame prd periods phylo
-- maybe add a better filter for non isolated ancestors
heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
$ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
......@@ -630,7 +635,7 @@ toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
$ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
$ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
$ processMetrics export
$ processMetrics phylo export
where
export :: PhyloExport
export = PhyloExport groups branches
......
......@@ -37,6 +37,17 @@ import qualified Data.Set as Set
-- | To Phylo | --
------------------
{-
-- TODO AD
data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
| PhyloN { _phylo'_phylo1 :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> Config -> Phylo
toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo
-}
toPhylo :: [Document] -> TermList -> Config -> Phylo
toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1))
......@@ -48,9 +59,11 @@ toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFrom
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 docs phyloBase
-- > AD to db here
--------------------------------------
phyloBase :: Phylo
phyloBase :: Phylo
phyloBase = toPhyloBase docs lst conf
-- > AD to db here
--------------------------------------
......@@ -202,7 +215,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$ foldl sumCooc empty
$ map listToMatrix
$ map (\d -> ngramsToIdx (text d) (getRoots phylo)) docs
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.01 cooc))
in (prd, map (\cl -> PhyloClique cl 0 prd) $ getMaxCliques Conditional 0.001 cooc))
$ toList phyloDocs
mcl' = mcl `using` parList rdeepseq
in fromList mcl'
......@@ -232,9 +245,9 @@ docsToTimeScaleCooc docs fdt =
-----------------------
-- | to Phylo Base | --
-----------------------
-- TODO anoe
groupDocsByPeriodRec :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc] -> Map (date, date) [doc]
groupDocsByPeriodRec f prds docs acc =
groupDocsByPeriodRec f prds docs acc =
if ((null prds) || (null docs))
then acc
else
......@@ -245,7 +258,7 @@ groupDocsByPeriodRec f prds docs acc =
-- To group a list of Documents by fixed periods
groupDocsByPeriod' :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod' f pds docs =
groupDocsByPeriod' f pds docs =
let docs' = groupBy (\d d' -> f d == f d') $ sortOn f docs
periods = map (inPeriode f docs') pds
periods' = periods `using` parList rdeepseq
......@@ -262,7 +275,7 @@ groupDocsByPeriod' f pds docs =
-- To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
......
......@@ -19,6 +19,8 @@ import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithK
import Data.String (String)
import Data.Text (Text)
import Prelude (floor)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Text.Printf
......@@ -56,6 +58,22 @@ printIOComment cmt =
-- | Misc | --
--------------
-- truncate' :: Double -> Int -> Double
-- truncate' x n = (fromIntegral (floor (x * t))) / t
-- where t = 10^n
truncate' :: Double -> Int -> Double
truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
where
--------------
t :: Double
t = 10 ^n
getInMap :: Int -> Map Int Double -> Double
getInMap k m =
if (member k m)
then m ! k
else 0
roundToStr :: (PrintfArg a, Floating a) => Int -> a -> String
roundToStr = printf "%0.*f"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment