Commit 48eb263b authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TextFlow] Local SpeGen computed, needs Metrics/Scored refactoring

parent 7fd045e8
Pipeline #1087 failed with stage
...@@ -9,11 +9,13 @@ Portability : POSIX ...@@ -9,11 +9,13 @@ Portability : POSIX
-} -}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.List module Gargantext.Core.Text.List
where where
-- import Data.Either (partitionEithers, Either(..)) -- import Data.Either (partitionEithers, Either(..))
import Control.Lens (makeLenses, set)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Map (Map) import Data.Map (Map)
import Data.Set (Set) import Data.Set (Set)
...@@ -28,6 +30,8 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, NgramsTerm(..), Ro ...@@ -28,6 +30,8 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, NgramsTerm(..), Ro
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..)) -- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, Ordering(..)) import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, Ordering(..))
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.Metrics (scored')
import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf) import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import Gargantext.Database.Query.Table.Node (defaultList) import Gargantext.Database.Query.Table.Node (defaultList)
...@@ -124,7 +128,7 @@ buildNgramsTermsList l n m s uCid mCid = do ...@@ -124,7 +128,7 @@ buildNgramsTermsList l n m s uCid mCid = do
let grouped = groupStems' let grouped = groupStems'
$ map (\(t,d) -> let stem = ngramsGroup l n m t $ map (\(t,d) -> let stem = ngramsGroup l n m t
in ( stem in ( stem
, GroupedText Nothing t d Set.empty (size t) stem , GroupedText Nothing t d Set.empty (size t) stem Set.empty
) )
) candidateTerms ) candidateTerms
...@@ -143,9 +147,9 @@ buildNgramsTermsList l n m s uCid mCid = do ...@@ -143,9 +147,9 @@ buildNgramsTermsList l n m s uCid mCid = do
-- Get Local Scores now for selected grouped ngrams -- Get Local Scores now for selected grouped ngrams
selectedTerms = Set.toList $ List.foldl' selectedTerms = Set.toList $ List.foldl'
(\set (GroupedText _ l _ g _ _) -> Set.union set (\set (GroupedText _ l _ g _ _ _ ) -> Set.union set
$ Set.union g $ Set.union g
$ Set.singleton l $ Set.singleton l
) )
Set.empty Set.empty
(groupedMonoHead <> groupedMultHead) (groupedMonoHead <> groupedMultHead)
...@@ -155,9 +159,36 @@ buildNgramsTermsList l n m s uCid mCid = do ...@@ -155,9 +159,36 @@ buildNgramsTermsList l n m s uCid mCid = do
masterListId <- defaultList mCid masterListId <- defaultList mCid
mapTextDocIds <- getNodesByNgramsOnlyUser uCid [userListId, masterListId] NgramsTerms selectedTerms mapTextDocIds <- getNodesByNgramsOnlyUser uCid [userListId, masterListId] NgramsTerms selectedTerms
-- groups Set NodeId let
mapGroups = Map.fromList
$ map (\g -> (_gt_stem g, g))
$ groupedMonoHead <> groupedMultHead
-- grouping with Set NodeId
contextsAdded = foldl' (\mapGroups' k -> let k' = ngramsGroup l n m k
in case Map.lookup k' mapGroups' of
Nothing -> mapGroups'
Just g -> case Map.lookup k mapTextDocIds of
Nothing -> mapGroups'
Just ns -> Map.insert k' ( g { _gt_nodes = Set.union ns (_gt_nodes g)}) mapGroups'
)
mapGroups
$ Map.keys mapTextDocIds
-- compute cooccurrences -- compute cooccurrences
-- compute scores mapCooc = Map.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2)
| (t1, s1) <- mapStemNodeIds
, (t2, s2) <- mapStemNodeIds
, t1 /= t2 -- Null Diagonal
]
where
mapStemNodeIds = Map.toList $ Map.map (_gt_nodes) contextsAdded
-- computing scores
scores = scored' mapCooc
-- dilate scores
-- sort / filter -- sort / filter
...@@ -175,56 +206,41 @@ buildNgramsTermsList l n m s uCid mCid = do ...@@ -175,56 +206,41 @@ buildNgramsTermsList l n m s uCid mCid = do
<> (map (toGargList $ Just MapTerm) (monoHead <> multiHead)) <> (map (toGargList $ Just MapTerm) (monoHead <> multiHead))
<> (map (toGargList $ Just CandidateTerm) (monoTail <> multiTail)) <> (map (toGargList $ Just CandidateTerm) (monoTail <> multiTail))
ngs = List.concat ngs = List.concat
$ map toNgramsElement $ map toNgramsElement
$ groupStems $ groupStems
$ map (\(listType, (t,d)) -> let stem = ngramsGroup l n m t $ map (\(listType, (t,d)) -> let stem = ngramsGroup l n m t
in ( stem in ( stem
, GroupedText listType t d Set.empty (size t) stem , GroupedText listType t d Set.empty (size t) stem Set.empty
) )
) termList ) termList
pure $ Map.fromList [(NgramsTerms, ngs)] pure $ Map.fromList [(NgramsTerms, ngs)]
type Group = Lang -> Int -> Int -> Text -> Text
type Stem = Text
type Label = Text
data GroupedText score =
GroupedText { _gt_listType :: !(Maybe ListType)
, _gt_label :: !Label
, _gt_score :: !score
, _gt_group :: !(Set Text)
, _gt_size :: !Int
, _gt_stem :: !Stem
}
instance (Eq a) => Eq (GroupedText a) where
(==) (GroupedText _ _ score1 _ _ _)
(GroupedText _ _ score2 _ _ _) = (==) score1 score2
instance (Eq a, Ord a) => Ord (GroupedText a) where
compare (GroupedText _ _ score1 _ _ _)
(GroupedText _ _ score2 _ _ _) = compare score1 score2
groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double] groupStems :: [(Stem, GroupedText Double)] -> [GroupedText Double]
groupStems = Map.elems . groupStems' groupStems = Map.elems . groupStems'
groupStems' :: [(Stem, GroupedText Double)] -> Map Stem (GroupedText Double) groupStems' :: [(Stem, GroupedText Double)] -> Map Stem (GroupedText Double)
groupStems' = Map.fromListWith grouping groupStems' = Map.fromListWith grouping
where where
grouping (GroupedText lt1 label1 score1 group1 s1 stem1) grouping (GroupedText lt1 label1 score1 group1 s1 stem1 nodes1)
(GroupedText lt2 label2 score2 group2 s2 stem2) (GroupedText lt2 label2 score2 group2 s2 stem2 nodes2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 | score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr) s1 stem1 nodes
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr) s2 stem2 nodes
where where
lt = lt1 <> lt2 lt = lt1 <> lt2
gr = Set.union group1 group2 gr = Set.union group1 group2
nodes = Set.union nodes1 nodes2
toNgramsElement :: GroupedText Double -> [NgramsElement] toNgramsElement :: GroupedText Double -> [NgramsElement]
toNgramsElement (GroupedText listType label _ setNgrams _ _) = toNgramsElement (GroupedText listType label _ setNgrams _ _ _) =
[parentElem] <> childrenElems [parentElem] <> childrenElems
where where
parent = label parent = label
...@@ -247,3 +263,32 @@ isStopTerm :: StopSize -> Text -> Bool ...@@ -247,3 +263,32 @@ isStopTerm :: StopSize -> Text -> Bool
isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x) isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
where where
isStopChar c = not (c `elem` ("- /()%" :: [Char]) || Char.isAlpha c) isStopChar c = not (c `elem` ("- /()%" :: [Char]) || Char.isAlpha c)
------------------------------------------------------------------------------
type Group = Lang -> Int -> Int -> Text -> Text
type Stem = Text
type Label = Text
data GroupedText score =
GroupedText { _gt_listType :: !(Maybe ListType)
, _gt_label :: !Label
, _gt_score :: !score
, _gt_group :: !(Set Text)
, _gt_size :: !Int
, _gt_stem :: !Stem
, _gt_nodes :: !(Set NodeId)
}
instance (Eq a) => Eq (GroupedText a) where
(==) (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = (==) score1 score2
instance (Eq a, Ord a) => Ord (GroupedText a) where
compare (GroupedText _ _ score1 _ _ _ _)
(GroupedText _ _ score2 _ _ _ _) = compare score1 score2
-- Lenses Instances
makeLenses 'GroupedText
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment