Commit 21f8b2e0 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TextFlow] grouping fun

parent 23ef92fa
...@@ -21,7 +21,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mS ...@@ -21,7 +21,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mS
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..)) -- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, Ordering(..)) import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, Ordering(..))
import Gargantext.Database.Action.Metrics.NgramsByNode ({-ngramsGroup,-} getNodesByNgramsUser, groupNodesByNgramsWith) import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith)
import Gargantext.Database.Action.Metrics.TFICF (getTficf) import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import Gargantext.Core.Text.Metrics.TFICF (sortTficf) import Gargantext.Core.Text.Metrics.TFICF (sortTficf)
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
...@@ -98,10 +98,10 @@ buildNgramsTermsList :: Lang ...@@ -98,10 +98,10 @@ buildNgramsTermsList :: Lang
-> UserCorpusId -> UserCorpusId
-> MasterCorpusId -> MasterCorpusId
-> Cmd err (Map NgramsType [NgramsElement]) -> Cmd err (Map NgramsType [NgramsElement])
buildNgramsTermsList _l _n _m s uCid mCid = do buildNgramsTermsList l n m s uCid mCid = do
candidates <- sortTficf Up <$> getTficf uCid mCid NgramsTerms candidates <- sortTficf Up <$> getTficf uCid mCid NgramsTerms
printDebug "head candidates" (List.take 10 $ candidates) -- printDebug "head candidates" (List.take 10 $ candidates)
printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates) -- printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates)
let let
(candidatesHead, candidatesTail0) = List.splitAt 3 candidates (candidatesHead, candidatesTail0) = List.splitAt 3 candidates
...@@ -113,43 +113,47 @@ buildNgramsTermsList _l _n _m s uCid mCid = do ...@@ -113,43 +113,47 @@ buildNgramsTermsList _l _n _m s uCid mCid = do
ngs = List.concat ngs = List.concat
$ map toNgramsElement $ map toNgramsElement
$ map (\(lt, (t,d)) -> (lt, ((t, (d,Set.singleton t))))) termList $ groupStems
$ map (\(listType, (t,d)) -> ( ngramsGroup l n m t
, GroupedText listType t d Set.empty
)
) termList
pure $ Map.fromList [(NgramsTerms, ngs)] pure $ Map.fromList [(NgramsTerms, ngs)]
type Group = Lang -> Int -> Int -> Text -> Text
toTermList :: Int type Stem = Text
-> Int type Label = Text
-> (a -> Bool) data GroupedText = GroupedText { _gt_listType :: ListType
-> [a] , _gt_label :: Label
-> [(ListType, a)] , _gt_score :: Double
toTermList _ _ _ [] = [] , _gt_group :: Set Text
toTermList a b stop ns = -- trace ("computing toTermList") $ }
map (toGargList stop CandidateTerm) xs groupStems :: [(Stem, GroupedText)] -> [GroupedText]
<> map (toGargList stop MapTerm) ys groupStems = Map.elems . Map.fromListWith grouping
<> toTermList a b stop zs where
grouping (GroupedText lt1 label1 score1 group1)
(GroupedText lt2 label2 score2 group2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr)
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr)
where
lt = lt1 <> lt2
gr = Set.union group1 group2
toNgramsElement :: GroupedText -> [NgramsElement]
toNgramsElement (GroupedText listType label _ setNgrams) =
[parentElem] <> childrenElems
where where
xs = take a ns parent = label
xz = drop a ns children = Set.toList setNgrams
parentElem = mkNgramsElement parent
ys = take b xz listType
zs = drop b xz Nothing
(mSetFromList children)
childrenElems = map (\t -> mkNgramsElement t listType
toNgramsElement :: (ListType, (Text, (Double, Set Text))) -> [NgramsElement] (Just $ RootParent parent parent)
toNgramsElement (listType, (_stem, (_score, setNgrams))) = (mSetFromList [])
case Set.toList setNgrams of ) children
[] -> []
(parent:children) -> [parentElem] <> childrenElems
where
parentElem = mkNgramsElement parent
listType
Nothing
(mSetFromList children)
childrenElems = map (\t -> mkNgramsElement t listType
(Just $ RootParent parent parent)
(mSetFromList [])
) children
toGargList :: (b -> Bool) -> ListType -> b -> (ListType, b) toGargList :: (b -> Bool) -> ListType -> b -> (ListType, b)
......
...@@ -126,10 +126,8 @@ instance Semigroup TokenTag where ...@@ -126,10 +126,8 @@ instance Semigroup TokenTag where
instance Monoid TokenTag where instance Monoid TokenTag where
mempty = TokenTag [] empty Nothing Nothing mempty = TokenTag [] empty Nothing Nothing
mappend t1 t2 = (<>) t1 t2
mconcat = foldl mappend mempty mconcat = foldl mappend mempty
-- mappend t1 t2 = (<>) t1 t2
class HasInvalidError e where class HasInvalidError e where
......
...@@ -23,6 +23,7 @@ import Data.Either (Either(..)) ...@@ -23,6 +23,7 @@ import Data.Either (Either(..))
import Data.Eq (Eq()) import Data.Eq (Eq())
import Data.Map (fromList, lookup) import Data.Map (fromList, lookup)
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import Data.Semigroup (Semigroup(..))
import Data.Swagger import Data.Swagger
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import GHC.Generics (Generic) import GHC.Generics (Generic)
...@@ -61,6 +62,15 @@ instance ToParamSchema ListType ...@@ -61,6 +62,15 @@ instance ToParamSchema ListType
instance Arbitrary ListType where instance Arbitrary ListType where
arbitrary = elements [minBound..maxBound] arbitrary = elements [minBound..maxBound]
instance Semigroup ListType
where
MapTerm <> _ = MapTerm
_ <> MapTerm = MapTerm
CandidateTerm <> _ = CandidateTerm
_ <> CandidateTerm = CandidateTerm
StopTerm <> StopTerm = StopTerm
instance FromHttpApiData ListType where instance FromHttpApiData ListType where
parseUrlPiece = Right . read . unpack parseUrlPiece = Right . read . unpack
......
...@@ -66,7 +66,7 @@ countNodesByNgramsWith f m = (total, m') ...@@ -66,7 +66,7 @@ countNodesByNgramsWith f m = (total, m')
where where
total = fromIntegral $ Set.size $ Set.unions $ elems m total = fromIntegral $ Set.size $ Set.unions $ elems m
m' = Map.map ( swap . second (fromIntegral . Set.size)) m' = Map.map ( swap . second (fromIntegral . Set.size))
$ groupNodesByNgramsWith f m $ groupNodesByNgramsWith f m
groupNodesByNgramsWith :: (Text -> Text) groupNodesByNgramsWith :: (Text -> Text)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment