Commit 21f8b2e0 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TextFlow] grouping fun

parent 23ef92fa
...@@ -21,7 +21,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mS ...@@ -21,7 +21,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mS
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..)) -- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, Ordering(..)) import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, Ordering(..))
import Gargantext.Database.Action.Metrics.NgramsByNode ({-ngramsGroup,-} getNodesByNgramsUser, groupNodesByNgramsWith) import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith)
import Gargantext.Database.Action.Metrics.TFICF (getTficf) import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import Gargantext.Core.Text.Metrics.TFICF (sortTficf) import Gargantext.Core.Text.Metrics.TFICF (sortTficf)
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
...@@ -98,10 +98,10 @@ buildNgramsTermsList :: Lang ...@@ -98,10 +98,10 @@ buildNgramsTermsList :: Lang
-> UserCorpusId -> UserCorpusId
-> MasterCorpusId -> MasterCorpusId
-> Cmd err (Map NgramsType [NgramsElement]) -> Cmd err (Map NgramsType [NgramsElement])
buildNgramsTermsList _l _n _m s uCid mCid = do buildNgramsTermsList l n m s uCid mCid = do
candidates <- sortTficf Up <$> getTficf uCid mCid NgramsTerms candidates <- sortTficf Up <$> getTficf uCid mCid NgramsTerms
printDebug "head candidates" (List.take 10 $ candidates) -- printDebug "head candidates" (List.take 10 $ candidates)
printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates) -- printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates)
let let
(candidatesHead, candidatesTail0) = List.splitAt 3 candidates (candidatesHead, candidatesTail0) = List.splitAt 3 candidates
...@@ -113,35 +113,39 @@ buildNgramsTermsList _l _n _m s uCid mCid = do ...@@ -113,35 +113,39 @@ buildNgramsTermsList _l _n _m s uCid mCid = do
ngs = List.concat ngs = List.concat
$ map toNgramsElement $ map toNgramsElement
$ map (\(lt, (t,d)) -> (lt, ((t, (d,Set.singleton t))))) termList $ groupStems
$ map (\(listType, (t,d)) -> ( ngramsGroup l n m t
, GroupedText listType t d Set.empty
)
) termList
pure $ Map.fromList [(NgramsTerms, ngs)] pure $ Map.fromList [(NgramsTerms, ngs)]
type Group = Lang -> Int -> Int -> Text -> Text
toTermList :: Int type Stem = Text
-> Int type Label = Text
-> (a -> Bool) data GroupedText = GroupedText { _gt_listType :: ListType
-> [a] , _gt_label :: Label
-> [(ListType, a)] , _gt_score :: Double
toTermList _ _ _ [] = [] , _gt_group :: Set Text
toTermList a b stop ns = -- trace ("computing toTermList") $ }
map (toGargList stop CandidateTerm) xs groupStems :: [(Stem, GroupedText)] -> [GroupedText]
<> map (toGargList stop MapTerm) ys groupStems = Map.elems . Map.fromListWith grouping
<> toTermList a b stop zs
where where
xs = take a ns grouping (GroupedText lt1 label1 score1 group1)
xz = drop a ns (GroupedText lt2 label2 score2 group2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr)
ys = take b xz | otherwise = GroupedText lt label2 score2 (Set.insert label1 gr)
zs = drop b xz where
lt = lt1 <> lt2
gr = Set.union group1 group2
toNgramsElement :: (ListType, (Text, (Double, Set Text))) -> [NgramsElement] toNgramsElement :: GroupedText -> [NgramsElement]
toNgramsElement (listType, (_stem, (_score, setNgrams))) = toNgramsElement (GroupedText listType label _ setNgrams) =
case Set.toList setNgrams of [parentElem] <> childrenElems
[] -> []
(parent:children) -> [parentElem] <> childrenElems
where where
parent = label
children = Set.toList setNgrams
parentElem = mkNgramsElement parent parentElem = mkNgramsElement parent
listType listType
Nothing Nothing
......
...@@ -126,10 +126,8 @@ instance Semigroup TokenTag where ...@@ -126,10 +126,8 @@ instance Semigroup TokenTag where
instance Monoid TokenTag where instance Monoid TokenTag where
mempty = TokenTag [] empty Nothing Nothing mempty = TokenTag [] empty Nothing Nothing
mappend t1 t2 = (<>) t1 t2
mconcat = foldl mappend mempty mconcat = foldl mappend mempty
-- mappend t1 t2 = (<>) t1 t2
class HasInvalidError e where class HasInvalidError e where
......
...@@ -23,6 +23,7 @@ import Data.Either (Either(..)) ...@@ -23,6 +23,7 @@ import Data.Either (Either(..))
import Data.Eq (Eq()) import Data.Eq (Eq())
import Data.Map (fromList, lookup) import Data.Map (fromList, lookup)
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import Data.Semigroup (Semigroup(..))
import Data.Swagger import Data.Swagger
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import GHC.Generics (Generic) import GHC.Generics (Generic)
...@@ -61,6 +62,15 @@ instance ToParamSchema ListType ...@@ -61,6 +62,15 @@ instance ToParamSchema ListType
instance Arbitrary ListType where instance Arbitrary ListType where
arbitrary = elements [minBound..maxBound] arbitrary = elements [minBound..maxBound]
instance Semigroup ListType
where
MapTerm <> _ = MapTerm
_ <> MapTerm = MapTerm
CandidateTerm <> _ = CandidateTerm
_ <> CandidateTerm = CandidateTerm
StopTerm <> StopTerm = StopTerm
instance FromHttpApiData ListType where instance FromHttpApiData ListType where
parseUrlPiece = Right . read . unpack parseUrlPiece = Right . read . unpack
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment