Commit 21f8b2e0 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TextFlow] grouping fun

parent 23ef92fa
......@@ -21,7 +21,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mS
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, Ordering(..))
import Gargantext.Database.Action.Metrics.NgramsByNode ({-ngramsGroup,-} getNodesByNgramsUser, groupNodesByNgramsWith)
import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith)
import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import Gargantext.Core.Text.Metrics.TFICF (sortTficf)
import Gargantext.Database.Prelude (Cmd)
......@@ -98,10 +98,10 @@ buildNgramsTermsList :: Lang
-> UserCorpusId
-> MasterCorpusId
-> Cmd err (Map NgramsType [NgramsElement])
buildNgramsTermsList _l _n _m s uCid mCid = do
buildNgramsTermsList l n m s uCid mCid = do
candidates <- sortTficf Up <$> getTficf uCid mCid NgramsTerms
printDebug "head candidates" (List.take 10 $ candidates)
printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates)
-- printDebug "head candidates" (List.take 10 $ candidates)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates)
let
(candidatesHead, candidatesTail0) = List.splitAt 3 candidates
......@@ -113,43 +113,47 @@ buildNgramsTermsList _l _n _m s uCid mCid = do
ngs = List.concat
$ map toNgramsElement
$ map (\(lt, (t,d)) -> (lt, ((t, (d,Set.singleton t))))) termList
$ groupStems
$ map (\(listType, (t,d)) -> ( ngramsGroup l n m t
, GroupedText listType t d Set.empty
)
) termList
pure $ Map.fromList [(NgramsTerms, ngs)]
toTermList :: Int
-> Int
-> (a -> Bool)
-> [a]
-> [(ListType, a)]
toTermList _ _ _ [] = []
toTermList a b stop ns = -- trace ("computing toTermList") $
map (toGargList stop CandidateTerm) xs
<> map (toGargList stop MapTerm) ys
<> toTermList a b stop zs
type Group = Lang -> Int -> Int -> Text -> Text
type Stem = Text
type Label = Text
data GroupedText = GroupedText { _gt_listType :: ListType
, _gt_label :: Label
, _gt_score :: Double
, _gt_group :: Set Text
}
groupStems :: [(Stem, GroupedText)] -> [GroupedText]
groupStems = Map.elems . Map.fromListWith grouping
where
grouping (GroupedText lt1 label1 score1 group1)
(GroupedText lt2 label2 score2 group2)
| score1 >= score2 = GroupedText lt label1 score1 (Set.insert label2 gr)
| otherwise = GroupedText lt label2 score2 (Set.insert label1 gr)
where
lt = lt1 <> lt2
gr = Set.union group1 group2
toNgramsElement :: GroupedText -> [NgramsElement]
toNgramsElement (GroupedText listType label _ setNgrams) =
[parentElem] <> childrenElems
where
xs = take a ns
xz = drop a ns
ys = take b xz
zs = drop b xz
toNgramsElement :: (ListType, (Text, (Double, Set Text))) -> [NgramsElement]
toNgramsElement (listType, (_stem, (_score, setNgrams))) =
case Set.toList setNgrams of
[] -> []
(parent:children) -> [parentElem] <> childrenElems
where
parentElem = mkNgramsElement parent
listType
Nothing
(mSetFromList children)
childrenElems = map (\t -> mkNgramsElement t listType
(Just $ RootParent parent parent)
(mSetFromList [])
) children
parent = label
children = Set.toList setNgrams
parentElem = mkNgramsElement parent
listType
Nothing
(mSetFromList children)
childrenElems = map (\t -> mkNgramsElement t listType
(Just $ RootParent parent parent)
(mSetFromList [])
) children
toGargList :: (b -> Bool) -> ListType -> b -> (ListType, b)
......
......@@ -126,10 +126,8 @@ instance Semigroup TokenTag where
instance Monoid TokenTag where
mempty = TokenTag [] empty Nothing Nothing
mappend t1 t2 = (<>) t1 t2
mconcat = foldl mappend mempty
-- mappend t1 t2 = (<>) t1 t2
class HasInvalidError e where
......
......@@ -23,6 +23,7 @@ import Data.Either (Either(..))
import Data.Eq (Eq())
import Data.Map (fromList, lookup)
import Data.Monoid ((<>))
import Data.Semigroup (Semigroup(..))
import Data.Swagger
import Data.Text (Text, unpack)
import GHC.Generics (Generic)
......@@ -61,6 +62,15 @@ instance ToParamSchema ListType
instance Arbitrary ListType where
arbitrary = elements [minBound..maxBound]
instance Semigroup ListType
where
MapTerm <> _ = MapTerm
_ <> MapTerm = MapTerm
CandidateTerm <> _ = CandidateTerm
_ <> CandidateTerm = CandidateTerm
StopTerm <> StopTerm = StopTerm
instance FromHttpApiData ListType where
parseUrlPiece = Right . read . unpack
......
......@@ -66,7 +66,7 @@ countNodesByNgramsWith f m = (total, m')
where
total = fromIntegral $ Set.size $ Set.unions $ elems m
m' = Map.map ( swap . second (fromIntegral . Set.size))
$ groupNodesByNgramsWith f m
$ groupNodesByNgramsWith f m
groupNodesByNgramsWith :: (Text -> Text)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment