Commit 82387646 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TextFlow] Score with normalization

parent 9e3e9e0f
...@@ -31,7 +31,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, NgramsTerm(..), Ro ...@@ -31,7 +31,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, NgramsTerm(..), Ro
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId) import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.Metrics (scored', Scored(..)) import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf) import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import Gargantext.Database.Query.Table.Node (defaultList) import Gargantext.Database.Query.Table.Node (defaultList)
...@@ -195,7 +195,11 @@ buildNgramsTermsList l n m s uCid mCid = do ...@@ -195,7 +195,11 @@ buildNgramsTermsList l n m s uCid mCid = do
let let
-- computing scores -- computing scores
mapScores f = Map.fromList $ map (\(Scored t g s') -> (t, f (g,s'))) $ scored' mapCooc mapScores f = Map.fromList
$ map (\(Scored t g s') -> (t, f (g,s')))
$ normalizeGlobal
$ map normalizeLocal
$ scored' mapCooc
groupsWithScores = catMaybes groupsWithScores = catMaybes
$ map (\(stem, g) $ map (\(stem, g)
...@@ -204,9 +208,7 @@ buildNgramsTermsList l n m s uCid mCid = do ...@@ -204,9 +208,7 @@ buildNgramsTermsList l n m s uCid mCid = do
Just s' -> Just $ g { _gt_score = s'} Just s' -> Just $ g { _gt_score = s'}
) $ Map.toList contextsAdded ) $ Map.toList contextsAdded
where where
mapScores' = mapScores adapt1 -- identity mapScores' = mapScores identity
adapt1 (s1,s2) = (log' 5 s1, log' 2 s2)
log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
-- adapt2 TOCHECK with DC -- adapt2 TOCHECK with DC
-- printDebug "groupsWithScores" groupsWithScores -- printDebug "groupsWithScores" groupsWithScores
let let
......
...@@ -73,3 +73,28 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) score ...@@ -73,3 +73,28 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) score
$ DAA.zip (DAA.use is) (DAA.use ss) $ DAA.zip (DAA.use is) (DAA.use ss)
normalizeGlobal :: [Scored a] -> [Scored a]
normalizeGlobal ss = map (\(Scored t s1 s2)
-> Scored t ((s1 - s1min) / s1max)
((s2 - s2min) / s2max)) ss
where
ss1 = map _scored_genInc ss
ss2 = map _scored_speExc ss
s1min = minimum ss1
s1max = maximum ss1
s2min = minimum ss2
s2max = maximum ss2
normalizeLocal :: Scored a -> Scored a
normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
where
log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment