Commit 82387646 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TextFlow] Score with normalization

parent 9e3e9e0f
......@@ -31,7 +31,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, NgramsTerm(..), Ro
import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.Metrics (scored', Scored(..))
import Gargantext.Core.Text.Metrics (scored', Scored(..), normalizeGlobal, normalizeLocal)
import Gargantext.Database.Action.Metrics.NgramsByNode (ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith, getNodesByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import Gargantext.Database.Query.Table.Node (defaultList)
......@@ -195,7 +195,11 @@ buildNgramsTermsList l n m s uCid mCid = do
let
-- computing scores
mapScores f = Map.fromList $ map (\(Scored t g s') -> (t, f (g,s'))) $ scored' mapCooc
mapScores f = Map.fromList
$ map (\(Scored t g s') -> (t, f (g,s')))
$ normalizeGlobal
$ map normalizeLocal
$ scored' mapCooc
groupsWithScores = catMaybes
$ map (\(stem, g)
......@@ -204,9 +208,7 @@ buildNgramsTermsList l n m s uCid mCid = do
Just s' -> Just $ g { _gt_score = s'}
) $ Map.toList contextsAdded
where
mapScores' = mapScores adapt1 -- identity
adapt1 (s1,s2) = (log' 5 s1, log' 2 s2)
log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
mapScores' = mapScores identity
-- adapt2 TOCHECK with DC
-- printDebug "groupsWithScores" groupsWithScores
let
......
......@@ -73,3 +73,28 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) score
$ DAA.zip (DAA.use is) (DAA.use ss)
normalizeGlobal :: [Scored a] -> [Scored a]
normalizeGlobal ss = map (\(Scored t s1 s2)
-> Scored t ((s1 - s1min) / s1max)
((s2 - s2min) / s2max)) ss
where
ss1 = map _scored_genInc ss
ss2 = map _scored_speExc ss
s1min = minimum ss1
s1max = maximum ss1
s2min = minimum ss2
s2max = maximum ss2
normalizeLocal :: Scored a -> Scored a
normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
where
log' n' x = 1 + (if x <= 0 then 0 else log $ (10^(n'::Int)) * x)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment