Commit d4e1dd93 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] map list size filtered (others terms do not appear any more)

parent 17fbec42
Pipeline #1458 failed with stage
...@@ -130,13 +130,13 @@ reIndexWith cId lId nt lts = do ...@@ -130,13 +130,13 @@ reIndexWith cId lId nt lts = do
-- Get all documents of the corpus -- Get all documents of the corpus
docs <- selectDocNodes cId docs <- selectDocNodes cId
-- Checking Text documents where orphans match -- Checking Text documents where orphans match
-- TODO Tests here -- TODO Tests here
let let
ngramsByDoc = HashMap.fromList ngramsByDoc = map (HashMap.fromList)
$ map (\(k,v) -> (SimpleNgrams (text2ngrams k), v)) $ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v)))
$ List.concat
$ map (\doc -> List.zip $ map (\doc -> List.zip
(termsInText (buildPatterns $ map (\k -> ([unNgramsTerm k], [])) orphans) (termsInText (buildPatterns $ map (\k -> ([unNgramsTerm k], [])) orphans)
$ Text.unlines $ catMaybes $ Text.unlines $ catMaybes
...@@ -150,7 +150,7 @@ reIndexWith cId lId nt lts = do ...@@ -150,7 +150,7 @@ reIndexWith cId lId nt lts = do
printDebug "ngramsByDoc" ngramsByDoc printDebug "ngramsByDoc" ngramsByDoc
-- Saving the indexation in database -- Saving the indexation in database
_ <- saveDocNgramsWith lId ngramsByDoc _ <- mapM (saveDocNgramsWith lId) ngramsByDoc
pure () -- ngramsByDoc pure () -- ngramsByDoc
......
...@@ -104,20 +104,9 @@ buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do ...@@ -104,20 +104,9 @@ buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do
$ List.zip (HashMap.keys allTerms) $ List.zip (HashMap.keys allTerms)
(List.cycle [mempty]) (List.cycle [mempty])
) )
{-
if nt == Sources -- Authors
then printDebug "flowSocialList" socialLists
else printDebug "flowSocialList" ""
-}
let let
groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms
{-
if nt == Sources -- Authors
then printDebug "groupedWithList" groupedWithList
else printDebug "groupedWithList" ""
-}
let
(stopTerms, tailTerms) = HashMap.partition ((== Just StopTerm) . viewListType) (stopTerms, tailTerms) = HashMap.partition ((== Just StopTerm) . viewListType)
$ view flc_scores groupedWithList $ view flc_scores groupedWithList
...@@ -129,10 +118,10 @@ buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do ...@@ -129,10 +118,10 @@ buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do
$ List.sortOn (Down . viewScore . snd) $ List.sortOn (Down . viewScore . snd)
$ HashMap.toList tailTerms' $ HashMap.toList tailTerms'
pure $ Map.fromList [( nt, (toNgramsElement stopTerms) pure $ Map.fromList [( nt, (toNgramsElement stopTerms)
<> (toNgramsElement mapTerms ) <> (toNgramsElement mapTerms )
<> (toNgramsElement $ setListType (Just MapTerm ) mapTerms' ) <> (toNgramsElement $ setListType (Just MapTerm ) mapTerms' )
<> (toNgramsElement $ setListType (Just CandidateTerm) candiTerms) <> (toNgramsElement $ setListType (Just CandidateTerm) candiTerms)
)] )]
...@@ -149,7 +138,6 @@ getGroupParams gp@(GroupWithPosTag l a _m) ng = do ...@@ -149,7 +138,6 @@ getGroupParams gp@(GroupWithPosTag l a _m) ng = do
getGroupParams gp _ = pure gp getGroupParams gp _ = pure gp
-- TODO use ListIds -- TODO use ListIds
buildNgramsTermsList :: ( HasNodeError err buildNgramsTermsList :: ( HasNodeError err
, CmdM env err m , CmdM env err m
...@@ -178,18 +166,19 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do ...@@ -178,18 +166,19 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let ngramsKeys = HashMap.keysSet allTerms let ngramsKeys = HashMap.keysSet allTerms
groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) ngramsKeys) groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) ngramsKeys)
let socialLists_Stemmed = addScoreStem groupParams' ngramsKeys socialLists
let
socialLists_Stemmed = addScoreStem groupParams' ngramsKeys socialLists
--printDebug "socialLists_Stemmed" socialLists_Stemmed --printDebug "socialLists_Stemmed" socialLists_Stemmed
let groupedWithList = toGroupedTree socialLists_Stemmed allTerms groupedWithList = toGroupedTree socialLists_Stemmed allTerms
(stopTerms, candidateTerms) = HashMap.partition ((== Just StopTerm) . viewListType) (stopTerms, candidateTerms) = HashMap.partition ((== Just StopTerm) . viewListType)
$ view flc_scores groupedWithList $ view flc_scores groupedWithList
(groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms (groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms
-- printDebug "stopTerms" stopTerms -- printDebug "stopTerms" stopTerms
-- splitting monterms and multiterms to take proportional candidates -- splitting monterms and multiterms to take proportional candidates
let
-- use % of list if to big, or Int if too small -- use % of list if to big, or Int if too small
listSizeGlobal = 2000 :: Double listSizeGlobal = 2000 :: Double
monoSize = 0.4 :: Double monoSize = 0.4 :: Double
...@@ -208,11 +197,10 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do ...@@ -208,11 +197,10 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
selectedTerms = Set.toList $ hasTerms (groupedMonoHead <> groupedMultHead) selectedTerms = Set.toList $ hasTerms (groupedMonoHead <> groupedMultHead)
-- TO remove (and remove HasNodeError instance) -- TODO remove (and remove HasNodeError instance)
userListId <- defaultList uCid userListId <- defaultList uCid
masterListId <- defaultList mCid masterListId <- defaultList mCid
mapTextDocIds <- getNodesByNgramsOnlyUser uCid mapTextDocIds <- getNodesByNgramsOnlyUser uCid
[userListId, masterListId] [userListId, masterListId]
nt nt
...@@ -220,11 +208,15 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do ...@@ -220,11 +208,15 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let let
groupedTreeScores_SetNodeId :: HashMap NgramsTerm (GroupedTreeScores (Set NodeId)) groupedTreeScores_SetNodeId :: HashMap NgramsTerm (GroupedTreeScores (Set NodeId))
groupedTreeScores_SetNodeId = setScoresWithMap mapTextDocIds (groupedMonoHead <> groupedMultHead) groupedTreeScores_SetNodeId = HashMap.filter (\g -> Set.size (view gts'_score g) > 1) -- removing hapax
$ setScoresWithMap mapTextDocIds (groupedMonoHead <> groupedMultHead)
printDebug "groupedTreeScores_SetNodeId" groupedTreeScores_SetNodeId
-- | Coocurrences computation -- | Coocurrences computation
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric --, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
let mapCooc = HashMap.filter (>2) let mapCooc = HashMap.filter (>1) -- removing cooc of 1
$ HashMap.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2) $ HashMap.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2)
| (t1, s1) <- mapStemNodeIds | (t1, s1) <- mapStemNodeIds
, (t2, s2) <- mapStemNodeIds , (t2, s2) <- mapStemNodeIds
...@@ -245,10 +237,7 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do ...@@ -245,10 +237,7 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let let
groupedTreeScores_SpeGen :: HashMap NgramsTerm (GroupedTreeScores (Scored NgramsTerm)) groupedTreeScores_SpeGen :: HashMap NgramsTerm (GroupedTreeScores (Scored NgramsTerm))
groupedTreeScores_SpeGen = setScoresWithMap (mapScores identity) groupedTreeScores_SpeGen = setScoresWithMap (mapScores identity) groupedTreeScores_SetNodeId
( groupedMonoHead
<> groupedMultHead
)
let let
-- sort / partition / split -- sort / partition / split
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment