Commit 461124e8 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[flow] fix hardcoded list size in build terms

parent e71c52ec
Pipeline #3488 passed with stage
in 92 minutes and 24 seconds
...@@ -29,4 +29,3 @@ data OutputFlow ...@@ -29,4 +29,3 @@ data OutputFlow
flow :: Flow -> OutputFlow flow :: Flow -> OutputFlow
flow = undefined flow = undefined
...@@ -63,4 +63,3 @@ toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do ...@@ -63,4 +63,3 @@ toDoc' la (HAL.Corpus i t ab d s aus affs struct_id) = do
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (pack . show) la } , _hd_language_iso2 = Just $ (pack . show) la }
...@@ -159,7 +159,7 @@ buildNgramsTermsList :: ( HasNodeError err ...@@ -159,7 +159,7 @@ buildNgramsTermsList :: ( HasNodeError err
-> GroupParams -> GroupParams
-> (NgramsType, MapListSize) -> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSize)= do
-- Filter 0 With Double -- Filter 0 With Double
-- Computing global speGen score -- Computing global speGen score
...@@ -179,13 +179,13 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do ...@@ -179,13 +179,13 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
) )
printDebug "[buildNgramsTermsList: Flow Social List / end]" nt printDebug "[buildNgramsTermsList: Flow Social List / end]" nt
let !ngramsKeys = HashSet.fromList $ List.take 1000 $ HashSet.toList $ HashMap.keysSet allTerms let !ngramsKeys = HashSet.fromList $ List.take mapListSize $ HashSet.toList $ HashMap.keysSet allTerms
printDebug "[buildNgramsTermsList: ngramsKeys]" (HashSet.size ngramsKeys) printDebug "[buildNgramsTermsList: ngramsKeys]" (HashSet.size ngramsKeys)
!groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) ngramsKeys) !groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) ngramsKeys)
printDebug "[buildNgramsTermsList: groupParams']" (""::Text) printDebug "[buildNgramsTermsList: groupParams']" ("" :: Text)
let let
!socialLists_Stemmed = addScoreStem groupParams' ngramsKeys socialLists !socialLists_Stemmed = addScoreStem groupParams' ngramsKeys socialLists
...@@ -217,7 +217,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do ...@@ -217,7 +217,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
------------------------- -------------------------
-- Filter 1 With Set NodeId and SpeGen -- Filter 1 With Set NodeId and SpeGen
!selectedTerms = Set.toList $ hasTerms (groupedMonoHead <> groupedMultHead) !selectedTerms = Set.toList $ hasTerms (groupedMonoHead <> groupedMultHead)
printDebug "[buildNgramsTermsList: selectedTerms]" selectedTerms printDebug "[buildNgramsTermsList: selectedTerms]" selectedTerms
-- TODO remove (and remove HasNodeError instance) -- TODO remove (and remove HasNodeError instance)
...@@ -283,7 +283,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do ...@@ -283,7 +283,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
-- use % of list if to big, or Int if to small -- use % of list if to big, or Int if to small
!mapSize = 1000 :: Double !mapSize = 1000 :: Double
!canSize = mapSize * 2 :: Double !canSize = mapSize * 2 :: Double
!inclSize = 0.4 :: Double !inclSize = 0.4 :: Double
!exclSize = 1 - inclSize !exclSize = 1 - inclSize
...@@ -318,7 +318,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do ...@@ -318,7 +318,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
<> mapMultScoredExclHead <> mapMultScoredExclHead
-- An original way to filter to start with -- An original way to filter to start with
!cands = setListType (Just CandidateTerm) !cands = setListType (Just CandidateTerm)
$ canMonoScoredIncHead $ canMonoScoredIncHead
<> canMonoScoredExclHead <> canMonoScoredExclHead
<> canMulScoredInclHead <> canMulScoredInclHead
......
...@@ -325,7 +325,7 @@ flowCorpusUser l user corpusName ctype ids mfslw = do ...@@ -325,7 +325,7 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
--let gp = (GroupParams l 2 3 (StopSize 3)) --let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according to the Lang -- Here the PosTagAlgo should be chosen according to the Lang
_ <- case mfslw of _ <- case mfslw of
(Just (NoList _)) -> do (Just (NoList _)) -> do
printDebug "Do not build list" mfslw printDebug "Do not build list" mfslw
pure () pure ()
......
...@@ -8,7 +8,7 @@ Stability : experimental ...@@ -8,7 +8,7 @@ Stability : experimental
Portability : POSIX Portability : POSIX
Add Documents/Contact to a Corpus/Annuaire. Add Documents/Contact to a Corpus/Annuaire.
-} -}
------------------------------------------------------------------------ ------------------------------------------------------------------------
{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveDataTypeable #-}
...@@ -78,4 +78,3 @@ instance ToRow InputData where ...@@ -78,4 +78,3 @@ instance ToRow InputData where
, toField (0 :: Int) , toField (0 :: Int)
, toField (1 :: Int) , toField (1 :: Int)
] ]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment