Restore and refactor isStopTerm

parent 00058186
...@@ -60,8 +60,8 @@ buildNgramsTermsList l n m uCid mCid = do ...@@ -60,8 +60,8 @@ buildNgramsTermsList l n m uCid mCid = do
candidates <- sortTficf <$> getTficf' uCid mCid (ngramsGroup l n m) candidates <- sortTficf <$> getTficf' uCid mCid (ngramsGroup l n m)
--printDebug "candidate" (length candidates) --printDebug "candidate" (length candidates)
--let termList = toTermList (isStopTerm . fst) candidates let termList = toTermList (isStopTerm . fst) candidates
let termList = toTermList ((\_ -> False) . fst) candidates --let termList = toTermList ((\_ -> False) . fst) candidates
--printDebug "termlist" (length termList) --printDebug "termlist" (length termList)
let ngs = List.concat $ map toNgramsElement termList let ngs = List.concat $ map toNgramsElement termList
...@@ -104,10 +104,6 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs ...@@ -104,10 +104,6 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
b = 400 b = 400
isStopTerm :: Text -> Bool isStopTerm :: Text -> Bool
isStopTerm x = Text.length x < 3 isStopTerm x = Text.length x < 3 || any isStopChar (Text.unpack x)
|| not (all Char.isAlpha (Text.unpack x')) where
where isStopChar c = not (c `elem` ("- /()" :: [Char]) || Char.isAlpha c)
x' = foldl (\t -> Text.replace t "a")
x
["-"," ","/","(",")"]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment