Commit bb33a3e9 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Start to clean the text

parent d7aeb114
......@@ -9,10 +9,9 @@ Portability : POSIX
-}
module OpenAlex.Utils where
import Protolude
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import Protolude
-- | https://docs.openalex.org/api-entities/works/work-object#abstract_inverted_index
-- https://en.wikipedia.org/wiki/Inverted_index
......@@ -21,6 +20,9 @@ import Protolude
-- The index is of form: { word : [ positions-in-text ] }
reconstructAbstract :: Maybe (Map Text [Int]) -> Text
reconstructAbstract Nothing = ""
reconstructAbstract (Just m) = T.intercalate " " $ snd <$> sort wordPositions
reconstructAbstract (Just m) = clean <$> T.intercalate " " $ snd <$> sort wordPositions
where
wordPositions = concatMap (\(word, positions) -> (\pos -> (pos, word)) <$> positions) $ Map.toList m
clean :: Text -> Text
clean = (fromMaybe "") . head . T.splitOn "Abstract Full Text"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment