Commit bb33a3e9 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Start to clean the text

parent d7aeb114
...@@ -9,10 +9,9 @@ Portability : POSIX ...@@ -9,10 +9,9 @@ Portability : POSIX
-} -}
module OpenAlex.Utils where module OpenAlex.Utils where
import Protolude
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import qualified Data.Text as T import qualified Data.Text as T
import Protolude
-- | https://docs.openalex.org/api-entities/works/work-object#abstract_inverted_index -- | https://docs.openalex.org/api-entities/works/work-object#abstract_inverted_index
-- https://en.wikipedia.org/wiki/Inverted_index -- https://en.wikipedia.org/wiki/Inverted_index
...@@ -21,6 +20,9 @@ import Protolude ...@@ -21,6 +20,9 @@ import Protolude
-- The index is of form: { word : [ positions-in-text ] } -- The index is of form: { word : [ positions-in-text ] }
reconstructAbstract :: Maybe (Map Text [Int]) -> Text reconstructAbstract :: Maybe (Map Text [Int]) -> Text
reconstructAbstract Nothing = "" reconstructAbstract Nothing = ""
reconstructAbstract (Just m) = T.intercalate " " $ snd <$> sort wordPositions reconstructAbstract (Just m) = clean <$> T.intercalate " " $ snd <$> sort wordPositions
where where
wordPositions = concatMap (\(word, positions) -> (\pos -> (pos, word)) <$> positions) $ Map.toList m wordPositions = concatMap (\(word, positions) -> (\pos -> (pos, word)) <$> positions) $ Map.toList m
clean :: Text -> Text
clean = (fromMaybe "") . head . T.splitOn "Abstract Full Text"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment