diff --git a/src/Gargantext/Core/NodeStory.hs b/src/Gargantext/Core/NodeStory.hs index c8b4b05444c4e865087e3ed42b890f2be41a2100..b308b0710f20dd6e5e239e4712331eb19ff4e08a 100644 --- a/src/Gargantext/Core/NodeStory.hs +++ b/src/Gargantext/Core/NodeStory.hs @@ -235,9 +235,9 @@ fixChildrenInNgrams ns = archiveStateFromList $ nsParents <> nsChildrenFixed ) ) <$> nsChildren --- | Sometimes, when we upload a new list, a child can be left without --- a parent. Find such ngrams and set their 'root' and 'parent' to --- 'Nothing'. +-- | (#281) Sometimes, when we upload a new list, a child can be left +-- without a parent. Find such ngrams and set their 'root' and +-- 'parent' to 'Nothing'. fixChildrenWithNoParent :: NgramsState' -> NgramsState' fixChildrenWithNoParent ns = archiveStateFromList $ nsParents <> nsChildrenFixed where diff --git a/src/Gargantext/Core/Text/List.hs b/src/Gargantext/Core/Text/List.hs index 55110d96e27d8638d546a3f4d8b62722bd9343aa..f708790c971266915b2d4219e207289970e7280a 100644 --- a/src/Gargantext/Core/Text/List.hs +++ b/src/Gargantext/Core/Text/List.hs @@ -9,14 +9,13 @@ Portability : POSIX -} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ScopedTypeVariables #-} module Gargantext.Core.Text.List where -import Control.Lens hiding (both) -- ((^.), view, over, set, (_1), (_2)) +import Control.Lens ( view, over ) -- ((^.), view, over, set, (_1), (_2)) import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap import Data.HashSet (HashSet) @@ -27,26 +26,26 @@ import Data.Set qualified as Set import Data.Tuple.Extra (both) import Gargantext.API.Ngrams.Types (NgramsElement, NgramsTerm(..)) import Gargantext.Core.NLP (HasNLPServer) -import Gargantext.Core.NodeStory +import Gargantext.Core.NodeStory.Types ( HasNodeStory ) import Gargantext.Core.Text (size) -import Gargantext.Core.Text.List.Group +import Gargantext.Core.Text.List.Group ( toGroupedTree, setScoresWithMap ) import Gargantext.Core.Text.List.Group.Prelude import Gargantext.Core.Text.List.Group.WithStem -import Gargantext.Core.Text.List.Social -import Gargantext.Core.Text.List.Social.Prelude +import Gargantext.Core.Text.List.Social ( FlowSocialListWith, flowSocialList ) +import Gargantext.Core.Text.List.Social.Prelude ( FlowListScores, FlowCont(FlowCont), flc_scores ) import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_genInc, normalizeGlobal, normalizeLocal, scored_terms) -import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, ContextId) import Gargantext.Core.Types.Individu (User(..)) +import Gargantext.Core.Types.Main ( ListType(..) ) import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, getContextsByNgramsOnlyUser) import Gargantext.Database.Action.Metrics.TFICF (getTficf_withSample) +import Gargantext.Database.Admin.Types.Node ( MasterCorpusId, UserCorpusId, ContextId ) import Gargantext.Database.Prelude (DBCmd) -import Gargantext.Database.Query.Table.Ngrams (text2ngrams) import Gargantext.Database.Query.Table.NgramsPostag (selectLems) import Gargantext.Database.Query.Table.Node (defaultList) import Gargantext.Database.Query.Table.Node.Error (HasNodeError()) import Gargantext.Database.Query.Tree.Error (HasTreeError) -import Gargantext.Database.Schema.Ngrams (NgramsType(..), Ngrams(..)) +import Gargantext.Database.Schema.Ngrams (NgramsType(..), Ngrams(..), text2ngrams) import Gargantext.Prelude {- @@ -81,8 +80,8 @@ buildNgramsLists user uCid mCid mfslw gp = do pure $ Map.unions $ [ngTerms] <> othersTerms -data MapListSize = MapListSize { unMapListSize :: !Int } -data MaxListSize = MaxListSize { unMaxListSize :: !Int } +newtype MapListSize = MapListSize { unMapListSize :: Int } +newtype MaxListSize = MaxListSize { unMaxListSize :: Int } buildNgramsOthersList :: ( HasNodeError err , HasNLPServer env @@ -103,7 +102,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize, <- flowSocialList mfslw user nt ( FlowCont HashMap.empty $ HashMap.fromList $ List.zip (HashMap.keys allTerms) - (List.cycle [mempty]) + (repeat mempty) ) let groupedWithList = toGroupedTree {- groupParams -} socialLists allTerms @@ -113,7 +112,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize, (mapTerms, tailTerms') = HashMap.partition ((== Just MapTerm) . viewListType) tailTerms - listSize = mapListSize - (List.length mapTerms) + listSize = mapListSize - List.length mapTerms (mapTerms', candiTerms) = both HashMap.fromList $ List.splitAt listSize $ List.take maxListSize @@ -121,10 +120,10 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize, $ HashMap.toList tailTerms' - pure $ Map.fromList [( nt, List.take maxListSize $ (toNgramsElement stopTerms) - <> (toNgramsElement mapTerms ) - <> (toNgramsElement $ setListType (Just MapTerm ) mapTerms' ) - <> (toNgramsElement $ setListType (Just CandidateTerm) candiTerms) + pure $ Map.fromList [( nt, List.take maxListSize $ toNgramsElement stopTerms + <> toNgramsElement mapTerms + <> toNgramsElement (setListType (Just MapTerm ) mapTerms') + <> toNgramsElement (setListType (Just CandidateTerm) candiTerms) )] @@ -135,7 +134,7 @@ getGroupParams :: ( HasNodeError err getGroupParams gp@(GroupWithPosTag l nsc _m) ng = do !hashMap <- HashMap.fromList <$> selectLems l nsc (HashSet.toList ng) -- printDebug "hashMap" hashMap - pure $ over gwl_map (\x -> x <> hashMap) gp + pure $ over gwl_map (<> hashMap) gp getGroupParams gp _ = pure gp @@ -168,7 +167,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi <- flowSocialList mfslw user nt ( FlowCont HashMap.empty $ HashMap.fromList $ List.zip (HashMap.keys allTerms) - (List.cycle [mempty]) + (repeat mempty) ) -- printDebug "[buildNgramsTermsList: Flow Social List / end]" nt @@ -187,7 +186,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi !socialLists_Stemmed = addScoreStem groupParams' ngramsKeys socialLists !groupedWithList = toGroupedTree socialLists_Stemmed allTerms !(stopTerms, candidateTerms) = HashMap.partition ((== Just StopTerm) . viewListType) - $ HashMap.filter (\g -> (view gts'_score g) > 1) + $ HashMap.filter (\g -> view gts'_score g > 1) $ view flc_scores groupedWithList !(groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms @@ -269,8 +268,8 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi !(monoScored, multScored) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) groupedTreeScores_SpeGen -- filter with max score - partitionWithMaxScore = HashMap.partition (\g -> (view scored_genInc $ view gts'_score g) - > (view scored_speExc $ view gts'_score g) + partitionWithMaxScore = HashMap.partition (\g -> view scored_genInc (view gts'_score g) + > view scored_speExc (view gts'_score g) ) !(monoScoredIncl, monoScoredExcl) = partitionWithMaxScore monoScored @@ -285,25 +284,25 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi !inclSize = 0.4 :: Double !exclSize = 1 - inclSize - splitAt'' max' n' = (both (HashMap.fromList)) . (List.splitAt (round $ n' * max')) - sortOn' f = (List.sortOn (Down . (view (gts'_score . f)) . snd)) . HashMap.toList + splitAt'' max' n' = both HashMap.fromList . List.splitAt (round $ n' * max') + sortOn' f = List.sortOn (Down . view (gts'_score . f) . snd) . HashMap.toList monoInc_size n = splitAt'' n $ monoSize * inclSize / 2 multExc_size n = splitAt'' n $ multSize * exclSize / 2 - !(mapMonoScoredInclHead, monoScoredInclTail) = monoInc_size mapSize $ (sortOn' scored_genInc) monoScoredIncl - !(mapMonoScoredExclHead, monoScoredExclTail) = monoInc_size mapSize $ (sortOn' scored_speExc) monoScoredExcl + !(mapMonoScoredInclHead, monoScoredInclTail) = monoInc_size mapSize $ sortOn' scored_genInc monoScoredIncl + !(mapMonoScoredExclHead, monoScoredExclTail) = monoInc_size mapSize $ sortOn' scored_speExc monoScoredExcl - !(mapMultScoredInclHead, multScoredInclTail) = multExc_size mapSize $ (sortOn' scored_genInc) multScoredIncl - !(mapMultScoredExclHead, multScoredExclTail) = multExc_size mapSize $ (sortOn' scored_speExc) multScoredExcl + !(mapMultScoredInclHead, multScoredInclTail) = multExc_size mapSize $ sortOn' scored_genInc multScoredIncl + !(mapMultScoredExclHead, multScoredExclTail) = multExc_size mapSize $ sortOn' scored_speExc multScoredExcl - !(canMonoScoredIncHead , _) = monoInc_size canSize $ (sortOn' scored_genInc) monoScoredInclTail - !(canMonoScoredExclHead, _) = monoInc_size canSize $ (sortOn' scored_speExc) monoScoredExclTail + !(canMonoScoredIncHead , _) = monoInc_size canSize $ sortOn' scored_genInc monoScoredInclTail + !(canMonoScoredExclHead, _) = monoInc_size canSize $ sortOn' scored_speExc monoScoredExclTail - !(canMulScoredInclHead, _) = multExc_size canSize $ (sortOn' scored_genInc) multScoredInclTail - !(canMultScoredExclHead, _) = multExc_size canSize $ (sortOn' scored_speExc) multScoredExclTail + !(canMulScoredInclHead, _) = multExc_size canSize $ sortOn' scored_genInc multScoredInclTail + !(canMultScoredExclHead, _) = multExc_size canSize $ sortOn' scored_speExc multScoredExclTail ------------------------------------------------------------ -- Final Step building the Typed list diff --git a/src/Gargantext/Core/Text/List/Group.hs b/src/Gargantext/Core/Text/List/Group.hs index f7eb1653eceacf100395c1d3673211612c9e62bb..53833218c1f5d3e5f2b838d710ffa7d7d3a9ee2f 100644 --- a/src/Gargantext/Core/Text/List/Group.hs +++ b/src/Gargantext/Core/Text/List/Group.hs @@ -9,11 +9,8 @@ Portability : POSIX -} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE FunctionalDependencies #-} -{-# LANGUAGE InstanceSigs #-} module Gargantext.Core.Text.List.Group where @@ -23,8 +20,8 @@ import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict qualified as HashMap import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.Core.Text.List.Group.Prelude -import Gargantext.Core.Text.List.Group.WithScores -import Gargantext.Core.Text.List.Social.Prelude +import Gargantext.Core.Text.List.Group.WithScores ( groupWithScores' ) +import Gargantext.Core.Text.List.Social.Prelude ( FlowListScores, FlowCont ) import Gargantext.Prelude ------------------------------------------------------------------------ toGroupedTree :: (Ord a, Monoid a, HasSize a) @@ -43,9 +40,7 @@ setScoresWithMap :: (Ord a, Ord b, Monoid b) => HashMap NgramsTerm b -> HashMap NgramsTerm (GroupedTreeScores b) setScoresWithMap m = setScoresWith (score m) where - score m' t = case HashMap.lookup t m' of - Nothing -> mempty - Just r -> r + score m' t = fromMaybe mempty (HashMap.lookup t m') setScoresWith :: (Ord a, Ord b) => (NgramsTerm -> b) @@ -58,8 +53,7 @@ setScoresWith f = Map.mapWithKey (\k v -> over gts'_children (setScoresWith f) ) -} setScoresWith f = HashMap.mapWithKey (\k v -> v { _gts'_score = f k - , _gts'_children = setScoresWith f - $ view gts'_children v - } - ) + , _gts'_children = setScoresWith f $ view gts'_children v + } + ) ------------------------------------------------------------------------ diff --git a/src/Gargantext/Core/Text/Terms.hs b/src/Gargantext/Core/Text/Terms.hs index 68526aad6a552758d2c2ea1ae9e1219a5913e21c..5984f195d61afb10ad0d78adf36139f3c4a6e52b 100644 --- a/src/Gargantext/Core/Text/Terms.hs +++ b/src/Gargantext/Core/Text/Terms.hs @@ -103,8 +103,7 @@ withLang (Unsupervised {..}) ns = Unsupervised { _tt_model = m', .. } $ fmap toToken $ uniText $ Text.intercalate " . " - $ List.concat - $ map hasText ns + $ concatMap hasText ns just_m -> just_m withLang l _ = l diff --git a/src/Gargantext/Core/Text/Terms/Multi.hs b/src/Gargantext/Core/Text/Terms/Multi.hs index ad22aa3bba0f2754dcc5a6197d7c6400006df3b6..4d70ea8d7ba3de263f65dc5f6fb6b7f4fb8040fa 100644 --- a/src/Gargantext/Core/Text/Terms/Multi.hs +++ b/src/Gargantext/Core/Text/Terms/Multi.hs @@ -11,25 +11,21 @@ Multi-terms are ngrams where n > 1. -} -{-# LANGUAGE OverloadedStrings #-} - module Gargantext.Core.Text.Terms.Multi (multiterms, multiterms_rake, tokenTagsWith, tokenTags, cleanTextForNLP) where -import Control.Applicative -import Data.Attoparsec.Text as DAT -import Data.Text hiding (map, group, filter, concat) +import Data.Attoparsec.Text as DAT ( digit, space, notChar, string ) import Gargantext.Core (Lang(..), NLPServerConfig(..), PosTagAlgo(..)) import Gargantext.Core.Text.Terms.Multi.Lang.En qualified as En import Gargantext.Core.Text.Terms.Multi.Lang.Fr qualified as Fr -import Gargantext.Core.Text.Terms.Multi.PosTagging -import Gargantext.Core.Text.Terms.Multi.PosTagging.Types +import Gargantext.Core.Text.Terms.Multi.PosTagging ( corenlp, tokens2tokensTags ) +import Gargantext.Core.Text.Terms.Multi.PosTagging.Types ( PosSentences(_sentences), Sentence(_sentenceTokens) ) import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake) -import Gargantext.Core.Types +import Gargantext.Core.Types ( POS(NP), Terms(Terms), TermsWithCount, TokenTag(TokenTag, _my_token_pos) ) import Gargantext.Core.Utils (groupWithCounts) import Gargantext.Prelude import Gargantext.Utils.SpacyNLP qualified as SpacyNLP -import Replace.Attoparsec.Text as RAT +import Replace.Attoparsec.Text as RAT ( streamEdit ) ------------------------------------------------------------------- type NLP_API = Lang -> Text -> IO PosSentences diff --git a/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs b/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs index f0f29c5f54e96bdf5dc9f08e07776f3ea1ae0b1a..04817e54c0bf7071f287e8f5039a6779408ae98f 100644 --- a/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs +++ b/src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs @@ -17,8 +17,8 @@ module Gargantext.Core.Text.Terms.Multi.Lang.En (groupTokens) where import Gargantext.Prelude -import Gargantext.Core.Types -import Gargantext.Core.Text.Terms.Multi.Group +import Gargantext.Core.Types ( POS(CC, IN, DT, NP, JJ), TokenTag ) +import Gargantext.Core.Text.Terms.Multi.Group ( group2 ) ------------------------------------------------------------------------ -- | Rule grammar to group tokens @@ -31,8 +31,7 @@ groupTokens ntags = group2 NP NP -- $ group2 VB NP $ group2 JJ NP $ group2 JJ JJ - $ group2 JJ CC - $ ntags + $ group2 JJ CC ntags ------------------------------------------------------------------------ --groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs) diff --git a/src/Gargantext/Database/Action/Flow/Extract.hs b/src/Gargantext/Database/Action/Flow/Extract.hs index 2b03eb100c13003d98d057144f5e1352b246a674..6556826605fe699ec0c8db4ccf15820884c2be5a 100644 --- a/src/Gargantext/Database/Action/Flow/Extract.hs +++ b/src/Gargantext/Database/Action/Flow/Extract.hs @@ -20,15 +20,16 @@ module Gargantext.Database.Action.Flow.Extract import Control.Lens ((^.), _Just, view) import Data.HashMap.Strict qualified as HashMap import Data.Map.Strict qualified as DM -import Gargantext.Core (Lang, NLPServerConfig, PosTagAlgo(CoreNLP)) +import Gargantext.Core (Lang, NLPServerConfig, PosTagAlgo(CoreNLP)) import Gargantext.Core.Text (HasText(..)) import Gargantext.Core.Text.Corpus.Parsers (splitOn) import Gargantext.Core.Text.Terms (ExtractNgramsT, ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractNgramsT, extractTerms, tt_lang) import Gargantext.Core.Types (POS(NP), TermsCount) -import Gargantext.Database.Admin.Types.Hyperdata (HyperdataContact, HyperdataDocument, cw_lastName, hc_who, hd_authors, hd_bdd, hd_institutes, hd_source) -import Gargantext.Database.Admin.Types.Node +import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who ) +import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source ) +import Gargantext.Database.Admin.Types.Node ( Node ) import Gargantext.Database.Prelude (DBCmd) -import Gargantext.Database.Schema.Ngrams +import Gargantext.Database.Schema.Ngrams ( NgramsType(..), text2ngrams ) import Gargantext.Database.Schema.Node (NodePoly(..)) import Gargantext.Prelude @@ -49,6 +50,9 @@ instance ExtractNgramsT HyperdataContact pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ] +-- | Main ngrams extraction functionality. +-- For NgramsTerms, this calls NLP server under the hood. +-- For Sources, Institutes, Authors, this uses simple split on " ". instance ExtractNgramsT HyperdataDocument where extractNgramsT :: NLPServerConfig @@ -72,9 +76,8 @@ instance ExtractNgramsT HyperdataDocument $ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd)) $ doc ^. hd_authors - termsWithCounts' <- map (\(t, cnt) -> (enrichedTerms (lang ^. tt_lang) CoreNLP NP t, cnt)) - <$> concat - <$> liftBase (extractTerms ncs lang $ hasText doc) + termsWithCounts' <- map (first (enrichedTerms (lang ^. tt_lang) CoreNLP NP)) . concat <$> + liftBase (extractTerms ncs lang $ hasText doc) pure $ HashMap.fromList $ [(SimpleNgrams source, (DM.singleton Sources 1, 1)) ] diff --git a/src/Gargantext/Database/Action/Search.hs b/src/Gargantext/Database/Action/Search.hs index 2ccc2cc1147cec496af2e943c756aa197d8212f5..1ed8e41a35d184e26a67e899ec8eba9d817e1300 100644 --- a/src/Gargantext/Database/Action/Search.hs +++ b/src/Gargantext/Database/Action/Search.hs @@ -23,7 +23,7 @@ module Gargantext.Database.Action.Search ( import Control.Arrow (returnA) import Control.Lens ((^.), view) -import Data.BoolExpr +import Data.BoolExpr ( BoolExpr(..), Signed(Negative, Positive) ) import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Profunctor.Product (p4) @@ -31,25 +31,26 @@ import Data.Set qualified as Set import Data.Text (unpack) import Data.Text qualified as T import Data.Time (UTCTime) -import Gargantext.Core +import Gargantext.Core ( Lang(EN), HasDBid(toDBid) ) import Gargantext.Core.Text.Corpus.Query qualified as API import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..)) import Gargantext.Core.Types import Gargantext.Core.Types.Query (IsTrash, Limit, Offset) -import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..)) +import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact(..) ) +import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Prelude (runOpaQuery, runCountOpaQuery, DBCmd) import Gargantext.Database.Query.Facet -import Gargantext.Database.Query.Filter -import Gargantext.Database.Query.Table.Context +import Gargantext.Database.Query.Filter ( limit', offset' ) +import Gargantext.Database.Query.Table.Context ( queryContextSearchTable ) import Gargantext.Database.Query.Table.ContextNodeNgrams (queryContextNodeNgramsTable) -import Gargantext.Database.Query.Table.Node +import Gargantext.Database.Query.Table.Node ( queryNodeSearchTable, defaultList ) import Gargantext.Database.Query.Table.Node.Error (HasNodeError()) import Gargantext.Database.Query.Table.NodeContext -import Gargantext.Database.Query.Table.NodeContext_NodeContext +import Gargantext.Database.Schema.NodeContext_NodeContext ( NodeContext_NodeContextRead, queryNodeContext_NodeContextTable, ncnc_nodecontext2, ncnc_nodecontext1 ) import Gargantext.Database.Schema.Context import Gargantext.Database.Schema.ContextNodeNgrams (ContextNodeNgramsPoly(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..)) -import Gargantext.Database.Schema.Node +import Gargantext.Database.Schema.Node ( NodePolySearch(_ns_hyperdata, _ns_search, _ns_typename, _ns_id) ) import Gargantext.Prelude hiding (groupBy) import Opaleye hiding (Order) import Opaleye qualified as O hiding (Order) @@ -59,7 +60,7 @@ import Opaleye qualified as O hiding (Order) -- queryToTsSearch :: API.Query -> Field SqlTSQuery -queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST) +queryToTsSearch q = sqlToTSQuery $ T.unpack $ API.interpretQuery q transformAST where -- It's important to understand how things work under the hood: When we perform diff --git a/src/Gargantext/Utils/SpacyNLP.hs b/src/Gargantext/Utils/SpacyNLP.hs index 2696a9da27afe8e577bd213a12caaedd7d78c303..4cdc63a3215b78c1975569916440dcf497034f7b 100644 --- a/src/Gargantext/Utils/SpacyNLP.hs +++ b/src/Gargantext/Utils/SpacyNLP.hs @@ -13,7 +13,6 @@ Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server -} -{-# LANGUAGE TemplateHaskell #-} module Gargantext.Utils.SpacyNLP ( module Gargantext.Utils.SpacyNLP.Types @@ -24,9 +23,8 @@ module Gargantext.Utils.SpacyNLP ( ) where import Data.Aeson (encode) -import Data.Text hiding (map, group, filter, concat, zip) import Gargantext.Core (Lang(..)) -import Gargantext.Core.Text.Terms.Multi.PosTagging.Types +import Gargantext.Core.Text.Terms.Multi.PosTagging.Types ( PosSentences(PosSentences), Sentence(Sentence), Token(Token) ) import Gargantext.Prelude import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response) import Network.URI (URI(..)) @@ -42,22 +40,22 @@ spacyRequest uri txt = do ---------------------------------------------------------------- spacyTagsToToken :: SpacyTags -> Token -spacyTagsToToken st = Token (_spacyTags_index st) - (_spacyTags_normalized st) - (_spacyTags_text st) - (_spacyTags_lemma st) - (_spacyTags_head_index st) - (_spacyTags_char_offset st) - (Just $ _spacyTags_pos st) - (Just $ _spacyTags_ent_type st) - (Just $ _spacyTags_prefix st) - (Just $ _spacyTags_suffix st) +spacyTagsToToken st = + Token (_spacyTags_index st) + (_spacyTags_normalized st) + (_spacyTags_text st) + (_spacyTags_lemma st) + (_spacyTags_head_index st) + (_spacyTags_char_offset st) + (Just $ _spacyTags_pos st) + (Just $ _spacyTags_ent_type st) + (Just $ _spacyTags_prefix st) + (Just $ _spacyTags_suffix st) spacyDataToPosSentences :: SpacyData -> PosSentences spacyDataToPosSentences (SpacyData ds) = PosSentences - $ map (\(i, ts) -> Sentence i ts) - $ zip [1..] - $ map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds + $ zipWith Sentence [1..] + (map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds) -----------------------------------------------------------------