[cooc] first draft implementation

parent abf437c8
Pipeline #5947 passed with stages
in 109 minutes and 21 seconds
......@@ -19,18 +19,24 @@ Count API part of Gargantext.
module Gargantext.API.Count
where
import Data.Map.Strict qualified as DMS
import Data.Set qualified as Set
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema )
import Data.Text (pack)
import Gargantext.API.Ngrams.Prelude (getTermList)
import Gargantext.API.Prelude (GargNoServer, GargServer)
import Gargantext.Core.Text.Metrics.Count qualified as Metrics
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(MapTerm))
import Gargantext.Core.Types (Terms(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Node ( ListId )
import Gargantext.Prelude
import Servant (JSON, Post)
-- import Gargantext.Utils.Servant (ZIP)
import Servant ((:<|>)((:<|>)), (:>), JSON, Capture, Get, Header, Headers(..), Post, ReqBody, Summary, addHeader)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary(..))
-----------------------------------------------------------------------
-- TODO-ACCESS: CanCount
-- TODO-EVENTS: No events as this is a read only query.
type CountAPI = Post '[JSON] Counts
-----------------------------------------------------------------------
data Scraper = Pubmed | Hal | IsTex | Isidore
......@@ -143,3 +149,46 @@ $(deriveJSON (unPrefix "count_") ''Count)
instance FromJSON Counts
instance ToJSON Counts
------------------------
-- we can't reliably represent a Map ([Text], [Text]) in JSON so we just represent that as a list
data CoocEntry =
CoocEntry { ce_left :: [Text]
, ce_right :: [Text]
, ce_cooc :: Int }
deriving (Show, Eq, Generic)
instance ToSchema CoocEntry
$(deriveJSON (unPrefix "ce_") ''CoocEntry)
data CoocExport =
CoocExport { cex_entries :: [CoocEntry] }
deriving (Show, Eq, Generic)
instance ToSchema CoocExport
$(deriveJSON (unPrefix "cex_") ''CoocExport)
-----------------------------------------------------------------------
-- TODO-ACCESS: CanCount
-- TODO-EVENTS: No events as this is a read only query.
type CountAPI = Summary "Count API endpoint"
:> ( ReqBody '[JSON] Query
:> Post '[JSON] Counts)
:<|> ( Capture "list_id" ListId
:> "cooc.json"
:> Get '[JSON] (Headers '[Header "Content-Disposition" Text] CoocExport))
countApi :: GargServer CountAPI
countApi = count
:<|> cooc
cooc :: ListId -> GargNoServer (Headers '[Header "Content-Disposition" Text] CoocExport)
cooc listId = do
mTermList <- getTermList listId MapTerm NgramsTerms
let termsWithLabels = fromMaybe [] mTermList
let terms = (\(label, terms_) -> (\ts -> Terms { _terms_label = label
, _terms_stem = Set.fromList ts }) <$> terms_) <$> termsWithLabels
let cooc' = Metrics.cooc terms
let cex_entries = (\((l, r), c) -> CoocEntry { ce_left = l, ce_right = r, ce_cooc = c }) <$> DMS.toList cooc'
pure $
addHeader ("attachment; filename=CoocExport.json") $ CoocExport { cex_entries = cex_entries }
......@@ -79,3 +79,5 @@ patchMSet_toList = HM.toList . unPatchMapToHashMap . unPatchMSet
unMSet :: MSet a -> [a]
unMSet (MSet a) = Map.keys a
......@@ -10,27 +10,26 @@ Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UndecidableInstances #-}
module Gargantext.API.Routes
where
import Control.Lens (view)
import Data.Validity
import Data.List qualified as L
import Gargantext.API.Admin.Auth (ForgotPasswordAPI, ForgotPasswordAsyncAPI, withAccess, withPolicyT)
import Gargantext.API.Admin.Auth.Types (AuthRequest, AuthResponse, AuthenticatedUser(..), PathId(..))
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.FrontEnd (FrontEndAPI)
import Gargantext.API.Auth.PolicyCheck
import Gargantext.API.Context
import Gargantext.API.Count (CountAPI, count, Query)
import Gargantext.API.Auth.PolicyCheck ( PolicyChecked, nodeChecks )
import Gargantext.API.Context ( ContextAPI, contextAPI )
import Gargantext.API.Count (CountAPI, countApi)
import Gargantext.API.Errors (GargErrorScheme (..))
import Gargantext.API.Errors.Types
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.GraphQL qualified as GraphQL
import Gargantext.API.Members (MembersAPI, members)
import Gargantext.API.Ngrams (TableNgramsApi, apiNgramsTableDoc)
......@@ -43,26 +42,26 @@ import Gargantext.API.Node.Corpus.Export.Types qualified as CorpusExport
import Gargantext.API.Node.Corpus.New qualified as New
import Gargantext.API.Node.Document.Export qualified as DocumentExport
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude
import Gargantext.API.Prelude ( GargM, GargServer )
import Gargantext.API.Public qualified as Public
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Viz.Graph.API
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Core.Viz.Graph.API ( GraphAPI, graphAPI )
import Gargantext.Database.Admin.Types.Hyperdata.Any ( HyperdataAny )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire, HyperdataCorpus )
import Gargantext.Database.Admin.Types.Node (AnnuaireId, DocId, CorpusId, ContextId, NodeId)
import Gargantext.Database.Prelude (HasConfig(..))
import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_max_docs_scrapers)
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Network.Wai (requestHeaders)
import Servant
import Servant.Auth as SA
import Servant.Auth as SA ( Auth, Cookie, JWT )
import Servant.Auth.Swagger ()
import Servant.Ekg
import Servant.Server.Internal.Delayed
import Servant.Server.Internal.DelayedIO
import Servant.Swagger
import Servant.Swagger.UI
import qualified Data.List as L
import Servant.Ekg (HasEndpoint(..))
import Servant.Server.Internal.Delayed ( addHeaderCheck )
import Servant.Server.Internal.DelayedIO ( withRequest, DelayedIO )
import Servant.Swagger ( HasSwagger(..) )
import Servant.Swagger.UI ( SwaggerSchemaUI )
data WithCustomErrorScheme a
......@@ -182,9 +181,7 @@ type GargPrivateAPI' =
-- :<|> "counts" :> Stream GET NewLineFraming '[JSON] Count :> CountAPI
-- TODO-SECURITY
:<|> "count" :> Summary "Count endpoint"
:> ReqBody '[JSON] Query
:> CountAPI
:<|> "count" :> CountAPI
-- Corpus endpoint --> TODO rename s/search/filter/g
-- :<|> "search" :> Capture "corpus" NodeId
......@@ -280,7 +277,7 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId)
:<|> DocumentExport.api userNodeId
:<|> count -- TODO: undefined
:<|> countApi -- TODO: undefined
-- :<|> withAccess (Proxy :: Proxy (Search.API Search.SearchResult)) Proxy uid
-- <$> PathNode <*> Search.api -- TODO: move elsewhere
......
......@@ -67,12 +67,13 @@ type Threshold = Int
removeApax :: Threshold -> Map ([Text], [Text]) Int -> Map ([Text], [Text]) Int
removeApax t = DMS.filter (> t)
cooc :: [[Terms]] -> Map ([Text], [Text]) Int
cooc tss = coocOnWithLabel _terms_stem (useLabelPolicy label_policy) tss
where
terms_occs = occurrencesOn _terms_stem (List.concat tss)
label_policy = mkLabelPolicy terms_occs
coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label)
-> [[a]] -> Map (label, label) Coocs
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment