Commit f56e8fc8 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev' into testing

parents c870f857 fa24156a
Pipeline #6048 failed with stages
in 8 minutes and 44 seconds
This diff is collapsed.
......@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.7
version: 0.0.7.1.3
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -133,6 +133,7 @@ library
Gargantext.API.Node.Corpus.Update
Gargantext.API.Node.File
Gargantext.API.Node.Share
Gargantext.API.Node.ShareURL
Gargantext.API.Node.Update
Gargantext.API.Prelude
Gargantext.API.Routes
......
......@@ -70,6 +70,7 @@ data Query m
, languages :: m [GQLNLP.LanguageTuple]
, nodes :: GQLNode.NodeArgs -> m [GQLNode.Node]
, nodes_corpus :: GQLNode.CorpusArgs -> m [GQLNode.Corpus]
, node_children :: GQLNode.NodeChildrenArgs -> m [GQLNode.Node]
, node_parent :: GQLNode.NodeParentArgs -> m [GQLNode.Node]
, user_infos :: GQLUserInfo.UserInfoArgs -> m [GQLUserInfo.UserInfo]
, users :: GQLUser.UserArgs -> m [GQLUser.User m]
......@@ -121,6 +122,7 @@ rootResolver authenticatedUser policyManager =
, languages = GQLNLP.resolveLanguages
, nodes = GQLNode.resolveNodes authenticatedUser policyManager
, nodes_corpus = GQLNode.resolveNodesCorpus
, node_children = GQLNode.resolveNodeChildren
, node_parent = GQLNode.resolveNodeParent
, user_infos = GQLUserInfo.resolveUserInfos authenticatedUser policyManager
, users = GQLUser.resolveUsers authenticatedUser policyManager
......
......@@ -14,23 +14,21 @@ Portability : POSIX
module Gargantext.API.GraphQL.Node where
import Data.Aeson
import Data.Aeson ( Result(..), Value(..) )
import Data.Aeson.KeyMap qualified as KM
import Data.Morpheus.Types ( GQLType )
import Data.Text qualified as T
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Auth.PolicyCheck
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser )
import Gargantext.API.Auth.PolicyCheck ( nodeChecks, AccessPolicyManager )
import Gargantext.API.GraphQL.PolicyCheck (withPolicy)
import Gargantext.API.GraphQL.Types
import Gargantext.Core
import Gargantext.API.GraphQL.Types ( GqlM )
import Gargantext.Core ( HasDBid(lookupDBid) )
import Gargantext.Database.Admin.Types.Node (NodeType)
import Gargantext.Database.Admin.Types.Node qualified as NN
import Gargantext.Database.Prelude (CmdCommon) -- , JSONB)
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNode)
import Gargantext.Database.Query.Table.Node (getClosestChildrenByType, getClosestParentIdByType, getNode)
import Gargantext.Database.Schema.Node qualified as N
import Gargantext.Prelude
import PUBMED.Types qualified as PUBMED
import Prelude qualified
data Corpus = Corpus
{ id :: Int
......@@ -89,7 +87,13 @@ dbNodesCorpus corpus_id = do
data NodeParentArgs
= NodeParentArgs
{ node_id :: Int
, parent_type :: Text
, parent_type :: NodeType
} deriving (Generic, GQLType)
data NodeChildrenArgs
= NodeChildrenArgs
{ node_id :: Int
, child_type :: NodeType
} deriving (Generic, GQLType)
resolveNodeParent
......@@ -97,16 +101,21 @@ resolveNodeParent
=> NodeParentArgs -> GqlM e env [Node]
resolveNodeParent NodeParentArgs { node_id, parent_type } = dbParentNodes node_id parent_type
resolveNodeChildren
:: (CmdCommon env)
=> NodeChildrenArgs -> GqlM e env [Node]
resolveNodeChildren NodeChildrenArgs { node_id, child_type } = dbChildNodes node_id child_type
dbParentNodes
:: (CmdCommon env)
=> Int -> Text -> GqlM e env [Node]
dbParentNodes node_id parent_type = do
let mParentType = readEither (T.unpack parent_type) :: Either Prelude.String NodeType
case mParentType of
Left err -> do
lift $ printDebug "[dbParentNodes] error reading parent type" (T.pack err)
pure []
Right parentType -> do
=> Int -> NodeType -> GqlM e env [Node]
dbParentNodes node_id parentType = do
-- let mParentType = readEither (T.unpack parent_type) :: Either Prelude.String NodeType
-- case mParentType of
-- Left err -> do
-- lift $ printDebug "[dbParentNodes] error reading parent type" (T.pack err)
-- pure []
-- Right parentType -> do
mNodeId <- lift $ getClosestParentIdByType (NN.UnsafeMkNodeId node_id) parentType -- (fromNodeTypeId parent_type_id)
case mNodeId of
Nothing -> pure []
......@@ -114,6 +123,13 @@ dbParentNodes node_id parent_type = do
node <- lift $ getNode id
pure [toNode node]
dbChildNodes :: (CmdCommon env)
=> Int -> NodeType -> GqlM e env [Node]
dbChildNodes node_id childType = do
childIds <- lift $ getClosestChildrenByType (NN.UnsafeMkNodeId node_id) childType -- (fromNodeTypeId parent_type_id)
children <- lift $ mapM getNode childIds
pure $ toNode <$> children
toNode :: NN.Node json -> Node
toNode N.Node { .. } = Node { id = nid
, name = _node_name
......
......@@ -3,11 +3,11 @@ module Gargantext.API.GraphQL.PolicyCheck where
import Prelude
import Control.Monad.Except
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Auth.PolicyCheck
import Gargantext.API.Errors.Types
import Gargantext.API.GraphQL.Types
import Control.Monad.Except (MonadError(..), MonadTrans(..))
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser )
import Gargantext.API.Auth.PolicyCheck ( BoolExpr, AccessCheck, AccessPolicyManager(..), AccessResult(..))
import Gargantext.API.Errors.Types ( BackendInternalError(..) )
import Gargantext.API.GraphQL.Types (GqlM)
import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
withPolicy :: (HasConnectionPool env, HasConfig env)
......
......@@ -15,15 +15,15 @@ Portability : POSIX
module Gargantext.API.GraphQL.TreeFirstLevel where
import Data.Morpheus.Types (GQLType)
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Auth.PolicyCheck
import Gargantext.API.GraphQL.PolicyCheck
import Gargantext.API.GraphQL.Types
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser )
import Gargantext.API.Auth.PolicyCheck (AccessPolicyManager, nodeChecks)
import Gargantext.API.GraphQL.PolicyCheck (withPolicy)
import Gargantext.API.GraphQL.Types ( GqlM )
import Gargantext.Core (fromDBid)
import Gargantext.Core.Types (Tree, NodeTree, NodeType)
import Gargantext.Core.Types.Main ( Tree(TreeN), _tn_node, _tn_children, NodeTree(NodeTree, _nt_id, _nt_type), _nt_name )
-- import Gargantext.Core.Types (ContextId, CorpusId, ListId)
import Gargantext.Core.Types.Main ( Tree(..), _tn_node, _tn_children, NodeTree(..), _nt_name )
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Admin.Types.Node (allNodeTypes, NodeId (UnsafeMkNodeId))
import Gargantext.Database.Admin.Types.Node (allNodeTypes, NodeId(..), NodeType)
import Gargantext.Database.Admin.Types.Node qualified as NN
import Gargantext.Database.Prelude (CmdCommon)
import Gargantext.Database.Query.Table.Node (getNode)
......@@ -51,7 +51,8 @@ data TreeFirstLevel m = TreeFirstLevel
, parent :: m (Maybe TreeNode)
, children :: [TreeNode]
} deriving (Generic, GQLType)
data BreadcrumbArgs = BreadcrumbArgs
{
node_id :: Int
......@@ -105,31 +106,33 @@ resolveParent Nothing = pure Nothing
nodeToTreeNode :: HasCallStack => NN.Node json -> Maybe TreeNode
nodeToTreeNode N.Node {..} = if (fromDBid _node_typename /= NN.NodeFolderShared) && (fromDBid _node_typename /= NN.NodeTeam)
then
Just TreeNode { id = NN.unNodeId _node_id
, name = _node_name
, node_type = fromDBid _node_typename
, parent_id = NN.unNodeId <$> _node_parent_id
}
else
Nothing
resolveBreadcrumb :: (CmdCommon env) => BreadcrumbArgs -> GqlM e env (BreadcrumbInfo)
nodeToTreeNode N.Node {..} =
if (fromDBid _node_typename /= NN.NodeFolderShared) && (fromDBid _node_typename /= NN.NodeTeam)
then
Just TreeNode { id = NN.unNodeId _node_id
, name = _node_name
, node_type = fromDBid _node_typename
, parent_id = NN.unNodeId <$> _node_parent_id
}
else
Nothing
resolveBreadcrumb :: (CmdCommon env) => BreadcrumbArgs -> GqlM e env BreadcrumbInfo
resolveBreadcrumb BreadcrumbArgs { node_id } = dbRecursiveParents node_id
convertDbTreeToTreeNode :: HasCallStack => T.DbTreeNode -> TreeNode
convertDbTreeToTreeNode T.DbTreeNode { _dt_name, _dt_nodeId, _dt_typeId, _dt_parentId } = TreeNode
{ name = _dt_name
, id = NN.unNodeId _dt_nodeId
, node_type = fromDBid _dt_typeId
, parent_id = NN.unNodeId <$> _dt_parentId
}
dbRecursiveParents :: (CmdCommon env) => Int -> GqlM e env (BreadcrumbInfo)
dbRecursiveParents node_id = do
let nId = UnsafeMkNodeId node_id
convertDbTreeToTreeNode T.DbTreeNode { _dt_name, _dt_nodeId, _dt_typeId, _dt_parentId } =
TreeNode
{ name = _dt_name
, id = NN.unNodeId _dt_nodeId
, node_type = fromDBid _dt_typeId
, parent_id = NN.unNodeId <$> _dt_parentId
}
dbRecursiveParents :: (CmdCommon env) => Int -> GqlM e env BreadcrumbInfo
dbRecursiveParents nodeId = do
let nId = UnsafeMkNodeId nodeId
dbParents <- lift $ T.recursiveParents nId allNodeTypes
let treeNodes = map convertDbTreeToTreeNode dbParents
let breadcrumbInfo = BreadcrumbInfo { parents = treeNodes }
pure breadcrumbInfo
pure $ BreadcrumbInfo { parents = treeNodes }
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.API.Node.ShareURL where
import Data.Text
import Gargantext.Prelude
import Gargantext.API.Prelude
import Servant
import Gargantext.Core.Types (NodeType, NodeId, unNodeId)
import Gargantext.Database.Prelude (HasConfig (hasConfig), CmdCommon)
import Control.Lens.Getter (view)
import Gargantext.Prelude.Config (gc_url)
import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Errors (BackendInternalError)
type API = Summary "Fetch URL for sharing a node"
:> QueryParam "type" NodeType
:> QueryParam "id" NodeId
:> Get '[JSON] Text
api :: ServerT API (GargM Env BackendInternalError)
api = getUrl
getUrl :: (CmdCommon env) =>
Maybe NodeType -> Maybe NodeId -> GargM env BackendInternalError Text
getUrl nt id = do
-- TODO add check that the node is able to be shared (in a shared folder)
case nt of
Nothing -> pure "Invalid node Type"
Just t ->
case id of
Nothing -> pure "Invalid node ID"
Just i -> do
url <- view $ hasConfig . gc_url
pure $ url <> "/#/share/" <> show t <> "/" <> show (unNodeId i)
......@@ -43,6 +43,7 @@ import Gargantext.API.Node.Corpus.Export.Types qualified as CorpusExport
import Gargantext.API.Node.Corpus.New qualified as New
import Gargantext.API.Node.Document.Export qualified as DocumentExport
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Node.ShareURL qualified as ShareURL
import Gargantext.API.Prelude
import Gargantext.API.Public qualified as Public
import Gargantext.Core.Types.Individu (User(..))
......@@ -222,6 +223,7 @@ type GargPrivateAPI' =
:<|> List.GETAPI
:<|> List.JSONAPI
:<|> List.CSVAPI
:<|> "shareurl" :> ShareURL.API
{-
:<|> "wait" :> Summary "Wait test"
:> Capture "x" Int
......@@ -305,6 +307,7 @@ serverPrivateGargAPI' authenticatedUser@(AuthenticatedUser userNodeId userId)
:<|> List.getApi
:<|> List.jsonApi
:<|> List.csvApi
:<|> ShareURL.api
-- :<|> waitAPI
......
......@@ -66,7 +66,7 @@ class Collage sup inf where
instance Collage Texte Paragraphe where
dec (Texte t) = map Paragraphe $ DT.splitOn "\n" t
inc = Texte . DT.intercalate "\n" . map (\(Paragraphe t) -> t)
inc = Texte . DT.unlines . map (\(Paragraphe t) -> t)
instance Collage Paragraphe Phrase where
dec (Paragraphe t) = map Phrase $ sentences t
......@@ -78,7 +78,7 @@ instance Collage Phrase MultiTerme where
instance Collage MultiTerme Mot where
dec (MultiTerme mt) = map Mot $ DT.words mt
inc = MultiTerme . DT.intercalate " " . map (\(Mot m) -> m)
inc = MultiTerme . DT.unwords . map (\(Mot m) -> m)
-------------------------------------------------------------------
-- Contexts of text
......@@ -92,7 +92,7 @@ isCharStop :: Char -> Bool
isCharStop c = c `elem` ['.','?','!']
unsentences :: [Text] -> Text
unsentences txts = DT.intercalate " " txts
unsentences txts = DT.unwords txts
-- | Ngrams size
size :: Text -> Int
......
......@@ -71,7 +71,7 @@ isidoreToDoc lang (IsidoreDoc t a d u s as) = do
langText :: LangText -> Text
langText (LangText _l t1) = t1
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
langText (ArrayText ts ) = Text.unwords $ map langText ts
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) Just d
let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
......
......@@ -247,7 +247,7 @@ text2titleParagraphs n = catMaybes
n' = n + (round $ (fromIntegral n) / (2 :: Double))
doTitle :: [Text] -> Maybe (Text, Text)
doTitle (t:ts) = Just (t, DT.intercalate " " ts)
doTitle (t:ts) = Just (t, DT.unwords ts)
doTitle [] = Nothing
......
......@@ -58,6 +58,11 @@ isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
-}
-- | Good value from users' requests and anthropological analysis
goodMapListSize :: Int
goodMapListSize = 350
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists :: ( HasNodeStory env err m
, HasNLPServer env
......@@ -71,7 +76,7 @@ buildNgramsLists :: ( HasNodeStory env err m
-> GroupParams
-> m (Map NgramsType [NgramsElement])
buildNgramsLists user uCid mCid mfslw gp = do
ngTerms <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize 350)
ngTerms <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize goodMapListSize)
othersTerms <- mapM (buildNgramsOthersList user uCid mfslw GroupIdentity)
[ (Authors , MapListSize 9, MaxListSize 1000)
, (Sources , MapListSize 9, MaxListSize 1000)
......@@ -179,23 +184,24 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
)
-- printDebug "[buildNgramsTermsList: Flow Social List / end]" nt
let !ngramsKeys = HashSet.fromList
$ List.take mapListSize
$ HashSet.toList
$ HashMap.keysSet allTerms
let !allKeys = HashMap.keysSet allTerms
-- printDebug "[buildNgramsTermsList: ngramsKeys]" (HashSet.size ngramsKeys)
!groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) ngramsKeys)
!groupParams' <- getGroupParams groupParams (HashSet.map (text2ngrams . unNgramsTerm) allKeys)
let
!socialLists_Stemmed = addScoreStem groupParams' ngramsKeys socialLists
!socialLists_Stemmed = addScoreStem groupParams' allKeys socialLists
!groupedWithList = toGroupedTree socialLists_Stemmed allTerms
!(stopTerms, candidateTerms) = HashMap.partition ((== Just StopTerm) . viewListType)
$ HashMap.fromList
$ List.take mapListSize
$ HashMap.toList
$ HashMap.filter (\g -> view gts'_score g > 1)
$ view flc_scores groupedWithList
!(groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms
-- | Split candidateTerms into mono-terms and multi-terms.
!(groupedMono, groupedMult) = HashMap.partitionWithKey (\(NgramsTerm t) _v -> size t < 2) candidateTerms
-- void $ panicTrace $ "groupedWithList: " <> show groupedWithList
......@@ -211,6 +217,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
!monoSize = 0.4 :: Double
!multSize = 1 - monoSize
-- | Splits given hashmap into 2 pieces, based on score
splitAt' n' ns = both (HashMap.fromListWith (<>))
$ List.splitAt (round $ n' * listSizeGlobal)
$ List.sortOn (viewScore . snd)
......@@ -254,8 +261,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
]
where
mapStemNodeIds = HashMap.toList
$ HashMap.map viewScores
$ groupedTreeScores_SetNodeId
$ HashMap.map viewScores groupedTreeScores_SetNodeId
let
-- computing scores
mapScores f = HashMap.fromList
......
......@@ -69,7 +69,7 @@ groupWith :: GroupParams
groupWith GroupIdentity t = identity t
groupWith (GroupParams { unGroupParams_lang = l }) t =
NgramsTerm
$ Text.intercalate " "
$ Text.unwords
$ map (stem l PorterAlgorithm)
-- . take n
$ List.sort
......
......@@ -14,14 +14,15 @@ TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93id
-}
module Gargantext.Core.Text.Metrics.TFICF ( TFICF
, TficfContext(..)
, Total(..)
, Count(..)
, tficf
, sortTficf
)
where
module Gargantext.Core.Text.Metrics.TFICF
( TFICF
, TficfContext(..)
, Total(..)
, Count(..)
, tficf
, sortTficf
)
where
import Data.List qualified as List
import Data.Map.Strict (toList)
......@@ -34,12 +35,19 @@ path = "[G.T.Metrics.TFICF]"
type TFICF = Double
-- https://www.researchgate.net/publication/221226686_TF-ICF_A_New_Term_Weighting_Scheme_for_Clustering_Dynamic_Data_Streams
-- TficfSupra n m
-- - m is the total number of documents in the corpus
-- - n is the number of documents, where given term occured more than once
-- TficfInfra n m
-- -
data TficfContext n m = TficfInfra n m
| TficfSupra n m
deriving (Show)
data Total = Total {unTotal :: !Double}
data Count = Count {unCount :: !Double}
newtype Total = Total { unTotal :: Double }
newtype Count = Count { unCount :: Double }
tficf :: TficfContext Count Total
-> TficfContext Count Total
......@@ -50,7 +58,11 @@ tficf (TficfInfra (Count ic) (Total it) )
| otherwise = panicTrace
$ "[ERR]"
<> path
<> " Frequency impossible"
<> " Frequency impossible: "
<> "ic = " <> show ic
<> ", it = " <> show it
<> ", sc = " <> show sc
<> ", st = " <> show st
tficf _ _ = panicTrace $ "[ERR]" <> path <> "Undefined for these contexts"
......
......@@ -92,7 +92,8 @@ instance Hashable Ngrams
makeLenses ''Ngrams
instance PGS.ToRow Ngrams where
toRow (UnsafeNgrams t s) = [toField t, toField s]
toRow (UnsafeNgrams { .. }) = [ toField _ngramsTerms
, toField _ngramsSize ]
------------------------------------------------------------------------
-------------------------------------------------------------------------
......
......@@ -50,7 +50,7 @@ data Paragraph = Uniform Grain | AuthorLike
-- Grain: number of Sentences by block of Text
-- Step : overlap of sentence between connex block of Text
groupUniform :: Grain -> [Text] -> [Text]
groupUniform g ts = map (Text.intercalate " ")
groupUniform g ts = map Text.unwords
$ chunkAlong g g
$ sentences
$ Text.concat ts
......
......@@ -52,7 +52,7 @@ import Gargantext.Core.Text.Terms.Mono (monoTerms)
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Core.Text.Terms.Multi (multiterms)
import Gargantext.Core.Types ( TermsCount, POS, Terms(Terms), TermsWithCount )
import Gargantext.Core.Types ( TermsCount, POS, Terms(..), TermsWithCount )
import Gargantext.Core.Utils (groupWithCounts)
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.Ngrams (insertNgrams)
......@@ -60,6 +60,7 @@ import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag(..), insertNgr
import Gargantext.Database.Schema.Ngrams (text2ngrams, NgramsId)
import Gargantext.Prelude
data TermType lang
= Mono { _tt_lang :: !lang }
| Multi { _tt_lang :: !lang }
......@@ -86,7 +87,7 @@ extractTerms ncs (Unsupervised {..}) xs = mapM (terms ncs (Unsupervised { _tt_mo
where
m' = case _tt_model of
Just m''-> m''
Nothing -> newTries _tt_windowSize (Text.intercalate " " xs)
Nothing -> newTries _tt_windowSize (Text.unwords xs)
extractTerms ncs termTypeLang xs = mapM (terms ncs termTypeLang) xs
......@@ -124,15 +125,15 @@ class ExtractNgramsT h
-> DBCmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms ng1 ng2) =
enrichedTerms l pa po (Terms { .. }) =
NgramsPostag { _np_lang = l
, _np_algo = pa
, _np_postag = po
, _np_form = form
, _np_lem = lem }
where
form = text2ngrams $ Text.intercalate " " ng1
lem = text2ngrams $ Text.intercalate " " $ Set.toList ng2
form = text2ngrams $ Text.unwords _terms_label
lem = text2ngrams $ Text.unwords $ Set.toList _terms_stem
------------------------------------------------------------------------
cleanNgrams :: Int -> Ngrams -> Ngrams
......
......@@ -114,7 +114,7 @@ extractTermsWithList' pats = map (concat . map concat . replaceTerms KeepAll pat
--------------------------------------------------------------------------
addSpaces :: Text -> Text
addSpaces = (Text.intercalate " ") . (Text.chunksOf 1)
addSpaces = Text.unwords . (Text.chunksOf 1)
--------------------------------------------------------------------------
......
......@@ -68,7 +68,7 @@ data Terms = Terms { _terms_label :: Label
, _terms_stem :: Stems
} deriving (Ord, Show)
instance Eq Terms where
(==) (Terms _ s1) (Terms _ s2) = s1 == s2
(==) (Terms { _terms_stem = s1 }) (Terms { _terms_stem = s2 }) = s1 == s2
type TermsCount = Int
......
......@@ -151,6 +151,6 @@ instance ToHyperdataRow HyperdataContact where
toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _ _), _hc_where = ou} ) =
HyperdataRowContact (fromMaybe "FirstName" fn) (fromMaybe "LastName" ln) ou'
where
ou' = maybe "CNRS" (Text.intercalate " " . _cw_organization) (head ou)
ou' = maybe "CNRS" (Text.unwords . _cw_organization) (head ou)
toHyperdataRow (HyperdataContact {}) =
HyperdataRowContact "FirstName" "LastName" "Labs"
......@@ -118,14 +118,14 @@ cooc2graphWith' _doPartitions _bridgenessMethod multi similarity threshold stren
partitions <- if (Map.size distanceMap > 0)
then recursiveClustering' (spinglass' 1) distanceMap
else panic $ Text.intercalate " " [ "I can not compute the graph you request"
, "because either the quantity of documents"
, "or the quantity of terms"
, "are lacking."
, "Solution: add more either Documents or Map Terms to your analysis."
, "Follow the available tutorials on the Training EcoSystems."
, "Ask your co-users of GarganText how to have access to it."
]
else panic $ Text.unwords [ "I can not compute the graph you request"
, "because either the quantity of documents"
, "or the quantity of terms"
, "are lacking."
, "Solution: add more either Documents or Map Terms to your analysis."
, "Follow the available tutorials on the Training EcoSystems."
, "Ask your co-users of GarganText how to have access to it."
]
length partitions `seq` pure ()
let
......
......@@ -185,12 +185,13 @@ selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt tms =
( int
, toDBid NodeDocument
, cId
, Values fields ((DPS.Only . unNgramsTerm) <$> (List.take 10000 tms))
-- , Values fields ((DPS.Only . unNgramsTerm) <$> (List.take 10000 tms))
, DPS.In (unNgramsTerm <$> (List.take 10000 tms))
, cId
, toDBid nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
-- where
-- fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
......@@ -198,18 +199,42 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
JOIN nodes_contexts nn ON n.id = nn.context_id
WHERE n.typename = ?
AND nn.node_id = ?),
input_rows(terms) AS (?)
SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
input_rows AS (
SELECT id, terms
FROM ngrams
WHERE terms IN ?
)
SELECT ir.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
JOIN input_rows ir ON cng.ngrams_id = ir.id
JOIN nodes_contexts nn ON nn.context_id = cng.context_id
JOIN nodes_sample n ON nn.context_id = n.id
WHERE nn.node_id = ? -- CorpusId
AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY cng.node_id, ng.terms
GROUP BY cng.node_id, ir.terms
|]
-- queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
-- queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
-- WITH nodes_sample AS (SELECT c.id FROM contexts c TABLESAMPLE SYSTEM_ROWS (?)
-- JOIN nodes_contexts nc ON c.id = nc.context_id
-- WHERE c.typename = ?
-- AND nc.node_id = ?),
-- input_rows(terms) AS (?)
-- SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
-- JOIN ngrams ng ON cng.ngrams_id = ng.id
-- JOIN input_rows ir ON ir.terms = ng.terms
-- JOIN nodes_contexts nc ON nc.context_id = cng.context_id
-- JOIN nodes_sample ns ON nc.context_id = ns.id
-- WHERE nc.node_id = ? -- CorpusId
-- AND cng.ngrams_type = ? -- NgramsTypeId
-- AND nc.category > 0
-- -- AND nc.context_id IN (SELECT id FROM nodes_sample)
-- GROUP BY cng.node_id, ng.terms
-- |]
selectNgramsOccurrencesOnlyByContextUser_withSample' :: HasDBid NodeType
=> CorpusId
-> Int
......
......@@ -70,6 +70,9 @@ getTficf_withSample cId mId nt = do
<$> getOccByNgramsOnlyFast_withSample mId countGlobal nt
(HM.keys mapTextDoubleLocal)
printDebug "[getTficf_withSample] mapTextDoubleLocal: " mapTextDoubleLocal
printDebug "[getTficf_withSample] mapTextDoubleGlobal: " mapTextDoubleGlobal
--printDebug "getTficf_withSample" (mapTextDoubleLocal, mapTextDoubleGlobal, countLocal, countGlobal)
pure $ HM.mapWithKey (\t n ->
tficf (TficfInfra (Count n )
......
......@@ -207,9 +207,9 @@ fromField' field mb = do
valueToHyperdata v = case fromJSON v of
Success a -> pure a
Error _err -> returnError ConversionFailed field
$ DL.intercalate " " [ "cannot parse hyperdata for JSON: "
, show v
]
$ DL.unwords [ "cannot parse hyperdata for JSON: "
, show v
]
printSqlOpa :: Default Unpackspec a a => Select a -> IO ()
printSqlOpa = putStrLn . maybe "Empty query" identity . showSql
......
......@@ -28,7 +28,7 @@ import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Text.Ngrams (Ngrams(..), NgramsType)
import Gargantext.Database.Admin.Types.Node ( pgNodeId, CorpusId, ListId, DocId )
import Gargantext.Database.Prelude (runOpaQuery, formatPGSQuery, runPGSQuery, DBCmd)
import Gargantext.Database.Query.Join (leftJoin3)
......@@ -79,14 +79,15 @@ insertNgrams ns =
-- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
insertNgrams' :: [Ngrams] -> DBCmd err [Indexed Int Text]
insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns')
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
ns' = (\n -> (_ngramsTerms n, _ngramsSize n)) <$> ns
fields = map (QualifiedIdentifier Nothing) ["text", "int4"]
_insertNgrams_Debug :: [(Text, Size)] -> DBCmd err ByteString
_insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
fields = map (QualifiedIdentifier Nothing) ["text", "int4"]
----------------------
queryInsertNgrams :: PGS.Query
......
......@@ -53,14 +53,14 @@ type NgramsPostagInsert = ( Int
)
toInsert :: NgramsPostag -> NgramsPostagInsert
toInsert (NgramsPostag l a p form lem) =
( toDBid l
, toDBid a
, show p
, view ngramsTerms form
, view ngramsSize form
, view ngramsTerms lem
, view ngramsSize lem
toInsert (NgramsPostag { .. }) =
( toDBid _np_lang
, toDBid _np_algo
, show _np_postag
, view ngramsTerms _np_form
, view ngramsSize _np_form
, view ngramsTerms _np_lem
, view ngramsSize _np_lem
)
insertNgramsPostag :: [NgramsPostag] -> DBCmd err (HashMap Text NgramsId)
......@@ -154,17 +154,18 @@ SELECT terms,id FROM ins_form_ret
-- TODO add lang and postag algo
-- TODO remove when form == lem in insert
selectLems :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
selectLems l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems (PGS.In (map _ngramsTerms ns), toDBid l, toDBid server)
selectLems l (NLPServerConfig { server }) ns =
runPGSQuery querySelectLems (PGS.In (_ngramsTerms <$> ns), toDBid l, toDBid server)
----------------------
querySelectLems :: PGS.Query
querySelectLems = [sql|
WITH
trms
AS (SELECT id, terms, n
AS (SELECT id, terms
FROM ngrams
WHERE terms IN ?)
, input_rows(lang_id, algo_id, terms,n)
AS (SELECT ? as lang_id, ? as algo_id, terms, n, id
, input_rows
AS (SELECT ? as lang_id, ? as algo_id, terms, id
FROM trms)
, lems AS ( select ir.terms as t1, n2.terms as t2, sum(np.score) as score from input_rows ir
JOIN ngrams_postag np ON np.ngrams_id = ir.id
......@@ -179,29 +180,29 @@ querySelectLems = [sql|
|]
-- | This is the same as 'selectLems', but slower.
selectLems' :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
selectLems' l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems' (PGS.Only $ Values fields datas)
where
fields = map (QualifiedIdentifier Nothing) ["int4","int4","text", "int4"]
datas = map (\d -> [toField $ toDBid l, toField $ toDBid server] <> toRow d) ns
-- selectLems' :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
-- selectLems' l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems' (PGS.Only $ Values fields datas)
-- where
-- fields = map (QualifiedIdentifier Nothing) ["int4","int4","text", "int4"]
-- datas = map (\d -> [toField $ toDBid l, toField $ toDBid server] <> toRow d) ns
querySelectLems' :: PGS.Query
querySelectLems' = [sql|
WITH input_rows(lang_id, algo_id, terms,n)
AS (?) -- ((VALUES ('automata' :: "text")))
, lems AS ( select n1.terms as t1 ,n2.terms as t2 ,sum(np.score) as score from input_rows ir
JOIN ngrams n1 ON ir.terms = n1.terms
JOIN ngrams_postag np ON np.ngrams_id = n1.id
JOIN ngrams n2 ON n2.id = np.lemm_id
WHERE np.lang_id = ir.lang_id
AND np.algo_id = ir.algo_id
GROUP BY n1.terms, n2.terms
ORDER BY score DESC
)
SELECT t1,t2 from lems
|]
-- querySelectLems' :: PGS.Query
-- querySelectLems' = [sql|
-- WITH input_rows(lang_id, algo_id, terms,n)
-- AS (?) -- ((VALUES ('automata' :: "text")))
-- , lems AS ( select n1.terms as t1 ,n2.terms as t2 ,sum(np.score) as score from input_rows ir
-- JOIN ngrams n1 ON ir.terms = n1.terms
-- JOIN ngrams_postag np ON np.ngrams_id = n1.id
-- JOIN ngrams n2 ON n2.id = np.lemm_id
-- WHERE np.lang_id = ir.lang_id
-- AND np.algo_id = ir.algo_id
-- GROUP BY n1.terms, n2.terms
-- ORDER BY score DESC
-- )
-- SELECT t1,t2 from lems
-- |]
-- | Insert Table
createTable_NgramsPostag :: DBCmd err [Int]
......
......@@ -173,15 +173,26 @@ getChildrenByType :: HasDBid NodeType
-> NodeType
-> DBCmd err [NodeId]
getChildrenByType nId nType = do
result <- runPGSQuery query (PGS.Only nId)
children_lst <- mapM (\(id, _) -> getChildrenByType id nType) result
pure $ concat $ [fst <$> filter (\(_, pTypename) -> pTypename == toDBid nType) result] ++ children_lst
childrenFirstLevel <- getClosestChildrenByType nId nType
childrenLst <- mapM (\id -> getChildrenByType id nType) childrenFirstLevel
pure $ childrenFirstLevel ++ concat childrenLst
-- | Given a node id, find all it's children (only first level) of
-- given node type.
getClosestChildrenByType :: HasDBid NodeType
=> NodeId
-> NodeType
-> DBCmd err [NodeId]
getClosestChildrenByType nId nType = do
results <- runPGSQuery query (nId, toDBid nType)
pure $ (\(PGS.Only nodeId) -> nodeId) <$> results
where
query :: PGS.Query
query = [sql|
SELECT n.id, n.typename
SELECT n.id
FROM nodes n
WHERE n.parent_id = ?;
WHERE n.parent_id = ?
AND n.typename = ?;
|]
------------------------------------------------------------------------
......
......@@ -317,6 +317,15 @@ nodeContextsScore inputData = map (\(PGS.Only a) -> a)
------------------------------------------------------------------------
-- | Counts the number of documents in a corpus.
-- Also applies filter for category to be at least 1 (i.e. not in trash).
-- select count(*)
-- from contexts c
-- join nodes_contexts nc on c.id = nc.context_id
-- where
-- nc.node_id = 88
-- and nc.category >= 1
-- and c.typename = 4
selectCountDocs :: HasDBid NodeType => CorpusId -> DBCmd err Int
selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
where
......
......@@ -39,6 +39,10 @@ import Gargantext.Prelude
type NgramsId = Int
type Size = Int
-- | Ngrams table
-- 'n' is the size, see G.D.Q.T.Ngrams -> insertNgrams'
-- function. I.e. ngrams with 1 term are of size 1, ngrams with 2
-- terms are of size 2 etc.
data NgramsPoly id terms n = NgramsDB { _ngrams_id :: !id
, _ngrams_terms :: !terms
, _ngrams_n :: !n
......@@ -90,7 +94,8 @@ instance PGS.ToRow Text where
toRow t = [toField t]
text2ngrams :: Text -> Ngrams
text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
text2ngrams txt = UnsafeNgrams { _ngramsTerms = txt'
, _ngramsSize = length $ splitOn " " txt' }
where
txt' = strip txt
......
......@@ -21,7 +21,9 @@ import Gargantext.Database.Schema.Prelude
import Gargantext.Prelude
-- | Index memory of any type in Gargantext
-- | Index memory of any type in Gargantext.
-- I.e. given entity 'a', we use this type to mark that it has a DB id of type 'i'.
-- An un-indexed entity 'a' might not have been INSERT-ed yet to the DB.
data Indexed i a =
Indexed { _index :: !i
, _unIndex :: !a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment