Commit 376ddea3 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 145-dev-graph-explorer-search-tfidf

parents b0b6a491 ca606da8
Pipeline #3385 passed with stage
in 92 minutes and 10 seconds
## Version 0.0.6.8.5
* [BACK][FIX][Ngrams Table, page sort / limit (#149)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/149)
* [FRONT][FIX][Security Issue with Teams (#452)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/452)
* [FRONT][FIX][Darkster Mode: when creating a node we miss informations (#461)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/461)
## Version 0.0.6.8.4
* [BACK][FEAT] Fix reindexing functions
* [FRONT][FIX][FEEDBACK ON 0.0.6.8.2 (#459)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/459)
## Version 0.0.6.8.3
* [FRONT][SECU][Security Issue with Teams (#452)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/452)
* [FRONT][FEAT][Graph Explorer: disable controls when ForceAtlas is running (#451)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/451)
* [FRONT][FIX][[Doc date filter] Filter by multiple dates (#450)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/450)
* [FRONT][ERGO][[layout] Description node block hide feature (#447)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/447)
* [FRONT][ERGO][[layout] Node view additions (#448)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/448)
* [BACK][FIX][Problem at list import (#148)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/148)
* [FRONT][FIX][CSS in Graph Explorer toolbar (#460)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/460)
* [BACK][FEAT] Bridgeness with more links
## Version 0.0.6.8.2
* [FRONT][FIX][[Doc view] By default sort document from latest to oldest (#457)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/457)
* [FRONT][FIX][Regression on Documents' annotation (#456)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/456)
* [FRONT][FIX][[layout] Move Cache CTA to topbar (#446)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/446)
* [FRONT][FIX][Graph Explorer: node status change to either candidates or stop (#454)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/454)
## Version 0.0.6.8.1
......
......@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.8.2
version: 0.0.6.8.5
synopsis: Search, map, share
description: Please see README.md
category: Data
......
......@@ -6,7 +6,7 @@ name: gargantext
# | | | +----- Layers * : New versions with API additions
# | | | | +--- Layers * : New versions without API breaking changes
# | | | | |
version: '0.0.6.8.2'
version: '0.0.6.8.5'
synopsis: Search, map, share
description: Please see README.md
category: Data
......
......@@ -78,13 +78,17 @@ childrenToTreeNodes (TreeN {_tn_node}, rId) = toTreeNode (Just rId) _tn_node
resolveParent :: (HasConnectionPool env, HasConfig env, HasMail env) => Maybe NodeId -> GqlM e env (Maybe TreeNode)
resolveParent (Just pId) = do
node <- lift $ getNode pId
pure $ Just $ nodeToTreeNode node
pure $ nodeToTreeNode node
resolveParent Nothing = pure Nothing
nodeToTreeNode :: NN.Node json -> TreeNode
nodeToTreeNode N.Node {..} = TreeNode { id = NN.unNodeId _node_id
, name = _node_name
, node_type = fromNodeTypeId _node_typename
, parent_id = NN.unNodeId <$> _node_parent_id
}
nodeToTreeNode :: NN.Node json -> Maybe TreeNode
nodeToTreeNode N.Node {..} = if (fromNodeTypeId _node_typename /= NN.NodeFolderShared) && (fromNodeTypeId _node_typename /= NN.NodeTeam)
then
Just TreeNode { id = NN.unNodeId _node_id
, name = _node_name
, node_type = fromNodeTypeId _node_typename
, parent_id = NN.unNodeId <$> _node_parent_id
}
else
Nothing
......@@ -69,6 +69,9 @@ module Gargantext.API.Ngrams
, tableNgramsPull
, tableNgramsPut
, getNgramsTable'
, setNgramsTableScores
, Version
, Versioned(..)
, VersionedWithCount(..)
......@@ -80,7 +83,7 @@ module Gargantext.API.Ngrams
where
import Control.Concurrent
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex)
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex, over)
import Control.Monad.Reader
import Data.Aeson hiding ((.=))
import Data.Either (Either(..))
......@@ -532,6 +535,11 @@ getTableNgrams _nType nId tabType listId limit_ offset
minSize' = maybe (const True) (<=) minSize
maxSize' = maybe (const True) (>=) maxSize
rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
(tableMap ^. at r)
)
(ne ^. ne_root)
selected_node n = minSize' s
&& maxSize' s
&& searchQuery (n ^. ne_ngrams)
......@@ -549,109 +557,115 @@ getTableNgrams _nType nId tabType listId limit_ offset
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
---------------------------------------
-- | Filter the given `tableMap` with the search criteria.
filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
filteredNodes tableMap = rootOf <$> list & filter selected_node
filteredNodes tableMap = roots
where
rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
(tableMap ^. at r)
)
(ne ^. ne_root)
list = tableMap ^.. each
selected_nodes = list & filter selected_node
roots = rootOf tableMap <$> selected_nodes
---------------------------------------
selectAndPaginate :: Map NgramsTerm NgramsElement -> [NgramsElement]
selectAndPaginate tableMap = roots <> inners
-- | Appends subitems (selected from `tableMap`) for given `roots`.
withInners :: Map NgramsTerm NgramsElement -> [NgramsElement] -> [NgramsElement]
withInners tableMap roots = roots <> inners
where
list = tableMap ^.. each
rootOf ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
(tableMap ^. at r)
)
(ne ^. ne_root)
selected_nodes = list & take limit_
. drop offset'
. filter selected_node
. sortOnOrder orderBy
roots = rootOf <$> selected_nodes
rootsSet = Set.fromList (_ne_ngrams <$> roots)
inners = list & filter (selected_inner rootsSet)
rootSet = Set.fromList (_ne_ngrams <$> roots)
inners = list & filter (selected_inner rootSet)
---------------------------------------
setScores :: forall t. Each t t NgramsElement NgramsElement => Bool -> t -> m t
setScores False table = pure table
setScores True table = do
let ngrams_terms = table ^.. each . ne_ngrams
-- printDebug "ngrams_terms" ngrams_terms
t1 <- getTime
occurrences <- getOccByNgramsOnlyFast nId
listId
ngramsType
--printDebug "occurrences" occurrences
t2 <- getTime
liftBase $ hprint stderr
("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
(length ngrams_terms) t1 t2
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
-- | Paginate the results
sortAndPaginate :: [NgramsElement] -> [NgramsElement]
sortAndPaginate = take limit_
. drop offset'
. sortOnOrder orderBy
pure $ table & each %~ setOcc
---------------------------------------
-- lists <- catMaybes <$> listsWith userMaster
-- trace (show lists) $
-- getNgramsTableMap ({-lists <>-} listIds) ngramsType
let scoresNeeded = needsScores orderBy
tableMap1 <- getNgramsTableMap listId ngramsType
t1 <- getTime
tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
. Map.mapWithKey ngramsElementFromRepo
tableMap2 <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
. filteredNodes
let fltr = tableMap2 & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
let fltrCount = length $ fltr ^. v_data . _NgramsTable
t2 <- getTime
tableMap3 <- tableMap2 & v_data %%~ fmap NgramsTable
. setScores (not scoresNeeded)
. selectAndPaginate
let tableMap3 = over (v_data . _NgramsTable) ((withInners (tableMap2 ^. v_data)) . sortAndPaginate) fltr
t3 <- getTime
liftBase $ hprint stderr
("getTableNgrams total=" % hasTime
% " map1=" % hasTime
% " map2=" % hasTime
% " map3=" % hasTime
% " sql=" % (if scoresNeeded then "map2" else "map3")
% "\n"
) t0 t3 t0 t1 t1 t2 t2 t3
liftBase $ do
hprint stderr
("getTableNgrams total=" % hasTime
% " map1=" % hasTime
% " map2=" % hasTime
% " map3=" % hasTime
% " sql=" % (if scoresNeeded then "map2" else "map3")
% "\n"
) t0 t3 t0 t1 t1 t2 t2 t3
-- printDebug "[getTableNgrams] tableMap3" $ show tableMap3
pure $ toVersionedWithCount fltrCount tableMap3
-- | Helper function to get the ngrams table with scores.
getNgramsTable' :: forall env err m.
( HasNodeStory env err m
, HasNodeError err
, HasConnectionPool env
, HasConfig env
, HasMail env)
=> NodeId
-> ListId
-> TableNgrams.NgramsType
-> m (Versioned (Map.Map NgramsTerm NgramsElement))
getNgramsTable' nId listId ngramsType = do
tableMap1 <- getNgramsTableMap listId ngramsType
tableMap1 & v_data %%~ (setNgramsTableScores nId listId ngramsType)
. Map.mapWithKey ngramsElementFromRepo
-- | Helper function to set scores on an `NgramsTable`.
setNgramsTableScores :: forall env err m t.
( Each t t NgramsElement NgramsElement
, HasNodeStory env err m
, HasNodeError err
, HasConnectionPool env
, HasConfig env
, HasMail env)
=> NodeId
-> ListId
-> TableNgrams.NgramsType
-> t
-> m t
setNgramsTableScores nId listId ngramsType table = do
let ngrams_terms = table ^.. each . ne_ngrams
-- printDebug "ngrams_terms" ngrams_terms
t1 <- getTime
occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
--printDebug "occurrences" occurrences
t2 <- getTime
liftBase $ hprint stderr
("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
(length ngrams_terms) t1 t2
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
pure $ table & each %~ setOcc
scoresRecomputeTableNgrams :: forall env err m.
(HasNodeStory env err m, HasNodeError err, HasConnectionPool env, HasConfig env, HasMail env)
=> NodeId -> TabType -> ListId -> m Int
scoresRecomputeTableNgrams nId tabType listId = do
tableMap <- getNgramsTableMap listId ngramsType
_ <- tableMap & v_data %%~ setScores
_ <- tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
. Map.mapWithKey ngramsElementFromRepo
pure $ 1
where
ngramsType = ngramsTypeFromTabType tabType
setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
setScores table = do
occurrences <- getOccByNgramsOnlyFast nId
listId
ngramsType
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
pure $ table & each %~ setOcc
-- APIs
......
......@@ -158,6 +158,7 @@ reIndexWith cId lId nt lts = do
-- Checking Text documents where orphans match
-- TODO Tests here
let
-- fromListWith (<>)
ngramsByDoc = map (HashMap.fromList)
$ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v)))
$ map (\doc -> List.zip
......@@ -266,7 +267,7 @@ parseCsvData lst = Map.fromList $ conv <$> lst
, _nre_children = MSet
$ Map.fromList
$ map (\form -> (NgramsTerm form, ()))
$ filter (/= "")
$ filter (\w -> w /= "" && w /= label)
$ splitOn "|&|" forms
}
)
......
......@@ -72,9 +72,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
--------------------------------------------------------------------------
-- Utils
type BlockText = Text
type MatchedText = Text
termsInText :: Patterns -> BlockText -> [(MatchedText, TermsCount)]
termsInText :: Patterns -> Text -> [(MatchedText, TermsCount)]
termsInText pats txt = groupWithCounts
$ List.concat
$ map (map unwords)
......
......@@ -109,10 +109,10 @@ filterComs _b m = Map.filter (\n -> length n > 0) $ mapWithKey filter' m
filter' (c1,c2) a
| c1 == c2 = a
-- TODO use n here
| otherwise = take 1 $ List.sortOn (Down . snd) a
| otherwise = take n $ List.sortOn (Down . snd) a
where
_n :: Int
_n = round $ 100 * a' / t
n :: Int
n = round $ 100 * a' / t
a'= fromIntegral $ length a
t :: Double
t = fromIntegral $ length $ List.concat $ elems m
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment