Commit 5f31a345 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[REFACT] before scoring new ngrams lists.

parent bfac1c97
...@@ -45,8 +45,11 @@ main = do ...@@ -45,8 +45,11 @@ main = do
let createUsers :: Cmd ServantErr Int64 let createUsers :: Cmd ServantErr Int64
createUsers = insertUsersDemo createUsers = insertUsersDemo
let cmd :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId let
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) (Unsupervised EN 5 1 Nothing) CsvHalFormat corpusPath --tt = (Unsupervised EN 5 1 Nothing)
tt = (Mono EN)
cmd :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
cmd = flowCorpusFile (cs user) (cs name) (read limit :: Int) tt CsvHalFormat corpusPath
{- {-
let debatCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId let debatCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId
debatCorpus = do debatCorpus = do
......
...@@ -18,7 +18,7 @@ Loads all static file for the front-end. ...@@ -18,7 +18,7 @@ Loads all static file for the front-end.
--------------------------------------------------------------------- ---------------------------------------------------------------------
module Gargantext.API.FrontEnd where module Gargantext.API.FrontEnd where
import Servant.Static.TH (createApiAndServerDecs) import Servant.Static.TH (createApiAndServerDecs)
--------------------------------------------------------------------- ---------------------------------------------------------------------
$(createApiAndServerDecs "FrontEndAPI" "frontEndServer" "purescript-gargantext/dist") $(createApiAndServerDecs "FrontEndAPI" "frontEndServer" "purescript-gargantext/dist")
......
...@@ -33,14 +33,18 @@ import GHC.Generics (Generic) ...@@ -33,14 +33,18 @@ import GHC.Generics (Generic)
import Gargantext.Core.Types (ListType(..)) import Gargantext.Core.Types (ListType(..))
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Utils import Gargantext.Database.Utils
import Gargantext.Core.Types (CorpusId) import Gargantext.Core.Types (CorpusId, ListId, Limit)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.API.Ngrams import Gargantext.API.Ngrams
import Gargantext.Text.Metrics (Scored(..))
import Gargantext.API.Ngrams.NTree import Gargantext.API.Ngrams.NTree
import Gargantext.Database.Flow import Gargantext.Database.Flow
import Gargantext.Viz.Chart import Gargantext.Viz.Chart
import Servant
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map
import qualified Gargantext.Database.Metrics as Metrics
data Metrics = Metrics data Metrics = Metrics
{ metrics_data :: [Metric]} { metrics_data :: [Metric]}
...@@ -97,6 +101,30 @@ instance Arbitrary MyTree ...@@ -97,6 +101,30 @@ instance Arbitrary MyTree
arbitrary = MyTree <$> arbitrary <*> arbitrary <*> arbitrary arbitrary = MyTree <$> arbitrary <*> arbitrary <*> arbitrary
-------------------------------------------------------------
-- | Scatter metrics API
type ScatterAPI = Summary "SepGen IncExc metrics"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParam "limit" Int
:> Get '[JSON] Metrics
getScatter :: FlowCmdM env err m =>
CorpusId
-> Maybe ListId
-> TabType
-> Maybe Limit
-> m Metrics
getScatter cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit
let
metrics = map (\(Scored t s1 s2) -> Metric t (log' 5 s1) (log' 2 s2) (listType t ngs')) scores
log' n x = 1 + (if x <= 0 then 0 else (log $ (10^(n::Int)) * x))
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics
......
...@@ -65,7 +65,6 @@ import Gargantext.Database.Types.Node ...@@ -65,7 +65,6 @@ import Gargantext.Database.Types.Node
import Gargantext.Database.Utils -- (Cmd, CmdM) import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Utils (hash) import Gargantext.Prelude.Utils (hash)
import Gargantext.Text.Metrics (Scored(..))
import Gargantext.Viz.Chart import Gargantext.Viz.Chart
import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI) import Gargantext.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Servant import Servant
...@@ -74,8 +73,6 @@ import Servant.Swagger (HasSwagger(toSwagger)) ...@@ -74,8 +73,6 @@ import Servant.Swagger (HasSwagger(toSwagger))
import Servant.Swagger.Internal import Servant.Swagger.Internal
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map
import qualified Gargantext.Database.Metrics as Metrics
import qualified Gargantext.Database.Node.Update as U (update, Update(..)) import qualified Gargantext.Database.Node.Update as U (update, Update(..))
{- {-
...@@ -144,7 +141,7 @@ type NodeAPI a = Get '[JSON] (Node a) ...@@ -144,7 +141,7 @@ type NodeAPI a = Get '[JSON] (Node a)
:> SearchAPI :> SearchAPI
-- VIZ -- VIZ
:<|> "metrics" :> MetricsAPI :<|> "metrics" :> ScatterAPI
:<|> "chart" :> ChartApi :<|> "chart" :> ChartApi
:<|> "pie" :> PieApi :<|> "pie" :> PieApi
:<|> "tree" :> TreeApi :<|> "tree" :> TreeApi
...@@ -187,7 +184,7 @@ nodeAPI p uId id ...@@ -187,7 +184,7 @@ nodeAPI p uId id
:<|> delDocs id :<|> delDocs id
:<|> searchIn id :<|> searchIn id
:<|> getMetrics id :<|> getScatter id
:<|> getChart id :<|> getChart id
:<|> getPie id :<|> getPie id
:<|> getTree id :<|> getTree id
...@@ -375,27 +372,6 @@ putNode = undefined -- TODO ...@@ -375,27 +372,6 @@ putNode = undefined -- TODO
query :: Monad m => Text -> m Text query :: Monad m => Text -> m Text
query s = pure s query s = pure s
-------------------------------------------------------------
type MetricsAPI = Summary "SepGen IncExc metrics"
:> QueryParam "list" ListId
:> QueryParamR "ngramsType" TabType
:> QueryParam "limit" Int
:> Get '[JSON] Metrics
getMetrics :: NodeId -> GargServer MetricsAPI
getMetrics cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics' cId maybeListId tabType maybeLimit
let
metrics = map (\(Scored t s1 s2) -> Metric t (log' 5 s1) (log' 2 s2) (listType t ngs')) scores
log' n x = 1 + (if x <= 0 then 0 else (log $ (10^(n::Int)) * x))
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics
------------------------------------------------------------- -------------------------------------------------------------
type Hash = Text type Hash = Text
data FileType = CSV | PresseRIS data FileType = CSV | PresseRIS
......
...@@ -217,8 +217,14 @@ insertMasterDocs c lang hs = do ...@@ -217,8 +217,14 @@ insertMasterDocs c lang hs = do
fixLang (Unsupervised l n s m) = Unsupervised l n s m' fixLang (Unsupervised l n s m) = Unsupervised l n s m'
where where
m' = case m of m' = case m of
Nothing -> trace ("buildTries here" :: String) $ Just $ buildTries n (fmap toToken $ uniText $ Text.intercalate " " $ List.concat $ map hasText documentsWithId) Nothing -> trace ("buildTries here" :: String)
m'' -> m'' $ Just
$ buildTries n ( fmap toToken $ uniText
$ Text.intercalate " . "
$ List.concat
$ map hasText documentsWithId
)
just_m -> just_m
fixLang l = l fixLang l = l
lang' = fixLang lang lang' = fixLang lang
......
...@@ -39,7 +39,7 @@ import qualified Gargantext.Database.Metrics as Metrics ...@@ -39,7 +39,7 @@ import qualified Gargantext.Database.Metrics as Metrics
{- {-
trainModel :: FlowCmdM env ServantErr m trainModel :: FlowCmdM env ServantErr m
=> Username -> m Score => Username -> m Score
trainMode u = do trainModel u = do
rootId <- _node_id <$> getRoot u rootId <- _node_id <$> getRoot u
(id:ids) <- getCorporaWithParentId rootId (id:ids) <- getCorporaWithParentId rootId
(s,_model) <- case length ids >0 of (s,_model) <- case length ids >0 of
...@@ -48,11 +48,11 @@ trainMode u = do ...@@ -48,11 +48,11 @@ trainMode u = do
--} --}
getMetrics :: FlowCmdM env err m getMetrics' :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Int => CorpusId -> Maybe ListId -> TabType -> Maybe Int
-> m (Map.Map ListType [Vec.Vector Double]) -> m (Map.Map ListType [Vec.Vector Double])
getMetrics cId maybeListId tabType maybeLimit = do getMetrics' cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics' cId maybeListId tabType maybeLimit (ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit
let let
metrics = map (\(Scored t s1 s2) -> (listType t ngs', [Vec.fromList [s1,s2]])) scores metrics = map (\(Scored t s1 s2) -> (listType t ngs', [Vec.fromList [s1,s2]])) scores
......
...@@ -24,26 +24,27 @@ import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType) ...@@ -24,26 +24,27 @@ import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm) import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm)
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..)) import Gargantext.Core.Types (ListType(..), Limit, NodeType(..))
import Gargantext.Database.Flow (FlowCmdM) import Gargantext.Database.Flow (FlowCmdM)
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser, getTficfWith) import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser{-, getTficfWith-})
import Gargantext.Database.Node.Select import Gargantext.Database.Node.Select
import Gargantext.Database.Schema.Node (defaultList) import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Types.Node (ListId, CorpusId, HyperdataCorpus) import Gargantext.Database.Types.Node (ListId, CorpusId{-, HyperdataCorpus-})
import Gargantext.Database.Flow (getOrMkRootWithCorpus) --import Gargantext.Database.Flow (getOrMkRootWithCorpus)
import Gargantext.Database.Config (userMaster) import Gargantext.Database.Config (userMaster)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Metrics (scored, Scored(..), localMetrics, toScored) import Gargantext.Text.Metrics (scored, Scored(..), localMetrics{-, toScored-})
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector.Storable as Vec import qualified Data.Vector.Storable as Vec
getMetrics' :: FlowCmdM env err m getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text]) -> m (Map Text (ListType, Maybe Text), [Scored Text])
getMetrics' cId maybeListId tabType maybeLimit = do getMetrics cId maybeListId tabType maybeLimit = do
(ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
pure (ngs, scored myCooc) pure (ngs, scored myCooc)
{- | TODO remove unused function
getMetrics :: FlowCmdM env err m getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text]) -> m (Map Text (ListType, Maybe Text), [Scored Text])
...@@ -58,7 +59,7 @@ getMetrics cId maybeListId tabType maybeLimit = do ...@@ -58,7 +59,7 @@ getMetrics cId maybeListId tabType maybeLimit = do
metrics' <- getTficfWith cId masterCorpusId (lIds <> [lId]) (ngramsTypeFromTabType tabType) ngs' metrics' <- getTficfWith cId masterCorpusId (lIds <> [lId]) (ngramsTypeFromTabType tabType) ngs'
pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics']) pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])
-}
getLocalMetrics :: (FlowCmdM env err m) getLocalMetrics :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
......
...@@ -43,7 +43,6 @@ data NgramsListBuilder = BuilderStepO { stemSize :: Int ...@@ -43,7 +43,6 @@ data NgramsListBuilder = BuilderStepO { stemSize :: Int
| BuilderStepN { withModel :: Model } | BuilderStepN { withModel :: Model }
data StopSize = StopSize {unStopSize :: Int} data StopSize = StopSize {unStopSize :: Int}
-- | TODO improve grouping functions of Authors, Sources, Institutes.. -- | TODO improve grouping functions of Authors, Sources, Institutes..
...@@ -111,8 +110,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs ...@@ -111,8 +110,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys = take b $ drop a ns ys = take b $ drop a ns
zs = drop b $ drop a ns zs = drop b $ drop a ns
a = 3 a = 300
b = 500 b = 350
isStopTerm :: StopSize -> Text -> Bool isStopTerm :: StopSize -> Text -> Bool
isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x) isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
......
...@@ -83,8 +83,6 @@ extractTerms (Unsupervised l n s m) xs = mapM (terms (Unsupervised l n s (Just m ...@@ -83,8 +83,6 @@ extractTerms (Unsupervised l n s m) xs = mapM (terms (Unsupervised l n s (Just m
extractTerms termTypeLang xs = mapM (terms termTypeLang) xs extractTerms termTypeLang xs = mapM (terms termTypeLang) xs
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Terms from Text -- | Terms from Text
-- Mono : mono terms -- Mono : mono terms
...@@ -132,8 +130,10 @@ newTries :: Int -> Text -> Tries Token () ...@@ -132,8 +130,10 @@ newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t) newTries n t = buildTries n (fmap toToken $ uniText t)
uniText :: Text -> [[Text]] uniText :: Text -> [[Text]]
uniText = map (List.filter (not . isPunctuation)) uniText =
-- map (map (Text.toLower))
map (List.filter (not . isPunctuation))
. map tokenize . map tokenize
. sentences -- | TODO get sentences according to lang . sentences -- | TODO get sentences according to lang
. Text.toLower
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment