Commit ae55f357 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[LEARN] moreLike func and apply.

parent c9a2ffdd
{-|
Module : Gargantext.Database.Learn
Description : Learn Small Data Analytics with big data connection (DB)
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Learn where
import Data.Text (Text)
import Data.Tuple (snd)
import Data.Maybe
import Gargantext.Database.Facet
import Gargantext.Database.Types.Node
import Gargantext.Prelude
import Gargantext.Text.Learn
import qualified Data.List as List
import qualified Data.Text as Text
import Gargantext.Database.Schema.NodeNode (nodesToFavorite)
import Gargantext.API.Node (delDocs, Documents(..))
import Gargantext.Database.Utils (Cmd)
import Gargantext.Database.Schema.Node (HasNodeError)
text :: FacetDoc -> (NodeId, Text)
text (FacetDoc nId _ _ h _ _) = (nId, title <> "" <> Text.take 100 abstr)
where
title = maybe "" identity (_hyperdataDocument_title h)
abstr = maybe "" identity (_hyperdataDocument_abstract h)
--moreLike docs_fav docs_trash docs_test = do
data FavTrash = IsFav | IsTrash
deriving (Eq)
--{-
moreLike :: HasNodeError err => FavTrash -> CorpusId -> Cmd err [(NodeId, Maybe Bool)]
moreLike ft cId = do
let b = if (==) ft IsFav then True else False
docs_trash <- map text <$> runViewDocuments cId True Nothing Nothing Nothing
docs_fav <- map text <$> filter (\(FacetDoc _ _ _ _ f _) -> f == True) <$> runViewDocuments cId False Nothing Nothing Nothing
docs_test <- map text <$> filter (\(FacetDoc _ _ _ _ f _) -> f == False) <$> runViewDocuments cId False Nothing Nothing Nothing
let priors = priorEventsWith snd b ( List.zip (repeat False) docs_fav
<> List.zip (repeat True ) docs_trash
)
pure $ filter ((==) (Just $ not b) . snd) $ map (\x -> (fst x, detectDefaultWithPriors snd priors x)) docs_test
learnModify :: HasNodeError err => FavTrash -> CorpusId -> [NodeId] -> Cmd err [Int]
learnModify favTrash cId ns = case favTrash of
IsFav -> nodesToFavorite $ map (\n -> (cId, n, True)) ns
IsTrash -> delDocs cId (Documents ns)
learnAndApply :: HasNodeError err => FavTrash -> CorpusId -> Cmd err [Int]
learnAndApply ft cId = do
ids <- map fst <$> moreLike ft cId
learnModify ft cId ids
--}
......@@ -7,6 +7,9 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TODO:
- generalize to byteString
Stop words and (how to learn it).
Main type here is String.
......@@ -30,7 +33,7 @@ import Data.String (String)
import Data.Text (Text)
import Data.Text (pack, unpack, toLower)
import Data.Tuple.Extra (both)
import Data.Tuple.Extra (both, second)
import Gargantext.Prelude
import Gargantext.Core (Lang(..), allLangs)
......@@ -60,6 +63,30 @@ type CatProb a = Map a Double
type Events a = Map a EventBook
------------------------------------------------------------------------
detectStopDefault :: Text -> Maybe Bool
detectStopDefault = undefined
detectBool :: [(Bool, Text)] -> Text -> Maybe Bool
detectBool events = detectDefault False events
detectDefault :: Ord a => a -> [(a, Text)] -> Text -> Maybe a
detectDefault = detectDefaultWith identity
detectDefaultWith :: Ord a => (b -> Text) -> a -> [(a, b)] -> b -> Maybe a
detectDefaultWith f d events = detectDefaultWithPriors f ps
where
ps = priorEventsWith f d events
detectDefaultWithPriors :: Ord b => (a -> Text) -> Events b -> a -> Maybe b
detectDefaultWithPriors f priors = detectCat 99 priors . f
priorEventsWith :: Ord a => (t -> Text) -> a -> [(a, t)] -> Events a
priorEventsWith f d e = toEvents d [0..2] 10 es
where
es = map (\(a,b) -> CatWord a (unpack $ toLower $ f b)) e
------------------------------------------------------------------------
detectLangDefault :: Text -> Maybe Lang
detectLangDefault = detectCat 99 eventLang
......@@ -76,15 +103,6 @@ detectLangDefault = detectCat 99 eventLang
--textSample DE = DE.textSample
--textSample SP = SP.textSample
--textSample CH = CH.textSample
detectStopDefault :: Text -> Maybe Bool
detectStopDefault = undefined
detectDefault :: [(Bool, Text)] -> Text -> Maybe Bool
detectDefault events = detectCat 99 (priorEvents events)
where
priorEvents events' = toEvents True [0..2] 10 (map (\(a,b) -> CatWord a (unpack $ toLower b)) events')
------------------------------------------------------------------------
detectCat :: Ord a => Int -> Events a -> Text -> Maybe a
detectCat n es = head . map fst . (detectCat' n es) . unpack
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment