Commit 311d7f20 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[LEARN] algo first implem.

parent ed19ff44
Pipeline #309 failed with stage
...@@ -111,6 +111,7 @@ library: ...@@ -111,6 +111,7 @@ library:
- http-types - http-types
- hsparql - hsparql
- hstatistics - hstatistics
- HSvm
- hxt - hxt
- hlcm - hlcm
- ini - ini
......
...@@ -68,6 +68,9 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) ...@@ -68,6 +68,9 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Gargantext.Database.Node.Update as U (update, Update(..)) import qualified Gargantext.Database.Node.Update as U (update, Update(..))
import qualified Gargantext.Text.List.Learn as Learn
import qualified Data.Vector as Vec
type GargServer api = type GargServer api =
forall env m. forall env m.
(CmdM env ServantErr m, HasRepo env) (CmdM env ServantErr m, HasRepo env)
...@@ -400,6 +403,12 @@ getMetrics cId maybeListId tabType maybeLimit = do ...@@ -400,6 +403,12 @@ getMetrics cId maybeListId tabType maybeLimit = do
metrics = map (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs')) scores metrics = map (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs')) scores
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent" errorMsg = "API.Node.metrics: key absent"
--{-
let metrics' = Map.fromListWith (<>) $ map (\(Metric _ s1 s2 lt) -> (lt, [Vec.fromList [s1,s2]])) metrics
_ <- liftIO $ Learn.grid metrics'
--}
pure $ Metrics metrics pure $ Metrics metrics
......
...@@ -59,8 +59,8 @@ getMetrics cId maybeListId tabType maybeLimit = do ...@@ -59,8 +59,8 @@ getMetrics cId maybeListId tabType maybeLimit = do
getLocalMetrics :: (FlowCmdM env ServantErr m) getLocalMetrics :: (FlowCmdM env ServantErr m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text) -> m ( Map Text (ListType, Maybe Text)
, Map Text (Maybe RootTerm) , Map Text (Maybe RootTerm)
, Map Text (Vec.Vector Double) , Map Text (Vec.Vector Double)
) )
getLocalMetrics cId maybeListId tabType maybeLimit = do getLocalMetrics cId maybeListId tabType maybeLimit = do
......
{-|
Module : Gargantext.Text.List.Learn
Description : Learn to make lists
Copyright : (c) CNRS, 2018-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
CSV parser for Gargantext corpus files.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.List.Learn
where
import Data.Maybe (maybe)
import GHC.IO (FilePath)
import qualified Data.SVM as SVM
import Gargantext.Prelude
import Data.Map (Map)
import Gargantext.Core.Types.Main (ListType(..), listTypeId, fromListTypeId)
import Gargantext.Text.Metrics.Count (occurrencesWith)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.IntMap as IntMap
import qualified Data.Vector as Vec
------------------------------------------------------------------------
train :: Double -> Double -> SVM.Problem -> IO SVM.Model
train x y = (SVM.train (SVM.CSvc x) (SVM.RBF y))
predict :: SVM.Model -> [Vec.Vector Double] -> IO [Double]
predict m vs = mapM (predict' m) vs
where
predict' m' vs' = SVM.predict m' (IntMap.fromList $ (zip [1..]) $ Vec.toList vs')
------------------------------------------------------------------------
trainList :: Double -> Double -> Map ListType [Vec.Vector Double] -> IO SVM.Model
trainList x y = (train x y) . trainList'
where
trainList' :: Map ListType [Vec.Vector Double] -> SVM.Problem
trainList' = mapVec2problem . (Map.mapKeys (fromIntegral . listTypeId))
mapVec2problem :: Map Double [Vec.Vector Double] -> SVM.Problem
mapVec2problem = List.concat . (map (\(a,as) -> zip (repeat a) as)) . Map.toList . (Map.map vecs2maps)
vecs2maps :: [Vec.Vector Double] -> [IntMap.IntMap Double]
vecs2maps = map (IntMap.fromList . (zip [1..]) . Vec.toList)
predictList :: SVM.Model -> [Vec.Vector Double] -> IO [Maybe ListType]
predictList m vs = map (fromListTypeId . round) <$> predict m vs
------------------------------------------------------------------------
save :: SVM.Model -> FilePath -> IO ()
save = SVM.saveModel
load :: FilePath -> IO SVM.Model
load = SVM.loadModel
------------------------------------------------------------------------
grid :: Map ListType [Vec.Vector Double] -> IO () -- Map (ListType, Maybe ListType) Int)
grid m = do
let
grid' :: Double -> Double
-> Map ListType [Vec.Vector Double]
-> IO (Double, (Double,Double))
grid' x y ls = do
model <- trainList x y ls
let (res, toGuess) = List.unzip $ List.concat
$ map (\(k,vs) -> zip (repeat k) vs)
$ Map.toList ls
res' <- predictList model toGuess
pure (score'' $ score' $ List.zip res res', (x,y))
{-
score :: [(ListType, Maybe ListType)] -> Map (ListType, Maybe ListType) Int
score = occurrencesWith identity
-}
score' :: [(ListType, Maybe ListType)] -> Map (Maybe Bool) Int
score' = occurrencesWith (\(a,b) -> (==) <$> Just a <*> b)
score'' :: Map (Maybe Bool) Int -> Double
score'' m'' = maybe 0 (\t -> (fromIntegral t)/total) (Map.lookup (Just True) m'')
where
total = fromIntegral $ foldl (+) 0 $ Map.elems m''
r <- List.take 10 <$> List.reverse <$> List.sortOn fst <$> mapM (\(x,y) -> grid' x y m) [(x,y) | x <- [500..600], y <- [500..600]]
printDebug "GRID SEARCH" r
-- save best result
...@@ -21,7 +21,8 @@ extra-deps: ...@@ -21,7 +21,8 @@ extra-deps:
commit: ba5347e7d8a13ce5275af8470c15b2305fbb23af commit: ba5347e7d8a13ce5275af8470c15b2305fbb23af
- git: https://github.com/delanoe/hstatistics.git - git: https://github.com/delanoe/hstatistics.git
commit: 90eef7604bb230644c2246eccd094d7bfefcb135 commit: 90eef7604bb230644c2246eccd094d7bfefcb135
- git: https://github.com/paulrzcz/HSvm.git
commit: 3fe28b683aba5ddf05e3b5f8eced0bd05c5a29f9
#- opaleye-0.6.7002.0 #- opaleye-0.6.7002.0
- KMP-0.1.0.2 - KMP-0.1.0.2
- accelerate-1.2.0.0 - accelerate-1.2.0.0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment