Commit d1a3103e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[LEARN] model saving tools.

parent f169660f
......@@ -142,6 +142,7 @@ library:
- protolude
- pureMD5
- SHA
- random
- rake
- regex-compat
- resourcet
......
......@@ -32,8 +32,9 @@ module Gargantext.API.Node
, HyperdataDocumentV3(..)
) where
import Control.Lens (prism', set)
import Control.Lens (prism', set, view)
import Control.Monad ((>>))
import Control.Monad.Reader (ask)
import Control.Monad.IO.Class (liftIO)
import Data.Aeson (FromJSON, ToJSON)
import Data.Swagger
......@@ -59,6 +60,7 @@ import Gargantext.Database.Types.Node
import Gargantext.Database.Types.Node (CorpusId, ContactId)
import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude
import Gargantext.API.Settings
import Gargantext.Text.Metrics
import Gargantext.Viz.Graph hiding (Node)-- (Graph(_graph_metadata),LegendField(..), GraphMetadata(..),readGraphFromJson,defaultGraph)
import Gargantext.Viz.Graph.Tools (cooc2graph)
......@@ -68,8 +70,8 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map
import qualified Gargantext.Database.Node.Update as U (update, Update(..))
--import qualified Gargantext.Text.List.Learn as Learn
--import qualified Data.Vector as Vec
import qualified Gargantext.Text.List.Learn as Learn
import qualified Data.Vector as Vec
type GargServer api =
forall env m.
......@@ -404,11 +406,12 @@ getMetrics cId maybeListId tabType maybeLimit = do
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent"
--{-
--let metrics' = Map.fromListWith (<>) $ map (\(Metric _ s1 s2 lt) -> (lt, [Vec.fromList [s1,s2]])) metrics
--_ <- liftIO $ Learn.grid metrics'
{-
let metrics' = Map.fromListWith (<>) $ map (\(Metric _ s1 s2 lt) -> (lt, [Vec.fromList [s1,s2]])) metrics
_ <- liftIO $ Learn.grid metrics'
en <- ask
printDebug "path" $ _fileFolder $ _env_settings en
--}
pure $ Metrics metrics
......@@ -80,6 +80,7 @@ data Settings = Settings
, _jwtSecret :: Jose.Jwk -- ^ key from the jose-jwt package
, _sendLoginEmails :: SendEmailType
, _scrapydUrl :: BaseUrl
, _fileFolder :: FilePath
}
makeLenses ''Settings
......@@ -107,6 +108,7 @@ devSettings = Settings
, _jwtSecret = parseJwk "MVg0YAPVSPiYQc/qIs/rV/X32EFR0zOJWfHFgMbszMw="
, _sendLoginEmails = LogEmailToConsole
, _scrapydUrl = fromMaybe (panic "Invalid scrapy URL") $ parseBaseUrl "http://localhost:6800"
, _fileFolder = "data"
}
......
......@@ -38,7 +38,6 @@ userMaster = "gargantua"
userArbitrary :: Text
userArbitrary = "user1"
nodeTypeId :: NodeType -> NodeTypeId
nodeTypeId n =
case n of
......@@ -53,6 +52,7 @@ nodeTypeId n =
---- Lists
NodeList -> 5
NodeListModel -> 10
---- Scores
-- NodeOccurrences -> 10
......
......@@ -81,7 +81,7 @@ import Gargantext.Prelude
import qualified Data.ByteString.Lazy.Char8 as DC (pack)
import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
import qualified Data.Text as DT (pack, unpack, concat, take)
import Gargantext.Prelude.Utils (hash)
-- TODO : the import of Document constructor below does not work
-- import Gargantext.Database.Types.Node (Document)
--import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
......@@ -212,13 +212,10 @@ instance ToRow InputData where
addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
addUniqIdsDoc doc = set hyperdataDocument_uniqIdBdd (Just hashBdd)
$ set hyperdataDocument_uniqId (Just hash) doc
$ set hyperdataDocument_uniqId (Just hashUni) doc
where
hash = uniqId $ DT.concat $ map ($ doc) hashParametersDoc
hashBdd = uniqId $ DT.concat $ map ($ doc) ([(\d -> maybe' (_hyperdataDocument_bdd d))] <> hashParametersDoc)
uniqId :: Text -> Text
uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
hashUni = hash $ DT.concat $ map ($ doc) hashParametersDoc
hashBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybe' (_hyperdataDocument_bdd d))] <> hashParametersDoc)
hashParametersDoc :: [(HyperdataDocument -> Text)]
......@@ -232,9 +229,9 @@ hashParametersDoc = [ \d -> maybe' (_hyperdataDocument_title d)
-- TODO factorize with above (use the function below for tests)
addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set (hc_uniqIdBdd) (Just hashBdd)
$ set (hc_uniqId) (Just hash) hc
$ set (hc_uniqId ) (Just hashUni) hc
where
hash = uniqId $ DT.concat $ map ($ hc) hashParametersContact
hashUni = uniqId $ DT.concat $ map ($ hc) hashParametersContact
hashBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybe' (view hc_bdd d)] <> hashParametersContact)
uniqId :: Text -> Text
......
......@@ -95,6 +95,10 @@ instance FromField HyperdataList
where
fromField = fromField'
instance FromField HyperdataListModel
where
fromField = fromField'
instance FromField HyperdataGraph
where
fromField = fromField'
......@@ -131,6 +135,10 @@ instance QueryRunnerColumnDefault PGJsonb HyperdataList
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataListModel
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataGraph
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
......@@ -331,6 +339,9 @@ getDocumentsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeDocument
getListsWithParentId :: NodeId -> Cmd err [Node HyperdataList]
getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList)
getListsModelWithParentId :: NodeId -> Cmd err [Node HyperdataListModel]
getListsModelWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeListModel)
getCorporaWithParentId :: NodeId -> Cmd err [Node HyperdataCorpus]
getCorporaWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeCorpus)
......@@ -400,7 +411,6 @@ nodeAnnuaireW maybeName maybeAnnuaire pId = node NodeAnnuaire name annuaire (Jus
where
name = maybe "Annuaire" identity maybeName
annuaire = maybe defaultAnnuaire identity maybeAnnuaire
--------------------------
------------------------------------------------------------------------
arbitraryList :: HyperdataList
......@@ -412,6 +422,20 @@ nodeListW maybeName maybeList pId = node NodeList name list (Just pId)
name = maybe "Listes" identity maybeName
list = maybe arbitraryList identity maybeList
--------------------
arbitraryListModel :: HyperdataListModel
arbitraryListModel = HyperdataListModel (400,500) "data/models/test.model" (Just 0.83)
mkListModelNode :: HasNodeError err => ParentId -> UserId -> Cmd err [NodeId]
mkListModelNode p u = insertNodesR [nodeListModelW Nothing Nothing p u]
nodeListModelW :: Maybe Name -> Maybe HyperdataListModel -> ParentId -> UserId -> NodeWrite
nodeListModelW maybeName maybeListModel pId = node NodeListModel name list (Just pId)
where
name = maybe "List Model" identity maybeName
list = maybe arbitraryListModel identity maybeListModel
------------------------------------------------------------------------
arbitraryGraph :: HyperdataGraph
arbitraryGraph = HyperdataGraph (Just "Preferences")
......@@ -551,6 +575,7 @@ defaultList cId =
mkList :: HasNodeError err => ParentId -> UserId -> Cmd err [NodeId]
mkList p u = insertNodesR [nodeListW Nothing Nothing p u]
mkGraph :: ParentId -> UserId -> Cmd err [GraphId]
mkGraph p u = insertNodesR [nodeGraphW Nothing Nothing p u]
......
......@@ -336,6 +336,19 @@ instance Hyperdata HyperdataList
instance Arbitrary HyperdataList where
arbitrary = elements [HyperdataList (Just "from list A")]
----
data HyperdataListModel = HyperdataListModel { _hlm_params :: !(Int, Int)
, _hlm_path :: !Text
, _hlm_score :: !(Maybe Double)
} deriving (Show, Generic)
instance Hyperdata HyperdataListModel
instance Arbitrary HyperdataListModel where
arbitrary = elements [HyperdataListModel (100,100) "models/example.model" Nothing]
$(deriveJSON (unPrefix "_hlm_") ''HyperdataListModel)
$(makeLenses ''HyperdataListModel)
------------------------------------------------------------------------
data HyperdataScore = HyperdataScore { hyperdataScore_preferences :: !(Maybe Text)
} deriving (Show, Generic)
......@@ -418,7 +431,7 @@ data NodeType = NodeUser
| NodeGraph
| NodeDashboard | NodeChart
-- | Classification
| NodeList
| NodeList | NodeListModel
-- | Metrics
deriving (Show, Read, Eq, Generic, Bounded, Enum)
......
{-|
Module : Gargantext.Prelude.Utils
Description : Useful Tools near Prelude of the project
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Prelude.Utils
where
--import Gargantext.Config (dataPath)
import Data.Text (Text)
import GHC.IO (FilePath)
import Gargantext.Prelude
import System.Random (newStdGen)
import System.Directory (createDirectoryIfMissing)
import qualified Data.ByteString.Lazy.Char8 as Char
import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
import qualified Data.Text as Text
type FolderPath = FilePath
type FileName = FilePath
-- | TODO Env Monad
dataPath :: Text
dataPath = "data"
hash :: Text -> Text
hash = Text.pack
. SHA.showDigest
. SHA.sha256
. Char.pack
. Text.unpack
toPath :: Int -> Text -> (FolderPath,FileName)
toPath n x = (Text.unpack $ Text.intercalate "/" [x1,x2], Text.unpack xs)
where
(x1,x') = Text.splitAt n x
(x2,xs) = Text.splitAt n x'
class SaveFile a where
saveFile' :: FilePath -> a -> IO ()
class ReadFile a where
readFile' :: FilePath -> IO a
-- | Empreinte is a uniq sequence of Text to identify the Type
-- we want to save
type Empreinte = Text
saveFile :: SaveFile a => a -> IO FilePath
saveFile a = do
let n = 3
(fp,fn) <- (toPath n) . hash . Text.pack . show <$> newStdGen
let foldPath = (Text.unpack dataPath) <> "/" <> fp
let filePath = foldPath <> "/" <> fn
_ <- createDirectoryIfMissing True foldPath
_ <- saveFile' filePath a
pure filePath
readFile :: ReadFile a => FilePath -> IO a
readFile fp = readFile' ((Text.unpack dataPath) <> "/" <> fp)
......@@ -11,6 +11,8 @@ CSV parser for Gargantext corpus files.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
......@@ -19,9 +21,9 @@ module Gargantext.Text.List.Learn
import Data.Map (Map)
import Data.Maybe (maybe)
import GHC.IO (FilePath)
import Gargantext.Core.Types.Main (ListType(..), listTypeId, fromListTypeId)
import Gargantext.Prelude
import Gargantext.Prelude.Utils
import Gargantext.Text.Metrics.Count (occurrencesWith)
import qualified Data.IntMap as IntMap
import qualified Data.List as List
......@@ -56,12 +58,17 @@ predictList :: SVM.Model -> [Vec.Vector Double] -> IO [Maybe ListType]
predictList m vs = map (fromListTypeId . round) <$> predict m vs
------------------------------------------------------------------------
save :: SVM.Model -> FilePath -> IO ()
save = SVM.saveModel
data Model = ModelSVM { model :: SVM.Model }
load :: FilePath -> IO SVM.Model
load = SVM.loadModel
instance SaveFile Model
where
saveFile' p (ModelSVM m) = SVM.saveModel m p
instance ReadFile Model
where
readFile' fp = do
m <- SVM.loadModel fp
pure $ ModelSVM m
------------------------------------------------------------------------
-- | TODO
-- shuffle list
......@@ -74,11 +81,13 @@ grid m = do
-> Map ListType [Vec.Vector Double]
-> IO (Double, (Double,Double))
grid' x y ls = do
model <- trainList x y ls
model' <- trainList x y ls
fp <- saveFile (ModelSVM model')
printDebug "file" fp
let (res, toGuess) = List.unzip $ List.concat
$ map (\(k,vs) -> zip (repeat k) vs)
$ Map.toList ls
res' <- predictList model toGuess
res' <- predictList model' toGuess
pure (score'' $ score' $ List.zip res res', (x,y))
{-
......@@ -94,9 +103,9 @@ grid m = do
where
total = fromIntegral $ foldl (+) 0 $ Map.elems m''
r <- List.take 10 <$> List.reverse
<$> List.sortOn fst
<$> mapM (\(x,y) -> grid' x y m) [(x,y) | x <- [500..600], y <- [500..600]]
r <- List.take 10 . List.reverse
. (List.sortOn fst)
<$> mapM (\(x,y) -> grid' x y m) [(x,y) | x <- [500..510], y <- [500..510]]
printDebug "GRID SEARCH" r
-- save best result
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment