Commit d1a3103e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[LEARN] model saving tools.

parent f169660f
...@@ -142,6 +142,7 @@ library: ...@@ -142,6 +142,7 @@ library:
- protolude - protolude
- pureMD5 - pureMD5
- SHA - SHA
- random
- rake - rake
- regex-compat - regex-compat
- resourcet - resourcet
......
...@@ -32,8 +32,9 @@ module Gargantext.API.Node ...@@ -32,8 +32,9 @@ module Gargantext.API.Node
, HyperdataDocumentV3(..) , HyperdataDocumentV3(..)
) where ) where
import Control.Lens (prism', set) import Control.Lens (prism', set, view)
import Control.Monad ((>>)) import Control.Monad ((>>))
import Control.Monad.Reader (ask)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Data.Aeson (FromJSON, ToJSON) import Data.Aeson (FromJSON, ToJSON)
import Data.Swagger import Data.Swagger
...@@ -59,6 +60,7 @@ import Gargantext.Database.Types.Node ...@@ -59,6 +60,7 @@ import Gargantext.Database.Types.Node
import Gargantext.Database.Types.Node (CorpusId, ContactId) import Gargantext.Database.Types.Node (CorpusId, ContactId)
import Gargantext.Database.Utils -- (Cmd, CmdM) import Gargantext.Database.Utils -- (Cmd, CmdM)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.API.Settings
import Gargantext.Text.Metrics import Gargantext.Text.Metrics
import Gargantext.Viz.Graph hiding (Node)-- (Graph(_graph_metadata),LegendField(..), GraphMetadata(..),readGraphFromJson,defaultGraph) import Gargantext.Viz.Graph hiding (Node)-- (Graph(_graph_metadata),LegendField(..), GraphMetadata(..),readGraphFromJson,defaultGraph)
import Gargantext.Viz.Graph.Tools (cooc2graph) import Gargantext.Viz.Graph.Tools (cooc2graph)
...@@ -68,8 +70,8 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) ...@@ -68,8 +70,8 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Gargantext.Database.Node.Update as U (update, Update(..)) import qualified Gargantext.Database.Node.Update as U (update, Update(..))
--import qualified Gargantext.Text.List.Learn as Learn import qualified Gargantext.Text.List.Learn as Learn
--import qualified Data.Vector as Vec import qualified Data.Vector as Vec
type GargServer api = type GargServer api =
forall env m. forall env m.
...@@ -404,11 +406,12 @@ getMetrics cId maybeListId tabType maybeLimit = do ...@@ -404,11 +406,12 @@ getMetrics cId maybeListId tabType maybeLimit = do
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent" errorMsg = "API.Node.metrics: key absent"
--{- {-
--let metrics' = Map.fromListWith (<>) $ map (\(Metric _ s1 s2 lt) -> (lt, [Vec.fromList [s1,s2]])) metrics let metrics' = Map.fromListWith (<>) $ map (\(Metric _ s1 s2 lt) -> (lt, [Vec.fromList [s1,s2]])) metrics
--_ <- liftIO $ Learn.grid metrics' _ <- liftIO $ Learn.grid metrics'
en <- ask
printDebug "path" $ _fileFolder $ _env_settings en
--} --}
pure $ Metrics metrics pure $ Metrics metrics
...@@ -80,6 +80,7 @@ data Settings = Settings ...@@ -80,6 +80,7 @@ data Settings = Settings
, _jwtSecret :: Jose.Jwk -- ^ key from the jose-jwt package , _jwtSecret :: Jose.Jwk -- ^ key from the jose-jwt package
, _sendLoginEmails :: SendEmailType , _sendLoginEmails :: SendEmailType
, _scrapydUrl :: BaseUrl , _scrapydUrl :: BaseUrl
, _fileFolder :: FilePath
} }
makeLenses ''Settings makeLenses ''Settings
...@@ -107,6 +108,7 @@ devSettings = Settings ...@@ -107,6 +108,7 @@ devSettings = Settings
, _jwtSecret = parseJwk "MVg0YAPVSPiYQc/qIs/rV/X32EFR0zOJWfHFgMbszMw=" , _jwtSecret = parseJwk "MVg0YAPVSPiYQc/qIs/rV/X32EFR0zOJWfHFgMbszMw="
, _sendLoginEmails = LogEmailToConsole , _sendLoginEmails = LogEmailToConsole
, _scrapydUrl = fromMaybe (panic "Invalid scrapy URL") $ parseBaseUrl "http://localhost:6800" , _scrapydUrl = fromMaybe (panic "Invalid scrapy URL") $ parseBaseUrl "http://localhost:6800"
, _fileFolder = "data"
} }
......
...@@ -38,7 +38,6 @@ userMaster = "gargantua" ...@@ -38,7 +38,6 @@ userMaster = "gargantua"
userArbitrary :: Text userArbitrary :: Text
userArbitrary = "user1" userArbitrary = "user1"
nodeTypeId :: NodeType -> NodeTypeId nodeTypeId :: NodeType -> NodeTypeId
nodeTypeId n = nodeTypeId n =
case n of case n of
...@@ -52,7 +51,8 @@ nodeTypeId n = ...@@ -52,7 +51,8 @@ nodeTypeId n =
--NodeSwap -> 19 --NodeSwap -> 19
---- Lists ---- Lists
NodeList -> 5 NodeList -> 5
NodeListModel -> 10
---- Scores ---- Scores
-- NodeOccurrences -> 10 -- NodeOccurrences -> 10
......
...@@ -81,7 +81,7 @@ import Gargantext.Prelude ...@@ -81,7 +81,7 @@ import Gargantext.Prelude
import qualified Data.ByteString.Lazy.Char8 as DC (pack) import qualified Data.ByteString.Lazy.Char8 as DC (pack)
import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest) import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
import qualified Data.Text as DT (pack, unpack, concat, take) import qualified Data.Text as DT (pack, unpack, concat, take)
import Gargantext.Prelude.Utils (hash)
-- TODO : the import of Document constructor below does not work -- TODO : the import of Document constructor below does not work
-- import Gargantext.Database.Types.Node (Document) -- import Gargantext.Database.Types.Node (Document)
--import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..) --import Gargantext.Database.Types.Node (docExample, hyperdataDocument, HyperdataDocument(..)
...@@ -212,13 +212,10 @@ instance ToRow InputData where ...@@ -212,13 +212,10 @@ instance ToRow InputData where
addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
addUniqIdsDoc doc = set hyperdataDocument_uniqIdBdd (Just hashBdd) addUniqIdsDoc doc = set hyperdataDocument_uniqIdBdd (Just hashBdd)
$ set hyperdataDocument_uniqId (Just hash) doc $ set hyperdataDocument_uniqId (Just hashUni) doc
where where
hash = uniqId $ DT.concat $ map ($ doc) hashParametersDoc hashUni = hash $ DT.concat $ map ($ doc) hashParametersDoc
hashBdd = uniqId $ DT.concat $ map ($ doc) ([(\d -> maybe' (_hyperdataDocument_bdd d))] <> hashParametersDoc) hashBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybe' (_hyperdataDocument_bdd d))] <> hashParametersDoc)
uniqId :: Text -> Text
uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
hashParametersDoc :: [(HyperdataDocument -> Text)] hashParametersDoc :: [(HyperdataDocument -> Text)]
...@@ -232,9 +229,9 @@ hashParametersDoc = [ \d -> maybe' (_hyperdataDocument_title d) ...@@ -232,9 +229,9 @@ hashParametersDoc = [ \d -> maybe' (_hyperdataDocument_title d)
-- TODO factorize with above (use the function below for tests) -- TODO factorize with above (use the function below for tests)
addUniqIdsContact :: HyperdataContact -> HyperdataContact addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set (hc_uniqIdBdd) (Just hashBdd) addUniqIdsContact hc = set (hc_uniqIdBdd) (Just hashBdd)
$ set (hc_uniqId) (Just hash) hc $ set (hc_uniqId ) (Just hashUni) hc
where where
hash = uniqId $ DT.concat $ map ($ hc) hashParametersContact hashUni = uniqId $ DT.concat $ map ($ hc) hashParametersContact
hashBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybe' (view hc_bdd d)] <> hashParametersContact) hashBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybe' (view hc_bdd d)] <> hashParametersContact)
uniqId :: Text -> Text uniqId :: Text -> Text
......
...@@ -95,6 +95,10 @@ instance FromField HyperdataList ...@@ -95,6 +95,10 @@ instance FromField HyperdataList
where where
fromField = fromField' fromField = fromField'
instance FromField HyperdataListModel
where
fromField = fromField'
instance FromField HyperdataGraph instance FromField HyperdataGraph
where where
fromField = fromField' fromField = fromField'
...@@ -131,6 +135,10 @@ instance QueryRunnerColumnDefault PGJsonb HyperdataList ...@@ -131,6 +135,10 @@ instance QueryRunnerColumnDefault PGJsonb HyperdataList
where where
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataListModel
where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataGraph instance QueryRunnerColumnDefault PGJsonb HyperdataGraph
where where
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
...@@ -331,6 +339,9 @@ getDocumentsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeDocument ...@@ -331,6 +339,9 @@ getDocumentsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeDocument
getListsWithParentId :: NodeId -> Cmd err [Node HyperdataList] getListsWithParentId :: NodeId -> Cmd err [Node HyperdataList]
getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList) getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList)
getListsModelWithParentId :: NodeId -> Cmd err [Node HyperdataListModel]
getListsModelWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeListModel)
getCorporaWithParentId :: NodeId -> Cmd err [Node HyperdataCorpus] getCorporaWithParentId :: NodeId -> Cmd err [Node HyperdataCorpus]
getCorporaWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeCorpus) getCorporaWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeCorpus)
...@@ -400,7 +411,6 @@ nodeAnnuaireW maybeName maybeAnnuaire pId = node NodeAnnuaire name annuaire (Jus ...@@ -400,7 +411,6 @@ nodeAnnuaireW maybeName maybeAnnuaire pId = node NodeAnnuaire name annuaire (Jus
where where
name = maybe "Annuaire" identity maybeName name = maybe "Annuaire" identity maybeName
annuaire = maybe defaultAnnuaire identity maybeAnnuaire annuaire = maybe defaultAnnuaire identity maybeAnnuaire
--------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
arbitraryList :: HyperdataList arbitraryList :: HyperdataList
...@@ -412,6 +422,20 @@ nodeListW maybeName maybeList pId = node NodeList name list (Just pId) ...@@ -412,6 +422,20 @@ nodeListW maybeName maybeList pId = node NodeList name list (Just pId)
name = maybe "Listes" identity maybeName name = maybe "Listes" identity maybeName
list = maybe arbitraryList identity maybeList list = maybe arbitraryList identity maybeList
--------------------
arbitraryListModel :: HyperdataListModel
arbitraryListModel = HyperdataListModel (400,500) "data/models/test.model" (Just 0.83)
mkListModelNode :: HasNodeError err => ParentId -> UserId -> Cmd err [NodeId]
mkListModelNode p u = insertNodesR [nodeListModelW Nothing Nothing p u]
nodeListModelW :: Maybe Name -> Maybe HyperdataListModel -> ParentId -> UserId -> NodeWrite
nodeListModelW maybeName maybeListModel pId = node NodeListModel name list (Just pId)
where
name = maybe "List Model" identity maybeName
list = maybe arbitraryListModel identity maybeListModel
------------------------------------------------------------------------ ------------------------------------------------------------------------
arbitraryGraph :: HyperdataGraph arbitraryGraph :: HyperdataGraph
arbitraryGraph = HyperdataGraph (Just "Preferences") arbitraryGraph = HyperdataGraph (Just "Preferences")
...@@ -551,6 +575,7 @@ defaultList cId = ...@@ -551,6 +575,7 @@ defaultList cId =
mkList :: HasNodeError err => ParentId -> UserId -> Cmd err [NodeId] mkList :: HasNodeError err => ParentId -> UserId -> Cmd err [NodeId]
mkList p u = insertNodesR [nodeListW Nothing Nothing p u] mkList p u = insertNodesR [nodeListW Nothing Nothing p u]
mkGraph :: ParentId -> UserId -> Cmd err [GraphId] mkGraph :: ParentId -> UserId -> Cmd err [GraphId]
mkGraph p u = insertNodesR [nodeGraphW Nothing Nothing p u] mkGraph p u = insertNodesR [nodeGraphW Nothing Nothing p u]
......
...@@ -336,6 +336,19 @@ instance Hyperdata HyperdataList ...@@ -336,6 +336,19 @@ instance Hyperdata HyperdataList
instance Arbitrary HyperdataList where instance Arbitrary HyperdataList where
arbitrary = elements [HyperdataList (Just "from list A")] arbitrary = elements [HyperdataList (Just "from list A")]
----
data HyperdataListModel = HyperdataListModel { _hlm_params :: !(Int, Int)
, _hlm_path :: !Text
, _hlm_score :: !(Maybe Double)
} deriving (Show, Generic)
instance Hyperdata HyperdataListModel
instance Arbitrary HyperdataListModel where
arbitrary = elements [HyperdataListModel (100,100) "models/example.model" Nothing]
$(deriveJSON (unPrefix "_hlm_") ''HyperdataListModel)
$(makeLenses ''HyperdataListModel)
------------------------------------------------------------------------ ------------------------------------------------------------------------
data HyperdataScore = HyperdataScore { hyperdataScore_preferences :: !(Maybe Text) data HyperdataScore = HyperdataScore { hyperdataScore_preferences :: !(Maybe Text)
} deriving (Show, Generic) } deriving (Show, Generic)
...@@ -418,7 +431,7 @@ data NodeType = NodeUser ...@@ -418,7 +431,7 @@ data NodeType = NodeUser
| NodeGraph | NodeGraph
| NodeDashboard | NodeChart | NodeDashboard | NodeChart
-- | Classification -- | Classification
| NodeList | NodeList | NodeListModel
-- | Metrics -- | Metrics
deriving (Show, Read, Eq, Generic, Bounded, Enum) deriving (Show, Read, Eq, Generic, Bounded, Enum)
......
{-|
Module : Gargantext.Prelude.Utils
Description : Useful Tools near Prelude of the project
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Prelude.Utils
where
--import Gargantext.Config (dataPath)
import Data.Text (Text)
import GHC.IO (FilePath)
import Gargantext.Prelude
import System.Random (newStdGen)
import System.Directory (createDirectoryIfMissing)
import qualified Data.ByteString.Lazy.Char8 as Char
import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
import qualified Data.Text as Text
type FolderPath = FilePath
type FileName = FilePath
-- | TODO Env Monad
dataPath :: Text
dataPath = "data"
hash :: Text -> Text
hash = Text.pack
. SHA.showDigest
. SHA.sha256
. Char.pack
. Text.unpack
toPath :: Int -> Text -> (FolderPath,FileName)
toPath n x = (Text.unpack $ Text.intercalate "/" [x1,x2], Text.unpack xs)
where
(x1,x') = Text.splitAt n x
(x2,xs) = Text.splitAt n x'
class SaveFile a where
saveFile' :: FilePath -> a -> IO ()
class ReadFile a where
readFile' :: FilePath -> IO a
-- | Empreinte is a uniq sequence of Text to identify the Type
-- we want to save
type Empreinte = Text
saveFile :: SaveFile a => a -> IO FilePath
saveFile a = do
let n = 3
(fp,fn) <- (toPath n) . hash . Text.pack . show <$> newStdGen
let foldPath = (Text.unpack dataPath) <> "/" <> fp
let filePath = foldPath <> "/" <> fn
_ <- createDirectoryIfMissing True foldPath
_ <- saveFile' filePath a
pure filePath
readFile :: ReadFile a => FilePath -> IO a
readFile fp = readFile' ((Text.unpack dataPath) <> "/" <> fp)
...@@ -11,6 +11,8 @@ CSV parser for Gargantext corpus files. ...@@ -11,6 +11,8 @@ CSV parser for Gargantext corpus files.
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
...@@ -19,9 +21,9 @@ module Gargantext.Text.List.Learn ...@@ -19,9 +21,9 @@ module Gargantext.Text.List.Learn
import Data.Map (Map) import Data.Map (Map)
import Data.Maybe (maybe) import Data.Maybe (maybe)
import GHC.IO (FilePath)
import Gargantext.Core.Types.Main (ListType(..), listTypeId, fromListTypeId) import Gargantext.Core.Types.Main (ListType(..), listTypeId, fromListTypeId)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Utils
import Gargantext.Text.Metrics.Count (occurrencesWith) import Gargantext.Text.Metrics.Count (occurrencesWith)
import qualified Data.IntMap as IntMap import qualified Data.IntMap as IntMap
import qualified Data.List as List import qualified Data.List as List
...@@ -56,12 +58,17 @@ predictList :: SVM.Model -> [Vec.Vector Double] -> IO [Maybe ListType] ...@@ -56,12 +58,17 @@ predictList :: SVM.Model -> [Vec.Vector Double] -> IO [Maybe ListType]
predictList m vs = map (fromListTypeId . round) <$> predict m vs predictList m vs = map (fromListTypeId . round) <$> predict m vs
------------------------------------------------------------------------ ------------------------------------------------------------------------
save :: SVM.Model -> FilePath -> IO () data Model = ModelSVM { model :: SVM.Model }
save = SVM.saveModel
load :: FilePath -> IO SVM.Model instance SaveFile Model
load = SVM.loadModel where
saveFile' p (ModelSVM m) = SVM.saveModel m p
instance ReadFile Model
where
readFile' fp = do
m <- SVM.loadModel fp
pure $ ModelSVM m
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO -- | TODO
-- shuffle list -- shuffle list
...@@ -74,11 +81,13 @@ grid m = do ...@@ -74,11 +81,13 @@ grid m = do
-> Map ListType [Vec.Vector Double] -> Map ListType [Vec.Vector Double]
-> IO (Double, (Double,Double)) -> IO (Double, (Double,Double))
grid' x y ls = do grid' x y ls = do
model <- trainList x y ls model' <- trainList x y ls
fp <- saveFile (ModelSVM model')
printDebug "file" fp
let (res, toGuess) = List.unzip $ List.concat let (res, toGuess) = List.unzip $ List.concat
$ map (\(k,vs) -> zip (repeat k) vs) $ map (\(k,vs) -> zip (repeat k) vs)
$ Map.toList ls $ Map.toList ls
res' <- predictList model toGuess res' <- predictList model' toGuess
pure (score'' $ score' $ List.zip res res', (x,y)) pure (score'' $ score' $ List.zip res res', (x,y))
{- {-
...@@ -94,9 +103,9 @@ grid m = do ...@@ -94,9 +103,9 @@ grid m = do
where where
total = fromIntegral $ foldl (+) 0 $ Map.elems m'' total = fromIntegral $ foldl (+) 0 $ Map.elems m''
r <- List.take 10 <$> List.reverse r <- List.take 10 . List.reverse
<$> List.sortOn fst . (List.sortOn fst)
<$> mapM (\(x,y) -> grid' x y m) [(x,y) | x <- [500..600], y <- [500..600]] <$> mapM (\(x,y) -> grid' x y m) [(x,y) | x <- [500..510], y <- [500..510]]
printDebug "GRID SEARCH" r printDebug "GRID SEARCH" r
-- save best result -- save best result
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment