Commit f7791341 authored by Mael NICOLAS's avatar Mael NICOLAS

Merge branch 'master' into lang-parser

parents 8e99f0b6 0dfe162f
...@@ -21,5 +21,6 @@ purescript-gargantext ...@@ -21,5 +21,6 @@ purescript-gargantext
doc doc
deps deps
_darcs _darcs
*.pdf
# Runtime # Runtime
module Paths_hastext (
version,
getBinDir, getLibDir, getDataDir, getLibexecDir,
getDataFileName, getSysconfDir
) where
import qualified Control.Exception as Exception
import Data.Version (Version(..))
import System.Environment (getEnv)
import Prelude
catchIO :: IO a -> (Exception.IOException -> IO a) -> IO a
catchIO = Exception.catch
version :: Version
version = Version [0,1,0,0] []
bindir, libdir, datadir, libexecdir, sysconfdir :: FilePath
bindir = "/home/alexandre/.cabal/bin"
libdir = "/home/alexandre/.cabal/lib/x86_64-linux-ghc-7.8.3/hastext-0.1.0.0"
datadir = "/home/alexandre/.cabal/share/x86_64-linux-ghc-7.8.3/hastext-0.1.0.0"
libexecdir = "/home/alexandre/.cabal/libexec"
sysconfdir = "/home/alexandre/.cabal/etc"
getBinDir, getLibDir, getDataDir, getLibexecDir, getSysconfDir :: IO FilePath
getBinDir = catchIO (getEnv "hastext_bindir") (\_ -> return bindir)
getLibDir = catchIO (getEnv "hastext_libdir") (\_ -> return libdir)
getDataDir = catchIO (getEnv "hastext_datadir") (\_ -> return datadir)
getLibexecDir = catchIO (getEnv "hastext_libexecdir") (\_ -> return libexecdir)
getSysconfDir = catchIO (getEnv "hastext_sysconfdir") (\_ -> return sysconfdir)
getDataFileName :: FilePath -> IO FilePath
getDataFileName name = do
dir <- getDataDir
return (dir ++ "/" ++ name)
/* DO NOT EDIT: This file is automatically generated by Cabal */
/* package base-4.7.0.1 */
#define VERSION_base "4.7.0.1"
#define MIN_VERSION_base(major1,major2,minor) (\
(major1) < 4 || \
(major1) == 4 && (major2) < 7 || \
(major1) == 4 && (major2) == 7 && (minor) <= 0)
/* tool alex-3.1.3 */
#define TOOL_VERSION_alex "3.1.3"
#define MIN_TOOL_VERSION_alex(major1,major2,minor) (\
(major1) < 3 || \
(major1) == 3 && (major2) < 1 || \
(major1) == 3 && (major2) == 1 && (minor) <= 3)
/* tool c2hs-0.17.2 */
#define TOOL_VERSION_c2hs "0.17.2"
#define MIN_TOOL_VERSION_c2hs(major1,major2,minor) (\
(major1) < 0 || \
(major1) == 0 && (major2) < 17 || \
(major1) == 0 && (major2) == 17 && (minor) <= 2)
/* tool cpphs-1.19.3 */
#define TOOL_VERSION_cpphs "1.19.3"
#define MIN_TOOL_VERSION_cpphs(major1,major2,minor) (\
(major1) < 1 || \
(major1) == 1 && (major2) < 19 || \
(major1) == 1 && (major2) == 19 && (minor) <= 3)
/* tool gcc-4.9.2 */
#define TOOL_VERSION_gcc "4.9.2"
#define MIN_TOOL_VERSION_gcc(major1,major2,minor) (\
(major1) < 4 || \
(major1) == 4 && (major2) < 9 || \
(major1) == 4 && (major2) == 9 && (minor) <= 2)
/* tool ghc-7.8.3 */
#define TOOL_VERSION_ghc "7.8.3"
#define MIN_TOOL_VERSION_ghc(major1,major2,minor) (\
(major1) < 7 || \
(major1) == 7 && (major2) < 8 || \
(major1) == 7 && (major2) == 8 && (minor) <= 3)
/* tool ghc-pkg-7.8.3 */
#define TOOL_VERSION_ghc_pkg "7.8.3"
#define MIN_TOOL_VERSION_ghc_pkg(major1,major2,minor) (\
(major1) < 7 || \
(major1) == 7 && (major2) < 8 || \
(major1) == 7 && (major2) == 8 && (minor) <= 3)
/* tool haddock-2.15.0.2 */
#define TOOL_VERSION_haddock "2.15.0.2"
#define MIN_TOOL_VERSION_haddock(major1,major2,minor) (\
(major1) < 2 || \
(major1) == 2 && (major2) < 15 || \
(major1) == 2 && (major2) == 15 && (minor) <= 0)
/* tool happy-1.19.4 */
#define TOOL_VERSION_happy "1.19.4"
#define MIN_TOOL_VERSION_happy(major1,major2,minor) (\
(major1) < 1 || \
(major1) == 1 && (major2) < 19 || \
(major1) == 1 && (major2) == 19 && (minor) <= 4)
/* tool hpc-0.67 */
#define TOOL_VERSION_hpc "0.67"
#define MIN_TOOL_VERSION_hpc(major1,major2,minor) (\
(major1) < 0 || \
(major1) == 0 && (major2) < 67 || \
(major1) == 0 && (major2) == 67 && (minor) <= 0)
/* tool hsc2hs-0.67 */
#define TOOL_VERSION_hsc2hs "0.67"
#define MIN_TOOL_VERSION_hsc2hs(major1,major2,minor) (\
(major1) < 0 || \
(major1) == 0 && (major2) < 67 || \
(major1) == 0 && (major2) == 67 && (minor) <= 0)
/* tool pkg-config-0.28 */
#define TOOL_VERSION_pkg_config "0.28"
#define MIN_TOOL_VERSION_pkg_config(major1,major2,minor) (\
(major1) < 0 || \
(major1) == 0 && (major2) < 28 || \
(major1) == 0 && (major2) == 28 && (minor) <= 0)
/* tool strip-2.25 */
#define TOOL_VERSION_strip "2.25"
#define MIN_TOOL_VERSION_strip(major1,major2,minor) (\
(major1) < 2 || \
(major1) == 2 && (major2) < 25 || \
(major1) == 2 && (major2) == 25 && (minor) <= 0)
[InstalledPackageInfo {installedPackageId = InstalledPackageId "hastext-0.1.0.0-inplace", sourcePackageId = PackageIdentifier {pkgName = PackageName "hastext", pkgVersion = Version {versionBranch = [0,1,0,0], versionTags = []}}, license = BSD3, copyright = "Copyright: (c) 2016 Alexandre Delano\235", maintainer = "alexandre+dev@delanoe.org", author = "Alexandre Delano\235", stability = "", homepage = "http://github.com/adelanoe/hastext#readme", pkgUrl = "", synopsis = "Text mining project", description = "Please see README.md", category = "Data", exposed = True, exposedModules = ["Hastext","Hastext.Db"], hiddenModules = [], trusted = False, importDirs = ["/home/alexandre/projets/hastext/dist/build"], libraryDirs = ["/home/alexandre/projets/hastext/dist/build"], hsLibraries = ["HShastext-0.1.0.0"], extraLibraries = [], extraGHCiLibraries = [], includeDirs = [], includes = [], depends = [InstalledPackageId "base-4.7.0.1-1a55ebc8256b39ccbff004d48b3eb834"], hugsOptions = [], ccOptions = [], ldOptions = [], frameworkDirs = [], frameworks = [], haddockInterfaces = ["/home/alexandre/projets/hastext/dist/doc/html/hastext/hastext.haddock"], haddockHTMLs = ["/home/alexandre/projets/hastext/dist/doc/html/hastext"]}
]
\ No newline at end of file
import Distribution.Simple; main = defaultMain
Version {versionBranch = [1,22,6,0], versionTags = []}
...@@ -40,6 +40,8 @@ library: ...@@ -40,6 +40,8 @@ library:
- Gargantext.Database.Node.Document.Import - Gargantext.Database.Node.Document.Import
- Gargantext.Database.Types.Node - Gargantext.Database.Types.Node
- Gargantext.Database.User - Gargantext.Database.User
- Gargantext.Database.Cooc
- Gargantext.Database.Tree
- Gargantext.Prelude - Gargantext.Prelude
- Gargantext.Text - Gargantext.Text
- Gargantext.Text.Context - Gargantext.Text.Context
...@@ -54,6 +56,7 @@ library: ...@@ -54,6 +56,7 @@ library:
- Gargantext.Text.Parsers.WOS - Gargantext.Text.Parsers.WOS
- Gargantext.Text.Search - Gargantext.Text.Search
- Gargantext.Text.Terms - Gargantext.Text.Terms
- Gargantext.Text.Terms.Stop
- Gargantext.Text.Terms.Mono - Gargantext.Text.Terms.Mono
- Gargantext.Text.Terms.Multi.Lang.En - Gargantext.Text.Terms.Multi.Lang.En
- Gargantext.Text.Terms.Multi.Lang.Fr - Gargantext.Text.Terms.Multi.Lang.Fr
...@@ -99,9 +102,10 @@ library: ...@@ -99,9 +102,10 @@ library:
- hlcm - hlcm
- ini - ini
- jose-jwt - jose-jwt
- kmeans-vector # - kmeans-vector
- KMP - KMP
- lens - lens
- located-base
- logging-effect - logging-effect
- matrix - matrix
- monad-logger - monad-logger
...@@ -113,7 +117,8 @@ library: ...@@ -113,7 +117,8 @@ library:
- path - path
- path-io - path-io
- postgresql-simple - postgresql-simple
- pretty - pretty-simple
- probability
- product-profunctors - product-profunctors
- profunctors - profunctors
- protolude - protolude
...@@ -169,6 +174,7 @@ executables: ...@@ -169,6 +174,7 @@ executables:
- -with-rtsopts=-N - -with-rtsopts=-N
- -O2 - -O2
- -Wmissing-signatures - -Wmissing-signatures
- -Wcompat
dependencies: dependencies:
- base - base
- containers - containers
......
...@@ -189,7 +189,15 @@ makeDevApp env = do ...@@ -189,7 +189,15 @@ makeDevApp env = do
type SwaggerAPI = SwaggerSchemaUI "swagger-ui" "swagger.json" type SwaggerAPI = SwaggerSchemaUI "swagger-ui" "swagger.json"
-- | API for serving main operational routes of @gargantext.org@ -- | API for serving main operational routes of @gargantext.org@
type GargAPI =
type GargAPI = "api" :> Summary "API " :> GargAPIVersion
-- | TODO :<|> Summary "Latest API" :> GargAPI'
type GargAPIVersion = "v1.0" :> Summary "v1.0: " :> GargAPI'
type GargAPI' =
-- Roots endpoint -- Roots endpoint
"user" :> Summary "First user endpoint" "user" :> Summary "First user endpoint"
...@@ -250,9 +258,9 @@ server env = do ...@@ -250,9 +258,9 @@ server env = do
:<|> nodeAPI conn :<|> nodeAPI conn
:<|> nodeAPI conn :<|> nodeAPI conn
:<|> nodesAPI conn :<|> nodesAPI conn
:<|> count :<|> count -- TODO: undefined
:<|> search conn :<|> search conn
:<|> graphAPI conn :<|> graphAPI conn -- TODO: mock
:<|> treeAPI conn :<|> treeAPI conn
-- :<|> orchestrator -- :<|> orchestrator
where where
......
...@@ -12,7 +12,6 @@ Count API part of Gargantext. ...@@ -12,7 +12,6 @@ Count API part of Gargantext.
{-# OPTIONS_GHC -fno-warn-name-shadowing #-} {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DataKinds #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
......
...@@ -10,7 +10,7 @@ Portability : POSIX ...@@ -10,7 +10,7 @@ Portability : POSIX
Node API Node API
-} -}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-} {-# OPTIONS_GHC -fno-warn-name-shadowing -fno-warn-orphans #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DataKinds #-}
...@@ -23,6 +23,7 @@ module Gargantext.API.Node ...@@ -23,6 +23,7 @@ module Gargantext.API.Node
where where
------------------------------------------------------------------- -------------------------------------------------------------------
import Control.Lens (prism')
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Control.Monad ((>>)) import Control.Monad ((>>))
--import System.IO (putStrLn, readFile) --import System.IO (putStrLn, readFile)
...@@ -39,11 +40,13 @@ import Servant ...@@ -39,11 +40,13 @@ import Servant
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
import Gargantext.Database.Node ( getNodesWithParentId import Gargantext.Database.Node ( runCmd
, getNodesWithParentId
, getNode, getNodesWith , getNode, getNodesWith
, deleteNode, deleteNodes) , deleteNode, deleteNodes)
import Gargantext.Database.Facet (FacetDoc, getDocFacet import Gargantext.Database.Facet (FacetDoc, getDocFacet
,FacetChart) ,FacetChart)
import Gargantext.Database.Tree (treeDB, HasTreeError(..), TreeError(..))
-- Graph -- Graph
import Gargantext.TextFlow import Gargantext.TextFlow
...@@ -55,13 +58,15 @@ import Gargantext.Text.Terms (TermType(..)) ...@@ -55,13 +58,15 @@ import Gargantext.Text.Terms (TermType(..))
------------------------------------------------------------------- -------------------------------------------------------------------
-- | Node API Types management -- | Node API Types management
type Roots = Get '[JSON] [Node Value] type Roots = Get '[JSON] [Node Value]
:<|> Post '[JSON] Int :<|> Post '[JSON] Int -- TODO
:<|> Put '[JSON] Int :<|> Put '[JSON] Int -- TODO
:<|> Delete '[JSON] Int :<|> Delete '[JSON] Int -- TODO
type NodesAPI = Delete '[JSON] Int type NodesAPI = Delete '[JSON] Int
type NodeAPI = Get '[JSON] (Node Value) type NodeAPI = Get '[JSON] (Node Value)
:<|> Post '[JSON] Int
:<|> Put '[JSON] Int
:<|> Delete '[JSON] Int :<|> Delete '[JSON] Int
:<|> "children" :> Summary " Summary children" :<|> "children" :> Summary " Summary children"
:> QueryParam "type" NodeType :> QueryParam "type" NodeType
...@@ -102,23 +107,33 @@ type FacetDocAPI = "table" ...@@ -102,23 +107,33 @@ type FacetDocAPI = "table"
-- | Node API functions -- | Node API functions
roots :: Connection -> Server Roots roots :: Connection -> Server Roots
roots conn = liftIO (putStrLn ( "/user" :: Text) >> getNodesWithParentId conn 0 Nothing) roots conn = liftIO (putStrLn ( "/user" :: Text) >> getNodesWithParentId 0 Nothing conn)
:<|> pure (panic "not implemented yet") :<|> pure (panic "not implemented yet") -- TODO
:<|> pure (panic "not implemented yet") :<|> pure (panic "not implemented yet") -- TODO
:<|> pure (panic "not implemented yet") :<|> pure (panic "not implemented yet") -- TODO
type GraphAPI = Get '[JSON] Graph type GraphAPI = Get '[JSON] Graph
graphAPI :: Connection -> NodeId -> Server GraphAPI graphAPI :: Connection -> NodeId -> Server GraphAPI
graphAPI _ _ = liftIO $ textFlow (Mono EN) (Contexts contextText) graphAPI _ _ = liftIO $ textFlow (Mono EN) (Contexts contextText)
-- TODO what do we get about the node? to replace contextText
-- TODO(orphan): There should be a proper APIError data type with a case TreeError.
instance HasTreeError ServantErr where
_TreeError = prism' mk (const Nothing) -- Note a prism
where
mk NoRoot = err404 { errBody = "Root node not found" }
mk EmptyRoot = err500 { errBody = "Root node should not be empty" }
mk TooManyRoots = err500 { errBody = "Too many root nodes" }
type TreeAPI = Get '[JSON] (Tree NodeTree) type TreeAPI = Get '[JSON] (Tree NodeTree)
treeAPI :: Connection -> NodeId -> Server TreeAPI treeAPI :: Connection -> NodeId -> Server TreeAPI
treeAPI _ _ = undefined treeAPI = treeDB
nodeAPI :: Connection -> NodeId -> Server NodeAPI nodeAPI :: Connection -> NodeId -> Server NodeAPI
nodeAPI conn id = liftIO (putStrLn ("/node" :: Text) >> getNode conn id ) nodeAPI conn id = liftIO (putStrLn ("/node" :: Text) >> getNode conn id )
:<|> postNode conn id
:<|> putNode conn id
:<|> deleteNode' conn id :<|> deleteNode' conn id
:<|> getNodesWith' conn id :<|> getNodesWith' conn id
:<|> getFacet conn id :<|> getFacet conn id
...@@ -126,16 +141,20 @@ nodeAPI conn id = liftIO (putStrLn ("/node" :: Text) >> getNode co ...@@ -126,16 +141,20 @@ nodeAPI conn id = liftIO (putStrLn ("/node" :: Text) >> getNode co
-- :<|> upload -- :<|> upload
-- :<|> query -- :<|> query
nodesAPI :: Connection -> [NodeId] -> Server NodesAPI nodesAPI :: Connection -> [NodeId] -> Server NodesAPI
nodesAPI conn ids = deleteNodes' conn ids nodesAPI conn ids = deleteNodes' conn ids
postNode :: Connection -> NodeId -> Handler Int
postNode = undefined -- TODO
putNode :: Connection -> NodeId -> Handler Int
putNode = undefined -- TODO
deleteNodes' :: Connection -> [NodeId] -> Handler Int deleteNodes' :: Connection -> [NodeId] -> Handler Int
deleteNodes' conn ids = liftIO (deleteNodes conn ids) deleteNodes' conn ids = liftIO (runCmd conn $ deleteNodes ids)
deleteNode' :: Connection -> NodeId -> Handler Int deleteNode' :: Connection -> NodeId -> Handler Int
deleteNode' conn id = liftIO (deleteNode conn id) deleteNode' conn id = liftIO (runCmd conn $ deleteNode id)
getNodesWith' :: Connection -> NodeId -> Maybe NodeType -> Maybe Int -> Maybe Int getNodesWith' :: Connection -> NodeId -> Maybe NodeType -> Maybe Int -> Maybe Int
-> Handler [Node Value] -> Handler [Node Value]
...@@ -148,7 +167,7 @@ getFacet conn id offset limit = liftIO (putStrLn ( "/facet" :: Text)) >> liftIO ...@@ -148,7 +167,7 @@ getFacet conn id offset limit = liftIO (putStrLn ( "/facet" :: Text)) >> liftIO
getChart :: Connection -> NodeId -> Maybe UTCTime -> Maybe UTCTime getChart :: Connection -> NodeId -> Maybe UTCTime -> Maybe UTCTime
-> Handler [FacetChart] -> Handler [FacetChart]
getChart _ _ _ _ = undefined getChart _ _ _ _ = undefined -- TODO
query :: Text -> Handler Text query :: Text -> Handler Text
......
...@@ -64,7 +64,7 @@ pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput ...@@ -64,7 +64,7 @@ pipeline :: FromJSON e => URL -> ClientEnv -> ScraperInput
-> (e -> IO ()) -> IO ScraperStatus -> (e -> IO ()) -> IO ScraperStatus
pipeline scrapyurl client_env input log_status = do pipeline scrapyurl client_env input log_status = do
e <- runJobMLog client_env log_status $ callScraper scrapyurl input e <- runJobMLog client_env log_status $ callScraper scrapyurl input
either (panic . cs . show) pure e either (panic . cs . show) pure e -- TODO throwError
scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI)) scrapyOrchestrator :: Env -> IO (Server (WithCallbacks ScraperAPI))
scrapyOrchestrator env = do scrapyOrchestrator env = do
......
...@@ -25,4 +25,8 @@ module Gargantext.Core ...@@ -25,4 +25,8 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet) -- - SP == spanish (not implemented yet)
-- --
-- ... add your language and help us to implement it (: -- ... add your language and help us to implement it (:
data Lang = EN | FR data Lang = EN | FR -- | DE | SP | CH
deriving (Show, Eq, Ord, Bounded, Enum)
allLangs :: [Lang]
allLangs = [minBound ..]
...@@ -24,6 +24,7 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main ...@@ -24,6 +24,7 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
import GHC.Generics import GHC.Generics
import Data.Aeson import Data.Aeson
import Data.Semigroup
import Data.Monoid import Data.Monoid
import Data.Set (Set, empty) import Data.Set (Set, empty)
--import qualified Data.Set as S --import qualified Data.Set as S
...@@ -103,16 +104,19 @@ data TokenTag = TokenTag { _my_token_word :: [Text] ...@@ -103,16 +104,19 @@ data TokenTag = TokenTag { _my_token_word :: [Text]
, _my_token_ner :: Maybe NER , _my_token_ner :: Maybe NER
} deriving (Show) } deriving (Show)
instance Semigroup TokenTag where
(<>) (TokenTag w1 s1 p1 n1) (TokenTag w2 s2 p2 _) = TokenTag (w1 <> w2) (s1 <> s2) p3 n1
where
p3 = case (p1,p2) of
(Just JJ, Just NP) -> Just NP
(Just VB, Just NP) -> Just NP
_ -> p1
instance Monoid TokenTag where instance Monoid TokenTag where
mempty = TokenTag [] empty Nothing Nothing mempty = TokenTag [] empty Nothing Nothing
mappend (TokenTag w1 s1 p1 n1) (TokenTag w2 s2 p2 _) mappend t1 t2 = (<>) t1 t2
= TokenTag (w1 <> w2) (s1 <> s2) p3 n1
where
p3 = case (p1,p2) of
(Just JJ, Just NP) -> Just NP
(Just VB, Just NP) -> Just NP
_ -> p1
mconcat = foldl mappend mempty mconcat = foldl mappend mempty
...@@ -56,35 +56,28 @@ gargNode = [userTree] ...@@ -56,35 +56,28 @@ gargNode = [userTree]
-- | User Tree simplified -- | User Tree simplified
userTree :: Tree NodeTree userTree :: Tree NodeTree
userTree = TreeN (NodeTree "user name" NodeUser 1) $ userTree = TreeN (NodeTree "user name" NodeUser 1) [annuaireTree, projectTree]
[leafT $ NodeTree "MyPage" UserPage 0] <>
[annuaireTree, projectTree]
-- | Project Tree -- | Project Tree
projectTree :: Tree NodeTree projectTree :: Tree NodeTree
projectTree = TreeN (NodeTree "Project CNRS/IMT" Project 2) [corpusTree 10 "A", corpusTree 20 "B"] projectTree = TreeN (NodeTree "Project CNRS/IMT" Folder 2) [corpusTree 10 "A", corpusTree 20 "B"]
type Individu = Document type Individu = Document
-- | Corpus Tree -- | Corpus Tree
annuaireTree :: Tree NodeTree annuaireTree :: Tree NodeTree
annuaireTree = TreeN (NodeTree "Annuaire" Annuaire 41) ( [leafT $ NodeTree "IMT" Individu 42] annuaireTree = (leafT $ NodeTree "Annuaire" Annuaire 41)
<> [leafT $ NodeTree "CNRS" Individu 43]
)
corpusTree :: NodeId -> Text -> Tree NodeTree corpusTree :: NodeId -> Text -> Tree NodeTree
corpusTree nId t = TreeN (NodeTree ("Corpus " <> t) NodeCorpus nId) ( [ leafT $ NodeTree "Documents" Document (nId +1)] corpusTree nId t = TreeN (NodeTree ("Corpus " <> t) NodeCorpus nId) ( [ leafT $ NodeTree "Dashboard" Dashboard (nId +1)
, leafT $ NodeTree "Graph" Graph (nId +2)
]
-- <> [ leafT $ NodeTree "My lists" Lists 5] -- <> [ leafT $ NodeTree "My lists" Lists 5]
-- <> [ leafT (NodeTree "Metrics A" Metrics 6) ] -- <> [ leafT (NodeTree "Metrics A" Metrics 6) ]
-- <> [ leafT (NodeTree "Class A" Classification 7)] -- <> [ leafT (NodeTree "Class A" Classification 7)]
) )
-- TODO make instances of Nodes
-- NP
-- - why NodeUser and not just User ?
-- - is this supposed to hold data ?
-- AD : Yes, some preferences for instances
data Parent = NodeType NodeId data Parent = NodeType NodeId
--data Classification = Favorites | MyClassifcation --data Classification = Favorites | MyClassifcation
......
...@@ -54,5 +54,5 @@ parseJSONFromString :: (Read a) => Value -> Parser a ...@@ -54,5 +54,5 @@ parseJSONFromString :: (Read a) => Value -> Parser a
parseJSONFromString v = do parseJSONFromString v = do
numString <- parseJSON v numString <- parseJSON v
case readMaybe (numString :: String) of case readMaybe (numString :: String) of
Nothing -> fail $ "Invalid number for TransactionID: " ++ show v Nothing -> fail $ "Invalid number for TransactionID: " ++ show v -- TODO error message too specific
Just n -> return n Just n -> return n
...@@ -72,9 +72,9 @@ module Gargantext.Database.Bashql ( get, get' ...@@ -72,9 +72,9 @@ module Gargantext.Database.Bashql ( get, get'
import Control.Monad.Reader -- (Reader, ask) import Control.Monad.Reader -- (Reader, ask)
import Database.PostgreSQL.Simple (Connection)
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.Aeson import Data.Aeson
import Data.Aeson.Types
import Data.List (last, concat) import Data.List (last, concat)
import Gargantext.Core.Types import Gargantext.Core.Types
...@@ -92,44 +92,43 @@ type PWD = [NodeId] ...@@ -92,44 +92,43 @@ type PWD = [NodeId]
--data PWD' a = a | PWD' [a] --data PWD' a = a | PWD' [a]
-- | TODO get Children or Node -- | TODO get Children or Node
get :: Connection -> PWD -> IO [Node Value] get :: PWD -> Cmd [Node Value]
get _ [] = pure [] get [] = pure []
get conn pwd = runQuery conn $ selectNodesWithParentID (last pwd) get pwd = Cmd . ReaderT $ \conn -> runQuery conn $ selectNodesWithParentID (last pwd)
-- | Home, need to filter with UserId -- | Home, need to filter with UserId
home :: Connection -> IO PWD home :: Cmd PWD
home c = map node_id <$> getNodesWithParentId c 0 Nothing home = map node_id <$> Cmd (ReaderT (getNodesWithParentId 0 Nothing))
-- | ls == get Children -- | ls == get Children
ls :: Connection -> PWD -> IO [Node Value] ls :: PWD -> Cmd [Node Value]
ls = get ls = get
tree :: Connection -> PWD -> IO [Node Value] tree :: PWD -> Cmd [Node Value]
tree c p = do tree p = do
ns <- get c p ns <- get p
children <- mapM (\p' -> get c [p']) $ map node_id ns children <- mapM (\n -> get [node_id n]) ns
pure $ ns <> (concat children) pure $ ns <> concat children
-- | TODO -- | TODO
post :: Connection -> PWD -> [NodeWrite'] -> IO Int64 post :: PWD -> [NodeWrite'] -> Cmd Int64
post _ [] _ = pure 0 post [] _ = pure 0
post _ _ [] = pure 0 post _ [] = pure 0
post c pth ns = mkNode c (last pth) ns post pth ns = Cmd . ReaderT $ mkNode (last pth) ns
--postR :: Connection -> PWD -> [NodeWrite'] -> IO [Int] --postR :: PWD -> [NodeWrite'] -> Cmd [Int]
--postR _ [] _ = pure [0] --postR [] _ _ = pure [0]
--postR _ _ [] = pure [0] --postR _ [] _ = pure [0]
--postR c pth ns = mkNodeR c (last pth) ns --postR pth ns c = mkNodeR (last pth) ns c
--
--rm :: Connection -> PWD -> [NodeId] -> IO Int --rm :: Connection -> PWD -> [NodeId] -> IO Int
--rm = del --rm = del
del :: Connection -> [NodeId] -> IO Int del :: [NodeId] -> Cmd Int
del _ [] = pure 0 del [] = pure 0
del c ns = deleteNodes c ns del ns = deleteNodes ns
-- | TODO -- | TODO
--put :: Connection -> PWD -> [a] -> IO Int64 --put :: Connection -> PWD -> [a] -> IO Int64
...@@ -141,84 +140,70 @@ del c ns = deleteNodes c ns ...@@ -141,84 +140,70 @@ del c ns = deleteNodes c ns
-- jump NodeId -- jump NodeId
-- touch Dir -- touch Dir
type CorpusName = Text
postCorpus :: ToJSON a => CorpusName -> (a -> Text) -> [a] -> Cmd NewNode
postCorpus corpusName title ns = do
pid <- last <$> home
let uid = 1
postNode uid pid ( Node' NodeCorpus corpusName emptyObject
(map (\n -> Node' Document (title n) (toJSON n) []) ns)
)
-- |
-- import IMTClient as C
-- postAnnuaire "Annuaire IMT" (\n -> (maybe "" identity (C.prenom n)) <> " " <> (maybe "" identity (C.nom n))) (take 30 annuaire)
postAnnuaire :: ToJSON a => CorpusName -> (a -> Text) -> [a] -> Cmd NewNode
postAnnuaire corpusName title ns = do
pid <- last <$> home
let uid = 1
postNode uid pid ( Node' Annuaire corpusName emptyObject
(map (\n -> Node' UserPage (title n) (toJSON n) []) ns)
)
-------------------------------------------------------------- --------------------------------------------------------------
-- Tests -- Tests
-------------------------------------------------------------- --------------------------------------------------------------
get' :: PWD -> Reader Connection (IO [Node Value]) get' :: PWD -> IO [Node Value]
get' [] = pure $ pure [] get' = runCmd' . get
get' pwd = do
connection <- ask
pure $ runQuery connection $ selectNodesWithParentID (last pwd)
home' :: IO PWD home' :: IO PWD
home' = do home' = runCmd' home
c <- connectGargandb "gargantext.ini"
home c
--home'' :: Reader Connection (IO PWD)
--home'' = do
-- c <- ask
-- liftIO $ home c
ls' :: IO [Node Value] ls' :: IO [Node Value]
ls' = do ls' = runCmd' $ do
c <- connectGargandb "gargantext.ini" h <- home
h <- home c ls h
ls c h
tree' :: IO [Node Value] tree' :: IO [Node Value]
tree' = do tree' = runCmd' $ do
c <- connectGargandb "gargantext.ini" h <- home
h <- home c tree h
tree c h
post' :: IO [Int] post' :: IO NewNode
post' = do post' = runCmd' $ do
c <- connectGargandb "gargantext.ini" pid <- last <$> home
pid <- last <$> home c
let uid = 1 let uid = 1
postNode c uid pid ( Node' NodeCorpus (pack "Premier corpus") (toJSON (pack "{}"::Text)) [ Node' Document (pack "Doc1") (toJSON (pack "{}" :: Text)) [] postNode uid pid ( Node' NodeCorpus (pack "Premier corpus") emptyObject [ Node' Document (pack "Doc1") emptyObject []
, Node' Document (pack "Doc2") (toJSON (pack "{}" :: Text)) [] , Node' Document (pack "Doc2") emptyObject []
, Node' Document (pack "Doc3") (toJSON (pack "{}" :: Text)) [] , Node' Document (pack "Doc3") emptyObject []
] ]
) )
type CorpusName = Text
-- | -- |
-- myCorpus <- Prelude.map doc2hyperdataDocument <$> toDocs <$> snd <$> readCsv "doc/corpus_imt/Gargantext_Corpus_small.csv" -- myCorpus <- Prelude.map doc2hyperdataDocument <$> toDocs <$> snd <$> readCsv "doc/corpus_imt/Gargantext_Corpus_small.csv"
-- There is an error in the CSV parsing... -- There is an error in the CSV parsing...
-- let myCorpus' = Prelude.filter (\n -> T.length (maybe "" identity (hyperdataDocument_title n)) > 30) myCorpus -- let myCorpus' = Prelude.filter (\n -> T.length (maybe "" identity (hyperdataDocument_title n)) > 30) myCorpus
postCorpus :: ToJSON a => CorpusName -> (a -> Text) -> [a] -> IO [Int]
postCorpus corpusName title ns = do
c <- connectGargandb "gargantext.ini"
pid <- last <$> home c
let uid = 1
postNode c uid pid ( Node' NodeCorpus corpusName (toJSON (pack "{}"::Text))
(map (\n -> Node' Document (title n) (toJSON n) []) ns)
)
-- |
-- import IMTClient as C
-- postAnnuaire "Annuaire IMT" (\n -> (maybe "" identity (C.prenom n)) <> " " <> (maybe "" identity (C.nom n))) (take 30 annuaire)
postAnnuaire :: ToJSON a => CorpusName -> (a -> Text) -> [a] -> IO [Int]
postAnnuaire corpusName title ns = do
c <- connectGargandb "gargantext.ini"
pid <- last <$> home c
let uid = 1
postNode c uid pid ( Node' Annuaire corpusName (toJSON (pack "{}"::Text))
(map (\n -> Node' UserPage (title n) (toJSON n) []) ns)
)
del' :: [NodeId] -> IO Int del' :: [NodeId] -> IO Int
del' ns = do del' ns = runCmd' $ del ns
c <- connectGargandb "gargantext.ini"
del c ns
-- corporaOf :: Username -> IO [Corpus] -- corporaOf :: Username -> IO [Corpus]
runCmd' :: Cmd a -> IO a
runCmd' f = do
c <- connectGargandb "gargantext.ini"
runCmd c f
...@@ -19,50 +19,69 @@ Gargantext's database. ...@@ -19,50 +19,69 @@ Gargantext's database.
module Gargantext.Database.Config module Gargantext.Database.Config
where where
import Data.Text (pack)
import Data.Maybe (fromMaybe) import Data.Text (pack)
import Data.List (lookup) import Data.Tuple.Extra (swap)
import Data.Maybe (fromMaybe)
import Data.List (lookup)
import Gargantext.Database.Types.Node import Gargantext.Database.Types.Node
import Gargantext.Prelude import Gargantext.Prelude
-- | Nodes are typed in the database according to a specific ID nodeTypeId :: NodeType -> NodeTypeId
-- nodeTypeId n =
nodeTypes :: [(NodeType, NodeTypeId)] case n of
nodeTypes = [ (NodeUser , 1) NodeUser -> 1
, (Folder , 2) Folder -> 2
, (NodeCorpus , 30) --NodeCorpus -> 3
, (Annuaire , 31) NodeCorpus -> 30 -- TODO ERRR
, (Document , 4) Annuaire -> 31
, (UserPage , 41) Document -> 4
--, (NodeSwap , 19) UserPage -> 41
------ Lists --NodeSwap -> 19
-- , (StopList , 5)
-- , (GroupList , 6) ---- Lists
-- , (MainList , 7) -- StopList -> 5
-- , (MapList ,  8) -- GroupList -> 6
-- MainList -> 7
-- MapList -> 8
---- Scores ---- Scores
, (Occurrences , 10) Occurrences -> 10
-- , (Cooccurrences , 9) Graph -> 9
-- Dashboard -> 5
-- , (Specclusion , 11) Chart -> 51
-- , (Genclusion , 18)
-- , (Cvalue , 12) -- Cooccurrences -> 9
-- --
-- , (TfidfCorpus , 13) -- Specclusion -> 11
-- , (TfidfGlobal , 14) -- Genclusion -> 18
-- Cvalue -> 12
-- --
-- , (TirankLocal , 16) -- TfidfCorpus -> 13
-- , (TirankGlobal , 17) -- TfidfGlobal -> 14
-- --
-- TirankLocal -> 16
-- TirankGlobal -> 17
---- Node management ---- Node management
, (Favorites , 15) Favorites -> 15
-- Project -> TODO
-- Individu -> TODO
-- Classification -> TODO
-- Lists -> TODO
-- Metrics -> TODO
-- --
] -- | Nodes are typed in the database according to a specific ID
-- --
nodeTypeId :: NodeType -> NodeTypeId nodeTypeInv :: [(NodeTypeId, NodeType)]
nodeTypeId tn = fromMaybe (panic $ pack $ "Typename " <> show tn <> " does not exist") nodeTypeInv = map swap nodeTypes
(lookup tn nodeTypes)
nodeTypes :: [(NodeType, NodeTypeId)]
nodeTypes = [ (n, nodeTypeId n) | n <- allNodeTypes ]
typeId2node :: NodeTypeId -> NodeType
typeId2node tId = fromMaybe (panic $ pack $ "Type Id " <> show tId <> " does not exist")
(lookup tId nodeTypeInv)
{-| {-|
Module : Gargantext.Database.TextSearch Module : Gargantext.Database.Cooc
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
......
...@@ -15,6 +15,8 @@ Portability : POSIX ...@@ -15,6 +15,8 @@ Portability : POSIX
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
...@@ -41,11 +43,14 @@ import Gargantext.Prelude hiding (sum) ...@@ -41,11 +43,14 @@ import Gargantext.Prelude hiding (sum)
import Database.PostgreSQL.Simple.Internal (Field) import Database.PostgreSQL.Simple.Internal (Field)
import Control.Applicative (Applicative)
import Control.Arrow (returnA) import Control.Arrow (returnA)
import Control.Lens.TH (makeLensesWith, abbreviatedFields) import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Monad.IO.Class
import Control.Monad.Reader
import Data.Aeson import Data.Aeson
import Data.Maybe (Maybe, fromMaybe) import Data.Maybe (Maybe, fromMaybe)
import Data.Text (Text, pack) import Data.Text (Text)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance) import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Typeable (Typeable) import Data.Typeable (Typeable)
...@@ -60,6 +65,26 @@ import qualified Data.Profunctor.Product as PP ...@@ -60,6 +65,26 @@ import qualified Data.Profunctor.Product as PP
-- | Types for Node Database Management -- | Types for Node Database Management
data PGTSVector data PGTSVector
newtype Cmd a = Cmd (ReaderT Connection IO a)
deriving (Functor, Applicative, Monad, MonadReader Connection, MonadIO)
runCmd :: Connection -> Cmd a -> IO a
runCmd c (Cmd f) = runReaderT f c
mkCmd :: (Connection -> IO a) -> Cmd a
mkCmd = Cmd . ReaderT
{-
newtype Cmd a = Cmd { unCmd :: Connection -> IO a }
instance Monad Cmd where
return a = Cmd $ \_ -> return a
m >>= f = Cmd $ \c -> do
a <- unCmd m c
unCmd (f a) c
-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
type CorpusId = Int type CorpusId = Int
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -161,8 +186,8 @@ selectNode id = proc () -> do ...@@ -161,8 +186,8 @@ selectNode id = proc () -> do
restrict -< node_id row .== id restrict -< node_id row .== id
returnA -< row returnA -< row
runGetNodes :: Connection -> Query NodeRead -> IO [Node Value] runGetNodes :: Query NodeRead -> Cmd [Node Value]
runGetNodes = runQuery runGetNodes q = mkCmd $ \conn -> runQuery conn q
-- | order by publication date -- | order by publication date
-- Favorites (Bool), node_ngrams -- Favorites (Bool), node_ngrams
...@@ -187,13 +212,20 @@ selectNodesWith' parentId maybeNodeType = proc () -> do ...@@ -187,13 +212,20 @@ selectNodesWith' parentId maybeNodeType = proc () -> do
returnA -< node returnA -< node
--type Cmd' a = forall m. (MonadReader env m, HasConnection env, MonadIO m) => m a
deleteNode :: Connection -> Int -> IO Int
deleteNode conn n = fromIntegral <$> runDelete conn nodeTable -- deleteNode :: (MonadReader Connection m, MonadIO m) => Int -> m Int
-- deleteNode :: Int -> Cmd' Int
deleteNode :: Int -> Cmd Int
deleteNode n = mkCmd $ \conn ->
fromIntegral <$> runDelete conn nodeTable
(\(Node n_id _ _ _ _ _ _) -> n_id .== pgInt4 n) (\(Node n_id _ _ _ _ _ _) -> n_id .== pgInt4 n)
deleteNodes :: Connection -> [Int] -> IO Int deleteNodes :: [Int] -> Cmd Int
deleteNodes conn ns = fromIntegral <$> runDelete conn nodeTable deleteNodes ns = mkCmd $ \conn ->
fromIntegral <$> runDelete conn nodeTable
(\(Node n_id _ _ _ _ _ _) -> in_ ((map pgInt4 ns)) n_id) (\(Node n_id _ _ _ _ _ _) -> in_ ((map pgInt4 ns)) n_id)
...@@ -205,13 +237,13 @@ getNodesWith conn parentId nodeType maybeOffset maybeLimit = ...@@ -205,13 +237,13 @@ getNodesWith conn parentId nodeType maybeOffset maybeLimit =
-- NP check type -- NP check type
getNodesWithParentId :: Connection -> Int getNodesWithParentId :: Int
-> Maybe Text -> IO [Node Value] -> Maybe Text -> Connection -> IO [Node Value]
getNodesWithParentId conn n _ = runQuery conn $ selectNodesWithParentID n getNodesWithParentId n _ conn = runQuery conn $ selectNodesWithParentID n
getNodesWithParentId' :: Connection -> Int getNodesWithParentId' :: Int
-> Maybe Text -> IO [Node Value] -> Maybe Text -> Connection -> IO [Node Value]
getNodesWithParentId' conn n _ = runQuery conn $ selectNodesWithParentID n getNodesWithParentId' n _ conn = runQuery conn $ selectNodesWithParentID n
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -285,11 +317,11 @@ node2write pid (Node id tn ud _ nm dt hp) = ((pgInt4 <$> id) ...@@ -285,11 +317,11 @@ node2write pid (Node id tn ud _ nm dt hp) = ((pgInt4 <$> id)
) )
mkNode :: Connection -> ParentId -> [NodeWrite'] -> IO Int64 mkNode :: ParentId -> [NodeWrite'] -> Connection -> IO Int64
mkNode conn pid ns = runInsertMany conn nodeTable' $ map (node2write pid) ns mkNode pid ns conn = runInsertMany conn nodeTable' $ map (node2write pid) ns
mkNodeR :: Connection -> ParentId -> [NodeWrite'] -> IO [Int] mkNodeR :: ParentId -> [NodeWrite'] -> Connection -> IO [Int]
mkNodeR conn pid ns = runInsertManyReturning conn nodeTable' (map (node2write pid) ns) (\(i,_,_,_,_,_,_) -> i) mkNodeR pid ns conn = runInsertManyReturning conn nodeTable' (map (node2write pid) ns) (\(i,_,_,_,_,_,_) -> i)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -309,10 +341,10 @@ post c uid pid [ Node' Corpus "name" "{}" [] ...@@ -309,10 +341,10 @@ post c uid pid [ Node' Corpus "name" "{}" []
-- TODO -- TODO
-- currently this function remove the child relation -- currently this function remove the child relation
-- needs a Temporary type between Node' and NodeWriteT -- needs a Temporary type between Node' and NodeWriteT
node2table :: UserId -> ParentId -> Node' -> [NodeWriteT] node2table :: UserId -> ParentId -> Node' -> NodeWriteT
node2table uid pid (Node' nt txt v []) = [( Nothing, (pgInt4$ nodeTypeId nt), (pgInt4 uid), (pgInt4 pid) node2table uid pid (Node' nt txt v []) = ( Nothing, (pgInt4$ nodeTypeId nt), (pgInt4 uid), (pgInt4 pid)
, pgStrictText txt, Nothing, pgStrictJSONB $ DB.pack $ DBL.unpack $ encode v)] , pgStrictText txt, Nothing, pgStrictJSONB $ DB.pack $ DBL.unpack $ encode v)
node2table _ _ (Node' _ _ _ _) = panic $ pack "node2table: should not happen, Tree insert not implemented yet" node2table _ _ (Node' _ _ _ _) = panic "node2table: should not happen, Tree insert not implemented yet"
data Node' = Node' { _n_type :: NodeType data Node' = Node' { _n_type :: NodeType
...@@ -330,30 +362,37 @@ type NodeWriteT = ( Maybe (Column PGInt4) ...@@ -330,30 +362,37 @@ type NodeWriteT = ( Maybe (Column PGInt4)
) )
mkNode' :: Connection -> [NodeWriteT] -> IO Int64 mkNode' :: [NodeWriteT] -> Cmd Int64
mkNode' conn ns = runInsertMany conn nodeTable' ns mkNode' ns = mkCmd $ \conn -> runInsertMany conn nodeTable' ns
mkNodeR' :: Connection -> [NodeWriteT] -> IO [Int] mkNodeR' :: [NodeWriteT] -> Cmd [Int]
mkNodeR' conn ns = runInsertManyReturning conn nodeTable' ns (\(i,_,_,_,_,_,_) -> i) mkNodeR' ns = mkCmd $ \conn -> runInsertManyReturning conn nodeTable' ns (\(i,_,_,_,_,_,_) -> i)
-- | postNode
postNode :: Connection -> UserId -> ParentId -> Node' -> IO [Int]
postNode c uid pid (Node' nt txt v []) = mkNodeR' c (node2table uid pid (Node' nt txt v []))
postNode c uid pid (Node' NodeCorpus txt v ns) = do data NewNode = NewNode { _newNodeId :: Int
[pid'] <- postNode c uid pid (Node' NodeCorpus txt v []) , _newNodeChildren :: [Int] }
pids <- mkNodeR' c $ concat $ map (\n -> childWith uid pid' n) ns
pure (pids)
postNode c uid pid (Node' Annuaire txt v ns) = do -- | postNode
[pid'] <- postNode c uid pid (Node' Annuaire txt v []) postNode :: UserId -> ParentId -> Node' -> Cmd NewNode
pids <- mkNodeR' c $ concat $ map (\n -> childWith uid pid' n) ns postNode uid pid (Node' nt txt v []) = do
pure (pids) pids <- mkNodeR' [node2table uid pid (Node' nt txt v [])]
postNode _ _ _ (Node' _ _ _ _) = panic $ pack "postNode for this type not implemented yet" case pids of
[pid] -> pure $ NewNode pid []
_ -> panic "postNode: only one pid expected"
childWith :: UserId -> ParentId -> Node' -> [NodeWriteT]
postNode uid pid (Node' NodeCorpus txt v ns) = do
NewNode pid' _ <- postNode uid pid (Node' NodeCorpus txt v [])
pids <- mkNodeR' (concat $ map (\n -> [childWith uid pid' n]) ns)
pure $ NewNode pid' pids
postNode uid pid (Node' Annuaire txt v ns) = do
NewNode pid' _ <- postNode uid pid (Node' Annuaire txt v [])
pids <- mkNodeR' (concat $ map (\n -> [childWith uid pid' n]) ns)
pure $ NewNode pid' pids
postNode _ _ (Node' _ _ _ _) = panic "TODO: postNode for this type not implemented yet"
childWith :: UserId -> ParentId -> Node' -> NodeWriteT
childWith uId pId (Node' Document txt v []) = node2table uId pId (Node' Document txt v []) childWith uId pId (Node' Document txt v []) = node2table uId pId (Node' Document txt v [])
childWith uId pId (Node' UserPage txt v []) = node2table uId pId (Node' UserPage txt v []) childWith uId pId (Node' UserPage txt v []) = node2table uId pId (Node' UserPage txt v [])
childWith _ _ (Node' _ _ _ _) = panic $ pack "This NodeType can not be a child" childWith _ _ (Node' _ _ _ _) = panic "This NodeType can not be a child"
...@@ -143,7 +143,7 @@ queryInsert = [sql| ...@@ -143,7 +143,7 @@ queryInsert = [sql|
|] |]
prepare :: UserId -> ParentId -> [HyperdataDocument] -> [InputData] prepare :: UserId -> ParentId -> [HyperdataDocument] -> [InputData]
prepare uId pId = map (\h -> InputData tId uId pId (DT.pack "Doc") (toJSON $ unicize h)) prepare uId pId = map (\h -> InputData tId uId pId (DT.pack "Doc") (toJSON $ addUniqId h))
where where
tId = nodeTypeId Document tId = nodeTypeId Document
...@@ -195,19 +195,13 @@ hashParameters = [ \d -> maybe' (_hyperdataDocument_title d) ...@@ -195,19 +195,13 @@ hashParameters = [ \d -> maybe' (_hyperdataDocument_title d)
where where
maybe' = maybe (DT.pack "") identity maybe' = maybe (DT.pack "") identity
unicize :: HyperdataDocument -> HyperdataDocument addUniqId :: HyperdataDocument -> HyperdataDocument
unicize = unicize' hashParameters addUniqId doc = set hyperdataDocument_uniqId (Just hash) doc
where where
unicize' :: [(HyperdataDocument -> Text)] -> HyperdataDocument -> HyperdataDocument hash = uniqId $ DT.concat $ map ($ doc) hashParameters
unicize' fields doc = set hyperdataDocument_uniqId (Just hash) doc
where
hash = uniqId $ DT.concat $ map (\f -> f doc) fields
uniqId :: Text -> Text uniqId :: Text -> Text
uniqId txt = (sha256 txt) uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
where
sha256 :: Text -> Text
sha256 = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
-- * Tests -- * Tests
......
{-|
Module : Gargantext.Database.Tree
Description : Tree of Resource Nodes built from Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Let a Root Node, return the Tree of the Node as a directed acyclic graph
(Tree).
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Tree (treeDB, TreeError(..), HasTreeError(..)) where
import Control.Lens (Prism', (#), (^..), at, each, _Just, to)
import Control.Monad.Error.Class (MonadError(throwError))
import Control.Monad.IO.Class (MonadIO(liftIO))
import Data.Map (Map, fromListWith, lookup)
import Data.Text (Text)
import Database.PostgreSQL.Simple
import Database.PostgreSQL.Simple.SqlQQ
import Gargantext.Prelude
import Gargantext.Core.Types.Main (NodeTree(..), Tree(..))
import Gargantext.Database.Config (typeId2node)
------------------------------------------------------------------------
-- import Gargantext (connectGargandb)
-- import Control.Monad ((>>=))
-- treeTest :: IO (Tree NodeTree)
-- treeTest = connectGargandb "gargantext.ini" >>= \c -> treeDB c 347474
------------------------------------------------------------------------
data TreeError = NoRoot | EmptyRoot | TooManyRoots
deriving (Show)
class HasTreeError e where
_TreeError :: Prism' e TreeError
treeError :: (MonadError e m, HasTreeError e) => TreeError -> m a
treeError te = throwError $ _TreeError # te
-- | Returns the Tree of Nodes in Database
treeDB :: (MonadIO m, MonadError e m, HasTreeError e)
=> Connection -> RootId -> m (Tree NodeTree)
treeDB c r = toTree =<< (toTreeParent <$> liftIO (dbTree c r))
type RootId = Int
type ParentId = Int
------------------------------------------------------------------------
toTree :: (MonadError e m, HasTreeError e)
=> Map (Maybe ParentId) [DbTreeNode] -> m (Tree NodeTree)
toTree m =
case lookup Nothing m of
Just [n] -> pure $ toTree' m n
Nothing -> treeError NoRoot
Just [] -> treeError EmptyRoot
Just _ -> treeError TooManyRoots
toTree' :: Map (Maybe ParentId) [DbTreeNode] -> DbTreeNode -> Tree NodeTree
toTree' m n =
TreeN (toNodeTree n) $
m ^.. at (Just $ dt_nodeId n) . _Just . each . to (toTree' m)
------------------------------------------------------------------------
toNodeTree :: DbTreeNode -> NodeTree
toNodeTree (DbTreeNode nId tId _ n) = NodeTree n nodeType nId
where
nodeType = typeId2node tId
------------------------------------------------------------------------
toTreeParent :: [DbTreeNode] -> Map (Maybe ParentId) [DbTreeNode]
toTreeParent = fromListWith (<>) . map (\n -> (dt_parentId n, [n]))
------------------------------------------------------------------------
data DbTreeNode = DbTreeNode { dt_nodeId :: Int
, dt_typeId :: Int
, dt_parentId :: Maybe Int
, dt_name :: Text
} deriving (Show)
dbTree :: Connection -> RootId -> IO [DbTreeNode]
dbTree conn rootId = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n) <$> query conn [sql|
WITH RECURSIVE
-- starting node(s)
starting (id, typename, parent_id, name) AS
(
SELECT n.id, n.typename, n.parent_id, n.name
FROM nodes AS n
WHERE n.parent_id = ? -- this can be arbitrary
),
descendants (id, typename, parent_id, name) AS
(
SELECT id, typename, parent_id, name
FROM starting
UNION ALL
SELECT n.id, n.typename, n.parent_id, n.name
FROM nodes AS n JOIN descendants AS d ON n.parent_id = d.id
where n.typename in (2,30,31)
),
ancestors (id, typename, parent_id, name) AS
(
SELECT n.id, n.typename, n.parent_id, n.name
FROM nodes AS n
WHERE n.id IN (SELECT parent_id FROM starting)
UNION ALL
SELECT n.id, n.typename, n.parent_id, n.name
FROM nodes AS n JOIN ancestors AS a ON n.id = a.parent_id
)
TABLE ancestors
UNION ALL
TABLE descendants ;
|] (Only rootId)
...@@ -20,7 +20,7 @@ Portability : POSIX ...@@ -20,7 +20,7 @@ Portability : POSIX
module Gargantext.Database.Types.Node where module Gargantext.Database.Types.Node where
import Prelude (Enum, Bounded, minBound, maxBound) import Prelude (Enum, Bounded, minBound, maxBound, mempty)
import GHC.Generics (Generic) import GHC.Generics (Generic)
...@@ -274,11 +274,21 @@ type NodeCorpus = Node HyperdataCorpus ...@@ -274,11 +274,21 @@ type NodeCorpus = Node HyperdataCorpus
type Document = Node HyperdataDocument type Document = Node HyperdataDocument
------------------------------------------------------------------------ ------------------------------------------------------------------------
data NodeType = NodeUser | Project | Folder | NodeCorpus | Annuaire | Document | Individu | UserPage | DocumentCopy | Favorites data NodeType = NodeUser
| Classification -- | Project
| Lists | Folder
| Metrics | Occurrences | NodeCorpus | Annuaire
deriving (Show, Read, Eq, Generic) | Document -- | Individu
| UserPage | Favorites
| Graph | Dashboard | Chart
-- | Classification
-- | Lists
-- | Metrics
| Occurrences
deriving (Show, Read, Eq, Generic, Bounded, Enum)
allNodeTypes :: [NodeType]
allNodeTypes = [minBound ..]
instance FromJSON NodeType instance FromJSON NodeType
instance ToJSON NodeType instance ToJSON NodeType
...@@ -307,11 +317,11 @@ $(deriveJSON (unPrefix "node_") ''NodePoly) ...@@ -307,11 +317,11 @@ $(deriveJSON (unPrefix "node_") ''NodePoly)
instance Arbitrary (NodePoly NodeId NodeTypeId (Maybe NodeUserId) NodeParentId NodeName UTCTime Value) where instance Arbitrary (NodePoly NodeId NodeTypeId (Maybe NodeUserId) NodeParentId NodeName UTCTime Value) where
arbitrary = elements [Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) (toJSON ("{}"::Text))] arbitrary = elements [Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) (Object mempty)]
instance Arbitrary (NodePoly NodeId NodeTypeId NodeUserId (Maybe NodeParentId) NodeName UTCTime Value) where instance Arbitrary (NodePoly NodeId NodeTypeId NodeUserId (Maybe NodeParentId) NodeName UTCTime Value) where
arbitrary = elements [Node 1 1 1 (Just 1) "name" (jour 2018 01 01) (toJSON ("{}"::Text))] arbitrary = elements [Node 1 1 1 (Just 1) "name" (jour 2018 01 01) (Object mempty)]
instance Arbitrary (NodePoly NodeId NodeTypeId (Maybe NodeUserId) NodeParentId NodeName UTCTime HyperdataDocument) where instance Arbitrary (NodePoly NodeId NodeTypeId (Maybe NodeUserId) NodeParentId NodeName UTCTime HyperdataDocument) where
arbitrary = elements [Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) ((hyperdataDocument))] arbitrary = elements [Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) ((hyperdataDocument))]
...@@ -343,7 +353,7 @@ instance ToSchema HyperdataDocument where ...@@ -343,7 +353,7 @@ instance ToSchema HyperdataDocument where
instance ToSchema Value where instance ToSchema Value where
declareNamedSchema proxy = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions proxy declareNamedSchema proxy = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions proxy
L.& mapped.schema.description ?~ "a document" L.& mapped.schema.description ?~ "a document"
L.& mapped.schema.example ?~ toJSON ("" :: Text) L.& mapped.schema.example ?~ toJSON ("" :: Text) -- TODO
instance ToSchema (NodePoly NodeId NodeTypeId NodeUserId instance ToSchema (NodePoly NodeId NodeTypeId NodeUserId
......
...@@ -20,6 +20,7 @@ module Gargantext.Prelude ...@@ -20,6 +20,7 @@ module Gargantext.Prelude
( module Gargantext.Prelude ( module Gargantext.Prelude
, module Protolude , module Protolude
, headMay, lastMay , headMay, lastMay
, module GHC.Err.Located
, module Text.Show , module Text.Show
, module Text.Read , module Text.Read
, cs , cs
...@@ -29,32 +30,32 @@ module Gargantext.Prelude ...@@ -29,32 +30,32 @@ module Gargantext.Prelude
where where
import GHC.Exts (sortWith) import GHC.Exts (sortWith)
import GHC.Err.Located (undefined)
import Control.Monad.IO.Class (MonadIO) import Control.Monad.IO.Class (MonadIO)
import Data.Maybe (isJust, fromJust, maybe) import Data.Maybe (isJust, fromJust, maybe)
import Protolude ( Bool(True, False), Int, Int64, Double, Integer import Protolude ( Bool(True, False), Int, Int64, Double, Integer
, Fractional, Num, Maybe(Just,Nothing) , Fractional, Num, Maybe(Just,Nothing)
, Enum, Bounded, Float , Enum, Bounded, Float
, Floating, Char, IO , Floating, Char, IO
, pure, (>>=), (=<<), (<*>), (<$>), panic , pure, (>>=), (=<<), (<*>), (<$>)
, putStrLn , putStrLn
, head, flip , head, flip
, Ord, Integral, Foldable, RealFrac, Monad, filter , Ord, Integral, Foldable, RealFrac, Monad, filter
, reverse, map, mapM, zip, drop, take, zipWith , reverse, map, mapM, zip, drop, take, zipWith
, sum, fromIntegral, length, fmap, foldl, foldl' , sum, fromIntegral, length, fmap, foldl, foldl'
, takeWhile, sqrt, undefined, identity , takeWhile, sqrt, identity
, abs, min, max, maximum, minimum, return, snd, truncate , abs, min, max, maximum, minimum, return, snd, truncate
, (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
, Eq, (==), (>=), (<=), (<>), (/=) , Eq, (==), (>=), (<=), (<>), (/=)
, (&&), (||), not, any , (&&), (||), not, any, all
, fst, snd, toS , fst, snd, toS
, elem, die, mod, div, const, either , elem, die, mod, div, const, either
, curry, uncurry, repeat , curry, uncurry, repeat
, otherwise, when , otherwise, when
, undefined
, IO() , IO()
, compare , compare
, on , on
, panic
) )
-- TODO import functions optimized in Utils.Count -- TODO import functions optimized in Utils.Count
...@@ -136,8 +137,8 @@ chunkAlong' a b l = only (while dropAlong) ...@@ -136,8 +137,8 @@ chunkAlong' a b l = only (while dropAlong)
dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..]) dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
-- | TODO Inverse of chunk ? unchunkAlong ? -- | TODO Inverse of chunk ? unchunkAlong ?
unchunkAlong :: Int -> Int -> [[a]] -> [a] -- unchunkAlong :: Int -> Int -> [[a]] -> [a]
unchunkAlong = undefined -- unchunkAlong = undefined
-- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"] -- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
......
...@@ -100,6 +100,7 @@ useLabelPolicy :: Map Grouped [Text] -> Grouped -> [Text] ...@@ -100,6 +100,7 @@ useLabelPolicy :: Map Grouped [Text] -> Grouped -> [Text]
useLabelPolicy m g = case DMS.lookup g m of useLabelPolicy m g = case DMS.lookup g m of
Just label -> label Just label -> label
Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g) Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g)
-- TODO: use a non-fatal error if this can happen in practice
{- {-
labelPolicy :: Map Grouped (Map Terms Occs) -> Grouped -> Label labelPolicy :: Map Grouped (Map Terms Occs) -> Grouped -> Label
labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList <$> lookup g m of labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList <$> lookup g m of
...@@ -144,6 +145,9 @@ occurrences = occurrencesOn _terms_stem ...@@ -144,6 +145,9 @@ occurrences = occurrencesOn _terms_stem
occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int) occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
occurrencesWith :: (Foldable list, Ord k, Num a) => (b -> k) -> list b -> Map k a
occurrencesWith f xs = foldl' (\x y -> insertWith (+) (f y) 1 x) empty xs
-- TODO add groups and filter stops -- TODO add groups and filter stops
sumOcc :: Ord a => [Occ a] -> Occ a sumOcc :: Ord a => [Occ a] -> Occ a
......
...@@ -16,7 +16,7 @@ Domain Specific Language to manage Frequent Item Set (FIS) ...@@ -16,7 +16,7 @@ Domain Specific Language to manage Frequent Item Set (FIS)
module Gargantext.Text.Metrics.FrequentItemSet module Gargantext.Text.Metrics.FrequentItemSet
( Fis, Size(..) ( Fis, Size(..)
, occ_hlcm, cooc_hlcm , occ_hlcm, cooc_hlcm
, all, between , allFis, between
, fisWithSize , fisWithSize
, fisWith , fisWith
, fisWithSizePoly , fisWithSizePoly
...@@ -33,7 +33,7 @@ import qualified Data.Set as Set ...@@ -33,7 +33,7 @@ import qualified Data.Set as Set
import Data.Set (Set) import Data.Set (Set)
import qualified Data.Vector as V import qualified Data.Vector as V
import Data.List (filter, concat) import Data.List (filter, concat, null)
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes)
import HLCM import HLCM
...@@ -51,8 +51,8 @@ occ_hlcm = fisWithSize (Point 1) ...@@ -51,8 +51,8 @@ occ_hlcm = fisWithSize (Point 1)
cooc_hlcm :: Frequency -> [[Item]] -> [Fis] cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
cooc_hlcm = fisWithSize (Point 2) cooc_hlcm = fisWithSize (Point 2)
all :: Frequency -> [[Item]] -> [Fis] allFis :: Frequency -> [[Item]] -> [Fis]
all = fisWith Nothing allFis = fisWith Nothing
------------------------------------------------------------------------ ------------------------------------------------------------------------
between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis] between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
...@@ -93,7 +93,9 @@ fisWithSize n f is = case n of ...@@ -93,7 +93,9 @@ fisWithSize n f is = case n of
--- Filter on Fis and not on [Item] --- Filter on Fis and not on [Item]
fisWith :: Maybe ([Item] -> Bool) -> Frequency -> [[Item]] -> [Fis] fisWith :: Maybe ([Item] -> Bool) -> Frequency -> [[Item]] -> [Fis]
fisWith s f is = catMaybes $ map items2fis $ filter' $ runLCMmatrix is f fisWith s f is = case filter (not . null) is of
[] -> []
js -> catMaybes $ map items2fis $ filter' $ runLCMmatrix js f
-- drop unMaybe -- drop unMaybe
where where
filter' = case s of filter' = case s of
......
...@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt) ...@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt)
parserLang :: Lang -> DC.Lang parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR parserLang FR = DC.FR
parserLang EN = DC.EN parserLang EN = DC.EN
-- parserLang _ = panic "not implemented"
-- | Final Date parser API -- | Final Date parser API
-- IO can be avoided here: -- IO can be avoided here:
...@@ -76,6 +76,7 @@ parserLang EN = DC.EN ...@@ -76,6 +76,7 @@ parserLang EN = DC.EN
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseDate1 :: Context -> Text -> SomeErrorHandling Text -- parseDate1 :: Context -> Text -> SomeErrorHandling Text
-- TODO error handling
parseDate1 :: Lang -> Text -> IO Text parseDate1 :: Lang -> Text -> IO Text
parseDate1 lang text = do parseDate1 lang text = do
maybeJson <- map jsonValue <$> parseDateWithDuckling lang text maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
......
module Data.Gargantext.Parsers.Utils where
-- use Duckling here
parseDate = undefined
module Gargantext.Text.Samples.CH where
import Data.String (String)
textMining :: String
textMining = "文本挖掘有时也被称为文字探勘、文本数据挖掘等,大致相当于文字分析,一般指文本处理过程中产生高质量的信息。高质量的信息通常通过分类和预测来产生,如模式识别。文本挖掘通常涉及输入文本的处理过程(通常进行分析,同时加上一些衍生语言特征以及消除杂音,随后插入到数据库中) ,产生结构化数据,并最终评价和解释输出。'高品质'的文本挖掘通常是指某种组合的相关性,新颖性和趣味性。典型的文本挖掘方法包括文本分类,文本聚类,概念/实体挖掘,生产精确分类,观点分析,文档摘要和实体关系模型(即,学习已命名实体之间的关系) 。 文本分析包括了信息检索、词典分析来研究词语的频数分布、模式识别、标签 注释、信息抽取,数据挖掘技术包括链接和关联分析、可视化和预测分析。本质上,首要的任务是,通过自然语言处理和分析方法,将文本转化为数据进行分析"
module Gargantext.Text.Samples.DE where
import Data.String (String)
textMining :: String
textMining = "Text Mining, seltener auch Textmining, Text Data Mining oder Textual Data Mining, ist ein Bündel von Algorithmus-basierten Analyseverfahren zur Entdeckung von Bedeutungsstrukturen aus un- oder schwachstrukturierten Textdaten. Mit statistischen und linguistischen Mitteln erschließt Text-Mining-Software aus Texten Strukturen, die die Benutzer in die Lage versetzen sollen, Kerninformationen der verarbeiteten Texte schnell zu erkennen. Im Optimalfall liefern Text-Mining-Systeme Informationen, von denen die Benutzer zuvor nicht wissen, ob und dass sie in den verarbeiteten Texten enthalten sind. Bei zielgerichteter Anwendung sind Werkzeuge des Text Mining außerdem in der Lage, Hypothesen zu generieren, diese zu überprüfen und schrittweise zu verfeinern."
module Gargantext.Text.Samples.EN where
import Data.String (String)
textMining :: String
textMining = "Text mining, also referred to as text data mining, roughly equivalent to text analytics, is the process of deriving high-quality information from text. High-quality information is typically derived through the devising of patterns and trends through means such as statistical pattern learning. Text mining usually involves the process of structuring the input text (usually parsing, along with the addition of some derived linguistic features and the removal of others, and subsequent insertion into a database), deriving patterns within the structured data, and finally evaluation and interpretation of the output. 'High quality' in text mining usually refers to some combination of relevance, novelty, and interestingness. Typical text mining tasks include text categorization, text clustering, concept/entity extraction, production of granular taxonomies, sentiment analysis, document summarization, and entity relation modeling (i.e., learning relations between named entities). Text analysis involves information retrieval, lexical analysis to study word frequency distributions, pattern recognition, tagging/annotation, information extraction, data mining techniques including link and association analysis, visualization, and predictive analytics. The overarching goal is, essentially, to turn text into data for analysis, via application of natural language processing (NLP) and analytical methods. A typical application is to scan a set of documents written in a natural language and either model the document set for predictive classification purposes or populate a database or search index with the information extracted."
module Gargantext.Text.Samples.FR where
import Data.String (String)
textMining :: String
textMining = "La fouille de textes ou « l'extraction de connaissances » dans les textes est une spécialisation de la fouille de données et fait partie du domaine de l'intelligence artificielle. Cette technique est souvent désignée sous l'anglicisme text mining. Elle désigne un ensemble de traitements informatiques consistant à extraire des connaissances selon un critère de nouveauté ou de similarité dans des textes produits par des humains pour des humains. Dans la pratique, cela revient à mettre en algorithme un modèle simplifié des théories linguistiques dans des systèmes informatiques d'apprentissage et de statistiques. Les disciplines impliquées sont donc la linguistique calculatoire, l'ingénierie des langues, l'apprentissage artificiel, les statistiques et l'informatique."
module Gargantext.Text.Samples.SP where
import Data.String (String)
textMining :: String
textMining = "La minería de textos se refiere al proceso de derivar información nueva de textos. A comienzos de los años ochenta surgieron los primeros esfuerzos de minería de textos que necesitaban una gran cantidad de esfuerzo humano, pero los avances tecnológicos han permitido que esta área progrese de manera rápida en la última década. La minería de textos es un área multidisciplinar basada en la recuperación de información, minería de datos, aprendizaje automático, estadísticas y la lingüística computacional. Como la mayor parte de la información (más de un 80%) se encuentra actualmente almacenada como texto, se cree que la minería de textos tiene un gran valor comercial."
...@@ -46,8 +46,8 @@ import Gargantext.Text.Terms.Mono (monoTerms) ...@@ -46,8 +46,8 @@ import Gargantext.Text.Terms.Mono (monoTerms)
data TermType lang = Mono lang | Multi lang | MonoMulti lang data TermType lang = Mono lang | Multi lang | MonoMulti lang
group :: [Text] -> [Text] --group :: [Text] -> [Text]
group = undefined --group = undefined
-- remove Stop Words -- remove Stop Words
-- map (filter (\t -> not . elem t)) $ -- map (filter (\t -> not . elem t)) $
......
...@@ -13,7 +13,7 @@ Mono-terms are Nterms where n == 1. ...@@ -13,7 +13,7 @@ Mono-terms are Nterms where n == 1.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence) module Gargantext.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence, words)
where where
import Prelude (String) import Prelude (String)
...@@ -34,10 +34,12 @@ import Gargantext.Prelude ...@@ -34,10 +34,12 @@ import Gargantext.Prelude
-- | TODO remove Num ? -- | TODO remove Num ?
--isGram c = isAlphaNum c --isGram c = isAlphaNum c
words :: Text -> [Text]
words = monoTexts
-- | Sentence split separators -- | Sentence split separators
isSep :: Char -> Bool isSep :: Char -> Bool
isSep = (`elem` (",.:;?!(){}[]\"" :: String)) isSep = (`elem` (",.:;?!(){}[]\"\'" :: String))
monoTerms :: Lang -> Text -> [Terms] monoTerms :: Lang -> Text -> [Terms]
monoTerms l txt = map (monoText2term l) $ monoTexts txt monoTerms l txt = map (monoText2term l) $ monoTexts txt
......
...@@ -55,6 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack ...@@ -55,6 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang' = case lang of lang' = case lang of
EN -> N.English EN -> N.English
FR -> N.French FR -> N.French
--_ -> panic $ DT.pack "not implemented yet"
...@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags ...@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group :: Lang -> [TokenTag] -> [TokenTag] group :: Lang -> [TokenTag] -> [TokenTag]
group EN = En.group group EN = En.group
group FR = Fr.group group FR = Fr.group
-- group _ = panic $ pack "group :: Lang not implemeted yet"
...@@ -124,6 +124,7 @@ corenlp' lang txt = do ...@@ -124,6 +124,7 @@ corenlp' lang txt = do
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}" EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}" -- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}" FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
-- _ -> panic $ pack "not implemented yet"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyLBS (cs txt) url let request = setRequestBodyLBS (cs txt) url
httpJSON request httpJSON request
......
...@@ -29,13 +29,22 @@ list quality in time. ...@@ -29,13 +29,22 @@ list quality in time.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Terms.Multi.RAKE (multiterms_rake) module Gargantext.Text.Terms.Multi.RAKE (multiterms_rake, select, hardStopList)
where where
import GHC.Real (round)
import Data.Text (Text) import Data.Text (Text)
import NLP.RAKE.Text import NLP.RAKE.Text
import Gargantext.Text.Terms.Stop (stopList)
import Gargantext.Prelude import Gargantext.Prelude
select :: Double -> [a] -> [a]
select part ns = take n ns
where
n = round $ part * (fromIntegral $ length ns)
multiterms_rake :: Text -> [WordScore] multiterms_rake :: Text -> [WordScore]
multiterms_rake = candidates hardStopList multiterms_rake = candidates hardStopList
defaultNosplit defaultNosplit
...@@ -43,74 +52,4 @@ multiterms_rake = candidates hardStopList ...@@ -43,74 +52,4 @@ multiterms_rake = candidates hardStopList
-- | StopList -- | StopList
hardStopList :: StopwordsMap hardStopList :: StopwordsMap
hardStopList = mkStopwordsStr [ hardStopList = mkStopwordsStr stopList
"a","a's","able","about","above","apply","according","accordingly",
"across","actually","after","afterwards","again","against",
"ain't","all","allow","allows","almost","alone","along",
"already","also","although","always","am","among","amongst",
"an","and","another","any","anybody","anyhow","anyone","anything",
"anyway","anyways","anywhere","analyze","apart","appear","appreciate","appropriate",
"are","aren't","around","as","aside","ask","asking","associated","at",
"available","away","awfully","based", "b","be","became","because","become",
"becomes","becoming","been","before","beforehand","behind","being",
"believe","below","beside","besides","best","better","between","beyond",
"both","brief","but","by","c","c'mon","c's","came","can","can't","cannot",
"cant","cause","causes","certain","certainly","changes","clearly","co",
"com","come","comes","common","concerning","consequently","consider","considering",
"contain","containing","contains","corresponding","could","couldn't","course",
"currently","d","definitely","described","detects","detecting","despite","did","didn't","different",
"do","does","doesn't","doing","don't","done","down","downwards","during","e",
"each","edu","eg","eight","either","else","elsewhere","enough","entirely",
"especially","et","etc","even","ever","every","everybody","everyone",
"everything","everywhere","ex","exactly","example","except","f","far",
"few","find","fifth","first","five","followed","following","follows","for",
"former","formerly","forth","four","from","further","furthermore","g",
"get","gets","getting","given","gives","go","goes","going","gone","got",
"gotten","greetings","h","had","hadn't","happens","hardly","has","hasn't",
"have","haven't","having","he","he's","hello","help","hence","her","here",
"here's","hereafter","hereby","herein","hereupon","hers","herself","hi",
"him","himself","his","hither","hopefully","how","howbeit","however","i",
"i'd","identify","i'll","i'm","i've","ie","if","ignored","immediate","in","inasmuch",
"inc","indeed","indicate","indicated","indicates","inner","insofar",
"instead","into","inward","is","isn't","it","it'd","it'll","it's","its",
"itself","j","just","k","keep","keeps","kept","know","known","knows","l",
"last","lately","later","latter","latterly","least","less","lest","let",
"let's","like","liked","likely","little","look","looking","looks","ltd",
"m","mainly","many","may","maybe","me","mean","meanwhile","merely","might",
"more","moreover","most","mostly","much","must","my","myself","n",
"name","namely","nd","near","nearly","necessary","need","needs","neither",
"never","nevertheless","new","next","nine","no","nobody","non","none",
"noone","nor","normally","not","nothing","novel","now","nowhere","o",
"obviously","of","off","often","oh","ok","okay","old","on","once","one",
"ones","only","onto","or","other","others","otherwise","ought","our",
"ours","ourselves","out","outside","over","overall","own","p","particular",
"particularly","per","perhaps","placed","please","plus","possible",
"presents","presumably","probably","provides","q","que","quite","qv","r","rather",
"rd","re","really","reasonably","regarding","regardless","regards",
"relatively","respectively","right","s","said","same","saw","say",
"saying","says","second","secondly","see","seeing","seem","seemed",
"seeming","seems","seen","self","selves","sensible","sent","serious",
"seriously","seven","several","shall","she","should","shouldn't","since",
"six","so","some","somebody","somehow","someone","something","sometime",
"sometimes","somewhat","somewhere","soon","sorry","specified","specify",
"specifying","still","sub","such","sup","sure","t","t's","take","taken",
"tell","tends","th","than","thank","thanks","thanx","that","that's",
"thats","the","their","theirs","them","themselves","then","thence","there",
"there's","thereafter","thereby","therefore","therein","theres",
"thereupon","these","they","they'd","they'll","they're","they've",
"think","third","this","thorough","thoroughly","those","though","three",
"through","throughout","thru","thus","to","together","too","took","toward",
"towards","tried","tries","truly","try","trying","twice","two","u","un",
"under","unfortunately","unless","unlikely","until","unto","up","upon",
"us","use","used","useful","uses","using","usually","uucp","v","value",
"various","very","via","viz","vs","w","want","wants","was","wasn't","way",
"we","we'd","we'll","we're","we've","welcome","well","went","were",
"weren't","what","what's","whatever","when","whence","whenever","where",
"where's","whereafter","whereas","whereby","wherein","whereupon",
"wherever","whether","which","while","whither","who","who's","whoever",
"whole","whom","whose","why","will","willing","wish","with","within",
"without","won't","wonder","would","wouldn't","x","y","yes","yet","you",
"you'd","you'll","you're","you've","your","yours","yourself","yourselves",
"z","zero"]
This diff is collapsed.
...@@ -86,7 +86,7 @@ textFlow termType workType = do ...@@ -86,7 +86,7 @@ textFlow termType workType = do
CSV path -> readCsvOn [csv_title, csv_abstract] path CSV path -> readCsvOn [csv_title, csv_abstract] path
Contexts ctxt -> pure ctxt Contexts ctxt -> pure ctxt
DB con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (node_hyperdata n) <> hyperdataDocumentV3_abstract (node_hyperdata n))<$> getDocumentsV3WithParentId con corpusId DB con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (node_hyperdata n) <> hyperdataDocumentV3_abstract (node_hyperdata n))<$> getDocumentsV3WithParentId con corpusId
_ -> undefined _ -> undefined -- TODO Query not supported
textFlow' termType contexts textFlow' termType contexts
......
...@@ -38,7 +38,7 @@ import qualified Data.Set as S ...@@ -38,7 +38,7 @@ import qualified Data.Set as S
import Data.Map (Map) import Data.Map (Map)
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import Data.Vector (Vector) -- import Data.Vector (Vector)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -86,13 +86,12 @@ indexConversion index ms = M.fromList $ map (\((k1,k2),c) -> ( ((M.!) index k1, ...@@ -86,13 +86,12 @@ indexConversion index ms = M.fromList $ map (\((k1,k2),c) -> ( ((M.!) index k1,
--------------------------------------------------------------------------------- ---------------------------------------------------------------------------------
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
-- TODO
--fromIndex' :: Ord t => Vector t -> Map (Index, Index) a -> Map (t,t) a --fromIndex' :: Ord t => Vector t -> Map (Index, Index) a -> Map (t,t) a
--fromIndex' vi ns = undefined --fromIndex' vi ns = undefined
-- TODO -- TODO: returing a Vector should be faster than a Map
createIndices' :: Ord t => Map (t, t) b -> (Map t Index, Vector t) -- createIndices' :: Ord t => Map (t, t) b -> (Map t Index, Vector t)
createIndices' = undefined -- createIndices' = undefined
createIndices :: Ord t => Map (t, t) b -> (Map t Index, Map Index t) createIndices :: Ord t => Map (t, t) b -> (Map t Index, Map Index t)
createIndices = set2indices . map2set createIndices = set2indices . map2set
......
-- | Cesar et Cleôpatre
-- Exemple de phylomemie
-- French without accents
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
module Gargantext.Viz.Phylo.Example where
import qualified Data.List as DL
import Data.String (String)
import Data.Text (Text, pack, unwords, toLower, words)
import Data.Tuple.Extra (both)
import Data.Map (Map)
import qualified Data.Map as DM
import Data.Set (Set)
import Gargantext.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Text.Terms.Mono (monoTexts)
import Gargantext.Prelude
------------------------------------------------------------------------
type Histoire = [Event]
data Event = Event {date:: Double, text :: Text}
deriving (Show)
type MapList = [Text]
type PeriodeSize = Int
-- data Periodes b a = Map (b,b) a
------------------------------------------------------------------------
-- | TODO FIS on monotexts
phyloFIS :: Map (Double, Double) [Event] -> Map (Double, Double) (Map (Set Text) Int)
phyloFIS = DM.map (\n -> fisWithSizePolyMap (Point 1) 1 (map (words . text) n))
phyloExample :: Map (Double, Double) [Event]
phyloExample = toPeriodes date 1 $ cleanHistoire mapList phyloCorpus
------------------------------------------------------------------------
toPeriodes :: (Enum b, Fractional b, Ord b) => (t -> b) -> b -> [t] -> Map (b, b) [t]
toPeriodes _ _ [] = panic $ pack "Empty history can not have any periode"
toPeriodes f s hs = periodes f st hs
where
hs' = DL.sortOn f hs
st = steps s $ both f (DL.head hs', DL.last hs')
periodes :: Ord b => (t -> b) -> [(b, b)] -> [t] -> Map (b, b) [t]
periodes f ds h = DM.fromList $ zip ds $ periodes' f ds h
periodes' :: Ord b => (t -> b) -> [(b, b)] -> [t] -> [[t]]
periodes' _ [] _ = []
periodes' f [a] h = [x] <> [y]
where
(x,y) = periode f a h
periodes' f (a:b:bs) h = [x] <> periodes' f (b:bs) y
where
(x,y) = periode f a h
periode :: Ord b => (t -> b) -> (b, b) -> [t] -> ([t],[t])
periode f (start,end) h = DL.partition (\d -> f d >= start && f d <= end) h
------------------------------------------------------------------------
steps :: (Ord a, Fractional a, Enum a) => a -> (a, a) -> [(a, a)]
steps s (b,e) = zip (DL.init ss) (DL.tail ss)
where
ss = steps' s (b,e)
steps' :: (Enum b, Fractional b, Ord b) => b -> (b, b) -> [b]
steps' s (b,e) = case s > 0 of
False -> panic $ pack "Steps size can not be < 0"
True -> steps'' s (b,e)
steps'' :: (Fractional b, Enum b) => b -> (b, b) -> [b]
steps'' s (start,end) = map (\s' -> s' * s + start) $ [0 .. end']
where
end' = ((end + 1)- start) / s
------------------------------------------------------------------------
cleanHistoire :: MapList -> Histoire -> Histoire
cleanHistoire ml = map (\(Event d t) -> Event d (unwords $ filter (\x -> elem x ml) $ monoTexts t))
mapList :: [Text]
mapList = map (toLower . pack) actants
actants :: [String]
actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
, "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
, "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
phyloCorpus :: Histoire
phyloCorpus = map (\(d,t) -> Event d (pack t)) corpus
corpus :: [(Double, String)]
corpus = [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]
resolver: lts-12.10
flags: {} flags: {}
extra-package-dbs: [] extra-package-dbs: []
packages: packages:
...@@ -15,32 +16,16 @@ extra-deps: ...@@ -15,32 +16,16 @@ extra-deps:
- git: https://github.com/delanoe/servant-static-th.git - git: https://github.com/delanoe/servant-static-th.git
commit: ba5347e7d8a13ce5275af8470c15b2305fbb23af commit: ba5347e7d8a13ce5275af8470c15b2305fbb23af
- accelerate-1.2.0.0 - accelerate-1.2.0.0
- hashtables-1.2.3.0 # needed by accelerate-1.2.0.0 - opaleye-0.6.7002.0
- aeson-1.2.4.0
- aeson-lens-0.5.0.0 - aeson-lens-0.5.0.0
- duckling-0.1.3.0 - duckling-0.1.3.0
- extra-1.5.3
- full-text-search-0.2.1.4 - full-text-search-0.2.1.4
- fullstop-0.1.4 - fullstop-0.1.4
- haskell-src-exts-1.18.2
- http-types-0.12.1
- kmeans-vector-0.3.2
- probable-0.1.3 - probable-0.1.3
- protolude-0.2
- rake-0.0.1 - rake-0.0.1
- servant-0.13 - located-base-0.1.1.1
- servant-auth-0.3.0.1 - servant-multipart-0.11.2
- servant-client-0.13
- servant-client-core-0.13
- servant-docs-0.11.1
- servant-multipart-0.11.1
- servant-server-0.13
- servant-swagger-ui-0.2.3.2.2.8
- stemmer-0.5.2 - stemmer-0.5.2
# - text-1.2.3.0
- text-show-3.6.2
- servant-flatten-0.2 - servant-flatten-0.2
- serialise-0.2.0.0 # imt-api-client - serialise-0.2.0.0 # imt-api-client
- cborg-0.2.0.0 # imt-api-client
- KMP-0.1.0.2 - KMP-0.1.0.2
resolver: lts-11.10
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment