Commit 8f1a5663 authored by Alexandre Delanoë's avatar Alexandre Delanoë

First commit to start with.

parents
This diff is collapsed.
module Main where
import Hastext.Db
main :: IO ()
main = fonction
name: gargantext
version: 0.1.0.0
synopsis: Deep (Collaborative) Text mining project
description: Please see README.md
homepage: http://gargantext.org
license: BSD3
license-file: LICENSE
author: Alexandre Delanoë
maintainer: gargantext@iscpif.fr
copyright: Copyright: (c) 2016,2017 CNRS Alexandre Delanoë
category: Data
build-type: Simple
-- extra-source-files:
cabal-version: >=1.10
library
hs-source-dirs: src
build-depends: base >= 4.7 && < 5
, aeson
, attoparsec
, base16-bytestring
, bytestring
, case-insensitive
, containers
, contravariant
, directory
, extra
, filepath
, http-client
, lens
, opaleye
, postgresql-simple
, pretty
, product-profunctors
, profunctors
, protolude
, pureMD5
, regex-compat
, semigroups
, servant-multipart
, servant-server
, split
-- , stemmer
, tagsoup
, text
, time
, time-locale-compat
, transformers
--, utc
, uuid
, vector
, wai
, warp
, zlib
exposed-modules: Data.Gargantext
, Data.Gargantext.Analysis
, Data.Gargantext.DSL
, Data.Gargantext.Database
, Data.Gargantext.Database.Instances
, Data.Gargantext.Database.Ngram
, Data.Gargantext.Database.Node
, Data.Gargantext.Database.NodeNgram
, Data.Gargantext.Database.NodeNgramNgram
, Data.Gargantext.Database.NodeNode
, Data.Gargantext.Database.NodeNodeNgram
, Data.Gargantext.Database.Private
, Data.Gargantext.Database.User
, Data.Gargantext.Parsers
, Data.Gargantext.Parsers.Occurrences
, Data.Gargantext.Prelude
, Data.Gargantext.Server
, Data.Gargantext.Types
, Data.Gargantext.Types.Node
, Data.Gargantext.Types.Main
, Data.Gargantext.Utils.DateUtils
, Data.Gargantext.Utils.Prefix
default-language: Haskell2010
ghc-options: -Wall
--executable gargantext-exe
-- hs-source-dirs: app
-- main-is: Main.hs
-- ghc-options: -threaded -rtsopts -with-rtsopts=-N
-- build-depends: base
-- , hastext
-- default-language: Haskell2010
test-suite garg-test-parsers
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Parsers.hs
build-depends: base
, extra
, text
, gargantext
, hspec
, QuickCheck
ghc-options: -threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010
module Data.Gargantext (
module Data.Gargantext.Database,
-- module Data.Gargantext.Ngrams,
-- module Data.Gargantext.Utils,
) where
import Data.Gargantext.Database
-- import Data.Gargantext.Ngrams
-- import Data.Gargantext.Utils
module Data.Gargantext.Analysis where
-- import qualified Data.Text.Lazy as DTL
import Data.Either.Extra (fromRight)
import Data.Gargantext.Database.Node
import Data.Gargantext.Parsers.Occurrences
import Data.Gargantext.Prelude
import Data.Gargantext.Types
import Data.Text
import Opaleye (Column, PGInt4)
-- | Simple function to count Occurrences in a context of text.
occOfDocument :: Column PGInt4 -> Text -> IO Int
occOfDocument = undefined
--occOfDocument c_id txt = do
-- docs <- pm (hyperdataDocument_Abstract . node_hyperdata) <$> getCorpusDocument c_id
-- let occs = pm (\x -> maybe "" identity x) docs
-- let result = case sequence $ pm (parseOccurrences txt) occs of
-- -- TODO find a way to get nice message d'errors (file, function, line)
-- Left str -> error $ "[ERRROR] at file/function/line" ++ str
-- Right xs -> xs
-- pure (sum result)
module Data.Gargantext.DSL where
import Data.Gargantext.Database
import Data.Gargantext.Prelude
import Data.Gargantext.Types
import Data.Text
type Username = Text
type Password = Text
--user :: Username -> Maybe User
--user username = undefined
--
--
--getNode :: Int -> IO Node
--getNode = undefined
--
--saveNode :: Node -> IO ()
--saveNode = undefined
--
--updateNode :: Node -> IO ()
--updateNode = undefined
--
--
--
--
--parents :: Node -> [Node]
--parents = undefined
--
--children :: Node -> [Node]
--children = undefined
--
--
--
-- projects :: User -> [Project]
-- projects u = undefined
module Data.Gargantext.Database ( module Data.Gargantext.Database.Private
, module Data.Gargantext.Database.Instances
, module Data.Gargantext.Database.User
, module Data.Gargantext.Database.Node
, module Data.Gargantext.Database.NodeNode
, module Data.Gargantext.Database.Ngram
, module Data.Gargantext.Database.NodeNgram
, module Data.Gargantext.Database.NodeNodeNgram
, module Data.Gargantext.Database.NodeNgramNgram
-- , module Data.Gargantext.Database.Gargandb
-- , module Data.Gargantext.Database.Simple
-- , module Data.Gargantext.Database.InsertNode
-- , module Data.Gargantext.Database.NodeType
) where
import Data.Gargantext.Database.Private
import Data.Gargantext.Database.Instances
--import Data.Gargantext.Database.Gargandb
import Data.Gargantext.Database.User
import Data.Gargantext.Database.Node
import Data.Gargantext.Database.NodeNode
import Data.Gargantext.Database.Ngram
import Data.Gargantext.Database.NodeNgram
import Data.Gargantext.Database.NodeNodeNgram
import Data.Gargantext.Database.NodeNgramNgram
--import Data.Gargantext.Database.Simple
--import Data.Gargantext.Database.NodeType
--import Data.Gargantext.Database.InsertNode
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module Data.Gargantext.Database.Instances where
import Data.Time (UTCTime)
import Opaleye (Column, PGBool, PGInt4, PGText, PGTimestamptz, PGFloat8
, QueryRunnerColumnDefault
, queryRunnerColumnDefault
, fieldQueryRunnerColumn
)
instance QueryRunnerColumnDefault PGInt4 Integer where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGTimestamptz (Maybe UTCTime) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
module Data.Gargantext.Database.Ngram where
import Prelude
import Data.Time (UTCTime)
import Data.Text (Text)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Arrow (returnA)
import qualified Database.PostgreSQL.Simple as PGS
import qualified Opaleye as O
import Opaleye (Column, PGBool, PGInt4, PGText, PGTimestamptz
, Table(Table), Query
, QueryRunnerColumnDefault, queryRunnerColumnDefault
, fieldQueryRunnerColumn
, (.==), (.>)
)
import Data.Gargantext.Database.Private (infoGargandb)
import Data.Gargantext.Database.Instances
-- Functions only
import Data.List (find)
data NgramPoly id terms n = Ngram { ngram_id :: id
, ngram_terms :: terms
, ngram_n :: n
} deriving (Show)
type NgramWrite = NgramPoly (Maybe (Column PGInt4)) (Column PGText) (Column PGInt4)
type NgramRead = NgramPoly (Column PGInt4) (Column PGText) (Column PGInt4)
type Ngram = NgramPoly Int Text Int
$(makeAdaptorAndInstance "pNgram" ''NgramPoly)
$(makeLensesWith abbreviatedFields ''NgramPoly)
ngramTable :: O.Table NgramWrite NgramRead
ngramTable = O.Table "ngrams" (pNgram Ngram { ngram_id = O.optional "id"
, ngram_terms = O.required "terms"
, ngram_n = O.required "n"
}
)
queryNgramTable :: Query NgramRead
queryNgramTable = O.queryTable ngramTable
--selectUsers :: Query UserRead
--selectUsers = proc () -> do
-- --user@(i, p, ll, is, un, fn, ln, m, iff, ive, dj) <- queryUserTable -< ()
-- row@(User i p ll is un fn ln m iff ive dj) <- queryUserTable -< ()
-- O.restrict -< i .== 1
-- --returnA -< User i p ll is un fn ln m iff ive dj
-- returnA -< row
--
findWith :: (Eq a1, Foldable t) => (a -> a1) -> a1 -> t a -> Maybe a
findWith f t = find (\x -> f x == t)
--userWithUsername :: Text -> [User] -> Maybe User
--userWithUsername t xs = userWith userUsername t xs
--
--userWithId :: Integer -> [User] -> Maybe User
--userWithId t xs = userWith userUserId t xs
-- | not optimized (get all ngrams without filters)
ngrams :: IO [Ngram]
ngrams = do
conn <- PGS.connect infoGargandb
O.runQuery conn queryNgramTable
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
module Data.Gargantext.Database.Node where
import Database.PostgreSQL.Simple.FromField (Conversion, ResultError(ConversionFailed), FromField, fromField, returnError)
import Database.PostgreSQL.Simple.Internal (Field)
import Control.Arrow (returnA)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Data.Aeson
import Data.Aeson.TH
import Data.Aeson.Types
import Data.Gargantext.Database.Instances
import Data.Gargantext.Database.Private (infoGargandb)
import Data.Gargantext.Prelude
import Data.Gargantext.Types
import Data.Gargantext.Utils.Prefix
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Text (Text)
import Data.Time (UTCTime)
import Data.Typeable.Internal (Typeable)
import GHC.Generics (Generic)
import qualified Data.ByteString.Internal as DBI
import qualified Database.PostgreSQL.Simple as PGS
import qualified Opaleye as O
import Opaleye (Column, PGBool, PGInt4, PGText, PGTimestamptz, PGFloat8
, Table(Table), PGJsonb, Query
, QueryRunnerColumnDefault, queryRunnerColumnDefault
, fieldQueryRunnerColumn
, (.==), (.>)
)
-- | Types for Node Database Management
type NodeWrite = NodePoly (Maybe (Column PGInt4)) (Column PGInt4)
(Column PGInt4) (Column PGInt4)
(Column PGText) (Maybe (Column PGTimestamptz))
(Column PGJsonb)
type NodeRead = NodePoly (Column PGInt4) (Column PGInt4)
(Column PGInt4) (Column PGInt4)
(Column PGText) (Column PGTimestamptz)
(Column PGJsonb)
instance FromField HyperdataCorpus where
fromField = fromField'
instance FromField HyperdataDocument where
fromField = fromField'
--instance FromField HyperdataProject where
-- fromField = fromField'
--instance FromField HyperdataUser where
-- fromField = fromField'
fromField' :: (Typeable b, FromJSON b) => Field -> Maybe DBI.ByteString -> Conversion b
fromField' field mb = do
v <- fromField field mb
valueToHyperdata v
where
valueToHyperdata v = case fromJSON v of
Success a -> pure a
Error err -> returnError ConversionFailed field "cannot parse hyperdata"
instance O.QueryRunnerColumnDefault PGJsonb HyperdataDocument where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance O.QueryRunnerColumnDefault PGJsonb HyperdataCorpus where
queryRunnerColumnDefault = fieldQueryRunnerColumn
$(makeAdaptorAndInstance "pNode" ''NodePoly)
$(makeLensesWith abbreviatedFields ''NodePoly)
nodeTable :: O.Table NodeWrite NodeRead
nodeTable = O.Table "nodes" (pNode Node { node_id = O.optional "id"
, node_typename = O.required "typename"
, node_userId = O.required "user_id"
, node_parentId = O.required "parent_id"
, node_name = O.required "name"
, node_date = O.optional "date"
, node_hyperdata = O.required "hyperdata"
}
)
selectNodes :: Column PGInt4 -> Query (Column O.PGText)
selectNodes node_id = proc () -> do
row@(Node n_id tn u p n d h) <- queryNodeTable -< ()
O.restrict -< n_id .== node_id
returnA -< n
runGetNodes :: PGS.Connection -> Query NodeRead -> IO [Document]
runGetNodes = O.runQuery
queryNodeTable :: Query NodeRead
queryNodeTable = O.queryTable nodeTable
selectNode :: Column PGInt4 -> Query NodeRead
selectNode node_id = proc () -> do
row@(Node id tn u p_id n d h) <- queryNodeTable -< ()
O.restrict -< p_id .== node_id
returnA -< row
getNodes :: Column PGInt4 -> IO [Document]
getNodes node_id = do
conn <- PGS.connect infoGargandb
O.runQuery conn $ selectNode node_id
getCorpusDocument :: Column PGInt4 -> IO [Document]
getCorpusDocument node_id = PGS.connect infoGargandb >>=
\conn -> O.runQuery conn (selectNode node_id)
getProjectCorpora :: Column PGInt4 -> IO [Corpus]
getProjectCorpora node_id = do
conn <- PGS.connect infoGargandb
O.runQuery conn $ selectNode node_id
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
module Data.Gargantext.Database.NodeNgram where
import Prelude
import Data.Time (UTCTime)
import Data.Text (Text)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Arrow (returnA)
import qualified Database.PostgreSQL.Simple as PGS
import qualified Opaleye as O
import Opaleye (Column, PGBool, PGInt4, PGText, PGTimestamptz, PGFloat8
, Table(Table), Query
, QueryRunnerColumnDefault, queryRunnerColumnDefault
, fieldQueryRunnerColumn
, (.==), (.>)
)
import Data.Gargantext.Database.Private (infoGargandb)
import Data.Gargantext.Database.Instances
data NodeNgramPoly id node_id ngram_id weight
= NodeNgram { nodeNgram_NodeNgramId :: id
, nodeNgram_NodeNgramNodeId :: node_id
, nodeNgram_NodeNgramNgramId :: ngram_id
, nodeNgram_NodeNgramWeight :: weight
} deriving (Show)
type NodeNgramWrite = NodeNgramPoly (Maybe (Column PGInt4)) (Column PGInt4) (Column PGInt4) (Maybe (Column PGFloat8))
type NodeNgramRead = NodeNgramPoly (Column PGInt4) (Column PGInt4) (Column PGInt4) ((Column PGFloat8))
type NodeNgram = NodeNgramPoly (Maybe Int) Int Int (Maybe Double)
$(makeAdaptorAndInstance "pNodeNgram" ''NodeNgramPoly)
$(makeLensesWith abbreviatedFields ''NodeNgramPoly)
nodeNgramTable :: O.Table NodeNgramWrite NodeNgramRead
nodeNgramTable = O.Table "nodes_ngrams" (pNodeNgram NodeNgram { nodeNgram_NodeNgramId = O.optional "id"
, nodeNgram_NodeNgramNodeId = O.required "node_id"
, nodeNgram_NodeNgramNgramId = O.required "ngram_id"
, nodeNgram_NodeNgramWeight = O.optional "weight"
}
)
queryNodeNgramTable :: Query NodeNgramRead
queryNodeNgramTable = O.queryTable nodeNgramTable
-- | not optimized (get all ngrams without filters)
nodeNgrams :: IO [NodeNgram]
nodeNgrams = do
conn <- PGS.connect infoGargandb
O.runQuery conn queryNodeNgramTable
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
module Data.Gargantext.Database.NodeNgramNgram where
import Prelude
import Data.Time (UTCTime)
import Data.Text (Text)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Arrow (returnA)
import qualified Database.PostgreSQL.Simple as PGS
import qualified Opaleye as O
import Opaleye (Column, PGBool, PGInt4, PGText, PGTimestamptz, PGFloat8
, Table(Table), Query
, QueryRunnerColumnDefault, queryRunnerColumnDefault
, fieldQueryRunnerColumn
, (.==), (.>)
)
import Data.Gargantext.Database.Private (infoGargandb)
import Data.Gargantext.Database.Instances
data NodeNgramNgramPoly node_id ngram1_id ngram2_id weight
= NodeNgramNgram { nodeNgramNgram_NodeNgramNgram_NodeId :: node_id
, nodeNgramNgram_NodeNgramNgram_Ngram1Id :: ngram1_id
, nodeNgramNgram_NodeNgramNgram_Ngram2Id :: ngram2_id
, nodeNgramNgram_NodeNgramNgram_Weight :: weight
} deriving (Show)
type NodeNgramNgramWrite = NodeNgramNgramPoly (Maybe (Column PGInt4)) (Column PGInt4) (Column PGInt4) (Maybe (Column PGFloat8))
type NodeNgramNgramRead = NodeNgramNgramPoly (Column PGInt4) (Column PGInt4) (Column PGInt4) (Column PGFloat8)
type NodeNgramNgram = NodeNgramNgramPoly (Maybe Int) Int Int (Maybe Double)
$(makeAdaptorAndInstance "pNodeNgramNgram" ''NodeNgramNgramPoly)
$(makeLensesWith abbreviatedFields ''NodeNgramNgramPoly)
nodeNgramNgramTable :: O.Table NodeNgramNgramWrite NodeNgramNgramRead
nodeNgramNgramTable = O.Table "nodes_ngrams_ngrams" ( pNodeNgramNgram NodeNgramNgram
{ nodeNgramNgram_NodeNgramNgram_NodeId = O.optional "node_id"
, nodeNgramNgram_NodeNgramNgram_Ngram1Id = O.required "ngram1_id"
, nodeNgramNgram_NodeNgramNgram_Ngram2Id = O.required "ngram2_id"
, nodeNgramNgram_NodeNgramNgram_Weight = O.optional "weight"
}
)
queryNodeNgramNgramTable :: Query NodeNgramNgramRead
queryNodeNgramNgramTable = O.queryTable nodeNgramNgramTable
-- | not optimized (get all ngrams without filters)
nodeNgramNgrams :: IO [NodeNgramNgram]
nodeNgramNgrams = do
conn <- PGS.connect infoGargandb
O.runQuery conn queryNodeNgramNgramTable
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
module Data.Gargantext.Database.NodeNode where
import Prelude
import Data.Time (UTCTime)
import Data.Text (Text)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Arrow (returnA)
import qualified Database.PostgreSQL.Simple as PGS
import qualified Opaleye as O
import Opaleye (Column, PGBool, PGInt4, PGText, PGTimestamptz, PGFloat8
, Table(Table), Query
, QueryRunnerColumnDefault, queryRunnerColumnDefault
, fieldQueryRunnerColumn
, (.==), (.>)
, required, optional
)
import Data.Gargantext.Database.Private (infoGargandb)
import Data.Gargantext.Database.Instances
data NodeNodePoly node1_id node2_id score
= NodeNode { nodeNode_node1_id :: node1_id
, nodeNode_node2_id :: node2_id
, nodeNode_score :: score
} deriving (Show)
type NodeNodeWrite = NodeNodePoly (Column PGInt4) (Column PGInt4) (Maybe (Column PGFloat8))
type NodeNodeRead = NodeNodePoly (Column PGInt4) (Column PGInt4) (Column PGFloat8)
type NodeNode = NodeNodePoly Int Int (Maybe Double)
$(makeAdaptorAndInstance "pNodeNode" ''NodeNodePoly)
$(makeLensesWith abbreviatedFields ''NodeNodePoly)
nodeNodeTable :: O.Table NodeNodeWrite NodeNodeRead
nodeNodeTable = O.Table "nodes_nodes" (pNodeNode NodeNode { nodeNode_node1_id = required "node1_id"
, nodeNode_node2_id = required "node2_id"
, nodeNode_score = optional "score"
}
)
queryNodeNodeTable :: Query NodeNodeRead
queryNodeNodeTable = O.queryTable nodeNodeTable
-- | not optimized (get all ngrams without filters)
nodeNodes :: IO [NodeNode]
nodeNodes = do
conn <- PGS.connect infoGargandb
O.runQuery conn queryNodeNodeTable
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
module Data.Gargantext.Database.NodeNodeNgram where
import Prelude
import Data.Time (UTCTime)
import Data.Text (Text)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Arrow (returnA)
import qualified Database.PostgreSQL.Simple as PGS
import qualified Opaleye as O
import Opaleye (Column, PGBool, PGInt4, PGText, PGTimestamptz, PGFloat8
, Table(Table), Query
, QueryRunnerColumnDefault, queryRunnerColumnDefault
, fieldQueryRunnerColumn
, (.==), (.>)
)
import Data.Gargantext.Database.Private (infoGargandb)
import Data.Gargantext.Database.Instances
data NodeNodeNgramPoly node1_id node2_id ngram_id score
= NodeNodeNgram { nodeNodeNgram_node1_id :: node1_id
, nodeNodeNgram_node2_id :: node2_id
, nodeNodeNgram_ngram_id :: ngram_id
, nodeNodeNgram_score :: score
} deriving (Show)
type NodeNodeNgramWrite = NodeNodeNgramPoly (Column PGInt4) (Column PGInt4) (Column PGInt4) (Maybe (Column PGFloat8))
type NodeNodeNgramRead = NodeNodeNgramPoly (Column PGInt4) (Column PGInt4) (Column PGInt4) (Column PGFloat8)
type NodeNodeNgram = NodeNodeNgramPoly Int Int Int (Maybe Double)
$(makeAdaptorAndInstance "pNodeNodeNgram" ''NodeNodeNgramPoly)
$(makeLensesWith abbreviatedFields ''NodeNodeNgramPoly)
nodeNodeNgramTable :: O.Table NodeNodeNgramWrite NodeNodeNgramRead
nodeNodeNgramTable = O.Table "nodes_nodes_ngrams" ( pNodeNodeNgram NodeNodeNgram
{ nodeNodeNgram_node1_id = O.required "node1_id"
, nodeNodeNgram_node2_id = O.required "node2_id"
, nodeNodeNgram_ngram_id = O.required "ngram_id"
, nodeNodeNgram_score = O.optional "score"
}
)
queryNodeNodeNgramTable :: Query NodeNodeNgramRead
queryNodeNodeNgramTable = O.queryTable nodeNodeNgramTable
-- | not optimized (get all ngrams without filters)
nodeNodeNgrams :: IO [NodeNodeNgram]
nodeNodeNgrams = do
conn <- PGS.connect infoGargandb
O.runQuery conn queryNodeNodeNgramTable
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Database.Private where
import qualified Database.PostgreSQL.Simple as PGS
-- TODO add a reader Monad here
infoGargandb :: PGS.ConnectInfo
infoGargandb = PGS.ConnectInfo { PGS.connectHost = "127.0.0.1"
, PGS.connectPort = 5432
, PGS.connectUser = "gargantua"
, PGS.connectPassword = "C8kdcUrAQy66U"
, PGS.connectDatabase = "gargandb" }
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
module Data.Gargantext.Database.User where
import Prelude
import Data.Gargantext.Prelude
import Data.Time (UTCTime)
import Data.Text (Text)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Arrow (returnA)
import qualified Database.PostgreSQL.Simple as PGS
import qualified Opaleye as O
import Opaleye (Column, PGBool, PGInt4, PGText, PGTimestamptz
, Table(Table), Query
, QueryRunnerColumnDefault, queryRunnerColumnDefault
, fieldQueryRunnerColumn
, (.==), (.>)
, required, optional
)
import Data.Gargantext.Database.Private (infoGargandb)
import Data.Gargantext.Database.Instances
-- Functions only
import Data.List (find)
data UserLight = UserLight { userLight_id :: Int
, userLight_username :: Text
, userLight_email :: Text
} deriving (Show)
toUserLight :: User -> UserLight
toUserLight (User id _ _ _ u _ _ e _ _ _ ) = UserLight id u e
data UserPoly id pass llogin suser
uname fname lname
mail staff active djoined = User { user_id :: id
, user_password :: pass
, user_lastLogin :: llogin
, user_isSuperUser :: suser
, user_username :: uname
, user_firstName :: fname
, user_lastName :: lname
, user_email :: mail
, user_isStaff :: staff
, user_isActive :: active
, user_dateJoined :: djoined
} deriving (Show)
type UserWrite = UserPoly (Maybe (Column PGInt4)) (Column PGText)
(Maybe (Column PGTimestamptz)) (Column PGBool)
(Column PGText) (Column PGText)
(Column PGText) (Column PGText)
(Column PGBool) (Column PGBool)
(Column PGTimestamptz)
type UserRead = UserPoly (Column PGInt4) (Column PGText)
(Column PGTimestamptz) (Column PGBool)
(Column PGText) (Column PGText)
(Column PGText) (Column PGText)
(Column PGBool) (Column PGBool)
(Column PGTimestamptz)
type User = UserPoly Int Text (Maybe UTCTime) Bool Text Text Text Text Bool Bool UTCTime
$(makeAdaptorAndInstance "pUser" ''UserPoly)
$(makeLensesWith abbreviatedFields ''UserPoly)
userTable :: O.Table UserWrite UserRead
userTable = O.Table "auth_user" (pUser User { user_id = optional "id"
, user_password = required "password"
, user_lastLogin = optional "last_login"
, user_isSuperUser = required "is_superuser"
, user_username = required "username"
, user_firstName = required "first_name"
, user_lastName = required "last_name"
, user_email = required "email"
, user_isStaff = required "is_staff"
, user_isActive = required "is_active"
, user_dateJoined = required "date_joined"
}
)
queryUserTable :: Query UserRead
queryUserTable = O.queryTable userTable
selectUsersLight :: Query UserRead
selectUsersLight = proc () -> do
row@(User i p ll is un fn ln m iff ive dj) <- queryUserTable -< ()
O.restrict -< i .== 1
--returnA -< User i p ll is un fn ln m iff ive dj
returnA -< row
userWith :: (Eq a1, Foldable t) => (a -> a1) -> a1 -> t a -> Maybe a
userWith f t xs = find (\x -> f x == t) xs
userWithUsername :: Text -> [User] -> Maybe User
userWithUsername t xs = userWith user_username t xs
userWithId :: Int -> [User] -> Maybe User
userWithId t xs = userWith user_id t xs
users :: IO [User]
users = do
conn <- PGS.connect infoGargandb
O.runQuery conn queryUserTable
usersLight :: IO [UserLight]
usersLight = do
conn <- PGS.connect infoGargandb
pm toUserLight <$> O.runQuery conn queryUserTable
module Data.Gargantext.Ngrams (
module Data.Gargantext.Ngrams.TextMining,
module Data.Gargantext.Ngrams.Words,
module Data.Gargantext.Ngrams.Hetero,
module Data.Gargantext.Ngrams.Count
) where
import Data.Gargantext.Ngrams.TextMining
import Data.Gargantext.Ngrams.Words
import Data.Gargantext.Ngrams.Hetero
import Data.Gargantext.Ngrams.Count
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Ngrams.Count where
import System.Environment (getArgs)
import Data.List (foldl', take)
import Data.Foldable as F
import Data.Map (Map)
import qualified Data.Map as M
import qualified Data.Text.Lazy.IO as DTLIO
import qualified Data.Text.Lazy as DTL
-- | /O(n)/ Breaks a 'Text' up into each Text list of chars.
-- from slower to faster:
letters :: DTL.Text -> [DTL.Text]
letters text = DTL.chunksOf 1 text
letters' :: DTL.Text -> [DTL.Text]
letters' text = DTL.splitOn "#" $ DTL.intersperse '#' text
letters'' :: DTL.Text -> [DTL.Text]
letters'' = DTL.foldr (\ch xs -> DTL.singleton ch : xs) []
-- words
-- lines
-- words between punctuation
-- number of punctuation
occurrences :: Ord a => [a] -> Map a Int
occurrences xs = foldl' (\x y -> M.insertWith' (+) y 1 x) M.empty xs
-- for optimization :
--occurrences' :: Ord a => [a] -> Map a Integer
--occurrences' xs = DTL.foldl (\x y -> M.insertWith' (+) y 1 x) M.empty xs
countMain = do
(fichier:rest) <- getArgs
c <- DTLIO.readFile fichier
--print $ occurrences $ DTL.chunksOf 1 c
print $ occurrences $ letters'' c
--print $ occurrences $ DTL.words $ DTL.toLower c
module Data.Gargantext.Ngrams.Hetero where
import GHC.Real as R
import Data.Set as S
import Data.Map as M
import Data.List.Split as S
import Database.PostgreSQL.Simple as PGS
import Opaleye.PGTypes (PGInt4)
import Opaleye.Internal.Column (Column)
import Data.Gargantext.Database.Gargandb
import Data.Gargantext.Database.Private
import Data.Gargantext.Utils.Chronos
import Data.Gargantext.Ngrams.Words (cleanText)
import Data.Gargantext.Ngrams.Count (occurrences)
import Data.Gargantext.Database.Simple
--main = do
-- t <- getTextquery
-- print (Prelude.map (heterogeinity . concat) $ S.chunksOf 3 t)
-- heterogeinity sur concat texts
heterogeinity' :: Int -> Int -> Int -> IO [Integer]
heterogeinity' corpus_id limit x = do
t <- getAbstract corpus_id limit
Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf x) . cleanText $ concat t
heterogeinity'' :: Int -> Int -> Int -> IO [Integer]
heterogeinity'' corpus_id limit size = do
t <- getAbstract corpus_id limit
Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf size) . cleanText $ concat t
dicoStruct :: (Integral r, Monad m) => M.Map t r -> m r
dicoStruct dict_occ = do
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
-- heterogeinity sur UCT (Unité de Context Textuel)
heterogeinity :: [Char] -> IO Integer
heterogeinity string = do
let dict_occ = occurrences $ cleanText string
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
--computeHeterogeinity
-- :: Fractional t =>
-- Opaleye.Internal.Column.Column Opaleye.PGTypes.PGInt4
-- -> IO (t, Integer, Integer)
computeHeterogeinity corpus_id = do
c <- PGS.connect infoGargandb
t <- getText c (nodeHyperdataText corpus_id)
heterogeinity $ Prelude.concat t
main2 = do
let corpus_ids = [
("ALL", 272927) -- 73
,("Histoire", 1387736) -- 28
,("Sciences Po", 1296892) -- 37
,("Phylosophie", 1170004) -- 20
,("Psychologie", 1345852) -- 37
,("Sociologie", 1246452) -- 42
]
r <- Prelude.map computeHeterogeinity $ Prelude.map (\(t,id) -> id) corpus_ids
return r
module Data.Gargantext.Ngrams.TextMining where
import Data.Map (empty, Map, insertWith, toList)
import Data.List (foldl, foldl')
import qualified Data.List as L
sortGT (a1, b1) (a2, b2)
| a1 < a2 = GT
| a1 > a2 = LT
| a1 == a2 = compare b1 b2
--histogram :: Ord a => [a] -> [(a, Int)]
--histogram = map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
--histogram = sortGT Prelude.. $ map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
countElem m e = Data.Map.insertWith (\n o -> n + o) e 1 m
freqList :: (Ord k) => [k] -> Data.Map.Map k Int
freqList = foldl countElem Data.Map.empty
--getMaxFromMap :: Data.Map.Map -> Maybe -> [a] -> [a]
getMaxFromMap m = go [] Nothing (toList m)
where
go ks _ [] = ks
go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
go ks (Just u) ((k,v):rest)
| v < u = go ks (Just u) rest
| v > u = go [k] (Just v) rest
| otherwise = go (k:ks) (Just v) rest
merge :: [a] -> [a] -> [a]
merge [] ys = ys
merge (x:xs) ys = x:merge ys xs
average :: [Double] -> Double
average x = L.sum x / L.genericLength x
average' :: [Int] -> Double
average' x = (L.sum y) / (L.genericLength y) where
y = map fromIntegral x
countYear :: [Integer] -> Map Integer Integer
countYear [] = empty
countYear (x:xs) = insertWith (+) x 1 (countYear xs)
countYear' :: [Integer] -> Map Integer Integer
countYear' (xs) = foldl' (\x y -> insertWith (+) y 1 x) empty xs
textMiningMain :: IO ()
textMiningMain = do
print $ merge ["abc"] ["bcd"]
-- module Data.Gargantext.Ngrams.Utils where
-- calculate levenshtein distance between two strings
levenshtein::[Char] -> [Char] -> Int
levenshtein "" "" = 0
levenshtein "" s2 = length s2
levenshtein s1 "" = length s1
levenshtein s1 s2
| last s1 == last s2 = levenshtein (init s1) (init s2)
| otherwise = minimum [
1 + levenshtein (init s1) s2,
1 + levenshtein s1 (init s2),
1 + levenshtein (init s1) (init s2)
]
-- calculate levenshtein distance between two strings
levenshtein::[Char] -> [Char] -> Int
-- this part is mostly a speed optimiziation
levenshtein' s1 s2
| length s1 > length s2 = levenshtein s2 s1
| length s1 < length s2 =
let d = length s2 - length s1
in d + levenshtein s1 (take (length s2 - d) s2)
-- the meat of the algorithm
levenshtein' "" "" = 0
levenshtein' s1 s2
| last s1 == last s2 = levenshtein (init s1) (init s2)
| otherwise = minimum [1 + levenshtein (init s1) s2,
1 + levenshtein s1 (init s2),
1 + levenshtein (init s1) (init s2)]
module Data.Gargantext.Ngrams.Words where
import Data.List (partition)
import Data.Set (fromList, notMember, member)
import Data.Char (isPunctuation, toLower, isAlpha, isSpace)
import NLP.Stemmer (stem, Stemmer(..))
import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
import Language.Aspell.Options (ACOption(..))
--import Data.Either.Utils (fromRight)
import Data.ByteString.Internal (packChars)
get_lang x = do
let lang = Lang (packChars x)
spell_lang <- spellCheckerWithOptions [lang]
return spell_lang
check' lang x = check lang (packChars x)
suggest' lang x = suggest lang (packChars x)
--spell_lang <- spellChecker
--lang = fromRight s
--suggest' lang x
-- stem French "naturelles"
-- paragraphes
-- lines
-- sentences
-- Prelude.map (\x -> stem French x) $ cleanText "Les hirondelles s envolent dans les cieux."
repl :: Char -> Char
repl x
| x == '\'' = ' '
| x == '/' = ' '
-- | x == '\t' = ' '
-- | x == '\n' = ' '
| otherwise = x
cleanText text = do
-- pb avec \'
--words $ filter (not . isPunctuation) $ Prelude.map toLower text
words $ filter (\x -> isAlpha x || isSpace x) $ Prelude.map (repl . toLower) text
isMiamWord word = do
let miamWord_set = fromList ["salut", "phrase"]
member word miamWord_set
isStopWord word = do
let stopWord_set = fromList ["de", "la", "une", "avec"]
member word stopWord_set
wordsMain = do
let text = "Salut, ceci est une phrase \n\n avec de la ponctuation !"
print $ partition (not . isStopWord) $ cleanText text
print $ filter (not . isStopWord) $ cleanText text
--print $ filter isStopWord $ words $ filter (not . isPunctuation) text
module Data.Gargantext.Parsers (module Data.Gargantext.Parsers.Occurrences)
where
import Data.Gargantext.Parsers.Occurrences
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Parsers.Occurrences where
import Data.Gargantext.Prelude
import Data.Attoparsec.Text
import Data.Text (Text)
import Data.Either.Extra(Either(..))
import qualified Data.Text as T
import Control.Applicative
occurrenceParser :: Text -> Parser Bool
occurrenceParser txt = manyTill anyChar (string txt) >> pure True
occurrencesParser :: Text -> Parser Int
occurrencesParser txt = case txt of
"" -> pure 0
_ -> many (occurrenceParser txt') >>= \matches -> pure (length matches)
where
txt' = T.toLower txt
parseOccurrences :: Text -> Text -> Either String Int
parseOccurrences x = parseOnly (occurrencesParser x)
{-# LANGUAGE NoImplicitPrelude #-}
module Data.Gargantext.Prelude where
import Protolude
-- TODO import functions optimized in Utils.Count
-- import Protolude hiding (head, last, all, any, sum, product, length)
-- import Data.Gargantext.Utils.Count
import qualified Data.List as L hiding (head, sum)
import qualified Control.Monad as M
import qualified Data.Map as Map
import qualified Data.Vector as V
pf = filter
pr = reverse
pm = map
pm2 :: (t -> b) -> [[t]] -> [[b]]
pm2 fun = pm (pm fun)
pz = zip
pd = drop
ptk = take
pzw = zipWith
-- Exponential Average
eavg :: [Double] -> Double
eavg (x:xs) = a*x + (1-a)*(eavg xs)
where a = 0.70
eavg [] = 0
-- Simple Average
mean :: Fractional a => [a] -> a
mean xs = if L.null xs then 0.0
else sum xs / fromIntegral (length xs)
sumMaybe :: Num a => [Maybe a] -> Maybe a
sumMaybe = fmap sum . M.sequence
variance :: Floating a => [a] -> a
variance xs = mean $ pm (\x -> (x - m) ** 2) xs where
m = mean xs
deviation :: [Double] -> Double
deviation = sqrt . variance
movingAverage :: Fractional b => Int -> [b] -> [b]
movingAverage steps xs = pm mean $ chunkAlong steps 1 xs
ma :: [Double] -> [Double]
ma = movingAverage 3
-- | Function to split a range into chunks
chunkAlong :: Int -> Int -> [a] -> [[a]]
chunkAlong a b l = only (while dropAlong)
where
only = pm (take a)
while = takeWhile (\x -> length x >= a)
dropAlong = L.scanl (\x y -> drop b x) l [1..]
-- | Optimized version (Vector)
chunkAlong' :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
chunkAlong' a b l = only (while dropAlong)
where
only = V.map (V.take a)
while = V.takeWhile (\x -> V.length x >= a)
dropAlong = V.scanl (\x y -> V.drop b x) l (V.fromList [1..])
-- | TODO Inverse of chunk ? unchunkAlong ?
unchunkAlong :: Int -> Int -> [[a]] -> [a]
unchunkAlong = undefined
-- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
splitAlong :: [Int] -> [Char] -> [[Char]]
splitAlong _ [] = [] -- No list? done
splitAlong [] xs = [xs] -- No place to split at? Return the remainder
splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys) -- take until our split spot, recurse with next split spot and list remainder
takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
takeWhileM _ [] = return []
takeWhileM p (a:as) = do
v <- a
if p v
then do
vs <- takeWhileM p as
return (v:vs)
else return []
-- SUMS
-- To select the right algorithme according to the type:
-- https://github.com/mikeizbicki/ifcxt
sumSimple :: Num a => [a] -> a
sumSimple = L.foldl' (+) 0
-- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
sumKahan :: Num a => [a] -> a
sumKahan = snd . L.foldl' go (0,0)
where
go (c,t) i = ((t'-t)-y,t')
where
y = i-c
t' = t+y
-- | compute part of the dict
count2map :: (Ord k, Foldable t) => t k -> Map.Map k Double
count2map xs = Map.map (/ (fromIntegral (length xs))) (count2map' xs)
-- | insert in a dict
count2map' :: (Ord k, Foldable t) => t k -> Map.Map k Double
count2map' xs = L.foldl' (\x y -> Map.insertWith' (+) y 1 x) Map.empty xs
trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
trunc n = truncate . (* 10^n)
trunc' :: Int -> Double -> Double
trunc' n x = fromIntegral $ truncate $ (x * 10^n)
bool2int :: Num a => Bool -> a
bool2int bool = case bool of
True -> 1
False -> 0
bool2double :: Bool -> Double
bool2double bool = case bool of
True -> 1.0
False -> 0.0
-- Normalizing && scaling data
scale = scaleMinMax
scaleMinMax :: [Double] -> [Double]
scaleMinMax xs = pm (\x -> (x - mi / (ma - mi + 1) )) xs'
where
ma = maximum xs'
mi = minimum xs'
xs' = pm abs xs
scaleNormalize :: [Double] -> [Double]
scaleNormalize xs = pm (\x -> (x - v / (m + 1))) xs'
where
v = variance xs'
m = mean xs'
xs' = pm abs xs
normalize :: [Double] -> [Double]
normalize as = normalizeWith identity as
normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
normalizeWith extract bs = pm (\x -> x/(sum bs')) bs'
where
bs' = pm extract bs
-- Zip functions to add
zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
zipFst f xs = zip (f xs) xs
zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
zipSnd f xs = zip xs (f xs)
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Data.Gargantext.Server
-- ( startApp
-- , app
-- )
where
import Control.Concurrent
import Control.Monad
import Control.Monad.IO.Class
import Data.Aeson
import Data.Aeson.TH
import Data.Gargantext.Types
import Network.HTTP.Client.MultipartFormData
import Network.Wai
import Network.Wai.Handler.Warp
import Servant
import Servant.Multipart
-- | TODO, use MOCK feature of Servant to generate fake data (for tests)
data FakeNode = FakeNode
{ fakeNodeId :: Int
, fakeNodeName :: String
} deriving (Eq, Show)
$(deriveJSON defaultOptions ''FakeNode)
type API = "nodes" :> Get '[JSON] [FakeNode]
:<|> "node" :> Capture "id" Int :> Get '[JSON] FakeNode
:<|> "echo" :> Capture "string" String :> Get '[JSON] String
:<|> "upload" :> MultipartForm MultipartData :> Post '[JSON] String
-- :<|> "node" :> Capture "id" Int :> Get '[JSON] Node
server :: Server API
server = pure fakeNodes
:<|> fakeNode
:<|> echo
:<|> upload
where
echo s = pure s
startGargantext :: IO ()
startGargantext = print ("Starting server on port " ++ show port) >> run port app
where
port = 8008
-- | TODO App type, the main monad in which the bot code is written with.
-- Provide config, state, logs and IO
-- type App m a = ( MonadState AppState m
-- , MonadReader Conf m
-- , MonadLog (WithSeverity Doc) m
-- , MonadIO m) => m a
-- Thanks @yannEsposito for this.
app :: Application
app = serve api server
api :: Proxy API
api = Proxy
fakeNode :: Monad m => Int -> m FakeNode
fakeNode id = pure (fakeNodes !! id)
fakeNodes :: [FakeNode]
fakeNodes = [ FakeNode 1 "Poincare"
, FakeNode 2 "Grothendieck"
]
-- | Upload files
-- TODO Is it possible to adapt the function according to iValue input ?
upload :: MultipartData -> Handler String
upload multipartData = do
liftIO $ do
putStrLn "Inputs:"
forM_ (inputs multipartData) $ \input ->
putStrLn $ " " ++ show (iName input)
++ " -> " ++ show (iValue input)
forM_ (files multipartData) $ \file -> do
content <- readFile (fdFilePath file)
putStrLn $ "Content of " ++ show (fdFileName file)
++ " at " ++ fdFilePath file
putStrLn content
pure "Data loaded"
module Data.Gargantext.Types ( module Data.Gargantext.Types.Main
, module Data.Gargantext.Types.Node
) where
import Data.Gargantext.Types.Main
import Data.Gargantext.Types.Node
-- | CNRS Copyrights
-- Licence: https://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE
-- Author: Alexandre Delanoë (alexandre.delanoe@iscpif.fr)
module Data.Gargantext.Types.Main where
import Protolude (fromMaybe)
import Data.Text (Text)
import Data.Time (UTCTime)
import Data.Gargantext.Prelude
import Data.Gargantext.Types.Node ( NodePoly
, HyperdataFolder , HyperdataCorpus , HyperdataDocument
, HyperdataFavorites, HyperdataResource
, HyperdataList , HyperdataScore
, HyperdataGraph
, HyperdataPhylo
, HyperdataNotebook
)
-- | TODO add Symbolic Node / Document
-- TODO make instances of Nodes
-- All the Database is structred like a hierachical Tree
-- Where a is a NodeType:
data Tree a = Empty | Node' a (Tree a) (Tree a) deriving (Show)
--gargTree :: Tree NodeType
--gargTree = Node' NodeUser Empty
-- (Node' Empty
-- (Project Empty Empty)
-- )
--
data NodeType = NodeUser
| Folder | Project | Corpus | Document
| Favorites
| NodeSwap
| List | StopList | MainList | MapList | GroupList
| Score | Occurrences | Cooccurrences | Specclusion | Genclusion | Cvalue
| Tficf | TfidfCorpus | TfidfGlobal | TirankLocal | TirankGlobal
deriving (Show, Eq)
-- | NodePoly indicates that Node has a Polymorphism Type
type Node json = NodePoly Integer NodeTypeId Integer Integer Text UTCTime json
-- type Node json = NodePoly NodeId NodeTypeId UserId ParentId NodeName UTCTime json
type NodeTypeId = Int
--type NodeUser = Node HyperdataUser
-- | Then a Node can be either a Folder or a Corpus or a Document
type Folder = Node HyperdataFolder
type Project = Folder
type Corpus = Node HyperdataCorpus
type Document = Node HyperdataDocument
-- | Community Manager Use Case
type Annuaire = Corpus
type Individu = Document
-- | Favorites Node enable Node categorization
type Favorites = Node HyperdataFavorites
-- | Favorites Node enable Swap Node with some synonyms for clarity
type NodeSwap = Node HyperdataResource
-- | Then a Node can be a List which as some synonyms
type List = Node HyperdataList
type StopList = List
type MainList = List
type MapList = List
type GroupList = List
-- | Then a Node can be a Score which as some synonyms
type Score = Node HyperdataScore
type Occurrences = Score
type Cooccurrences = Score
type Specclusion = Score
type Genclusion = Score
type Cvalue = Score
type Tficf = Score
-- TODO All these Tfidf* will be replaced with TFICF
type TfidfCorpus = Tficf
type TfidfGlobal = Tficf
type TirankLocal = Tficf
type TirankGlobal = Tficf
-- | Then a Node can be either a Graph or a Phylo or a Notebook
type Graph = Node HyperdataGraph
type Phylo = Node HyperdataPhylo
type Notebook = Node HyperdataNotebook
nodeTypes :: [(NodeType, NodeTypeId)]
nodeTypes = [
--(NodeUser , 1)
--
(Project , 2)
, (NodeSwap , 19)
, (Corpus , 3)
, (Document , 4)
------ Lists
, (StopList , 5)
, (GroupList , 6)
, (MainList , 7)
, (MapList ,  8)
-- Scores
, (Occurrences , 10)
, (Cooccurrences , 9)
, (Specclusion , 11)
, (Genclusion , 18)
, (Cvalue , 12)
, (TfidfCorpus , 13)
, (TfidfGlobal , 14)
, (TirankLocal , 16)
, (TirankGlobal , 17)
-- Node management
, (Favorites , 15)
]
--
nodeTypeId :: NodeType -> NodeTypeId
nodeTypeId tn = fromMaybe (error ("Typename " ++ show tn ++ " does not exist")) (lookup tn nodeTypes)
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DeriveGeneric #-}
module Data.Gargantext.Types.Node where
import Data.Text (Text)
import Data.List (lookup)
import GHC.Generics (Generic)
import Data.Time (UTCTime)
import Data.Gargantext.Utils.Prefix
import Data.Aeson.TH
data NodePoly id typename userId parentId name date hyperdata = Node { node_id :: id
, node_typename :: typename
, node_userId:: userId
-- , nodeHashId :: hashId
, node_parentId :: parentId
, node_name :: name
, node_date :: date
, node_hyperdata :: hyperdata
} deriving (Show)
data Statut = Statut { statut_Date :: Maybe UTCTime
, statut_Error :: Maybe Text
, statut_Action :: Maybe Text
, statut_Complete :: Maybe Bool
, statut_Progress :: Maybe Int
} deriving (Show, Generic)
$(deriveJSON (unPrefix "statut_") ''Statut)
data HyperdataDocument = HyperdataDocument { hyperdataDocument_Bdd :: Maybe Text
, hyperdataDocument_Doi :: Maybe Text
, hyperdataDocument_Url :: Maybe Text
, hyperdataDocument_Page :: Maybe Int
, hyperdataDocument_Title :: Maybe Text
, hyperdataDocument_Authors :: Maybe Text
, hyperdataDocument_Abstract :: Maybe Text
, hyperdataDocument_Statuses :: Maybe [Statut]
, hyperdataDocument_Publication_date :: Maybe Text
, hyperdataDocument_Publication_year :: Maybe Text
, hyperdataDocument_Publication_month :: Maybe Text
, hyperdataDocument_Publication_hour :: Maybe Text
, hyperdataDocument_Publication_minute :: Maybe Text
, hyperdataDocument_Publication_second :: Maybe Text
, hyperdataDocument_LanguageIso2 :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataDocument_") ''HyperdataDocument)
data LanguageNodes = LanguageNodes { languageNodes___unknown__ :: [Int]}
deriving (Show, Generic)
$(deriveJSON (unPrefix "languageNodes_") ''LanguageNodes)
data Resource = Resource { resource_Url :: Maybe Text
, resource_Path :: Maybe Text
, resource_Type :: Maybe Int
, resource_Extracted :: Maybe Bool
} deriving (Show, Generic)
$(deriveJSON (unPrefix "resource_") ''Resource)
data HyperdataCorpus = HyperdataCorpus { hyperdataCorpus_Action :: Maybe Text
, hyperdataCorpus_Statuses :: Maybe [Statut]
, hyperdataCorpus_Languages :: Maybe LanguageNodes
, hyperdataCorpus_Resources :: Maybe [Resource]
, hyperdataCorpus_Language_id :: Maybe Text
, hyperdataCorpus_Skipped_docs :: Maybe [Int]
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataCorpus_") ''HyperdataCorpus)
data HyperdataFolder = HyperdataFolder { hyperdataFolder_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataFolder_") ''HyperdataFolder)
data HyperdataList = HyperdataList { hyperdataList_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataList_") ''HyperdataList)
data HyperdataScore = HyperdataScore { hyperdataScore_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataScore_") ''HyperdataScore)
data HyperdataFavorites = HyperdataFavorites { hyperdataFavorites_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataFavorites_") ''HyperdataFavorites)
data HyperdataResource = HyperdataResource { hyperdataResource_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataResource_") ''HyperdataResource)
-- TODO add the Graph Structure here
data HyperdataGraph = HyperdataGraph { hyperdataGraph_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataGraph_") ''HyperdataGraph)
-- TODO add the Graph Structure here
data HyperdataPhylo = HyperdataPhylo { hyperdataPhylo_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataPhylo_") ''HyperdataPhylo)
-- | TODO FEATURE: Notebook saved in the node (to work with Python or Haskell)
data HyperdataNotebook = HyperdataNotebook { hyperdataNotebook_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataNotebook_") ''HyperdataNotebook)
module Data.Gargantext.Utils ( module Data.Gargantext.Utils.Chronos
, module Data.Gargantext.Utils.Prefix
) where
import Data.Gargantext.Utils.Chronos
module Data.Gargantext.Utils.Prefix
module Data.Gargantext.Utils.Chronos where
import Data.Maybe (fromJust, isJust, isNothing)
import qualified Data.Time as DT
import qualified Data.UTC as DU
import Data.Time
import Data.Time.Clock.POSIX
import Text.Regex
parseDate :: String -> Maybe [String]
parseDate d = matchRegex (mkRegex "(.*)/(.*)/(.*)") d
getDate' :: Maybe [String] -> (Integer, Int, Int)
getDate' d
| isJust d == True = toGregorian $ fromGregorian (read year) (read month) (read day)
| otherwise = toGregorian $ fromGregorian 2015 1 1
where
Just [day, month, year] = d
getDate :: String -> (Integer, Int, Int)
getDate = getDate' . parseDate
--getDateDay :: Maybe [String] -> Day
--getDateDay d = fromGregorian (read year) (read month) (read day)
-- where Just [day, month, year] = matchRegex (mkRegex "(.*)/(.*)/(.*)") d
getDateDay' :: Maybe [String] -> Day
getDateDay' d
| isJust d == True = fromGregorian (read year) (read month) (read day)
| otherwise = fromGregorian 2015 1 1
where Just [day, month, year] = d
getDateDay :: String -> Day
getDateDay = getDateDay' . parseDate
getDateUTC :: String -> String
getDateUTC d = show $ DT.UTCTime (getDateDay d) (DT.timeOfDayToTime $ DT.TimeOfDay 0 0 0)
getYear :: String -> String
getYear date = s where
(y, m, d) = getDate date
s = show y
getMonth :: String -> String
getMonth date = s where
(y, m, d) = getDate date
s = show m
getDay :: String -> String
getDay date = s where
(y, m, d) = getDate date
s = show d
--for Dates exported via xls2csv tool
type MT = Maybe (DU.Local DU.DateTime)
type MS = Maybe String
--getDate'' :: String -> String
--getDate'' gd = d where
-- start = "1900-01-01T00:00:00Z"
-- da = (DU.parseRfc3339 start :: MT) >>= DU.addDays ( (read gd :: Integer) -2) >>= DU.renderRfc3339 :: MS
-- d = fromJust da
--
--getDate''' :: String -> String
--getDate''' gd = d where
-- start = "1900-01-01T00:00:00Z"
-- da = (DU.parseRfc3339 start :: MT) >>= DU.addDays ( (read gd :: Integer) -2) >>= DU.renderIso8601CalendarDate :: MS
-- d = fromJust da
--
--date2greg :: String ->
date2greg date = (y, m, d) where
(y, m, d) = DT.toGregorian $ DT.addDays ((read date :: Integer) -2) $ DT.utctDay (read "1900-01-01 00:00:00" :: DT.UTCTime)
getYear' :: String -> String
getYear' date = s where
(y, m, d) = date2greg date
s = show y
getMonth' :: String -> String
getMonth' date = s where
(y, m, d) = date2greg date
s = show m
getDay' :: String -> String
getDay' date = s where
(y, m, d) = date2greg date
s = show d
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RankNTypes #-}
module Data.Gargantext.Utils.Count (head, last, all, any, sum, product, length)
where
import Data.Monoid
import Protolude hiding ((<>), head, last, all, any, sum, product, length)
import qualified Data.Foldable
import Control.Lens (Getting, foldMapOf)
data Fold i o = forall m . Monoid m => Fold (i -> m) (m -> o)
instance Functor (Fold i) where
fmap k (Fold tally summarize) = Fold tally (k . summarize)
instance Applicative (Fold i) where
pure o = Fold (\_ -> ()) (\_ -> o)
Fold tallyF summarizeF <*> Fold tallyX summarizeX = Fold tally summarize
where
tally i = (tallyF i, tallyX i)
summarize (nF, nX) = summarizeF nF (summarizeX nX)
focus :: (forall m . Monoid m => Getting m b a) -> Fold a o -> Fold b o
focus lens (Fold tally summarize) = Fold (foldMapOf lens tally) summarize
fold :: Fold i o -> [i] -> o
fold (Fold tally summarize) is = summarize (reduce (map tally is))
where
reduce = Data.Foldable.foldl' (<>) mempty
--
head :: Fold a (Maybe a)
head = Fold (First . Just) getFirst
last :: Fold a (Maybe a)
last = Fold (Last . Just) getLast
--
all :: (a -> Bool) -> Fold a Bool
all predicate = Fold (All . predicate) getAll
any :: (a -> Bool) -> Fold a Bool
any predicate = Fold (Any . predicate) getAny
--
sum :: Num n => Fold n n
sum = Fold Sum getSum
product :: Num n => Fold n n
product = Fold Product getProduct
length :: Num n => Fold i n
length = Fold (\_ -> Sum 1) getSum
-- | Average function optimized (/!\ need to test it)
data Average a = Average { numerator :: !a, denominator :: !Int }
instance Num a => Monoid (Average a) where
mempty = Average 0 0
mappend (Average xL nL) (Average xR nR) = Average (xL + xR) (nL + nR)
average :: Fractional a => Fold a a
average = Fold tally summarize
where
tally x = Average x 1
summarize (Average numerator denominator) =
numerator / fromIntegral denominator
module Data.Gargantext.Utils.DateUtils where
import Data.Time
--import Data.Dates
--
--readInt :: IO [Char] -> IO Int
--readInt = readLn
--
--readBool :: IO [Char] -> IO Bool
--readBool = readLn
utc2gregorian :: UTCTime -> (Integer, Int, Int)
utc2gregorian date = toGregorian $ utctDay date
gregorian2year :: (Integer, Int, Int) -> Integer
gregorian2year (y, m, d) = y
utc2year :: UTCTime -> Integer
utc2year date = gregorian2year $ utc2gregorian date
averageLength :: Fractional a => [[a1]] -> a
averageLength l = fromIntegral (sum (map length l)) / fromIntegral (length l)
--main :: IO ()
--main = do
-- c <- getCurrentTime
-- print c -- $ toYear $ toGregorian $ utctDay c
charToString :: Char -> String
charToString = (:[])
-- DEFINITIONS as SPECS
-- (Engineering axioms for Gargantext)
------------------------------------------------------------------------
-- From file to corpus
------------------------------------------------------------------------
-- > A Corpus is a list of Documents
data Corpus = [Document]
-- > A Document should have a date, some text and a maybe a language.
-- > Remarks :
-- > If no date then force one ?
-- > Analyze either text or numbers
-- > only one language per document
data Document = Document { date :: UTCTime
, uce :: Map Text $ Either (Maybe Text) (Maybe Double)
, lang :: Maybe Language
}
parseFiles :: Maybe ParserType -> [File] -> Corpus
parseFiles = undefined
-- This function exists already (in Python)
parseFile' :: ParserType -> File -> Maybe [Document]
parseFile' = undefined
-- This function does not exist yet
parseFile :: Maybe ParserType -> File -> Maybe [Document]
parseFile parserType file = documents
where
documents = case parserType of
Nothing -> case guessParserType file of
Nothing -> askUser "Answer to the question with link to $doc"
Just parserType' -> parseFile (Just parserType') file
Just parserType'' -> case parserType'' of
UnsupportedYet -> askUser "Not supported yet, which priority ?"
otherwise -> parseFile' parserType'' file
data ParserType = RIS | ISI | XML | CSV | Europresse | Book | UnsupportedYet
guessParserType :: File -> Maybe ParserType
guessParserType = undefined
------------------------------------------------------------------------
-- What kind of interactions with our users ?
------------------------------------------------------------------------
-- Question is Text only
type Question = Text
-- Possible Answers:
data Answer = ClosedAnswer | NumAnswer | OpenAnswer
-- Definitions of the Answers
type ClosedAnswer = Bool
type OpenAnswer = Text
type NumAnswer = Int
-- Un formulaire est un mapping entre question et peut-être une réponse
-- Un formulaire vide a Nothing au champs (Maybe Answer)
-- Une question répondue a la valeur (Just Response)
type Formular = Map Question (Maybe Answer)
askUser :: Question -> ClosedAnswer
askUser = undefined
data Advice = BugReport | WishList
askUser' :: Question -> Advice
askUser' question = case askUser question of
True -> BugReport
False -> WishList
------------------------------------------------------------------------
-- Specs for Lang Detection
------------------------------------------------------------------------
data Language = English | French
tagDoc :: Document -> Ngrams
tagDoc doc = ngrams
where
ngrams = case lang doc of
Nothing -> case guessLang doc of
Nothing -> tag
------------------------------------------------------------------------
-- Specs for ngrams Worflow
------------------------------------------------------------------------
module Data.Gargantext.Utils.Prefix where
import Control.Monad as X (mzero)
import Data.Aeson as X
import Data.Aeson.TH as X
import Data.Aeson.Types as X
import Data.Char as X (toLower)
-- import Data.Decimal as X
import Data.Maybe as X (catMaybes)
import Data.Monoid as X ((<>))
-- import Data.Scientific as X
import Data.String as X (IsString (..), fromString)
import Data.Text as X (Text, unpack, pack)
import Data.Text.Encoding as X
import Text.Read (readMaybe)
-- | Aeson Options that remove the prefix from fields
unPrefix :: String -> Options
unPrefix prefix = defaultOptions
{ fieldLabelModifier = unCapitalize . dropPrefix prefix
, omitNothingFields = True
}
-- | Lower case leading character
unCapitalize :: String -> String
unCapitalize [] = []
unCapitalize (c:cs) = toLower c : cs
-- | Remove given prefix
dropPrefix :: String -> String -> String
dropPrefix prefix input = go prefix input
where
go pre [] = error $ contextual $ "prefix leftover: " <> pre
go [] (c:cs) = c : cs
go (p:preRest) (c:cRest)
| p == c = go preRest cRest
| otherwise = error $ contextual $ "not equal: " <> (p:preRest) <> " " <> (c:cRest)
contextual msg = "dropPrefix: " <> msg <> ". " <> prefix <> " " <> input
parseJSONFromString :: (Read a) => Value -> Parser a
parseJSONFromString v = do
numString <- parseJSON v
case readMaybe (numString :: String) of
Nothing -> fail $ "Invalid number for TransactionID: " ++ show v
Just n -> return n
module Data.Gargantext.Utils.SaveGetHash where
import System.FilePath (addExtension, joinPath)
import Data.ByteString.Lazy.Internal (ByteString)
import Data.List (elem, intersperse, insert)
import Data.List.Extra (chunksOf)
import Data.Digest.Pure.MD5 (md5)
import System.Directory (getDirectoryContents, createDirectory, findFile, createDirectoryIfMissing)
import Control.Monad (foldM)
import Data.List (splitAt)
import Data.ByteString.Lazy.Internal (packChars)
import qualified Data.ByteString.Lazy as BL
import Codec.Compression.Zlib (compress, decompress)
data Config = Config {
root :: String
, chunkSize :: Int
, compression :: Bool
} deriving Show
conf = Config {
root="/tmp/robot"
, chunkSize=2
, compression = True
}
chunkUrl :: Int -> ByteString -> [[Char]]
chunkUrl a url = chunksOf a $ show $ md5 url
-- replace it with createDirectoryIfMissing
existOrCreate :: [[Char]] -> FilePath -> IO [[Char]]
existOrCreate path_ dir = do
let path = joinPath path_
let returnPath = return $ path_ ++ [dir]
is <- elem dir <$> getDirectoryContents path -- ?
case is of
True -> do
returnPath
False -> do
createDirectory $ path ++ "/" ++ dir
returnPath
doPath :: [[Char]] -> [FilePath] -> IO [[Char]]
doPath root path = foldM (\x y -> existOrCreate x y) root path
splitAt' :: Int -> Int -> [Char] -> ([Char], [Char], [Char])
splitAt' i1 i2 x = (a, b, c) where
(a, a') = splitAt i1 x
(b, c) = splitAt i2 a'
-- ne pas écraser le fichier s'il existe
-- spliter l'url proprement
saveFile :: ByteString -> String -> IO String
saveFile url'' file = do
let url' = chunkUrl (chunkSize conf) url''
let url = init url'
-- add extension according to the filetype
let filename = Prelude.foldl addExtension (last url') ["html", "zlib"]
doPath [(root conf)] url
let path = (root conf) ++ "/" ++ joinPath url ++ "/" ++ filename
--case (findFile ["/tmp/sdfs"] "file.hmtl.zib"
-- Nothing -> create
-- _ -> change name
case (compression conf) of
True -> BL.writeFile path (compress $ packChars file)
False -> writeFile path file
return path
getFile :: FilePath -> IO ByteString
getFile path = do
case (compression conf) of
True -> decompress <$> BL.readFile path
False -> packChars <$> Prelude.readFile path
-- resources
-- add Resource
-- levensthein distance...
flags: {}
extra-package-dbs: []
packages:
- '.'
extra-deps:
- servant-0.11
- servant-multipart-0.10.0.1
- servant-server-0.11
#- utc-0.2.0.1
resolver: lts-8.21
{-# LANGUAGE OverloadedStrings #-}
import Test.Hspec
import Test.QuickCheck
import Control.Exception (evaluate)
import Data.Text (Text)
import Data.Gargantext.Parsers.Occurrences (parseOccurrences)
-- import Data.Gargantext.Analysis (occOfCorpus)
parsersTest = hspec $ do
describe "Parser for occurrences" $ do
let txt = "internet"
it "returns the result of one parsing" $ do
parseOccurrences "internet" "internet" `shouldBe` Right 1
-- | Context of Text should be toLower
it "returns the result of one parsing not case sensitive" $ do
let txtCase = "Internet"
parseOccurrences txtCase "internet" `shouldBe` Right 1
it "returns the result of one parsing after space" $ do
parseOccurrences txt " internet"
`shouldBe` Right 1
it "returns the result of one parsing after chars" $ do
parseOccurrences txt "l'internet"
`shouldBe` Right 1
it "returns the result of multiple parsing" $ do
parseOccurrences txt "internet internet of things"
`shouldBe` Right 2
it "returns the result of multiple parsing separated by text" $ do
parseOccurrences txt "internet in the internet of things"
`shouldBe` Right 2
it "returns the result of multiple parsing separated by punctuation" $ do
parseOccurrences txt "internet. In the internet of things, internet like; internet?"
`shouldBe` Right 4
-- describe "Parser for nodes" $ do
-- it "returns the result of one parsing after space" $ do
-- occOfCorpus 249509 "sciences" `shouldReturn` 7
main :: IO ()
main = do
parsersTest
{-# LANGUAGE OverloadedStrings #-}
import Test.Hspec
import Test.QuickCheck
import Control.Exception (evaluate)
import Data.Text (Text)
import Data.Hastext.Parsers.Occurrences (parse)
main = print "hspec $ do
describe "Parser for occurrences" $ do
let txt = "internet"
it "returns the result of one parsing" $ do
parse "internet" "internet" `shouldBe` Right ((txt, 1) :: (Text, Int))
-- | Context of Text should be toLower
it "returns the result of one parsing not case sensitive" $ do
let txtCase = "Internet"
parse txtCase "internet" `shouldBe` Right ((txtCase, 1) :: (Text, Int))
it "returns the result of one parsing after space" $ do
parse txt " internet"
`shouldBe` Right ((txt, 1) :: (Text, Int))
it "returns the result of one parsing after chars" $ do
parse txt "l'internet"
`shouldBe` (Right ((txt, 1) :: (Text, Int)))
it "returns the result of multiple parsing" $ do
parse txt "internet internet of things"
`shouldBe` (Right ((txt, 2) :: (Text, Int)))
it "returns the result of multiple parsing separated by text" $ do
parse txt "internet in the internet of things"
`shouldBe` (Right ((txt, 2) :: (Text, Int)))
it "returns the result of multiple parsing separated by punctuation" $ do
parse txt "internet. In the internet of things, internet like; internet?"
`shouldBe` (Right ((txt, 4) :: (Text, Int)))
main :: IO ()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment