Commit da0acc7e authored by Alexandre Delanoë's avatar Alexandre Delanoë

Removing files.

parent 29e682f3
module Data.Gargantext (
module Data.Gargantext.Database,
-- module Data.Gargantext.Ngrams,
-- module Data.Gargantext.Utils,
) where
import Data.Gargantext.Database
-- import Data.Gargantext.Ngrams
-- import Data.Gargantext.Utils
module Data.Gargantext.Analysis where
-- import qualified Data.Text.Lazy as DTL
import Data.Text
import Opaleye (Column, PGInt4)
--import Data.Map as DM
--import Data.Vector as DV
-- | Simple function to count Occurrences in a context of text.
occOfDocument :: Column PGInt4 -> Text -> IO Int
occOfDocument = undefined
--occOfDocument c_id txt = do
-- docs <- pm (hyperdataDocument_Abstract . node_hyperdata) <$> getCorpusDocument c_id
-- let occs = pm (\x -> maybe "" identity x) docs
-- let result = case sequence $ pm (parseOccurrences txt) occs of
-- -- TODO find a way to get nice message d'errors (file, function, line)
-- Left str -> error $ "[ERRROR] at file/function/line" ++ str
-- Right xs -> xs
-- pure (sum result)
data Occurrences a b = Map a b
module Data.Gargantext.DSL where
import Data.Text
type Username = Text
type Password = Text
--user :: Username -> Maybe User
--user username = undefined
--
--
--getNode :: Int -> IO Node
--getNode = undefined
--
--saveNode :: Node -> IO ()
--saveNode = undefined
--
--updateNode :: Node -> IO ()
--updateNode = undefined
--
--
--
--
--parents :: Node -> [Node]
--parents = undefined
--
--children :: Node -> [Node]
--children = undefined
--
--
--
-- projects :: User -> [Project]
-- projects u = undefined
module Data.Gargantext.Database (
module Data.Gargantext.Database.Private
-- , module Data.Gargantext.Database.Instances
, module Data.Gargantext.Database.User
, module Data.Gargantext.Database.Node
, module Data.Gargantext.Database.NodeNode
, module Data.Gargantext.Database.Ngram
, module Data.Gargantext.Database.NodeNgram
, module Data.Gargantext.Database.NodeNodeNgram
, module Data.Gargantext.Database.NodeNgramNgram
-- , module Data.Gargantext.Database.Gargandb
-- , module Data.Gargantext.Database.Simple
-- , module Data.Gargantext.Database.InsertNode
-- , module Data.Gargantext.Database.NodeType
) where
import Data.Gargantext.Database.Private
--import Data.Gargantext.Database.Gargandb
import Data.Gargantext.Database.User
import Data.Gargantext.Database.Node
import Data.Gargantext.Database.NodeNode
import Data.Gargantext.Database.Ngram
import Data.Gargantext.Database.NodeNgram
import Data.Gargantext.Database.NodeNodeNgram
import Data.Gargantext.Database.NodeNgramNgram
--import Data.Gargantext.Database.Simple
--import Data.Gargantext.Database.NodeType
--import Data.Gargantext.Database.InsertNode
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.Gargantext.Database.Instances where
import Data.Time (UTCTime)
import Opaleye (PGInt4, PGTimestamptz, PGFloat8
, QueryRunnerColumnDefault
, queryRunnerColumnDefault
, fieldQueryRunnerColumn
)
instance QueryRunnerColumnDefault PGInt4 Integer where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGTimestamptz (Maybe UTCTime) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
module Data.Gargantext.Database.Ngram where
import Prelude
import Data.Text (Text)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import qualified Database.PostgreSQL.Simple as PGS
import Opaleye
import Data.Gargantext.Database.Private (infoGargandb)
-- Functions only
import Data.List (find)
data NgramPoly id terms n = Ngram { ngram_id :: id
, ngram_terms :: terms
, ngram_n :: n
} deriving (Show)
type NgramWrite = NgramPoly (Maybe (Column PGInt4)) (Column PGText) (Column PGInt4)
type NgramRead = NgramPoly (Column PGInt4) (Column PGText) (Column PGInt4)
type Ngram = NgramPoly Int Text Int
$(makeAdaptorAndInstance "pNgram" ''NgramPoly)
$(makeLensesWith abbreviatedFields ''NgramPoly)
ngramTable :: Table NgramWrite NgramRead
ngramTable = Table "ngrams" (pNgram Ngram { ngram_id = optional "id"
, ngram_terms = required "terms"
, ngram_n = required "n"
}
)
queryNgramTable :: Query NgramRead
queryNgramTable = queryTable ngramTable
--selectUsers :: Query UserRead
--selectUsers = proc () -> do
-- --user@(i, p, ll, is, un, fn, ln, m, iff, ive, dj) <- queryUserTable -< ()
-- row@(User i p ll is un fn ln m iff ive dj) <- queryUserTable -< ()
-- O.restrict -< i .== 1
-- --returnA -< User i p ll is un fn ln m iff ive dj
-- returnA -< row
--
findWith :: (Eq a1, Foldable t) => (a -> a1) -> a1 -> t a -> Maybe a
findWith f t = find (\x -> f x == t)
--userWithUsername :: Text -> [User] -> Maybe User
--userWithUsername t xs = userWith userUsername t xs
--
--userWithId :: Integer -> [User] -> Maybe User
--userWithId t xs = userWith userUserId t xs
-- | not optimized (get all ngrams without filters)
ngrams :: IO [Ngram]
ngrams = do
conn <- PGS.connect infoGargandb
runQuery conn queryNgramTable
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.Gargantext.Database.Node where
import Database.PostgreSQL.Simple.FromField ( Conversion
, ResultError(ConversionFailed)
, FromField
, fromField
, returnError
)
import Prelude hiding (null, id)
import Database.PostgreSQL.Simple.Internal (Field)
import Control.Arrow (returnA)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Data.Aeson
import Data.Gargantext.Types
import Data.Gargantext.Prelude
import Data.Maybe (Maybe, fromMaybe)
import Data.Text (Text)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Typeable (Typeable)
import qualified Data.ByteString.Internal as DBI
import Database.PostgreSQL.Simple (Connection)
import Opaleye
-- | Types for Node Database Management
data PGTSVector
type NodeWrite = NodePoly (Maybe (Column PGInt4)) (Column PGInt4)
(Column PGInt4) (Column (Nullable PGInt4))
(Column (PGText)) (Maybe (Column PGTimestamptz))
(Column PGJsonb) -- (Maybe (Column PGTSVector))
type NodeRead = NodePoly (Column PGInt4) (Column PGInt4)
(Column PGInt4) (Column (Nullable PGInt4))
(Column (PGText)) (Column PGTimestamptz)
(Column PGJsonb) -- (Column PGTSVector)
instance FromField HyperdataCorpus where
fromField = fromField'
instance FromField HyperdataDocument where
fromField = fromField'
instance FromField HyperdataProject where
fromField = fromField'
instance FromField HyperdataUser where
fromField = fromField'
fromField' :: (Typeable b, FromJSON b) => Field -> Maybe DBI.ByteString -> Conversion b
fromField' field mb = do
v <- fromField field mb
valueToHyperdata v
where
valueToHyperdata v = case fromJSON v of
Success a -> pure a
Error _err -> returnError ConversionFailed field "cannot parse hyperdata"
instance QueryRunnerColumnDefault PGJsonb HyperdataDocument where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataCorpus where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataProject where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGJsonb HyperdataUser where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault (Nullable PGInt4) Int where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault (Nullable PGText) Text where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGInt4 Integer where
queryRunnerColumnDefault = fieldQueryRunnerColumn
$(makeAdaptorAndInstance "pNode" ''NodePoly)
$(makeLensesWith abbreviatedFields ''NodePoly)
nodeTable :: Table NodeWrite NodeRead
nodeTable = Table "nodes" (pNode Node { node_id = optional "id"
, node_typename = required "typename"
, node_userId = required "user_id"
, node_parentId = required "parent_id"
, node_name = required "name"
, node_date = optional "date"
, node_hyperdata = required "hyperdata"
-- , node_titleAbstract = optional "title_abstract"
}
)
selectNodes :: Column PGInt4 -> Query NodeRead
selectNodes id = proc () -> do
row <- queryNodeTable -< ()
restrict -< node_id row .== id
returnA -< row
runGetNodes :: Connection -> Query NodeRead -> IO [Document]
runGetNodes = runQuery
-- NP check type
getNodesWithParentId :: Connection -> Int -> IO [Node Value]
getNodesWithParentId conn n = runQuery conn $ selectNodesWithParentID n
selectNodesWithParentID :: Int -> Query NodeRead
selectNodesWithParentID n = proc () -> do
row@(Node _ _ _ parent_id _ _ _) <- queryNodeTable -< ()
restrict -< if n > 0
then
parent_id .== (toNullable $ pgInt4 n)
else
isNull parent_id
returnA -< row
queryNodeTable :: Query NodeRead
queryNodeTable = queryTable nodeTable
selectNodesWithType :: Column PGInt4 -> Query NodeRead
selectNodesWithType type_id = proc () -> do
row@(Node _ tn _ _ _ _ _) <- queryNodeTable -< ()
restrict -< tn .== type_id
returnA -< row
getNode :: Connection -> Column PGInt4 -> IO (Node Value)
getNode conn id = do
fromMaybe (error "TODO: 404") . headMay <$> runQuery conn (limit 1 $ selectNodes id)
getNodesWithType :: Connection -> Column PGInt4 -> IO [Node Value]
getNodesWithType conn type_id = do
runQuery conn $ selectNodesWithType type_id
-- NP check type
getCorpusDocument :: Connection -> Int -> IO [Document]
getCorpusDocument conn n = runQuery conn (selectNodesWithParentID n)
-- NP check type
getProjectCorpora :: Connection -> Int -> IO [Corpus]
getProjectCorpora conn node_id = do
runQuery conn $ selectNodesWithParentID node_id
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.Gargantext.Database.NodeNgram where
import Prelude
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import qualified Database.PostgreSQL.Simple as PGS
import Opaleye
import Data.Gargantext.Database.Private (infoGargandb)
data NodeNgramPoly id node_id ngram_id weight
= NodeNgram { nodeNgram_NodeNgramId :: id
, nodeNgram_NodeNgramNodeId :: node_id
, nodeNgram_NodeNgramNgramId :: ngram_id
, nodeNgram_NodeNgramWeight :: weight
} deriving (Show)
type NodeNgramWrite = NodeNgramPoly (Maybe (Column PGInt4)) (Column PGInt4) (Column PGInt4) (Maybe (Column PGFloat8))
type NodeNgramRead = NodeNgramPoly (Column PGInt4) (Column PGInt4) (Column PGInt4) ((Column PGFloat8))
type NodeNgram = NodeNgramPoly (Maybe Int) Int Int (Maybe Double)
$(makeAdaptorAndInstance "pNodeNgram" ''NodeNgramPoly)
$(makeLensesWith abbreviatedFields ''NodeNgramPoly)
nodeNgramTable :: Table NodeNgramWrite NodeNgramRead
nodeNgramTable = Table "nodes_ngrams" (pNodeNgram NodeNgram { nodeNgram_NodeNgramId = optional "id"
, nodeNgram_NodeNgramNodeId = required "node_id"
, nodeNgram_NodeNgramNgramId = required "ngram_id"
, nodeNgram_NodeNgramWeight = optional "weight"
}
)
queryNodeNgramTable :: Query NodeNgramRead
queryNodeNgramTable = queryTable nodeNgramTable
instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
-- | not optimized (get all ngrams without filters)
nodeNgrams :: IO [NodeNgram]
nodeNgrams = do
conn <- PGS.connect infoGargandb
runQuery conn queryNodeNgramTable
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.Gargantext.Database.NodeNgramNgram where
import Prelude
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import qualified Database.PostgreSQL.Simple as PGS
import Opaleye
import Data.Gargantext.Database.Private (infoGargandb)
data NodeNgramNgramPoly node_id ngram1_id ngram2_id weight
= NodeNgramNgram { nodeNgramNgram_NodeNgramNgram_NodeId :: node_id
, nodeNgramNgram_NodeNgramNgram_Ngram1Id :: ngram1_id
, nodeNgramNgram_NodeNgramNgram_Ngram2Id :: ngram2_id
, nodeNgramNgram_NodeNgramNgram_Weight :: weight
} deriving (Show)
type NodeNgramNgramWrite = NodeNgramNgramPoly (Maybe (Column PGInt4)) (Column PGInt4) (Column PGInt4) (Maybe (Column PGFloat8))
type NodeNgramNgramRead = NodeNgramNgramPoly (Column PGInt4) (Column PGInt4) (Column PGInt4) (Column PGFloat8)
type NodeNgramNgram = NodeNgramNgramPoly (Maybe Int) Int Int (Maybe Double)
$(makeAdaptorAndInstance "pNodeNgramNgram" ''NodeNgramNgramPoly)
$(makeLensesWith abbreviatedFields ''NodeNgramNgramPoly)
nodeNgramNgramTable :: Table NodeNgramNgramWrite NodeNgramNgramRead
nodeNgramNgramTable = Table "nodes_ngrams_ngrams" ( pNodeNgramNgram NodeNgramNgram
{ nodeNgramNgram_NodeNgramNgram_NodeId = optional "node_id"
, nodeNgramNgram_NodeNgramNgram_Ngram1Id = required "ngram1_id"
, nodeNgramNgram_NodeNgramNgram_Ngram2Id = required "ngram2_id"
, nodeNgramNgram_NodeNgramNgram_Weight = optional "weight"
}
)
queryNodeNgramNgramTable :: Query NodeNgramNgramRead
queryNodeNgramNgramTable = queryTable nodeNgramNgramTable
-- | not optimized (get all ngrams without filters)
nodeNgramNgrams :: IO [NodeNgramNgram]
nodeNgramNgrams = do
conn <- PGS.connect infoGargandb
runQuery conn queryNodeNgramNgramTable
instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.Gargantext.Database.NodeNode where
import Prelude
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import qualified Database.PostgreSQL.Simple as PGS
import Opaleye
import Data.Gargantext.Database.Private (infoGargandb)
data NodeNodePoly node1_id node2_id score
= NodeNode { nodeNode_node1_id :: node1_id
, nodeNode_node2_id :: node2_id
, nodeNode_score :: score
} deriving (Show)
type NodeNodeWrite = NodeNodePoly (Column PGInt4) (Column PGInt4) (Maybe (Column PGFloat8))
type NodeNodeRead = NodeNodePoly (Column PGInt4) (Column PGInt4) (Column PGFloat8)
type NodeNode = NodeNodePoly Int Int (Maybe Double)
$(makeAdaptorAndInstance "pNodeNode" ''NodeNodePoly)
$(makeLensesWith abbreviatedFields ''NodeNodePoly)
nodeNodeTable :: Table NodeNodeWrite NodeNodeRead
nodeNodeTable = Table "nodes_nodes" (pNodeNode NodeNode { nodeNode_node1_id = required "node1_id"
, nodeNode_node2_id = required "node2_id"
, nodeNode_score = optional "score"
}
)
queryNodeNodeTable :: Query NodeNodeRead
queryNodeNodeTable = queryTable nodeNodeTable
-- | not optimized (get all ngrams without filters)
nodeNodes :: IO [NodeNode]
nodeNodes = do
conn <- PGS.connect infoGargandb
runQuery conn queryNodeNodeTable
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.Gargantext.Database.NodeNodeNgram where
import Prelude
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import qualified Database.PostgreSQL.Simple as PGS
import Opaleye
import Data.Gargantext.Database.Private (infoGargandb)
data NodeNodeNgramPoly node1_id node2_id ngram_id score
= NodeNodeNgram { nodeNodeNgram_node1_id :: node1_id
, nodeNodeNgram_node2_id :: node2_id
, nodeNodeNgram_ngram_id :: ngram_id
, nodeNodeNgram_score :: score
} deriving (Show)
type NodeNodeNgramWrite = NodeNodeNgramPoly (Column PGInt4) (Column PGInt4) (Column PGInt4) (Maybe (Column PGFloat8))
type NodeNodeNgramRead = NodeNodeNgramPoly (Column PGInt4) (Column PGInt4) (Column PGInt4) (Column PGFloat8)
type NodeNodeNgram = NodeNodeNgramPoly Int Int Int (Maybe Double)
$(makeAdaptorAndInstance "pNodeNodeNgram" ''NodeNodeNgramPoly)
$(makeLensesWith abbreviatedFields ''NodeNodeNgramPoly)
nodeNodeNgramTable :: Table NodeNodeNgramWrite NodeNodeNgramRead
nodeNodeNgramTable = Table "nodes_nodes_ngrams" ( pNodeNodeNgram NodeNodeNgram
{ nodeNodeNgram_node1_id = required "node1_id"
, nodeNodeNgram_node2_id = required "node2_id"
, nodeNodeNgram_ngram_id = required "ngram_id"
, nodeNodeNgram_score = optional "score"
}
)
queryNodeNodeNgramTable :: Query NodeNodeNgramRead
queryNodeNodeNgramTable = queryTable nodeNodeNgramTable
-- | not optimized (get all ngrams without filters)
nodeNodeNgrams :: IO [NodeNodeNgram]
nodeNodeNgrams = do
conn <- PGS.connect infoGargandb
runQuery conn queryNodeNodeNgramTable
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Database.Private where
import qualified Database.PostgreSQL.Simple as PGS
-- TODO add a reader Monad here
-- read this in the init file
infoGargandb :: PGS.ConnectInfo
infoGargandb = PGS.ConnectInfo { PGS.connectHost = "127.0.0.1"
, PGS.connectPort = 5432
, PGS.connectUser = "gargantua"
, PGS.connectPassword = "C8kdcUrAQy66U"
, PGS.connectDatabase = "gargandb" }
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Data.Gargantext.Database.User where
import Prelude
import Data.Gargantext.Prelude
import Data.Time (UTCTime)
import Data.Text (Text)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Arrow (returnA)
import qualified Database.PostgreSQL.Simple as PGS
import Opaleye
import Data.Gargantext.Database.Private (infoGargandb)
-- Functions only
import Data.List (find)
data UserLight = UserLight { userLight_id :: Int
, userLight_username :: Text
, userLight_email :: Text
} deriving (Show)
toUserLight :: User -> UserLight
toUserLight (User id _ _ _ u _ _ e _ _ _ ) = UserLight id u e
data UserPoly id pass llogin suser
uname fname lname
mail staff active djoined = User { user_id :: id
, user_password :: pass
, user_lastLogin :: llogin
, user_isSuperUser :: suser
, user_username :: uname
, user_firstName :: fname
, user_lastName :: lname
, user_email :: mail
, user_isStaff :: staff
, user_isActive :: active
, user_dateJoined :: djoined
} deriving (Show)
type UserWrite = UserPoly (Maybe (Column PGInt4)) (Column PGText)
(Maybe (Column PGTimestamptz)) (Column PGBool)
(Column PGText) (Column PGText)
(Column PGText) (Column PGText)
(Column PGBool) (Column PGBool)
(Column PGTimestamptz)
type UserRead = UserPoly (Column PGInt4) (Column PGText)
(Column PGTimestamptz) (Column PGBool)
(Column PGText) (Column PGText)
(Column PGText) (Column PGText)
(Column PGBool) (Column PGBool)
(Column PGTimestamptz)
type User = UserPoly Int Text (Maybe UTCTime) Bool Text Text Text Text Bool Bool UTCTime
$(makeAdaptorAndInstance "pUser" ''UserPoly)
$(makeLensesWith abbreviatedFields ''UserPoly)
userTable :: Table UserWrite UserRead
userTable = Table "auth_user" (pUser User { user_id = optional "id"
, user_password = required "password"
, user_lastLogin = optional "last_login"
, user_isSuperUser = required "is_superuser"
, user_username = required "username"
, user_firstName = required "first_name"
, user_lastName = required "last_name"
, user_email = required "email"
, user_isStaff = required "is_staff"
, user_isActive = required "is_active"
, user_dateJoined = required "date_joined"
}
)
queryUserTable :: Query UserRead
queryUserTable = queryTable userTable
selectUsersLight :: Query UserRead
selectUsersLight = proc () -> do
row@(User i _p _ll _is _un _fn _ln _m _iff _ive _dj) <- queryUserTable -< ()
restrict -< i .== 1
--returnA -< User i p ll is un fn ln m iff ive dj
returnA -< row
userWith :: (Eq a1, Foldable t) => (a -> a1) -> a1 -> t a -> Maybe a
userWith f t xs = find (\x -> f x == t) xs
userWithUsername :: Text -> [User] -> Maybe User
userWithUsername t xs = userWith user_username t xs
userWithId :: Int -> [User] -> Maybe User
userWithId t xs = userWith user_id t xs
instance QueryRunnerColumnDefault PGTimestamptz (Maybe UTCTime) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
users :: IO [User]
users = do
conn <- PGS.connect infoGargandb
runQuery conn queryUserTable
usersLight :: IO [UserLight]
usersLight = do
conn <- PGS.connect infoGargandb
pm toUserLight <$> runQuery conn queryUserTable
module Data.Gargantext.Error where
import Control.Exception (Exception)
import Data.Typeable (Typeable)
import GHC.Generics (Generic)
import Network.HTTP.Client (HttpException)
import System.Exit (ExitCode (..), exitWith)
import System.IO (stderr)
--import Text.Parsec.Error
--import Text.Parsec.Pos hiding (Line)
data GargError = GargIOError String IOError
| GargHttpError String HttpException
| GargParseError String
| GargNgramsError String
| GargDatabaseError String
deriving (Show, Typeable, Generic)
instance Exception PandocError
-- | Handle GargError by exiting with an error message.
handleError :: Either GargError a -> IO a
handleError (Right r) = pure r
handleError (Left e) =
case e of
GargIOError _ err' -> ioError err'
GargHttpError u err' -> err 61 $
"Could not fetch " ++ u ++ "\n" ++ show err'
GargParseError s -> err 64 s
_ s -> err 0 s
err :: Int -> String -> IO a
err exitCode msg = do
UTF8.hPutStrLn stderr msg
exitWith $ ExitFailure exitCode
return undefined
module Data.Gargantext.Network where
import Data.Gargantext.Prelude
import Data.Map as DM
import Data.Vector as DV
type Measure a b c = DM.Map a (DM.Map b c)
-- UTCTime Paire Granularity [Candle]
-- GargVector Paire Granularity [Candle]
type GargVector a b c = DM.Map a ( DM.Map b c)
-- GargMatrix Granularity (Paire Paire) [Candle]
type GargMatrix a b c d = DM.Map a (FolioVector b c d)
-- GargMatrix Granularity (Paire Paire) [Candle]
type GargTensor a b c d e = DM.Map a (FolioMatrix b c d e)
--data PortGarg = PortGarg { _portFolioParameters :: Parameters
-- , _portGargData :: Garg
--}
toMeasure :: Granularity -> Paire -> [Candle]
-> Measure Granularity Paire Candle
toMeasure g c1 c2 cs = DM.fromList [(g,
module Data.Gargantext.Ngrams ( module Data.Gargantext.Ngrams.Count
--, module Data.Gargantext.Ngrams.Hetero
, module Data.Gargantext.Ngrams.CoreNLP
, module Data.Gargantext.Ngrams.Parser
, module Data.Gargantext.Ngrams.Occurrences
, module Data.Gargantext.Ngrams.TextMining
, module Data.Gargantext.Ngrams.Metrics
--, module Data.Gargantext.Ngrams.Words
) where
import Data.Gargantext.Ngrams.Count
--import Data.Gargantext.Ngrams.Hetero
import Data.Gargantext.Ngrams.CoreNLP
import Data.Gargantext.Ngrams.Parser
import Data.Gargantext.Ngrams.Occurrences
import Data.Gargantext.Ngrams.TextMining
--import Data.Gargantext.Ngrams.Words
import Data.Gargantext.Ngrams.Metrics
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TemplateHaskell #-}
module Data.Gargantext.Ngrams.CoreNLP where
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import GHC.Generics
import Data.Monoid ((<>))
import Data.Gargantext.Types.Main (Language(..))
import Data.Gargantext.Prelude
import Data.Gargantext.Utils.Prefix (unPrefix)
import Data.Text (Text)
import qualified Data.ByteString.Char8 as S8
import qualified Data.Yaml as Yaml
import Network.HTTP.Simple
data Token = Token { _tokenIndex :: Int
, _tokenWord :: Text
, _tokenOriginalText :: Text
, _tokenLemma :: Text
, _tokenCharacterOffsetBegin :: Int
, _tokenCharacterOffsetEnd :: Int
, _tokenPos :: Text
, _tokenNer :: Text
, _tokenBefore :: Maybe Text
, _tokenAfter :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_token") ''Token)
token2text :: Token -> (Text, Text, Text)
token2text (Token _ w _ _ _ _ p n _ _) = (w,p,n)
data Sentence = Sentence { _sentenceIndex :: Int
, _sentenceTokens :: [Token]
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_sentence") ''Sentence)
data Properties = Properties { _propertiesAnnotators :: Text
, _propertiesOutputFormat :: Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_properties") ''Properties)
data Sentences = Sentences { sentences :: [Sentence]}
deriving (Show, Generic)
instance ToJSON Sentences
instance FromJSON Sentences
-- request =
-- "fr" : {
-- "tokenize.language" : "fr",
-- "pos.model" : "edu/stanford/nlp/models/pos-tagger/french/french.tagger",
-- "parse.model" : "edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz",
-- // dependency parser
-- "depparse.model" : "edu/stanford/nlp/models/parser/nndep/UD_French.gz",
-- "depparse.language" : "french",
-- "ner.model": DATA_ROOT+"/eunews.fr.crf.gz",
-- "ssplit.newlineIsSentenceBreak": "always"
-- },
--
corenlpPretty :: String -> IO ()
corenlpPretty txt = do
url <- parseRequest "POST http://localhost:9000/?properties={\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
let request = setRequestBodyJSON txt url
response <- httpJSON request
-- putStrLn $ "The status code was: " ++
-- show (getResponseStatusCode response)
-- print $ getResponseHeader "Content-Type" response
S8.putStrLn $ Yaml.encode (getResponseBody response :: Sentences)
corenlp :: Language -> String -> IO Sentences
corenlp lang txt = do
let properties = case lang of
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyJSON txt url
response <- httpJSON request
pure (getResponseBody response :: Sentences)
-- | parseWith
-- Part Of Speech example
-- parseWith _tokenPos "Hello world."
-- == [[("``","``"),("Hello","UH"),("world","NN"),(".","."),("''","''")]]
-- Named Entity Recognition example
-- parseWith _tokenNer "Hello world of Peter."
-- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
tokenWith :: (Token -> t) -> Language -> String -> IO [[(Text, t)]]
tokenWith f lang s = pm (pm (\t -> (_tokenWord t, f t))) <$> pm _sentenceTokens <$> sentences <$> corenlp lang s
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Ngrams.Count where
import System.Environment (getArgs)
import Data.Foldable as F
import Data.Map (Map)
import qualified Data.Map as M
import qualified Data.Text.Lazy.IO as DTLIO
import qualified Data.Text.Lazy as DTL
-- | /O(n)/ Breaks a 'Text' up into each Text list of chars.
-- from slower to faster:
letters :: DTL.Text -> [DTL.Text]
letters text = DTL.chunksOf 1 text
letters' :: DTL.Text -> [DTL.Text]
letters' text = DTL.splitOn "#" $ DTL.intersperse '#' text
letters'' :: DTL.Text -> [DTL.Text]
letters'' = DTL.foldr (\ch xs -> DTL.singleton ch : xs) []
-- words
-- lines
-- words between punctuation
-- number of punctuation
occurrences :: Ord a => [a] -> Map a Int
occurrences xs = foldl' (\x y -> M.insertWith' (+) y 1 x) M.empty xs
-- for optimization :
--occurrences' :: Ord a => [a] -> Map a Integer
--occurrences' xs = DTL.foldl (\x y -> M.insertWith' (+) y 1 x) M.empty xs
countMain :: IO ()
countMain = do
(fichier:_) <- getArgs
c <- DTLIO.readFile fichier
--print $ occurrences $ DTL.chunksOf 1 c
print $ occurrences $ letters'' c
--print $ occurrences $ DTL.words $ DTL.toLower c
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Ngrams.Lang.En (selectNgrams, groupNgrams, textTest) where
import Data.Gargantext.Prelude
import Data.Text (Text)
import Data.Monoid ((<>))
selectNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
selectNgrams xs = pf selectNgrams' xs
where
selectNgrams' (_,"NN" ,_ ) = True
selectNgrams' (_,"NNS" ,_ ) = True
selectNgrams' (_,"NNP" ,_ ) = True
selectNgrams' (_,"NN+CC",_ ) = True
selectNgrams' (_,_ ,"PERSON" ) = True
selectNgrams' (_,_ ,"ORGANIZATION") = True
selectNgrams' (_,_ ,"LOCATION" ) = True
selectNgrams' (_,_ ,_ ) = False
groupNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
groupNgrams [] = []
groupNgrams ((j1,"JJ",j1'):(c1,"CC",c1'):(j2,"JJ",j2'):(j3,"JJ",_):xs) = groupNgrams (jn1:cc:jn2:xs)
where
jn j' j'' jn' = (j' <> " " <> j'', "JJ", jn')
cc = (c1,"CC",c1')
jn1 = (j1, "JJ", j1')
jn2 = jn j2 j3 j2'
groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NN",nn):xs) = groupNgrams (jn1:jn2:xs)
where
jn j m mm p = (j <> " " <> m, p, mm)
jn1 = jn j1 n nn ("NN+CC" :: Text)
jn2 = jn j2 n nn ("NN+CC" :: Text)
groupNgrams ((j1,"JJ",_):(_,"CC",_):(j2,"JJ",_):(n,"NNS",nn):xs) = groupNgrams (jn1:jn2:xs)
where
jn j m mm p = (j <> " " <> m, p, mm)
jn1 = jn j1 n nn ("NN+CC" :: Text)
jn2 = jn j2 n nn ("NN+CC" :: Text)
groupNgrams ((x,"JJ",_):(y,"JJ",yy):xs) = groupNgrams ((x <> " " <> y, "JJ", yy):xs)
groupNgrams ((x,"JJ",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"JJ",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NNP",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(y,"NP",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(y,"NNS",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NP",_):(y,"NP",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> y, "NN", yy):xs)
-- extractNgrams "Test the antiinflammatory or analgesic activity?"
-- [[("``","``","O"),("Test","VB","O"),("the","DT","O"),("antiinflammatory activity analgesic activity","NN","O"),("?",".","O"),("''","''","O")]]
-- > should be (antiinflammatory activity) <> (analgesic activity)
groupNgrams ((x,"NN",_):(o,"IN",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> o <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(o,"IN",_):(y,"NNP",yy):xs) = groupNgrams ((x <> " " <> o <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(o,"IN",_):(det,"DT",_):(y,"NN",yy):xs) = groupNgrams ((x <> " " <> o <> " " <> det <> " " <> y, "NN", yy):xs)
groupNgrams ((x,"NN",_):(o,"IN",_):(det,"DT",_):(y,"NNP",yy):xs) = groupNgrams ((x <> " " <> o <> " " <> det <> " " <> y, "NN", yy):xs)
groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs) = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
groupNgrams (x:xs) = (x:(groupNgrams xs))
textTest :: [String]
textTest = [ "Alcoholic extract of Kaempferia galanga was tested for analgesic and antiinflammatory activities in animal models. "
, "Three doses, 300 mg/kg, 600 mg/kg and 1200 mg/kg of the plant extract prepared as a suspension in 2 ml of 2% gum acacia were used. "
, " Acute and sub acute inflammatory activities were studied in rats by carrageenan induced paw edema and cotton pellet induced granuloma models respectively. "
, "In both models, the standard drug used was aspirin 100 mg/kg. "
, "Two doses 600 mg/kg and 1200 mg/kg of plant extract exhibited significant (P<0.001) antiinflammatory activity in carrageenan model and cotton pellet granuloma model in comparison to control. "
, "Analgesic activity was studied in rats using hot plate and tail-flick models. "
, "Codeine 5 mg/kg and vehicle served as standard and control respectively. "
, "The two doses of plant extract exhibited significant analgesic activity in tail flick model (P<0.001) and hot plate model (P<0.001) in comparison to control. "
, "In conclusion K. galanga possesses antiinflammatory and analgesic activities. "]
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Ngrams.Lang.Fr (selectNgrams, groupNgrams, textTest)
where
import Data.Gargantext.Prelude
import Data.Text (Text)
import Data.Monoid ((<>))
selectNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
selectNgrams xs = pf selectNgrams' xs
where
selectNgrams' (_,"N" ,_ ) = True
selectNgrams' (_,"NC" ,_ ) = True
selectNgrams' (_,"NN+CC",_ ) = True
selectNgrams' (_,_ ,"PERSON" ) = True
selectNgrams' (_,_ ,"ORGANIZATION") = True
selectNgrams' (_,_ ,"LOCATION" ) = True
selectNgrams' (_,_ ,_ ) = False
groupNgrams :: [(Text, Text, Text)] -> [(Text, Text, Text)]
groupNgrams [] = []
--groupNgrams ((_,"DET",_):xs) = groupNgrams xs
-- "Groupe : nom commun et adjectifs avec conjonction"
groupNgrams ((n,"NC",n'):(j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",_):xs) = groupNgrams (n1:n2:xs)
where
n1 = (n <> " " <> j1, "NC", n')
n2 = (n <> " " <> j2, "NC", n')
-- /!\ sometimes N instead of NC (why?)
groupNgrams ((n,"N",n'):(j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",_):xs) = groupNgrams (n1:n2:xs)
where
n1 = (n <> " " <> j1, "N", n')
n2 = (n <> " " <> j2, "N", n')
-- Groupe : Adjectif + Conjonction de coordination + Adjectif
-- groupNgrams ((j1,"ADJ",_):(_,"CC",_):(j2,"ADJ",j2'):xs) = groupNgrams ((j1 <> " " <> j2, "ADJ", j2'):xs)
-- Groupe : Nom commun + préposition + Nom commun
groupNgrams ((n1,"NC",_):(p,"P",_):(n2,"NC",n2'):xs) = groupNgrams ((n1 <> " " <> p <> " " <> n2, "NC", n2'):xs)
groupNgrams ((n1,"NC",_):(p,"P",_):(n2,"NPP",n2'):xs) = groupNgrams ((n1 <> " " <> p <> " " <> n2, "NC", n2'):xs)
groupNgrams ((n1,"NC",_):(prep,"P",_):(det,"DET",_):(n2,"NPP",n2'):xs) = groupNgrams ((n1 <> " " <> prep <> " " <> det <> " " <> n2, "NC", n2'):xs)
-- Groupe : Plusieurs adjectifs successifs
groupNgrams ((x,"ADJ",_):(y,"ADJ",yy):xs) = groupNgrams ((x <> " " <> y, "ADJ", yy):xs)
-- Groupe : nom commun et adjectif
groupNgrams ((x,"NC",_):(y,"ADJ",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
-- /!\ sometimes N instead of NC (why?)
groupNgrams ((x,"N",_):(y,"ADJ",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
-- Groupe : adjectif et nom commun
groupNgrams ((x,"ADJ",_):(y,"NC",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
-- /!\ sometimes N instead of NC (why?)
groupNgrams ((x,"ADJ",_):(y,"N",yy):xs) = groupNgrams ((x <> " " <> y, "NC", yy):xs)
groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
groupNgrams ((x,_,"ORGANIZATION"):(y,yy,"ORGANIZATION"):xs) = groupNgrams ((x <> " " <> y,yy,"ORGANIZATION"):xs)
-- Si aucune des règles précédentes n'est remplie
groupNgrams (x:xs) = (x:(groupNgrams xs))
textTest :: [String]
textTest = [ "L'heure d'arrivée des coureurs dépend de la météo du jour."]
{-|
Module : Data.Gargantext.Ngrams.Metrics
Description : Short description
Copyright : (c) Some Guy, 2013
Someone Else, 2014
License : GPL-3
Maintainer : sample@email.com
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Data.Gargantext.Ngrams.Metrics (levenshtein
, levenshteinNorm
, damerauLevenshtein
, damerauLevenshteinNorm
, overlap
, jaccard
, hamming
) where
import Data.Text (Text)
import GHC.Real (Ratio)
import qualified Data.Text.Metrics as DTM
{- * Example de titre
-}
-- | This module provide metrics to compare Text
-- starting as an API rexporting main functions of the great lib
-- text-metrics of Mark Karpov
-- | Levenshtein Distance
-- In information theory, Linguistics and computer science,
-- the Levenshtein distance is a string metric for measuring
-- the difference between two sequences.
-- See: https://en.wikipedia.org/wiki/Levenshtein_distance
--
levenshtein :: Text -> Text -> Int
levenshtein = DTM.levenshtein
-- | Return normalized Levenshtein distance between two 'Text' values.
-- Result is a non-negative rational number (represented as @'Ratio'
-- 'Data.Numeric.Natural'@), where 0 signifies no similarity between the
-- strings, while 1 means exact match.
--
levenshteinNorm :: Text -> Text -> Ratio Int
levenshteinNorm = DTM.levenshteinNorm
-- | Return Damerau-Levenshtein distance between two 'Text' values. The
-- function works like 'levenshtein', but the collection of allowed
-- operations also includes transposition of two /adjacent/ characters.
-- See also:
-- <https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance>
--
damerauLevenshtein :: Text -> Text -> Int
damerauLevenshtein = DTM.damerauLevenshtein
-- damerau-Levenshtein distance normalized
--
damerauLevenshteinNorm :: Text -> Text -> Ratio Int
damerauLevenshteinNorm = DTM.damerauLevenshteinNorm
-- Treating inputs like sets
-- | Return overlap coefficient for two 'Text' values. Returned value
-- is in the range from 0 (no similarity) to 1 (exact match). Return 1
-- if both 'Text' values are empty.
--
-- See also: <https://en.wikipedia.org/wiki/Overlap_coefficient>.
overlap :: Text -> Text -> Ratio Int
overlap = DTM.overlap
-- | Jaccard distance
-- measures dissimilarity between sample sets
jaccard :: Text -> Text -> Ratio Int
jaccard = DTM.jaccard
-- | Hamming Distance
-- In information theory, the Hamming distance between two strings of
-- equal length is the number of positions at which the corresponding
-- symbols are different. In other words, it measures the minimum number of
-- substitutions required to change one string into the other
-- See: https://en.wikipedia.org/wiki/Hamming_distance
hamming :: Text -> Text -> Maybe Int
hamming = DTM.hamming
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Ngrams.Occurrences where
import Data.Attoparsec.Text
import Data.Text (Text)
import Data.Either.Extra(Either(..))
import qualified Data.Text as T
import Control.Applicative
occurrenceParser :: Text -> Parser Bool
occurrenceParser txt = manyTill anyChar (string txt) >> pure True
occurrencesParser :: Text -> Parser Int
occurrencesParser txt = case txt of
"" -> pure 0
_ -> many (occurrenceParser txt') >>= \matches -> pure (length matches)
where
txt' = T.toLower txt
parseOccurrences :: Text -> Text -> Either String Int
parseOccurrences x = parseOnly (occurrencesParser x)
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Gargantext.Ngrams.Parser where
import Data.Gargantext.Prelude
import Data.Gargantext.Ngrams.CoreNLP
import Data.Gargantext.Types.Main (Language(..), Ngrams)
import qualified Data.Gargantext.Ngrams.Lang.En as En
import qualified Data.Gargantext.Ngrams.Lang.Fr as Fr
-- | Ngrams selection algorithms
-- A form is a list of characters seperated by one or more spaces in a sentence.
-- A word is a form.
-- type Form = [Char]
-- For performance reasons, Type Text is used, then:
-- type Form = Text
-- Let be a form and its associated forms in contexts of a sentence.
-- Forms and subfoorms can be representend as Tree whose top is the minimal form
-- as a monogram whos occurrences are
-- ps : Common words function in Haskell do not take sentence into account
-- TODO for scientific papers: add maesures
-- TODO add the p score regex
extractNgrams :: Language -> String -> IO [[Ngrams]]
extractNgrams lang s = pm (groupNgrams lang) <$> extractNgrams' lang s
extractNgrams' :: Language -> String -> IO [[Ngrams]]
extractNgrams' lang t = pm (pm token2text)
<$> pm _sentenceTokens
<$> sentences
<$> corenlp lang t
-- | This function selects ngrams according to grammars specific
-- of each language.
-- In english, JJ is ADJectiv in french.
selectNgrams :: Language -> [Ngrams] -> [Ngrams]
selectNgrams EN = En.selectNgrams
selectNgrams FR = Fr.selectNgrams
-- | This function analyze and groups (or not) ngrams according to
-- grammars specific of each language.
groupNgrams :: Language -> [Ngrams] -> [Ngrams]
groupNgrams EN = En.groupNgrams
groupNgrams FR = Fr.groupNgrams
module Data.Gargantext.Ngrams.TFICF where
data TFICF = TFICF { _tficfTerms :: Text
, _tficfContext1 :: Context
, _tficfContext2 :: Context
, _tficfScore :: Maybe Double
} deriving (Read, Show, Generics)
tfidf :: Text -> TFICF
tfidf txt = TFICF txt Document Corpus score
where
score = Nothing
module Data.Gargantext.Ngrams.TextMining where
import Data.Map (empty, Map, insertWith, toList)
import Data.List (foldl, foldl')
import qualified Data.List as L
sortGT :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering
sortGT (a1, b1) (a2, b2)
| a1 < a2 = GT
| a1 > a2 = LT
| a1 == a2 = compare b1 b2
sortGT (_, _) (_, _) = error "What is this case ?"
--histogram :: Ord a => [a] -> [(a, Int)]
--histogram = map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
--histogram = sortGT Prelude.. $ map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
countElem m e = Data.Map.insertWith (\n o -> n + o) e 1 m
freqList :: (Ord k) => [k] -> Data.Map.Map k Int
freqList = foldl countElem Data.Map.empty
getMaxFromMap :: Ord a => Map a1 a -> [a1]
getMaxFromMap m = go [] Nothing (toList m)
where
go ks _ [] = ks
go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
go ks (Just u) ((k,v):rest)
| v < u = go ks (Just u) rest
| v > u = go [k] (Just v) rest
| otherwise = go (k:ks) (Just v) rest
merge :: [a] -> [a] -> [a]
merge [] ys = ys
merge (x:xs) ys = x:merge ys xs
average :: [Double] -> Double
average x = L.sum x / L.genericLength x
average' :: [Int] -> Double
average' x = (L.sum y) / (L.genericLength y) where
y = map fromIntegral x
countYear :: [Integer] -> Map Integer Integer
countYear [] = empty
countYear (x:xs) = insertWith (+) x 1 (countYear xs)
countYear' :: [Integer] -> Map Integer Integer
countYear' (xs) = foldl' (\x y -> insertWith (+) y 1 x) empty xs
textMiningMain :: IO ()
textMiningMain = do
print $ merge ["abc"::String] ["bcd" :: String]
-- Word2Vec
-- Word Vector in a Field
module Data.Gargantext.Ngrams.Words where
import Data.List (partition)
import Data.Set (fromList, notMember, member)
import Data.Char (isPunctuation, toLower, isAlpha, isSpace)
import NLP.Stemmer (stem, Stemmer(..))
import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
import Language.Aspell.Options (ACOption(..))
--import Data.Either.Utils (fromRight)
import Data.ByteString.Internal (packChars)
get_lang x = do
let lang = Lang (packChars x)
spell_lang <- spellCheckerWithOptions [lang]
return spell_lang
check' lang x = check lang (packChars x)
suggest' lang x = suggest lang (packChars x)
--spell_lang <- spellChecker
--lang = fromRight s
--suggest' lang x
-- stem French "naturelles"
-- paragraphes
-- lines
-- sentences
-- Prelude.map (\x -> stem French x) $ cleanText "Les hirondelles s envolent dans les cieux."
repl :: Char -> Char
repl x
| x == '\'' = ' '
| x == '/' = ' '
-- | x == '\t' = ' '
-- | x == '\n' = ' '
| otherwise = x
cleanText text = do
-- pb avec \'
--words $ filter (not . isPunctuation) $ Prelude.map toLower text
words $ filter (\x -> isAlpha x || isSpace x) $ Prelude.map (repl . toLower) text
isMiamWord word = do
let miamWord_set = fromList ["salut", "phrase"]
member word miamWord_set
isStopWord word = do
let stopWord_set = fromList ["de", "la", "une", "avec"]
member word stopWord_set
wordsMain = do
let text = "Salut, ceci est une phrase \n\n avec de la ponctuation !"
print $ partition (not . isStopWord) $ cleanText text
print $ filter (not . isStopWord) $ cleanText text
--print $ filter isStopWord $ words $ filter (not . isPunctuation) text
{-|
Module : Data.Gargantext.Parsers
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Gargantext enables analyzing semi-structured text that should be parsed
in order to be analyzed.
The parsers suppose we know the format of the Text (TextFormat data
type) according to which the right parser is chosen among the list of
available parsers.
This module mainly describe how to add a new parser to Gargantext,
please follow the types.
-}
module Data.Gargantext.Parsers -- (parse, FileFormat(..))
where
import System.FilePath (takeExtension)
import Data.Attoparsec.ByteString (parseOnly, Parser)
import Data.ByteString as DB
import Data.Map as DM
----import Data.Either.Extra(Either(..))
----
--import Control.Monad (join)
import Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Path.IO (resolveFile')
------ import qualified Data.ByteString.Lazy as B
--import Control.Applicative ( (<$>) )
import Control.Concurrent.Async as CCA (mapConcurrently)
import Data.Gargantext.Parsers.WOS (wosParser)
---- import Data.Gargantext.Parsers.XML (xmlParser)
---- import Data.Gargantext.Parsers.DOC (docParser)
---- import Data.Gargantext.Parsers.ODT (odtParser)
--import Data.Gargantext.Prelude (pm)
--import Data.Gargantext.Types.Main (ErrorMessage(), Corpus)
-- | According to the format of Input file,
-- different parser are available.
data FileFormat = WOS -- Implemented (ISI Format)
-- | DOC -- Not Implemented / import Pandoc
-- | ODT -- Not Implemented / import Pandoc
-- | PDF -- Not Implemented / pdftotext and import Pandoc ?
-- | XML -- Not Implemented / see :
-- -- > http://chrisdone.com/posts/fast-haskell-c-parsing-xml
parse :: FileFormat -> FilePath
-> IO [Either String [[(DB.ByteString, DB.ByteString)]]]
parse format path = do
files <- case takeExtension path of
".zip" -> openZip path
_ -> pure <$> DB.readFile path
mapConcurrently (runParser format) files
-- | withParser:
-- According the format of the text, choosing the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = wosParser
--withParser DOC = docParser
--withParser ODT = odtParser
--withParser XML = xmlParser
--withParser _ = error "[ERROR] Parser not implemented yet"
runParser :: FileFormat -> DB.ByteString
-> IO (Either String [[(DB.ByteString, DB.ByteString)]])
runParser format text = pure $ parseOnly (withParser format) text
openZip :: FilePath -> IO [DB.ByteString]
openZip fp = do
path <- resolveFile' fp
entries <- withArchive path (DM.keys <$> getEntries)
bs <- mapConcurrently (\s -> withArchive path (getEntry s)) entries
pure bs
{-|
Module : Data.Gargantext.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017-present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
According to the language of the text, parseDate1 returns date as Text:
TODO : Add some tests
import Data.Gargantext.Parsers.Date as DGP
DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
-}
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Parsers.Date (parseDate1, Lang(FR, EN), parseDate) where
import Data.Gargantext.Prelude
--import Data.Gargantext.Types.Main as G
import Data.Time.Clock (UTCTime, getCurrentTime)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Data.Time.LocalTime (utc)
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale)
, DucklingTime(DucklingTime)
)
import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time))
import Duckling.Types (jsonValue, Entity)
import Duckling.Api (analyze, parse)
import qualified Data.HashSet as HashSet
import qualified Data.Aeson as Json
import Data.HashMap.Strict as HM
import Data.Text (Text)
-- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor)
-- import Duckling.Debug as DB
import Duckling.Types (ResolvedToken)
import Safe (headMay)
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
--parserLang :: G.Language -> Lang
--parserLang G.FR = FR
--parserLang G.EN = EN
-- | Final Date parser API
-- IO can be avoided here:
-- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseDate1 :: Context -> Text -> SomeErrorHandling Text
parseDate1 :: Lang -> Text -> IO Text
parseDate1 lang text = do
maybeJson <- pm jsonValue <$> parseDateWithDuckling lang text
case headMay maybeJson of
Just (Json.Object object) -> case HM.lookup "value" object of
Just (Json.String date) -> pure date
Just _ -> error "ParseDate ERROR: should be a json String"
Nothing -> error "ParseDate ERROR: no date found"
_ -> error "ParseDate ERROR: type error"
-- | Current Time in DucklingTime format
-- TODO : get local Time in a more generic way
utcToDucklingTime :: UTCTime -> DucklingTime
utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
-- | Local Context which depends on Lang and Time
localContext :: Lang -> DucklingTime -> Context
localContext lang dt = Context {referenceTime = dt, locale = makeLocale lang Nothing}
-- | Date parser with Duckling
parseDateWithDuckling :: Lang -> Text -> IO [ResolvedToken]
parseDateWithDuckling lang input = do
contxt <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure $ analyze input contxt $ HashSet.fromList [(This Time)]
parseDate :: Lang -> Text -> IO [Entity]
parseDate lang input = do
context <- localContext lang <$> utcToDucklingTime <$> getCurrentTime
pure $ parse input context [(This Time)]
module Data.Gargantext.Parsers.Utils where
-- use Duckling here
parseDate = undefined
{-# LANGUAGE OverloadedStrings #-}
module Data.Gargantext.Parsers.WOS (wosParser) where
-- TOFIX : Should import Data.Gargantext.Prelude here
import Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
import qualified Data.List as DL
import Data.Monoid ((<>))
import Data.Attoparsec.ByteString (Parser, try, string
, takeTill, take
, manyTill, many1)
import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine)
import Data.ByteString (ByteString, concat)
import Data.ByteString.Char8 (pack)
import Control.Applicative
--import Data.Gargantext.Types
-- | wosParser parses ISI format from
-- Web Of Science Database
wosParser :: Parser [[(ByteString, ByteString)]]
wosParser = do
-- TODO Warning if version /= 1.0
-- FIXME anyChar (string ..) /= exact string "\nVR 1.0" ?
_ <- manyTill anyChar (string $ pack "\nVR 1.0")
ns <- many1 notice <* (string $ pack "\nEF" )
pure ns
notice :: Parser [(ByteString, ByteString)]
notice = start *> fields <* end
where
start :: Parser ByteString
start = "\nPT " *> takeTill isEndOfLine
end :: Parser [Char]
end = manyTill anyChar (string $ pack "\nER\n")
fields :: Parser [(ByteString, ByteString)]
fields = many field
where
field :: Parser (ByteString, ByteString)
field = do
name <- "\n" *> take 2 <* " "
txt <- takeTill isEndOfLine
txts <- try lines
let txts' = case DL.length txts > 0 of
True -> txts
False -> []
pure (translate name, concat ([txt] <> txts'))
lines :: Parser [ByteString]
lines = many line
where
line :: Parser ByteString
line = "\n " *> takeTill isEndOfLine
translate :: ByteString -> ByteString
translate champs
| champs == "AU" = "author"
| champs == "TI" = "title"
| champs == "SO" = "source"
| champs == "DI" = "doi"
| champs == "PD" = "publication_date"
| champs == "AB" = "abstract"
| otherwise = champs
-- http://chrisdone.com/posts/fast-haskell-c-parsing-xml
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# OPTIONS_GHC -fno-warn-type-defaults #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-
TODO: import head impossible from Protolude: why ?
-}
module Data.Gargantext.Prelude
( module Data.Gargantext.Prelude
, module Protolude
, headMay
)
where
import Protolude ( Bool(True, False), Int, Double, Integer
, Fractional, Num, Maybe, Floating, Char
, Ord, Integral, Foldable, RealFrac, Monad, filter
, reverse, map, zip, drop, take, zipWith
, sum, fromIntegral, length, fmap
, takeWhile, sqrt, undefined, identity
, abs, maximum, minimum, return, snd, truncate
, (+), (*), (/), (-), (.), (>=), ($), (**), (^)
)
-- TODO import functions optimized in Utils.Count
-- import Protolude hiding (head, last, all, any, sum, product, length)
-- import Data.Gargantext.Utils.Count
import qualified Data.List as L hiding (head, sum)
import qualified Control.Monad as M
import qualified Data.Map as Map
import qualified Data.Vector as V
import Safe (headMay)
pf :: (a -> Bool) -> [a] -> [a]
pf = filter
pr :: [a] -> [a]
pr = reverse
pm :: (a -> b) -> [a] -> [b]
pm = map
pm2 :: (t -> b) -> [[t]] -> [[b]]
pm2 fun = pm (pm fun)
pz :: [a] -> [b] -> [(a, b)]
pz = zip
pd :: Int -> [a] -> [a]
pd = drop
ptk :: Int -> [a] -> [a]
ptk = take
pzw :: (a -> b -> c) -> [a] -> [b] -> [c]
pzw = zipWith
-- Exponential Average
eavg :: [Double] -> Double
eavg (x:xs) = a*x + (1-a)*(eavg xs)
where a = 0.70
eavg [] = 0
-- Simple Average
mean :: Fractional a => [a] -> a
mean xs = if L.null xs then 0.0
else sum xs / fromIntegral (length xs)
sumMaybe :: Num a => [Maybe a] -> Maybe a
sumMaybe = fmap sum . M.sequence
variance :: Floating a => [a] -> a
variance xs = mean $ pm (\x -> (x - m) ** 2) xs where
m = mean xs
deviation :: [Double] -> Double
deviation = sqrt . variance
movingAverage :: Fractional b => Int -> [b] -> [b]
movingAverage steps xs = pm mean $ chunkAlong steps 1 xs
ma :: [Double] -> [Double]
ma = movingAverage 3
-- | Function to split a range into chunks
chunkAlong :: Int -> Int -> [a] -> [[a]]
chunkAlong a b l = only (while dropAlong)
where
only = pm (take a)
while = takeWhile (\x -> length x >= a)
dropAlong = L.scanl (\x _y -> drop b x) l ([1..] :: [Integer])
-- | Optimized version (Vector)
chunkAlong' :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
chunkAlong' a b l = only (while dropAlong)
where
only = V.map (V.take a)
while = V.takeWhile (\x -> V.length x >= a)
dropAlong = V.scanl (\x _y -> V.drop b x) l (V.fromList [1..])
-- | TODO Inverse of chunk ? unchunkAlong ?
unchunkAlong :: Int -> Int -> [[a]] -> [a]
unchunkAlong = undefined
-- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
splitAlong :: [Int] -> [Char] -> [[Char]]
splitAlong _ [] = [] -- No list? done
splitAlong [] xs = [xs] -- No place to split at? Return the remainder
splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys) -- take until our split spot, recurse with next split spot and list remainder
takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
takeWhileM _ [] = return []
takeWhileM p (a:as) = do
v <- a
if p v
then do
vs <- takeWhileM p as
return (v:vs)
else return []
-- SUMS
-- To select the right algorithme according to the type:
-- https://github.com/mikeizbicki/ifcxt
sumSimple :: Num a => [a] -> a
sumSimple = L.foldl' (+) 0
-- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
sumKahan :: Num a => [a] -> a
sumKahan = snd . L.foldl' go (0,0)
where
go (c,t) i = ((t'-t)-y,t')
where
y = i-c
t' = t+y
-- | compute part of the dict
count2map :: (Ord k, Foldable t) => t k -> Map.Map k Double
count2map xs = Map.map (/ (fromIntegral (length xs))) (count2map' xs)
-- | insert in a dict
count2map' :: (Ord k, Foldable t) => t k -> Map.Map k Double
count2map' xs = L.foldl' (\x y -> Map.insertWith' (+) y 1 x) Map.empty xs
trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
trunc n = truncate . (* 10^n)
trunc' :: Int -> Double -> Double
trunc' n x = fromIntegral $ truncate $ (x * 10^n)
bool2int :: Num a => Bool -> a
bool2int b = case b of
True -> 1
False -> 0
bool2double :: Bool -> Double
bool2double bool = case bool of
True -> 1.0
False -> 0.0
-- Normalizing && scaling data
scale :: [Double] -> [Double]
scale = scaleMinMax
scaleMinMax :: [Double] -> [Double]
scaleMinMax xs = pm (\x -> (x - mi / (ma - mi + 1) )) xs'
where
ma = maximum xs'
mi = minimum xs'
xs' = pm abs xs
scaleNormalize :: [Double] -> [Double]
scaleNormalize xs = pm (\x -> (x - v / (m + 1))) xs'
where
v = variance xs'
m = mean xs'
xs' = pm abs xs
normalize :: [Double] -> [Double]
normalize as = normalizeWith identity as
normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
normalizeWith extract bs = pm (\x -> x/(sum bs')) bs'
where
bs' = pm extract bs
-- Zip functions to add
zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
zipFst f xs = zip (f xs) xs
zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
zipSnd f xs = zip xs (f xs)
module Data.Gargantext.RCT where
foo :: Int
foo = undefined
--import Data.Text (Text, words)
--import Data.Attoparsec.Text (anyChar, isEndOfLine, Parser, takeTill, many1, endOfLine, space, manyTill)
--import Control.Applicative (many)
-- RCT is the acronym for Referential ConText (of Text)
-- at the begin there was a byte
-- then a char
-- Char -> RCT [Char]
-- then a list of chars called a string, we call it a Form
-- (removing all weird charachters which are not alphanumeric)
-- Form -> RCT Sentence
-- These forms compose the RCT Sentence
-- an ngrams is composed with multiple forms
-- Paragraph = [Sentence]
-- type Title = Paragraph
-- data Block = [Paragraph]
-- Block is taken form Pandoc
-- data Document = [Block]
-- Set of databases
-- Database
-- Set of Articles
-- Article
-- Paragraph (abstract + title)
-- Sentence - Ngrams - Forms
--separateurs :: Parser Text
--separateurs = dropWhile isEndOfLine
--paragraphs :: Parser [Text]
--paragraphs = many paragraph
--
--paragraph :: Parser Text
--paragraph = takeTill isEndOfLine <* many1 endOfLine
--
-- forms :: Text -> [Text]
-- forms = words
-- Right Management
-----------------------------------------------------------------
-- data Management = RolesRights | NodesRights | OperationsRights
-----------------------------------------------------------------
-----------------------------------------------------------------
-- Role Rights Management
-- rights to create roles (group)
-- Node Rights Management
-- rights to read/write Node
-- Operation Rights Management
-- rights for which operations
-----------------------------------------------------------------
-- Roles Rights Management
-----------------------------------------------------------------
-- 2 main roles
-- admin : can create group and assign Node Rights to it
-- user : can not create group and assign Node rights inside his group (if he has the rights)
-- Use cases:
-- if all user are in public and have read/write permissions: everything is free inside the public group
-- else:
-- in X institution x admin can create an gx group or a gy group for each department and assign user to it
-- users y can share with user y withing the group if he has the rights for it
-- an admin can give admin group to a user
-- Roles Rights Management are stored in "User Node"
-- right to read on group called "x" == can share permissions inside group x
-- right to write on group called "x" == can modify group x itself
-- Question: how to manage the hierarchy of roles/groups ?
-- Example: use can create a group inside a group but not outside of it
-----------------------------------------------------------------
-- Node Rights Management
-----------------------------------------------------------------
-- Les actions sur un Node (if /= Graph) depends on the rights of his parent
-- | rightsOf:
-- technically : get the column Node (in table nodes) with rights (ACL)
rightsOf :: Node -> Rights
rightsOf n = undefined
rightsOfNode :: User -> Node -> Rights
rightsOfNode u n = case n of
UserNode -> rightsOf n
ProjectNode -> rightsOf n
CorpusNode -> rightsOf n
GraphNode -> rightsOf n
_ -> rightsOf (parentOf n)
rightsOfNodeNgram :: User -> NodeNgram -> Rights
rightsOfNodeNgram u n = rightsOf n'
where
n' = nodeOf n
rightsOfNodeNgramNgram :: User -> NodeNgramNgram -> Rights
rightsOfNodeNgramNgram u n = rightsOf n'
where
n' = nodeOf n
rightsOfNodeNodeNgram
rightsOfNodeNode
-----------------------------------------------------------------
-- Operation Rights Management
-----------------------------------------------------------------
data Operation = Read | Write
-- Starting with simple case:
-- type ModifyRights = Write
-- type Exec = Write
data Rights = { _rightsRead :: Bool
, _rightsWrite :: Bool
}
deriving (Show, Read, Eq)
data LogRightsMessage = RightsSuccess | RightsError
deriving (Show, Read, Eq)
type Read = Bool
type Write = Bool
-----------------------------------------------------------------
-- | TODO
-- find the tables where there is the relation Node / User / Rights
getRightsOfNodeWithUser :: Node -> User -> IO Rights
getRightsOfNodeWithUser n u = undefined
userCan :: Operation -> User -> Node -> IO Bool
userCan op u n = do
rights <- getRightsOfNodeWithUser u n
r = case op of
Read -> _rightsRead rights
Write -> _rightsWrite rights
pure (r == True)
-- | User can (or can not) give/change rights of the Node
userCanModifyRights :: User -> Node -> IO Bool
userCanModifyRights u n = True `==` <$> userCan Write u n
-- | User can see who has access to the Node
userCanReadRights :: User -> Node -> IO Bool
userCanReadRights u n = True `==` <$> userCan Read u n
chmod :: Rights -> User -> Node -> IO LogRightsMessage
chmod r u n = undefined
chmod' :: Read -> Write -> User -> Node -> IO LogRightsMessage
chmod' r w u n = chmod rights u n
where
rights = Rights r w
readAccessOnly :: User -> Node -> IO LogRightsMessage
readAccessOnly u n = chmod r u n
where
r = Rights True False
stopAccess :: User -> Node -> IO LogRightsMessage
stopAccess =
chmodAll :: Rights -> User -> [Node] -> IO [LogRightsMessage]
chmd b r u ns = map (chmod b r u n) ns
chmodChildren :: Rights -> User -> [Node] -> IO [LogRightsMessage]
chmodChildren b r u n = map (chmod br u n) ns'
where
ns' = childrenOf n
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Data.Gargantext.Server
-- ( startApp
-- , app
-- )
where
import Prelude hiding (null)
import Control.Monad
import Control.Monad.IO.Class
import Data.Aeson
import Network.Wai
import Network.Wai.Handler.Warp
import Servant
import Servant.Multipart
import Database.PostgreSQL.Simple (Connection, connect)
import Opaleye
import Data.Gargantext.Types.Main (Node, NodeId)
import Data.Gargantext.Database.Node (getNodesWithParentId, getNode)
import Data.Gargantext.Database.Private (infoGargandb)
-- | TODO, use MOCK feature of Servant to generate fake data (for tests)
type NodeAPI = Get '[JSON] (Node Value)
:<|> "children" :> Get '[JSON] [Node Value]
type API = "roots" :> Get '[JSON] [Node Value]
:<|> "node" :> Capture "id" Int :> NodeAPI
:<|> "echo" :> Capture "string" String :> Get '[JSON] String
:<|> "upload" :> MultipartForm MultipartData :> Post '[JSON] String
-- :<|> "node" :> Capture "id" Int :> Get '[JSON] Node
server :: Connection -> Server API
server conn
= liftIO (getNodesWithParentId conn 0)
:<|> nodeAPI conn
:<|> echo
:<|> upload
where
echo s = pure s
connectGargandb :: IO Connection
connectGargandb = connect infoGargandb
startGargantext :: IO ()
startGargantext = do
print ("Starting server on port " ++ show port)
conn <- connectGargandb
run port $ app conn
where
port = 8008
-- | TODO App type, the main monad in which the bot code is written with.
-- Provide config, state, logs and IO
-- type App m a = ( MonadState AppState m
-- , MonadReader Conf m
-- , MonadLog (WithSeverity Doc) m
-- , MonadIO m) => m a
-- Thanks @yannEsposito for this.
app :: Connection -> Application
app = serve api . server
api :: Proxy API
api = Proxy
nodeAPI :: Connection -> NodeId -> Server NodeAPI
nodeAPI conn id
= liftIO (getNode conn id')
:<|> liftIO (getNodesWithParentId conn id)
where
id' = pgInt4 id
-- | Upload files
-- TODO Is it possible to adapt the function according to iValue input ?
upload :: MultipartData -> Handler String
upload multipartData = do
liftIO $ do
putStrLn "Inputs:"
forM_ (inputs multipartData) $ \input ->
putStrLn $ " " ++ show (iName input)
++ " -> " ++ show (iValue input)
forM_ (files multipartData) $ \file -> do
content <- readFile (fdFilePath file)
putStrLn $ "Content of " ++ show (fdFileName file)
++ " at " ++ fdFilePath file
putStrLn content
pure "Data loaded"
module Data.Gargantext.Types ( module Data.Gargantext.Types.Main
, module Data.Gargantext.Types.Node
) where
import Data.Gargantext.Types.Main
import Data.Gargantext.Types.Node
-- | CNRS Copyrights
-- Licence: https://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE
-- Author: Alexandre Delanoë (alexandre.delanoe@iscpif.fr)
module Data.Gargantext.Types.Main where
import Data.Monoid ((<>))
import Protolude (fromMaybe)
--import Data.ByteString (ByteString())
import Data.Text (Text)
import Data.Time (UTCTime)
import Data.Gargantext.Types.Node ( NodePoly, HyperdataUser
, HyperdataFolder , HyperdataCorpus , HyperdataDocument
, HyperdataFavorites, HyperdataResource
, HyperdataList , HyperdataScore
, HyperdataGraph
, HyperdataPhylo
, HyperdataNotebook
)
-- | Language of a Text
-- For simplicity, we suppose text has an homogenous language
data Language = EN | FR -- | DE | IT | SP
-- > EN == english
-- > FR == french
-- > DE == deutch (not implemented yet)
-- > IT == italian (not implemented yet)
-- > SP == spanish (not implemented yet)
-- > ... add your language and help us to implement it (:
-- All the Database is structred like a hierarchical Tree
data Tree a = NodeT a [Tree a]
deriving (Show, Read, Eq)
-- data Tree a = NodeT a [Tree a]
-- same as Data.Tree
leafT :: a -> Tree a
leafT x = NodeT x []
-- Garg Network is a network of all Garg nodes
--gargNetwork = undefined
-- | Garg Node is Database Schema Typed as specification
-- gargNode gathers all the Nodes of all users on one Node
gargNode :: [Tree NodeType]
gargNode = [userTree]
-- | User Tree simplified
userTree :: Tree NodeType
userTree = NodeT NodeUser [projectTree]
-- | Project Tree
projectTree :: Tree NodeType
projectTree = NodeT Project [corpusTree]
-- | Corpus Tree
corpusTree :: Tree NodeType
corpusTree = NodeT Corpus ( [ leafT Document ]
<> [ leafT Lists ]
<> [ leafT Metrics ]
<> [ leafT Classification]
)
-- TODO make instances of Nodes
-- NP
-- * why NodeUser and not just User ?
-- * is this supposed to hold data ?
data NodeType = NodeUser | Project | Corpus | Document | DocumentCopy
| Classification
| Lists
| Metrics
deriving (Show, Read, Eq)
data Classification = Favorites | MyClassifcation
data Lists = StopList | MainList | MapList | GroupList
data Metrics = Occurrences | Cooccurrences | Specclusion | Genclusion | Cvalue
| TfidfCorpus | TfidfGlobal | TirankLocal | TirankGlobal
-- | NodePoly indicates that Node has a Polymorphism Type
type Node json = NodePoly NodeId NodeTypeId NodeUserId (Maybe NodeParentId) NodeName UTCTime json -- NodeVector
-- type Node json = NodePoly NodeId NodeTypeId UserId ParentId NodeName UTCTime json
type NodeTypeId = Int
type NodeId = Int
type NodeParentId = Int
type NodeUserId = Int
type NodeName = Text
--type NodeVector = Vector
--type NodeUser = Node HyperdataUser
-- | Then a Node can be either a Folder or a Corpus or a Document
type NodeUser = Node HyperdataUser
type Folder = Node HyperdataFolder
type Project = Folder -- NP Node HyperdataProject ?
type Corpus = Node HyperdataCorpus
type Document = Node HyperdataDocument
-- | Community Manager Use Case
type Annuaire = Corpus
type Individu = Document
-- | Favorites Node enable Node categorization
type Favorites = Node HyperdataFavorites
-- | Favorites Node enable Swap Node with some synonyms for clarity
type NodeSwap = Node HyperdataResource
-- | Then a Node can be a List which has some synonyms
type List = Node HyperdataList
type StopList = List
type MainList = List
type MapList = List
type GroupList = List
-- | Then a Node can be a Score which has some synonyms
type Score = Node HyperdataScore
type Occurrences = Score
type Cooccurrences = Score
type Specclusion = Score
type Genclusion = Score
type Cvalue = Score
type Tficf = Score
---- TODO All these Tfidf* will be replaced with TFICF
type TfidfCorpus = Tficf
type TfidfGlobal = Tficf
type TirankLocal = Tficf
type TirankGlobal = Tficf
--
---- | Then a Node can be either a Graph or a Phylo or a Notebook
type Graph = Node HyperdataGraph
type Phylo = Node HyperdataPhylo
type Notebook = Node HyperdataNotebook
nodeTypes :: [(NodeType, NodeTypeId)]
nodeTypes = [ (NodeUser , 1)
, (Project , 2)
, (Corpus , 3)
, (Document , 4)
--, (NodeSwap , 19)
------ Lists
-- , (StopList , 5)
-- , (GroupList , 6)
-- , (MainList , 7)
-- , (MapList ,  8)
---- Scores
-- , (Occurrences , 10)
-- , (Cooccurrences , 9)
--
-- , (Specclusion , 11)
-- , (Genclusion , 18)
-- , (Cvalue , 12)
--
-- , (TfidfCorpus , 13)
-- , (TfidfGlobal , 14)
--
-- , (TirankLocal , 16)
-- , (TirankGlobal , 17)
--
---- Node management
-- , (Favorites , 15)
--
]
--
nodeTypeId :: NodeType -> NodeTypeId
nodeTypeId tn = fromMaybe (error ("Typename " ++ show tn ++ " does not exist")) (lookup tn nodeTypes)
-- Temporary types to be removed
type Ngrams = (Text, Text, Text)
type ErrorMessage = String
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DeriveGeneric #-}
-- {-# LANGUAGE DuplicateRecordFields #-}
module Data.Gargantext.Types.Node where
import Data.Text (Text)
import GHC.Generics (Generic)
import Data.Time (UTCTime)
import Data.Gargantext.Utils.Prefix (unPrefix)
import Data.Aeson.TH (deriveJSON)
-- node_Id... ?
data NodePoly id typename userId parentId name date hyperdata = Node { node_id :: id
, node_typename :: typename
, node_userId :: userId
-- , nodeHashId :: hashId
, node_parentId :: parentId
, node_name :: name
, node_date :: date
, node_hyperdata :: hyperdata
-- , node_titleAbstract :: titleAbstract
} deriving (Show)
$(deriveJSON (unPrefix "node_") ''NodePoly)
data Status = Status { status_Date :: Maybe UTCTime
, status_Error :: Maybe Text
, status_Action :: Maybe Text
, status_Complete :: Maybe Bool
, status_Progress :: Maybe Int
} deriving (Show, Generic)
$(deriveJSON (unPrefix "status_") ''Status)
data HyperdataDocument = HyperdataDocument { hyperdataDocument_Bdd :: Maybe Text
, hyperdataDocument_Doi :: Maybe Text
, hyperdataDocument_Url :: Maybe Text
, hyperdataDocument_Page :: Maybe Int
, hyperdataDocument_Title :: Maybe Text
, hyperdataDocument_Authors :: Maybe Text
, hyperdataDocument_Abstract :: Maybe Text
, hyperdataDocument_Statuses :: Maybe [Status]
, hyperdataDocument_Publication_date :: Maybe Text
, hyperdataDocument_Publication_year :: Maybe Text
, hyperdataDocument_Publication_month :: Maybe Text
, hyperdataDocument_Publication_hour :: Maybe Text
, hyperdataDocument_Publication_minute :: Maybe Text
, hyperdataDocument_Publication_second :: Maybe Text
, hyperdataDocument_LanguageIso2 :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataDocument_") ''HyperdataDocument)
data LanguageNodes = LanguageNodes { languageNodes___unknown__ :: [Int]}
deriving (Show, Generic)
$(deriveJSON (unPrefix "languageNodes_") ''LanguageNodes)
data Resource = Resource { resource_Url :: Maybe Text
, resource_Path :: Maybe Text
, resource_Type :: Maybe Int
, resource_Extracted :: Maybe Bool
} deriving (Show, Generic)
$(deriveJSON (unPrefix "resource_") ''Resource)
data HyperdataCorpus = HyperdataCorpus { hyperdataCorpus_Action :: Maybe Text
, hyperdataCorpus_Statuses :: Maybe [Status]
, hyperdataCorpus_Languages :: Maybe LanguageNodes
, hyperdataCorpus_Resources :: Maybe [Resource]
, hyperdataCorpus_Language_id :: Maybe Text
, hyperdataCorpus_Skipped_docs :: Maybe [Int]
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataCorpus_") ''HyperdataCorpus)
data HyperdataUser = HyperdataUser { hyperdataUser_language :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataUser_") ''HyperdataUser)
-- Preferences ?
data HyperdataFolder = HyperdataFolder { hyperdataFolder_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataFolder_") ''HyperdataFolder)
data HyperdataProject = HyperdataProject { hyperdataProject_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataProject_") ''HyperdataProject)
data HyperdataList = HyperdataList { hyperdataList_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataList_") ''HyperdataList)
data HyperdataScore = HyperdataScore { hyperdataScore_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataScore_") ''HyperdataScore)
data HyperdataFavorites = HyperdataFavorites { hyperdataFavorites_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataFavorites_") ''HyperdataFavorites)
data HyperdataResource = HyperdataResource { hyperdataResource_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataResource_") ''HyperdataResource)
-- TODO add the Graph Structure here
data HyperdataGraph = HyperdataGraph { hyperdataGraph_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataGraph_") ''HyperdataGraph)
-- TODO add the Graph Structure here
data HyperdataPhylo = HyperdataPhylo { hyperdataPhylo_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataPhylo_") ''HyperdataPhylo)
-- | TODO FEATURE: Notebook saved in the node (to work with Python or Haskell)
data HyperdataNotebook = HyperdataNotebook { hyperdataNotebook_Preferences :: Maybe Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "hyperdataNotebook_") ''HyperdataNotebook)
module Data.Gargantext.Utils (
-- module Data.Gargantext.Utils.Chronos
module Data.Gargantext.Utils.Prefix
) where
-- import Data.Gargantext.Utils.Chronos
import Data.Gargantext.Utils.Prefix
module Data.Gargantext.Utils.Chronos where
import Data.Maybe (fromJust, isJust, isNothing)
import qualified Data.Time as DT
import qualified Data.UTC as DU
import Data.Time
import Data.Time.Clock.POSIX
import Text.Regex
parseDate :: String -> Maybe [String]
parseDate d = matchRegex (mkRegex "(.*)/(.*)/(.*)") d
getDate' :: Maybe [String] -> (Integer, Int, Int)
getDate' d
| isJust d == True = toGregorian $ fromGregorian (read year) (read month) (read day)
| otherwise = toGregorian $ fromGregorian 2015 1 1
where
Just [day, month, year] = d
getDate :: String -> (Integer, Int, Int)
getDate = getDate' . parseDate
--getDateDay :: Maybe [String] -> Day
--getDateDay d = fromGregorian (read year) (read month) (read day)
-- where Just [day, month, year] = matchRegex (mkRegex "(.*)/(.*)/(.*)") d
getDateDay' :: Maybe [String] -> Day
getDateDay' d
| isJust d == True = fromGregorian (read year) (read month) (read day)
| otherwise = fromGregorian 2015 1 1
where Just [day, month, year] = d
getDateDay :: String -> Day
getDateDay = getDateDay' . parseDate
getDateUTC :: String -> String
getDateUTC d = show $ DT.UTCTime (getDateDay d) (DT.timeOfDayToTime $ DT.TimeOfDay 0 0 0)
getYear :: String -> String
getYear date = s where
(y, m, d) = getDate date
s = show y
getMonth :: String -> String
getMonth date = s where
(y, m, d) = getDate date
s = show m
getDay :: String -> String
getDay date = s where
(y, m, d) = getDate date
s = show d
--for Dates exported via xls2csv tool
type MT = Maybe (DU.Local DU.DateTime)
type MS = Maybe String
--getDate'' :: String -> String
--getDate'' gd = d where
-- start = "1900-01-01T00:00:00Z"
-- da = (DU.parseRfc3339 start :: MT) >>= DU.addDays ( (read gd :: Integer) -2) >>= DU.renderRfc3339 :: MS
-- d = fromJust da
--
--getDate''' :: String -> String
--getDate''' gd = d where
-- start = "1900-01-01T00:00:00Z"
-- da = (DU.parseRfc3339 start :: MT) >>= DU.addDays ( (read gd :: Integer) -2) >>= DU.renderIso8601CalendarDate :: MS
-- d = fromJust da
--
--date2greg :: String ->
date2greg date = (y, m, d) where
(y, m, d) = DT.toGregorian $ DT.addDays ((read date :: Integer) -2) $ DT.utctDay (read "1900-01-01 00:00:00" :: DT.UTCTime)
getYear' :: String -> String
getYear' date = s where
(y, m, d) = date2greg date
s = show y
getMonth' :: String -> String
getMonth' date = s where
(y, m, d) = date2greg date
s = show m
getDay' :: String -> String
getDay' date = s where
(y, m, d) = date2greg date
s = show d
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RankNTypes #-}
module Data.Gargantext.Utils.Count (head, last, all, any, sum, product, length)
where
import Data.Monoid
import Protolude hiding ((<>), head, last, all, any, sum, product, length)
import qualified Data.Foldable
import Control.Lens (Getting, foldMapOf)
data Fold i o = forall m . Monoid m => Fold (i -> m) (m -> o)
instance Functor (Fold i) where
fmap k (Fold tally summarize) = Fold tally (k . summarize)
instance Applicative (Fold i) where
pure o = Fold (\_ -> ()) (\_ -> o)
Fold tallyF summarizeF <*> Fold tallyX summarizeX = Fold tally summarize
where
tally i = (tallyF i, tallyX i)
summarize (nF, nX) = summarizeF nF (summarizeX nX)
focus :: (forall m . Monoid m => Getting m b a) -> Fold a o -> Fold b o
focus lens (Fold tally summarize) = Fold (foldMapOf lens tally) summarize
fold :: Fold i o -> [i] -> o
fold (Fold tally summarize) is = summarize (reduce (map tally is))
where
reduce = Data.Foldable.foldl' (<>) mempty
--
head :: Fold a (Maybe a)
head = Fold (First . Just) getFirst
last :: Fold a (Maybe a)
last = Fold (Last . Just) getLast
--
all :: (a -> Bool) -> Fold a Bool
all predicate = Fold (All . predicate) getAll
any :: (a -> Bool) -> Fold a Bool
any predicate = Fold (Any . predicate) getAny
--
sum :: Num n => Fold n n
sum = Fold Sum getSum
product :: Num n => Fold n n
product = Fold Product getProduct
length :: Num n => Fold i n
length = Fold (\_ -> Sum 1) getSum
-- | Average function optimized (/!\ need to test it)
data Average a = Average { numerator :: !a, denominator :: !Int }
instance Num a => Monoid (Average a) where
mempty = Average 0 0
mappend (Average xL nL) (Average xR nR) = Average (xL + xR) (nL + nR)
average :: Fractional a => Fold a a
average = Fold tally summarize
where
tally x = Average x 1
summarize (Average numerator denominator) =
numerator / fromIntegral denominator
module Data.Gargantext.Utils.DateUtils where
import Data.Time (UTCTime, toGregorian, utctDay)
--
--readInt :: IO [Char] -> IO Int
--readInt = readLn
--
--readBool :: IO [Char] -> IO Bool
--readBool = readLn
utc2gregorian :: UTCTime -> (Integer, Int, Int)
utc2gregorian date = toGregorian $ utctDay date
gregorian2year :: (Integer, Int, Int) -> Integer
gregorian2year (y, _m, _d) = y
utc2year :: UTCTime -> Integer
utc2year date = gregorian2year $ utc2gregorian date
averageLength :: Fractional a => [[a1]] -> a
averageLength l = fromIntegral (sum (map length l)) / fromIntegral (length l)
--main :: IO ()
--main = do
-- c <- getCurrentTime
-- print c -- $ toYear $ toGregorian $ utctDay c
charToString :: Char -> String
charToString = (:[])
-- DEFINITIONS as SPECS
-- (Engineering axioms for Gargantext)
------------------------------------------------------------------------
-- From file to corpus
------------------------------------------------------------------------
-- > A Corpus is a list of Documents
data Corpus = [Document]
-- > A Document should have a date, some text and a maybe a language.
-- > Remarks :
-- > If no date then force one ?
-- > Analyze either text or numbers
-- > only one language per document
data Document = Document { date :: UTCTime
, uce :: Map Text $ Either (Maybe Text) (Maybe Double)
, lang :: Maybe Language
}
parseFiles :: Maybe ParserType -> [File] -> Corpus
parseFiles = undefined
-- This function exists already (in Python)
parseFile' :: ParserType -> File -> Maybe [Document]
parseFile' = undefined
-- This function does not exist yet
parseFile :: Maybe ParserType -> File -> Maybe [Document]
parseFile parserType file = documents
where
documents = case parserType of
Nothing -> case guessParserType file of
Nothing -> askUser "Answer to the question with link to $doc"
Just parserType' -> parseFile (Just parserType') file
Just parserType'' -> case parserType'' of
UnsupportedYet -> askUser "Not supported yet, which priority ?"
otherwise -> parseFile' parserType'' file
data ParserType = RIS | ISI | XML | CSV | Europresse | Book | UnsupportedYet
guessParserType :: File -> Maybe ParserType
guessParserType = undefined
------------------------------------------------------------------------
-- What kind of interactions with our users ?
------------------------------------------------------------------------
-- Question is Text only
type Question = Text
-- Possible Answers:
data Answer = ClosedAnswer | NumAnswer | OpenAnswer
-- Definitions of the Answers
type ClosedAnswer = Bool
type OpenAnswer = Text
type NumAnswer = Int
-- Un formulaire est un mapping entre question et peut-être une réponse
-- Un formulaire vide a Nothing au champs (Maybe Answer)
-- Une question répondue a la valeur (Just Response)
type Formular = Map Question (Maybe Answer)
askUser :: Question -> ClosedAnswer
askUser = undefined
data Advice = BugReport | WishList
askUser' :: Question -> Advice
askUser' question = case askUser question of
True -> BugReport
False -> WishList
------------------------------------------------------------------------
-- Specs for Lang Detection
------------------------------------------------------------------------
data Language = English | French
tagDoc :: Document -> Ngrams
tagDoc doc = ngrams
where
ngrams = case lang doc of
Nothing -> case guessLang doc of
Nothing -> tag
------------------------------------------------------------------------
-- Specs for ngrams Worflow
------------------------------------------------------------------------
module Data.Gargantext.Utils.Prefix where
import Data.Aeson (Value, defaultOptions, parseJSON)
import Data.Aeson.TH (Options, fieldLabelModifier, omitNothingFields)
import Data.Aeson.Types (Parser)
import Data.Char (toLower)
import Data.Monoid ((<>))
import Text.Read (readMaybe)
-- | Aeson Options that remove the prefix from fields
unPrefix :: String -> Options
unPrefix prefix = defaultOptions
{ fieldLabelModifier = unCapitalize . dropPrefix prefix
, omitNothingFields = True
}
-- | Lower case leading character
unCapitalize :: String -> String
unCapitalize [] = []
unCapitalize (c:cs) = toLower c : cs
-- | Remove given prefix
dropPrefix :: String -> String -> String
dropPrefix prefix input = go prefix input
where
go pre [] = error $ contextual $ "prefix leftover: " <> pre
go [] (c:cs) = c : cs
go (p:preRest) (c:cRest)
| p == c = go preRest cRest
| otherwise = error $ contextual $ "not equal: " <> (p:preRest) <> " " <> (c:cRest)
contextual msg = "dropPrefix: " <> msg <> ". " <> prefix <> " " <> input
parseJSONFromString :: (Read a) => Value -> Parser a
parseJSONFromString v = do
numString <- parseJSON v
case readMaybe (numString :: String) of
Nothing -> fail $ "Invalid number for TransactionID: " ++ show v
Just n -> return n
module Data.Gargantext.Utils.SaveGetHash where
import System.FilePath (addExtension, joinPath)
import Data.ByteString.Lazy.Internal (ByteString)
import Data.List (elem, intersperse, insert)
import Data.List.Extra (chunksOf)
import Data.Digest.Pure.MD5 (md5)
import System.Directory (getDirectoryContents, createDirectory, findFile, createDirectoryIfMissing)
import Control.Monad (foldM)
import Data.List (splitAt)
import Data.ByteString.Lazy.Internal (packChars)
import qualified Data.ByteString.Lazy as BL
import Codec.Compression.Zlib (compress, decompress)
data Config = Config {
root :: String
, chunkSize :: Int
, compression :: Bool
} deriving Show
conf = Config {
root="/tmp/robot"
, chunkSize=2
, compression = True
}
chunkUrl :: Int -> ByteString -> [[Char]]
chunkUrl a url = chunksOf a $ show $ md5 url
-- replace it with createDirectoryIfMissing
existOrCreate :: [[Char]] -> FilePath -> IO [[Char]]
existOrCreate path_ dir = do
let path = joinPath path_
let returnPath = return $ path_ ++ [dir]
is <- elem dir <$> getDirectoryContents path -- ?
case is of
True -> do
returnPath
False -> do
createDirectory $ path ++ "/" ++ dir
returnPath
doPath :: [[Char]] -> [FilePath] -> IO [[Char]]
doPath root path = foldM (\x y -> existOrCreate x y) root path
splitAt' :: Int -> Int -> [Char] -> ([Char], [Char], [Char])
splitAt' i1 i2 x = (a, b, c) where
(a, a') = splitAt i1 x
(b, c) = splitAt i2 a'
-- ne pas écraser le fichier s'il existe
-- spliter l'url proprement
saveFile :: ByteString -> String -> IO String
saveFile url'' file = do
let url' = chunkUrl (chunkSize conf) url''
let url = init url'
-- add extension according to the filetype
let filename = Prelude.foldl addExtension (last url') ["html", "zlib"]
doPath [(root conf)] url
let path = (root conf) ++ "/" ++ joinPath url ++ "/" ++ filename
--case (findFile ["/tmp/sdfs"] "file.hmtl.zib"
-- Nothing -> create
-- _ -> change name
case (compression conf) of
True -> BL.writeFile path (compress $ packChars file)
False -> writeFile path file
return path
getFile :: FilePath -> IO ByteString
getFile path = do
case (compression conf) of
True -> decompress <$> BL.readFile path
False -> packChars <$> Prelude.readFile path
-- resources
-- add Resource
-- levensthein distance...
module Data.Gargantext.Ngrams.Hetero where module Gargantext.Ngrams.Hetero where
import GHC.Real as R import GHC.Real as R
import Data.Set as S import Data.Set as S
...@@ -8,14 +8,14 @@ import Database.PostgreSQL.Simple as PGS ...@@ -8,14 +8,14 @@ import Database.PostgreSQL.Simple as PGS
import Opaleye.PGTypes (PGInt4) import Opaleye.PGTypes (PGInt4)
import Opaleye.Internal.Column (Column) import Opaleye.Internal.Column (Column)
import Data.Gargantext.Database.Gargandb import Gargantext.Database.Gargandb
import Data.Gargantext.Database.Private import Gargantext.Database.Private
--import Data.Gargantext.Utils.Chronos --import Gargantext.Utils.Chronos
import Data.Gargantext.Ngrams.Words (cleanText) import Gargantext.Ngrams.Words (cleanText)
import Data.Gargantext.Ngrams.Count (occurrences) import Gargantext.Ngrams.Count (occurrences)
import Data.Gargantext.Database.Simple import Gargantext.Database.Simple
--main = do --main = do
-- t <- getTextquery -- t <- getTextquery
......
module Data.Gargantext.Ngrams.Hetero where
import GHC.Real as R
import Data.Set as S
import Data.Map as M
import Data.List.Split as S
import Database.PostgreSQL.Simple as PGS
import Opaleye.PGTypes (PGInt4)
import Opaleye.Internal.Column (Column)
import Data.Gargantext.Database.Gargandb
import Data.Gargantext.Database.Private
--import Data.Gargantext.Utils.Chronos
import Data.Gargantext.Ngrams.Words (cleanText)
import Data.Gargantext.Ngrams.Count (occurrences)
import Data.Gargantext.Database.Simple
--main = do
-- t <- getTextquery
-- print (Prelude.map (heterogeinity . concat) $ S.chunksOf 3 t)
-- heterogeinity sur concat texts
heterogeinity' :: Int -> Int -> Int -> IO [Integer]
heterogeinity' corpus_id limit x = do
t <- getAbstract corpus_id limit
Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf x) . cleanText $ concat t
heterogeinity'' :: Int -> Int -> Int -> IO [Integer]
heterogeinity'' corpus_id limit size = do
t <- getAbstract corpus_id limit
Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf size) . cleanText $ concat t
dicoStruct :: (Integral r, Monad m) => M.Map t r -> m r
dicoStruct dict_occ = do
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
-- heterogeinity sur UCT (Unité de Context Textuel)
heterogeinity :: [Char] -> IO Integer
heterogeinity string = do
let dict_occ = occurrences $ cleanText string
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
--computeHeterogeinity
-- :: Fractional t =>
-- Opaleye.Internal.Column.Column Opaleye.PGTypes.PGInt4
-- -> IO (t, Integer, Integer)
computeHeterogeinity corpus_id = do
c <- PGS.connect infoGargandb
t <- getText c (nodeHyperdataText corpus_id)
heterogeinity $ Prelude.concat t
main2 = do
let corpus_ids = [
("ALL", 272927) -- 73
,("Histoire", 1387736) -- 28
,("Sciences Po", 1296892) -- 37
,("Phylosophie", 1170004) -- 20
,("Psychologie", 1345852) -- 37
,("Sociologie", 1246452) -- 42
]
r <- Prelude.map computeHeterogeinity $ Prelude.map (\(t,id) -> id) corpus_ids
return r
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment