Commit de549c2f authored by Alexandre Delanoë's avatar Alexandre Delanoë

[MERGE] bashql + pipeline.

parents 5c858e52 37c3a450
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-|
Module : Data.ByteString.Extended
Description : Short description
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Example showing how to extend existing base libraries.
-}
module Data.ByteString.Extended ( module Data.ByteString
, replace
) where
import Data.ByteString
replace :: ByteString -> ByteString -> ByteString -> ByteString
replace = undefined
-- instance (Binary k, Binary v) => Binary (HaskMap k v) where
-- ...
...@@ -42,18 +42,16 @@ main = do ...@@ -42,18 +42,16 @@ main = do
let q = ["gratuit", "gratuité", "culture", "culturel"] let q = ["gratuit", "gratuité", "culture", "culturel"]
(h,csvDocs) <- readCsv rPath (h,csvDocs) <- readCsv rPath
putStrLn $ "Number of documents before:" <> show (V.length csvDocs) putStrLn $ "Number of documents before:" <> show (V.length csvDocs)
putStrLn $ "Mean size of docs:" <> show ( docsSize csvDocs) putStrLn $ "Mean size of docs:" <> show ( docsSize csvDocs)
let docs = toDocs csvDocs let docs = toDocs csvDocs
let engine = insertDocs docs initialDocSearchEngine let engine = insertDocs docs initialDocSearchEngine
let docIds = S.query engine (map pack q) let docIds = S.query engine (map pack q)
let docs' = fromDocs $ filterDocs docIds (V.fromList docs) let docs' = fromDocs $ filterDocs docIds (V.fromList docs)
putStrLn $ "Number of documents after:" <> show (V.length docs') putStrLn $ "Number of documents after:" <> show (V.length docs')
putStrLn $ "Mean size of docs:" <> show (docsSize docs') putStrLn $ "Mean size of docs:" <> show (docsSize docs')
writeCsv wPath (h, docs')
writeCsv wPath (h, docs')
...@@ -24,6 +24,7 @@ library: ...@@ -24,6 +24,7 @@ library:
# - -Werror # - -Werror
exposed-modules: exposed-modules:
- Gargantext - Gargantext
- Gargantext.TextFlow
- Gargantext.Prelude - Gargantext.Prelude
- Gargantext.Core - Gargantext.Core
- Gargantext.Core.Types - Gargantext.Core.Types
...@@ -68,6 +69,7 @@ library: ...@@ -68,6 +69,7 @@ library:
- hlcm - hlcm
- ini - ini
- jose-jwt - jose-jwt
- kmeans-vector
- lens - lens
- logging-effect - logging-effect
- matrix - matrix
...@@ -121,25 +123,38 @@ library: ...@@ -121,25 +123,38 @@ library:
- zlib - zlib
# - utc # - utc
executable: executables:
main: Main.hs gargantext:
source-dirs: app main: Main.hs
ghc-options: source-dirs: app
- -threaded ghc-options:
- -rtsopts - -threaded
- -with-rtsopts=-N - -rtsopts
- -O2 - -with-rtsopts=-N
- -Wmissing-signatures - -O2
dependencies: - -Wmissing-signatures
dependencies:
- base
- containers
- gargantext
- vector
- cassava
- ini
- optparse-generic
- unordered-containers
- full-text-search
gargantext-workflow:
main: Main.hs
source-dirs: app-workflow
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
dependencies:
- base - base
- containers
- gargantext - gargantext
- vector
- cassava
- ini
- optparse-generic
- unordered-containers
- full-text-search
tests: tests:
garg-test: garg-test:
......
...@@ -265,6 +265,9 @@ data NodePoly id typename userId parentId name date hyperdata = Node { node_id ...@@ -265,6 +265,9 @@ data NodePoly id typename userId parentId name date hyperdata = Node { node_id
} deriving (Show, Generic) } deriving (Show, Generic)
$(deriveJSON (unPrefix "node_") ''NodePoly) $(deriveJSON (unPrefix "node_") ''NodePoly)
instance Arbitrary (NodePoly NodeId NodeTypeId (Maybe NodeUserId) NodeParentId NodeName UTCTime Value) where instance Arbitrary (NodePoly NodeId NodeTypeId (Maybe NodeUserId) NodeParentId NodeName UTCTime Value) where
arbitrary = elements [Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) (toJSON ("{}"::Text))] arbitrary = elements [Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) (toJSON ("{}"::Text))]
......
{-| {-|
Module : Gargantext.Database Module : Gargantext.Database
Description : Description : Main commands of BASHQL a Domain Specific Language to deal with Gargantext Database.
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Here is a longer description of this module, containing some * BASHQL = functional (Bash * SQL)
commentary with @some markup@.
* Which language to chose when working with a database ? To make it
simple, instead of all common Object Relational Mapping (ORM) [1]
strategy used nowadays inspired more by object logic than functional
logic, the semantics of BASHQL with focus on the function first.
* BASHQL focus on the function, i.e. use bash language function name,
and make it with SQL behind the scene. Then BASHQL is inspired more
by Bash language [2] than SQL and then follows its main commands as
specification and documentation.
* Main arguments:
1. Theoritical: database and FileSystems are each thought as a single
category, assumption based on theoretical work on databases by David Spivak [0].
2. Practical argument: basic bash commands are a daily practice among
developper community.
* How to help ?
1. Choose a command you like in Bash
2. Implement it in Haskell-SQL according to Gargantext Shema (Tree like
filesystem)
3. Translate it in BASHQL (follow previous implementations)
4. Make a pull request (enjoy the community)
* Implementation strategy: Functional adapations are made to the
gargantext languages options and SQL optimization are done continuously
during the project. For the Haskellish part, you may be inspired by
Turtle implementation written by Gabriel Gonzales [3] which shows how to
write Haskell bash translations.
* Semantics
- FileSystem is now a NodeSystem where each File is a Node in a Directed Graph (DG).
* References
[0] MIT Press has published "Category theory for the sciences". The book
can also be purchased on Amazon. Here are reviews by the MAA, by the
AMS, and by SIAM.
[1] https://en.wikipedia.org/wiki/Object-relational_mapping
[2] https://en.wikipedia.org/wiki/Bash_(Unix_shell)
[3] https://github.com/Gabriel439/Haskell-Turtle-Library
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Database ( module Gargantext.Database ( module Gargantext.Database.Utils
module Gargantext.Database.Utils , get
-- , module Gargantext.Database.Instances , ls , ls'
, module Gargantext.Database.User , home, home'
, module Gargantext.Database.Node , post, post', postR'
, module Gargantext.Database.NodeNode , del , del'
-- , module Gargantext.Database.Ngram , tree, tree'
, module Gargantext.Database.NodeNgram )
, module Gargantext.Database.NodeNodeNgram where
, module Gargantext.Database.NodeNgramNgram
-- , module Gargantext.Database.Gargandb import Gargantext.Core.Types
-- , module Gargantext.Database.Simple import Gargantext.Core.Types.Node
-- , module Gargantext.Database.InsertNode import Gargantext.Database.Utils (connectGargandb)
-- , module Gargantext.Database.NodeType
) where
import Gargantext.Database.Utils
--import Gargantext.Database.Gargandb
import Gargantext.Database.User
import Gargantext.Database.Node import Gargantext.Database.Node
import Gargantext.Database.NodeNode import Gargantext.Prelude
--import Gargantext.Database.Ngram import Database.PostgreSQL.Simple (Connection)
import Gargantext.Database.NodeNgram import Data.Text (Text)
import Gargantext.Database.NodeNodeNgram import Opaleye hiding (FromField)
import Gargantext.Database.NodeNgramNgram import Data.Aeson
--import Gargantext.Database.Simple import Data.ByteString (ByteString)
--import Gargantext.Database.NodeType import Data.List (last, concat)
--import Gargantext.Database.InsertNode type UserId = Int
--type NodeId = Int
-- List of NodeId
-- type PWD a = PWD UserId [a]
type PWD = [NodeId]
--data PWD' a = a | PWD' [a]
-- | TODO get Children or Node
get :: Connection -> PWD -> IO [Node Value]
get _ [] = pure []
get conn pwd = runQuery conn $ selectNodesWithParentID (last pwd)
-- | Home, need to filter with UserId
home :: Connection -> IO PWD
home c = map node_id <$> getNodesWithParentId c 0 Nothing
-- | ls == get Children
ls :: Connection -> PWD -> IO [Node Value]
ls = get
tree :: Connection -> PWD -> IO [Node Value]
tree c p = do
ns <- get c p
cs <- mapM (\p' -> get c [p']) $ map node_id ns
pure $ ns <> (concat cs)
-- | TODO
post :: Connection -> PWD -> [NodeWrite'] -> IO Int64
post _ [] _ = pure 0
post _ _ [] = pure 0
post c pth ns = mkNode c (last pth) ns
postR :: Connection -> PWD -> [NodeWrite'] -> IO [Int]
postR _ [] _ = pure [0]
postR _ _ [] = pure [0]
postR c pth ns = mkNodeR c (last pth) ns
rm :: Connection -> PWD -> [NodeId] -> IO Int
rm = del
del :: Connection -> PWD -> [NodeId] -> IO Int
del _ [] _ = pure 0
del _ _ [] = pure 0
del c pth ns = deleteNodes c ns
put :: Connection -> PWD -> [a] -> IO Int64
put = undefined
-- | TODO
-- cd (Home UserId) | (Node NodeId)
-- cd Path
-- jump NodeId
-- touch Dir
--------------------------------------------------------------
-- Tests
--------------------------------------------------------------
home' :: IO PWD
home' = do
c <- connectGargandb "gargantext.ini"
home c
ls' :: IO [Node Value]
ls' = do
c <- connectGargandb "gargantext.ini"
h <- home c
ls c h
tree' :: IO [Node Value]
tree' = do
c <- connectGargandb "gargantext.ini"
h <- home c
tree c h
post' :: IO [Int]
post' = do
c <- connectGargandb "gargantext.ini"
pid <- last <$> home c
let uid = 1
postNode c uid pid (Node' Corpus "Premier corpus" "{}" [ Node' Document "Doc1" "{}" []
, Node' Document "Doc2" "{}" []
, Node' Document "Doc3" "{}" []
]
)
del' :: [NodeId] -> IO Int
del' ns = do
c <- connectGargandb "gargantext.ini"
h <- home c
del c h ns
...@@ -21,6 +21,10 @@ Portability : POSIX ...@@ -21,6 +21,10 @@ Portability : POSIX
module Gargantext.Database.Node where module Gargantext.Database.Node where
import GHC.Int (Int64)
import Data.Maybe
import Data.Time (UTCTime)
import Database.PostgreSQL.Simple.FromField ( Conversion import Database.PostgreSQL.Simple.FromField ( Conversion
, ResultError(ConversionFailed) , ResultError(ConversionFailed)
, FromField , FromField
...@@ -28,6 +32,7 @@ import Database.PostgreSQL.Simple.FromField ( Conversion ...@@ -28,6 +32,7 @@ import Database.PostgreSQL.Simple.FromField ( Conversion
, returnError , returnError
) )
import Prelude hiding (null, id, map, sum) import Prelude hiding (null, id, map, sum)
import Data.Time.Segment (jour, timesAfter, Granularity(D))
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core.Types.Node (NodeType) import Gargantext.Core.Types.Node (NodeType)
...@@ -43,10 +48,15 @@ import Data.Maybe (Maybe, fromMaybe) ...@@ -43,10 +48,15 @@ import Data.Maybe (Maybe, fromMaybe)
import Data.Text (Text) import Data.Text (Text)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance) import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Typeable (Typeable) import Data.Typeable (Typeable)
import qualified Data.ByteString.Internal as DBI
import qualified Data.ByteString as DB
import qualified Data.ByteString.Lazy as DBL
import Data.ByteString (ByteString)
import Database.PostgreSQL.Simple (Connection) import Database.PostgreSQL.Simple (Connection)
import Opaleye hiding (FromField) import Opaleye hiding (FromField)
import Opaleye.Internal.QueryArr (Query(..))
import qualified Data.Profunctor.Product as PP
-- | Types for Node Database Management -- | Types for Node Database Management
data PGTSVector data PGTSVector
...@@ -78,7 +88,7 @@ instance QueryRunnerColumnDefault PGJsonb HyperdataUser where ...@@ -78,7 +88,7 @@ instance QueryRunnerColumnDefault PGJsonb HyperdataUser where
fromField' :: (Typeable b, FromJSON b) => Field -> Maybe DBI.ByteString -> Conversion b fromField' :: (Typeable b, FromJSON b) => Field -> Maybe DB.ByteString -> Conversion b
fromField' field mb = do fromField' field mb = do
v <- fromField field mb v <- fromField field mb
valueToHyperdata v valueToHyperdata v
...@@ -89,7 +99,7 @@ fromField' field mb = do ...@@ -89,7 +99,7 @@ fromField' field mb = do
$(makeAdaptorAndInstance "pNode" ''NodePoly) $(makeAdaptorAndInstance "pNode" ''NodePoly)
$(makeLensesWith abbreviatedFields ''NodePoly) $(makeLensesWith abbreviatedFields ''NodePoly)
nodeTable :: Table NodeWrite NodeRead nodeTable :: Table NodeWrite NodeRead
...@@ -105,12 +115,40 @@ nodeTable = Table "nodes" (pNode Node { node_id = optional "id" ...@@ -105,12 +115,40 @@ nodeTable = Table "nodes" (pNode Node { node_id = optional "id"
) )
nodeTable' :: Table (Maybe (Column PGInt4)
, Column PGInt4
, Column PGInt4
, Column PGInt4
, Column PGText
,Maybe (Column PGTimestamptz)
, Column PGJsonb
)
((Column PGInt4)
, Column PGInt4
, Column PGInt4
, Column PGInt4
, Column PGText
,(Column PGTimestamptz)
, Column PGJsonb
)
nodeTable' = Table "nodes" (PP.p7 ( optional "id"
, required "typename"
, required "user_id"
, required "parent_id"
, required "name"
, optional "date"
, required "hyperdata"
)
)
queryNodeTable :: Query NodeRead queryNodeTable :: Query NodeRead
queryNodeTable = queryTable nodeTable queryNodeTable = queryTable nodeTable
selectNodes :: Column PGInt4 -> Query NodeRead selectNode :: Column PGInt4 -> Query NodeRead
selectNodes id = proc () -> do selectNode id = proc () -> do
row <- queryNodeTable -< () row <- queryNodeTable -< ()
restrict -< node_id row .== id restrict -< node_id row .== id
returnA -< row returnA -< row
...@@ -142,13 +180,11 @@ selectNodesWith' parentId maybeNodeType = proc () -> do ...@@ -142,13 +180,11 @@ selectNodesWith' parentId maybeNodeType = proc () -> do
deleteNode :: Connection -> Int -> IO Int deleteNode :: Connection -> Int -> IO Int
deleteNode conn n = fromIntegral deleteNode conn n = fromIntegral <$> runDelete conn nodeTable
<$> runDelete conn nodeTable
(\(Node n_id _ _ _ _ _ _) -> n_id .== pgInt4 n) (\(Node n_id _ _ _ _ _ _) -> n_id .== pgInt4 n)
deleteNodes :: Connection -> [Int] -> IO Int deleteNodes :: Connection -> [Int] -> IO Int
deleteNodes conn ns = fromIntegral deleteNodes conn ns = fromIntegral <$> runDelete conn nodeTable
<$> runDelete conn nodeTable
(\(Node n_id _ _ _ _ _ _) -> in_ ((map pgInt4 ns)) n_id) (\(Node n_id _ _ _ _ _ _) -> in_ ((map pgInt4 ns)) n_id)
...@@ -164,6 +200,11 @@ getNodesWithParentId :: Connection -> Int ...@@ -164,6 +200,11 @@ getNodesWithParentId :: Connection -> Int
-> Maybe Text -> IO [Node HyperdataDocument] -> Maybe Text -> IO [Node HyperdataDocument]
getNodesWithParentId conn n _ = runQuery conn $ selectNodesWithParentID n getNodesWithParentId conn n _ = runQuery conn $ selectNodesWithParentID n
getNodesWithParentId' :: Connection -> Int
-> Maybe Text -> IO [Node Value]
getNodesWithParentId' conn n _ = runQuery conn $ selectNodesWithParentID n
selectNodesWithParentID :: Int -> Query NodeRead selectNodesWithParentID :: Int -> Query NodeRead
selectNodesWithParentID n = proc () -> do selectNodesWithParentID n = proc () -> do
row@(Node _ _ _ parent_id _ _ _) <- queryNodeTable -< () row@(Node _ _ _ parent_id _ _ _) <- queryNodeTable -< ()
...@@ -181,12 +222,103 @@ selectNodesWithType type_id = proc () -> do ...@@ -181,12 +222,103 @@ selectNodesWithType type_id = proc () -> do
restrict -< tn .== type_id restrict -< tn .== type_id
returnA -< row returnA -< row
getNode' :: Connection -> Int -> IO (Node Value)
getNode' c id = do
fromMaybe (error "TODO: 404") . headMay <$> runQuery c (limit 1 $ selectNode (pgInt4 id))
getNode :: Connection -> Int -> IO (Node HyperdataDocument) getNode :: Connection -> Int -> IO (Node HyperdataDocument)
getNode conn id = do getNode conn id = do
fromMaybe (error "TODO: 404") . headMay <$> runQuery conn (limit 1 $ selectNodes (pgInt4 id)) fromMaybe (error "TODO: 404") . headMay <$> runQuery conn (limit 1 $ selectNode (pgInt4 id))
getNodesWithType :: Connection -> Column PGInt4 -> IO [Node HyperdataDocument] getNodesWithType :: Connection -> Column PGInt4 -> IO [Node HyperdataDocument]
getNodesWithType conn type_id = do getNodesWithType conn type_id = do
runQuery conn $ selectNodesWithType type_id runQuery conn $ selectNodesWithType type_id
type UserId = NodeId
type TypeId = Int
------------------------------------------------------------------------
-- Quick and dirty
------------------------------------------------------------------------
type NodeWrite' = NodePoly (Maybe Int) Int Int (ParentId) Text (Maybe UTCTime) ByteString
--node :: UserId -> ParentId -> NodeType -> Text -> Value -> NodeWrite'
node :: UserId -> ParentId -> NodeType -> Text -> Value -> NodeWrite'
node userId parentId nodeType name nodeData = Node Nothing typeId userId parentId name Nothing byteData
where
typeId = nodeTypeId nodeType
byteData = DB.pack $ DBL.unpack $ encode nodeData
node2write pid (Node id tn ud _ nm dt hp) = ((pgInt4 <$> id)
,(pgInt4 tn)
,(pgInt4 ud)
,(pgInt4 pid)
,(pgStrictText nm)
,(pgUTCTime <$> dt)
,(pgStrictJSONB hp)
)
mkNode :: Connection -> ParentId -> [NodeWrite'] -> IO Int64
mkNode conn pid ns = runInsertMany conn nodeTable' $ map (node2write pid) ns
mkNodeR :: Connection -> ParentId -> [NodeWrite'] -> IO [Int]
mkNodeR conn pid ns = runInsertManyReturning conn nodeTable' (map (node2write pid) ns) (\(i,_,_,_,_,_,_) -> i)
------------------------------------------------------------------------
-- TODO Hierachy of Nodes
-- post and get same types Node' and update if changes
{- TODO semantic to achieve
post c uid pid [ Node' Corpus "name" "{}" []
, Node' Folder "name" "{}" [Node' Corpus "test 2" "" [ Node' Document "title" "metaData" []
, Node' Document "title" "jsonData" []
]
]
]
-}
------------------------------------------------------------------------
-- TODO
-- currently this function remove the child relation
-- needs a Temporary type between Node' and NodeWriteT
node2table :: UserId -> ParentId -> Node' -> [NodeWriteT]
node2table uid pid (Node' nt txt v []) = [( Nothing, (pgInt4$ nodeTypeId nt), (pgInt4 uid), (pgInt4 pid)
, pgStrictText txt, Nothing, pgStrictJSONB $ DB.pack $ DBL.unpack $ encode v)]
node2table _ _ (Node' _ _ _ _) = panic "node2table: should not happen, Tree insert not implemented yet"
data Node' = Node' { _n_type :: NodeType
, _n_name :: Text
, _n_data :: Value
, _n_children :: [Node']
} deriving (Show)
type NodeWriteT = ( Maybe (Column PGInt4)
, Column PGInt4, Column PGInt4
, Column PGInt4, Column PGText
, Maybe (Column PGTimestamptz)
, Column PGJsonb
)
mkNode' :: Connection -> [NodeWriteT] -> IO Int64
mkNode' conn ns = runInsertMany conn nodeTable' ns
mkNodeR' :: Connection -> [NodeWriteT] -> IO [Int]
mkNodeR' conn ns = runInsertManyReturning conn nodeTable' ns (\(i,_,_,_,_,_,_) -> i)
postNode :: Connection -> UserId -> ParentId -> Node' -> IO [Int]
postNode c uid pid (Node' nt txt v []) = mkNodeR' c (node2table uid pid (Node' nt txt v []))
postNode c uid pid (Node' Corpus txt v ns) = do
[pid'] <- postNode c uid pid (Node' Corpus txt v [])
pids <- mkNodeR' c $ concat $ (map (\(Node' Document txt v _) -> node2table uid pid' $ Node' Document txt v []) ns)
pure (pids)
postNode c uid pid (Node' _ _ _ _) = panic "postNode for this type not implemented yet"
{-|
Module : Gargantext.Pipeline
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Pipeline
where
import Data.Text.IO (readFile)
import Control.Arrow ((***))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import qualified Data.List as L
import Data.Tuple.Extra (both)
----------------------------------------------
import Gargantext.Core (Lang(FR))
import Gargantext.Prelude
import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Text.Metrics.Count (cooc, removeApax)
import Gargantext.Text.Metrics
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
{-
____ _ _
/ ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
| | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
\____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
|___/
-}
pipeline path = do
-- Text <- IO Text <- FilePath
text <- readFile path
let contexts = splitBy (Sentences 5) text
myterms <- extractTerms Multi FR contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
let myCooc = removeApax $ cooc myterms
--let (ti, fi) = createIndices myCooc
pure True
--pure $ incExcSpeGen myCooc
-- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming)
-- let theScores = M.filter (>0) $ score conditional myCoocFiltered
----
------ -- Matrix -> Clustering
------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
-- partitions <- cLouvain theScores
-- pure partitions
---- | Building : -> Graph -> JSON
...@@ -19,20 +19,23 @@ commentary with @some markup@. ...@@ -19,20 +19,23 @@ commentary with @some markup@.
module Gargantext.Prelude module Gargantext.Prelude
( module Gargantext.Prelude ( module Gargantext.Prelude
, module Protolude , module Protolude
, headMay , headMay, lastMay
, module Text.Show , module Text.Show
, module Text.Read , module Text.Read
, cs , cs
, module Data.Maybe , module Data.Maybe
, sortWith
) )
where where
import GHC.Exts (sortWith)
import Data.Maybe (isJust, fromJust, maybe) import Data.Maybe (isJust, fromJust, maybe)
import Protolude ( Bool(True, False), Int, Double, Integer import Protolude ( Bool(True, False), Int, Int64, Double, Integer
, Fractional, Num, Maybe(Just,Nothing) , Fractional, Num, Maybe(Just,Nothing)
, Enum, Bounded, Float , Enum, Bounded, Float
, Floating, Char, IO , Floating, Char, IO
, pure, (<*>), (<$>), panic , pure, (>>=), (=<<), (<*>), (<$>), panic
, putStrLn , putStrLn
, head, flip , head, flip
, Ord, Integral, Foldable, RealFrac, Monad, filter , Ord, Integral, Foldable, RealFrac, Monad, filter
...@@ -50,6 +53,7 @@ import Protolude ( Bool(True, False), Int, Double, Integer ...@@ -50,6 +53,7 @@ import Protolude ( Bool(True, False), Int, Double, Integer
, undefined , undefined
, IO() , IO()
, compare , compare
, on
) )
-- TODO import functions optimized in Utils.Count -- TODO import functions optimized in Utils.Count
...@@ -63,7 +67,7 @@ import qualified Data.Map as M ...@@ -63,7 +67,7 @@ import qualified Data.Map as M
import Data.Map.Strict (insertWith) import Data.Map.Strict (insertWith)
import qualified Data.Vector as V import qualified Data.Vector as V
import Safe (headMay) import Safe (headMay, lastMay)
import Text.Show (Show(), show) import Text.Show (Show(), show)
import Text.Read (Read()) import Text.Read (Read())
import Data.String.Conversions (cs) import Data.String.Conversions (cs)
...@@ -109,7 +113,7 @@ ma = movingAverage 3 ...@@ -109,7 +113,7 @@ ma = movingAverage 3
-- | splitEvery n == chunkAlong n n -- | splitEvery n == chunkAlong n n
splitEvery :: Int -> [a] -> [[a]] splitEvery :: Int -> [a] -> [[a]]
splitEvery _ [] = L.cycle [[]] splitEvery _ [] = []
splitEvery n xs = splitEvery n xs =
let (h,t) = L.splitAt n xs let (h,t) = L.splitAt n xs
in h : splitEvery n t in h : splitEvery n t
...@@ -235,5 +239,5 @@ unMaybe :: [Maybe a] -> [a] ...@@ -235,5 +239,5 @@ unMaybe :: [Maybe a] -> [a]
unMaybe = map fromJust . L.filter isJust unMaybe = map fromJust . L.filter isJust
-- maximumWith -- maximumWith
maximumWith f = L.maximumBy (\x y -> compare (f x) (f y)) maximumWith f = L.maximumBy (compare `on` f)
...@@ -37,9 +37,6 @@ type Context = Text -> [Text] ...@@ -37,9 +37,6 @@ type Context = Text -> [Text]
data Viz = Graph | Phylo | Chart data Viz = Graph | Phylo | Chart
pipeline :: Config -> Text -> Viz
pipeline = undefined
----------------------------------------------------------------- -----------------------------------------------------------------
------------------------------------------------------------------- -------------------------------------------------------------------
......
...@@ -16,6 +16,7 @@ noApax m = M.filter (>1) m ...@@ -16,6 +16,7 @@ noApax m = M.filter (>1) m
-} -}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
...@@ -23,16 +24,19 @@ module Gargantext.Text.Metrics ...@@ -23,16 +24,19 @@ module Gargantext.Text.Metrics
where where
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.Ord (comparing, Down(..))
import Data.Map (Map) import Data.Map (Map)
import qualified Data.List as L import qualified Data.List as L
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import Data.Tuple.Extra (both) import Data.Tuple.Extra (both)
--import GHC.Real (Ratio) --import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM --import qualified Data.Text.Metrics as DTM
import Data.Array.Accelerate (toList) import Data.Array.Accelerate (toList)
import Math.KMeans (kmeans, euclidSq, elements)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -46,26 +50,99 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) ...@@ -46,26 +50,99 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Viz.Graph.Distances.Matrice import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index import Gargantext.Viz.Graph.Index
import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.Array.Accelerate as DAA
-- import Data.Array.Accelerate ((:.)(..), Z(..))
import GHC.Real (round)
import Debug.Trace
import Prelude (seq)
-- ord relevance: top n plus inclus data MapListSize = MapListSize Int
-- échantillonnage de généricity data InclusionSize = InclusionSize Int
-- data SampleBins = SampleBins Double
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int data Clusters = Clusters Int
--filterCooc m = data DefaultValue = DefaultValue Int
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
data FilterConfig = FilterConfig { fc_mapListSize :: MapListSize
, fc_inclusionSize :: InclusionSize
, fc_sampleBins :: SampleBins
, fc_clusters :: Clusters
, fc_defaultValue :: DefaultValue
}
filterCooc :: Ord t => FilterConfig -> Map (t, t) Int -> Map (t, t) Int
filterCooc fc cc = (filterCooc' fc) ts cc
where
ts = map _scored_terms $ takeSome fc $ coocScored cc
import Data.Array.Accelerate (Matrix)
filterMat :: Matrix Int -> [(Index, Index)] filterCooc' :: Ord t => FilterConfig -> [t] -> Map (t, t) Int -> Map (t, t) Int
filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen') filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = -- trace ("coocScored " <> show (length ts)) $
foldl' (\m' k -> M.insert k (maybe dv identity $ M.lookup k m) m')
M.empty selection
where
selection = [(x,y) | x <- ts
, y <- ts
-- , x >= y
]
-- | Map list creation
-- Kmeans split into (Clusters::Int) main clusters with Inclusion/Exclusion (relevance score)
-- Sample the main cluster ordered by specificity/genericity in (SampleBins::Double) parts
-- each parts is then ordered by Inclusion/Exclusion
-- take n scored terms in each parts where n * SampleBins = MapListSize.
takeSome :: Ord t => FilterConfig -> [Scored t] -> [Scored t]
takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters k) _) scores = L.take l
$ takeSample n m
$ L.take l' $ sortWith (Down . _scored_incExc) scores
-- $ splitKmeans k scores
where where
(incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m) -- TODO: benchmark with accelerate-example kmeans version
n = nIe + nSg splitKmeans x xs = L.concat $ map elements
nIe = 30 $ V.take (k-1)
nSg = 70 $ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
euclidSq x xs
n = round ((fromIntegral l)/s)
m = round $ (fromIntegral $ length scores) / (s)
takeSample n m xs = -- trace ("splitKmeans " <> show (length xs)) $
L.concat $ map (L.take n)
$ map (sortWith (Down . _scored_incExc))
-- TODO use kmeans s instead of splitEvery
-- in order to split in s heteregenous parts
-- without homogeneous order hypothesis
$ splitEvery m
$ sortWith (Down . _scored_speGen) xs
data Scored t = Scored { _scored_terms :: !t
, _scored_incExc :: !InclusionExclusion
, _scored_speGen :: !SpecificityGenericity
} deriving (Show)
coocScored :: Ord t => Map (t,t) Int -> [Scored t]
coocScored m = zipWith (\(i,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scores
where
(ti,fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat ti m
scores = DAA.toList $ DAA.run $ DAA.zip (DAA.use is) (DAA.use ss)
...@@ -73,8 +150,7 @@ incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)]) ...@@ -73,8 +150,7 @@ incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m) incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
where where
(ti,fi) = createIndices m (ti,fi) = createIndices m
ordonne x = L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x) ordonne x = sortWith (Down . snd) $ zip (map snd $ M.toList fi) (toList x)
...@@ -106,7 +182,7 @@ metrics_sentences_Test = metrics_sentences == metrics_sentences' ...@@ -106,7 +182,7 @@ metrics_sentences_Test = metrics_sentences == metrics_sentences'
-} -}
metrics_terms :: IO [[Terms]] metrics_terms :: IO [[Terms]]
metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text metrics_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) metrics_text
-- | Occurrences -- | Occurrences
{- {-
......
...@@ -75,14 +75,15 @@ type Grouped = Stems ...@@ -75,14 +75,15 @@ type Grouped = Stems
type Occs = Int type Occs = Int
type Coocs = Int type Coocs = Int
type Threshold = Int
removeApax :: Map (Label, Label) Int -> Map (Label, Label) Int removeApax :: Threshold -> Map (Label, Label) Int -> Map (Label, Label) Int
removeApax = DMS.filter (> 1) removeApax t = DMS.filter (> t)
cooc :: [[Terms]] -> Map (Label, Label) Int cooc :: [[Terms]] -> Map (Label, Label) Int
cooc tss = coocOnWithLabel _terms_stem (labelPolicy terms_occs) tss cooc tss = coocOnWithLabel _terms_stem (useLabelPolicy label_policy) tss
where where
terms_occs = occurrencesOn _terms_stem (List.concat tss) terms_occs = occurrencesOn _terms_stem (List.concat tss)
label_policy = mkLabelPolicy terms_occs
coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label) coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label)
...@@ -93,10 +94,21 @@ coocOnWithLabel on policy tss = ...@@ -93,10 +94,21 @@ coocOnWithLabel on policy tss =
delta f = f *** f delta f = f *** f
mkLabelPolicy :: Map Grouped (Map Terms Occs) -> Map Grouped Label
mkLabelPolicy = DMS.map f where
f = _terms_label . fst . maximumWith snd . DMS.toList
-- TODO use the Foldable instance of Map instead of building a list
useLabelPolicy :: Map Grouped Label -> Grouped -> Label
useLabelPolicy m g = case DMS.lookup g m of
Just label -> label
Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g)
{-
labelPolicy :: Map Grouped (Map Terms Occs) -> Grouped -> Label labelPolicy :: Map Grouped (Map Terms Occs) -> Grouped -> Label
labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList <$> lookup g m of labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList <$> lookup g m of
Just label -> label Just label -> label
Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g) Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g)
-}
coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs
coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
......
...@@ -25,7 +25,7 @@ import Control.Applicative ...@@ -25,7 +25,7 @@ import Control.Applicative
import Data.Char (ord) import Data.Char (ord)
import Data.Csv import Data.Csv
import Data.Either (Either(Left, Right)) import Data.Either (Either(Left, Right))
import Data.Text (Text, pack, length) import Data.Text (Text, pack, length, intercalate)
import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy as BL
import Data.Vector (Vector) import Data.Vector (Vector)
...@@ -68,9 +68,8 @@ fromDocs docs = V.map fromDocs' docs ...@@ -68,9 +68,8 @@ fromDocs docs = V.map fromDocs' docs
-- | Split a document in its context -- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average -- TODO adapt the size of the paragraph according to the corpus average
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc m splt doc = let docSize = (length $ c_abstract doc) in splitDoc m splt doc = let docSize = (length $ csv_abstract doc) in
if docSize > 1000 if docSize > 1000
then then
if (mod (round m) docSize) >= 10 if (mod (round m) docSize) >= 10
...@@ -101,18 +100,18 @@ type Mean = Double ...@@ -101,18 +100,18 @@ type Mean = Double
docsSize :: Vector CsvDoc -> Mean docsSize :: Vector CsvDoc -> Mean
docsSize csvDoc = mean ls docsSize csvDoc = mean ls
where where
ls = V.toList $ V.map (fromIntegral . length . c_abstract) csvDoc ls = V.toList $ V.map (fromIntegral . length . csv_abstract) csvDoc
--------------------------------------------------------------- ---------------------------------------------------------------
data CsvDoc = CsvDoc data CsvDoc = CsvDoc
{ c_title :: !Text { csv_title :: !Text
, c_source :: !Text , csv_source :: !Text
, c_publication_year :: !Int , csv_publication_year :: !Int
, c_publication_month :: !Int , csv_publication_month :: !Int
, c_publication_day :: !Int , csv_publication_day :: !Int
, c_abstract :: !Text , csv_abstract :: !Text
, c_authors :: !Text , csv_authors :: !Text
} }
deriving (Show) deriving (Show)
...@@ -147,12 +146,19 @@ csvEncodeOptions = ( defaultEncodeOptions ...@@ -147,12 +146,19 @@ csvEncodeOptions = ( defaultEncodeOptions
{encDelimiter = fromIntegral $ ord '\t'} {encDelimiter = fromIntegral $ ord '\t'}
) )
------------------------------------------------------------------------
------------------------------------------------------------------------
readCsvOn :: [CsvDoc -> Text] -> FilePath -> IO [Text]
readCsvOn fields fp = V.toList <$> V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields)
<$> snd
<$> readCsv fp
------------------------------------------------------------------------
readCsv :: FilePath -> IO (Header, Vector CsvDoc) readCsv :: FilePath -> IO (Header, Vector CsvDoc)
readCsv fp = do readCsv fp = do
csvData <- BL.readFile fp csvData <- BL.readFile fp
case decodeByNameWith csvDecodeOptions csvData of case decodeByNameWith csvDecodeOptions csvData of
Left e -> panic (pack e) Left e -> panic (pack e)
Right csvDocs -> pure csvDocs Right csvDocs -> pure csvDocs
......
...@@ -42,23 +42,23 @@ import Gargantext.Core.Types ...@@ -42,23 +42,23 @@ import Gargantext.Core.Types
import Gargantext.Text.Terms.Multi (multiterms) import Gargantext.Text.Terms.Multi (multiterms)
import Gargantext.Text.Terms.Mono (monoterms') import Gargantext.Text.Terms.Mono (monoterms')
data TermType = Mono | Multi | MonoMulti data TermType lang = Mono lang | Multi lang | MonoMulti lang
-- remove Stop Words -- remove Stop Words
-- map (filter (\t -> not . elem t)) $ -- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Sugar to extract terms from text (hiddeng mapM from end user). -- | Sugar to extract terms from text (hiddeng mapM from end user).
extractTerms :: Traversable t => TermType -> Lang -> t Text -> IO (t [Terms]) extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms termType lang = mapM (terms termType lang) extractTerms termTypeLang = mapM (terms termTypeLang)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Terms from Text -- | Terms from Text
-- Mono : mono terms -- Mono : mono terms
-- Multi : multi terms -- Multi : multi terms
-- MonoMulti : mono and multi -- MonoMulti : mono and multi
-- TODO : multi terms should exclude mono (intersection is not empty yet) -- TODO : multi terms should exclude mono (intersection is not empty yet)
terms :: TermType -> Lang -> Text -> IO [Terms] terms :: TermType Lang -> Text -> IO [Terms]
terms Mono lang txt = pure $ monoterms' lang txt terms (Mono lang) txt = pure $ monoterms' lang txt
terms Multi lang txt = multiterms lang txt terms (Multi lang) txt = multiterms lang txt
terms MonoMulti lang txt = terms Multi lang txt terms (MonoMulti lang) txt = terms (Multi lang) txt
------------------------------------------------------------------------ ------------------------------------------------------------------------
{-|
Module : Gargantext.TextFlow
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
From text to viz, all the flow of texts in Gargantext.
-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.TextFlow
where
import qualified Data.Text as T
import Data.Text.IO (readFile)
import Control.Arrow ((***))
import Data.Map.Strict (Map)
import qualified Data.Array.Accelerate as A
import qualified Data.Map.Strict as M
import qualified Data.List as L
import Data.Tuple.Extra (both)
----------------------------------------------
import Gargantext.Core (Lang(FR))
import Gargantext.Core.Types (Label)
import Gargantext.Prelude
import Prelude (print, seq)
import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, map2mat, mat2map)
import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional, distributional)
import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Viz.Graph (Graph(..), Node(..), Edge(..), Attributes(..), TypeNode(..))
import Gargantext.Text.Metrics.Count (cooc)
import Gargantext.Text.Metrics
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Text.Parsers.CSV
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain, LouvainNode(..))
{-
____ _ _
/ ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
| | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
\____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
|___/
-}
data TextFlow = CSV | FullText
-- workflow :: Lang (EN|FR) -> FilePath -> Graph
textflow termsLang workType path = do
-- Text <- IO Text <- FilePath
contexts <- case workType of
FullText -> splitBy (Sentences 5) <$> readFile path
CSV -> readCsvOn [csv_title, csv_abstract] path
-- Context :: Text -> [Text]
-- Contexts = Paragraphs n | Sentences n | Chars n
myterms <- extractTerms (Mono FR) contexts
-- TermsType = Mono | Multi | MonoMulti
-- myterms # filter (\t -> not . elem t stopList)
-- # groupBy (Stem|GroupList|Ontology)
printDebug "myterms" (sum $ map length myterms)
-- Bulding the map list
-- compute copresences of terms
-- Cooc = Map (Term, Term) Int
let myCooc1 = cooc myterms
printDebug "myCooc1" (M.size myCooc1)
-- Remove Apax: appears one time only => lighting the matrix
let myCooc2 = M.filter (>1) myCooc1
printDebug "myCooc2" (M.size myCooc2)
-- Filtering terms with inclusion/Exclusion and Specifity/Genericity scores
let myCooc3 = filterCooc ( FilterConfig (MapListSize 1000 )
(InclusionSize 4000 )
(SampleBins 10 )
(Clusters 3 )
(DefaultValue 0 )
) myCooc2
printDebug "myCooc3" $ M.size myCooc3
-- Cooc -> Matrix
let (ti, fi) = createIndices myCooc3
printDebug "ti" $ M.size ti
let myCooc4 = toIndex ti myCooc3
printDebug "myCooc4" $ M.size myCooc4
let matCooc = map2mat (0) (M.size ti) myCooc4
--printDebug "matCooc" matCooc
-- Matrix -> Clustering
let distanceMat = conditional matCooc
-- let distanceMat = distributional matCooc
printDebug "distanceMat" $ A.arrayShape distanceMat
--printDebug "distanceMat" distanceMat
--
let distanceMap = mat2map distanceMat
printDebug "distanceMap" $ M.size distanceMap
--{-
-- let distance = fromIndex fi distanceMap
-- printDebug "distance" $ M.size distance
---}
partitions <- cLouvain distanceMap
------ | Building : -> Graph -> JSON
printDebug "partitions" $ length partitions
--printDebug "partitions" partitions
pure $ data2graph (M.toList ti) myCooc4 distanceMap partitions
-----------------------------------------------------------
-- distance should not be a map since we just "toList" it (same as cLouvain)
data2graph :: [(Label, Int)] -> Map (Int, Int) Int
-> Map (Int, Int) Double
-> [LouvainNode]
-> Graph
data2graph labels coocs distance partitions = Graph nodes edges
where
community_id_by_node_id = M.fromList [ (n, c) | LouvainNode n c <- partitions ]
nodes = [ Node { n_size = maybe 0 identity (M.lookup (n,n) coocs)
, n_type = Terms -- or Unknown
, n_id = cs (show n)
, n_label = T.unwords l
, n_attributes =
Attributes { clust_default = maybe 0 identity
(M.lookup n community_id_by_node_id) } }
| (l, n) <- labels ]
edges = [ Edge { e_source = s
, e_target = t
, e_weight = w
, e_id = i }
| (i, ((s,t), w)) <- zip [0..] (M.toList distance) ]
-----------------------------------------------------------
printDebug msg x = putStrLn $ msg <> " " <> show x
--printDebug _ _ = pure ()
...@@ -19,11 +19,14 @@ module Gargantext.Viz.Graph ...@@ -19,11 +19,14 @@ module Gargantext.Viz.Graph
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Text (Text) import Data.Text (Text)
import Data.Map (Map)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
----------------------------------------------------------- import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode)
data TypeNode = Terms | Unknown data TypeNode = Terms | Unknown
deriving (Show, Generic) deriving (Show, Generic)
...@@ -55,8 +58,6 @@ data Graph = Graph { g_nodes :: [Node] ...@@ -55,8 +58,6 @@ data Graph = Graph { g_nodes :: [Node]
} }
deriving (Show, Generic) deriving (Show, Generic)
$(deriveJSON (unPrefix "g_") ''Graph) $(deriveJSON (unPrefix "g_") ''Graph)
----------------------------------------------------------- -----------------------------------------------------------
...@@ -109,7 +109,6 @@ conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m) ...@@ -109,7 +109,6 @@ conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity) conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m) conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
where where
ie :: Acc (Matrix Double) -> Acc (Matrix Double) ie :: Acc (Matrix Double) -> Acc (Matrix Double)
ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat) ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
sg :: Acc (Matrix Double) -> Acc (Matrix Double) sg :: Acc (Matrix Double) -> Acc (Matrix Double)
...@@ -149,8 +148,6 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m) ...@@ -149,8 +148,6 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
crossProduct m = zipWith (*) (cross m ) (cross (transpose m)) crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
cross mat = zipWith (-) (mkSum n mat) (mat) cross mat = zipWith (-) (mkSum n mat) (mat)
----------------------------------------------------------------------- -----------------------------------------------------------------------
----------------------------------------------------------------------- -----------------------------------------------------------------------
......
...@@ -24,6 +24,8 @@ extra-deps: ...@@ -24,6 +24,8 @@ extra-deps:
- fullstop-0.1.4 - fullstop-0.1.4
- haskell-src-exts-1.18.2 - haskell-src-exts-1.18.2
- http-types-0.12.1 - http-types-0.12.1
- kmeans-vector-0.3.2
- probable-0.1.3
- protolude-0.2 - protolude-0.2
- servant-0.13 - servant-0.13
- servant-auth-0.3.0.1 - servant-auth-0.3.0.1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment