Commit 99d4f1f3 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Adding External module for IMT community manager

[CLEAN] adding signatures, removing unused imports, fixing shadowing...
parent e54de3ad
......@@ -6,3 +6,4 @@ doc
bin
deps
profiling
_darcs
......@@ -18,10 +18,10 @@ library:
ghc-options:
- -Wincomplete-uni-patterns
- -Wincomplete-record-updates
#- -Wmissing-signatures
#- -Wunused-binds
#- -Wunused-imports
# - -Werror
- -Wmissing-signatures
- -Wunused-binds
- -Wunused-imports
- -Werror
exposed-modules:
- Gargantext
- Gargantext.TextFlow
......
......@@ -56,7 +56,7 @@ import Network.Wai.Handler.Warp hiding (defaultSettings)
import Servant
import Servant.Mock (mock)
import Servant.Job.Server (WithCallbacks)
--import Servant.Job.Server (WithCallbacks)
import Servant.Swagger
import Servant.Swagger.UI
-- import Servant.API.Stream
......@@ -70,8 +70,8 @@ import Gargantext.API.Node ( Roots , roots
)
import Gargantext.API.Count ( CountAPI, count, Query)
import Gargantext.API.Search ( SearchAPI, search, SearchQuery)
import Gargantext.API.Orchestrator
import Gargantext.API.Orchestrator.Types
--import Gargantext.API.Orchestrator
--import Gargantext.API.Orchestrator.Types
---------------------------------------------------------------------
......
......@@ -27,17 +27,12 @@ module Gargantext.API.Search
import GHC.Generics (Generic)
import Control.Monad.IO.Class (liftIO)
import Prelude (Bounded, Enum, minBound, maxBound)
import Data.Aeson hiding (Error, fieldLabelModifier)
import Data.Aeson.TH (deriveJSON)
import Data.Eq (Eq())
import Data.Either
import Data.List (repeat, permutations)
import Data.Swagger
import Data.Swagger.SchemaOptions
import Data.Text (Text, pack)
import Data.Text (Text)
import Database.PostgreSQL.Simple (Connection)
import Servant
......@@ -102,7 +97,7 @@ type SearchAPI = Post '[JSON] SearchResults
search :: Connection -> SearchQuery -> Handler SearchResults
search c (SearchQuery q pId) =
liftIO $ SearchResults <$> map (\(i, y, t, s, a, _) -> SearchResult i (cs $ encode t) (cs $ encode a))
liftIO $ SearchResults <$> map (\(i, _, t, _, a, _) -> SearchResult i (cs $ encode t) (cs $ encode a))
<$> textSearch c (toTSQuery q) pId 5 0 Desc
......@@ -45,7 +45,7 @@ data Terms = Terms { _terms_label :: Label
} deriving (Ord)
instance Show Terms where
show (Terms l s) = show l
show (Terms l _) = show l
instance Eq Terms where
(==) (Terms _ s1) (Terms _ s2) = s1 == s2
......
......@@ -68,7 +68,6 @@ module Gargantext.Database ( module Gargantext.Database.Utils
where
import Gargantext.Core.Types
import Gargantext.Core.Types.Node
import Gargantext.Database.Utils (connectGargandb)
import Gargantext.Database.Node
import Gargantext.Prelude
......@@ -76,9 +75,9 @@ import Database.PostgreSQL.Simple (Connection)
import Data.Text (Text, pack)
import Opaleye hiding (FromField)
import Data.Aeson
import Data.ByteString (ByteString)
import Data.List (last, concat)
type UserId = Int
--type UserId = Int
--type NodeId = Int
-- List of NodeId
......@@ -102,8 +101,8 @@ ls = get
tree :: Connection -> PWD -> IO [Node Value]
tree c p = do
ns <- get c p
cs <- mapM (\p' -> get c [p']) $ map node_id ns
pure $ ns <> (concat cs)
children <- mapM (\p' -> get c [p']) $ map node_id ns
pure $ ns <> (concat children)
-- | TODO
......@@ -112,22 +111,22 @@ post _ [] _ = pure 0
post _ _ [] = pure 0
post c pth ns = mkNode c (last pth) ns
postR :: Connection -> PWD -> [NodeWrite'] -> IO [Int]
postR _ [] _ = pure [0]
postR _ _ [] = pure [0]
postR c pth ns = mkNodeR c (last pth) ns
--postR :: Connection -> PWD -> [NodeWrite'] -> IO [Int]
--postR _ [] _ = pure [0]
--postR _ _ [] = pure [0]
--postR c pth ns = mkNodeR c (last pth) ns
--
rm :: Connection -> PWD -> [NodeId] -> IO Int
rm = del
--rm :: Connection -> PWD -> [NodeId] -> IO Int
--rm = del
del :: Connection -> PWD -> [NodeId] -> IO Int
del _ [] _ = pure 0
del _ _ [] = pure 0
del c pth ns = deleteNodes c ns
del :: Connection -> [NodeId] -> IO Int
del _ [] = pure 0
del c ns = deleteNodes c ns
put :: Connection -> PWD -> [a] -> IO Int64
put = undefined
-- | TODO
--put :: Connection -> PWD -> [a] -> IO Int64
--put = undefined
-- | TODO
-- cd (Home UserId) | (Node NodeId)
......@@ -196,11 +195,9 @@ postAnnuaire corpusName title ns = do
)
del' :: [NodeId] -> IO Int
del' ns = do
c <- connectGargandb "gargantext.ini"
h <- home c
del c h ns
del c ns
......@@ -217,7 +217,7 @@ leftJoin3''' = leftJoin3 queryNodeNodeTable queryNodeTable queryNodeTable cond12
-- | Building the facet
selectDocFacet' :: NodeType -> ParentId -> Maybe NodeType -> Query FacetDocRead
selectDocFacet' pt pId _ = proc () -> do
selectDocFacet' _ pId _ = proc () -> do
(n1,(nn,n2)) <- leftJoin3''' -< ()
restrict -< (.&&) (node_parentId n1 .== (toNullable $ pgInt4 pId))
(node_typename n1 .== (pgInt4 $ nodeTypeId Document))
......
......@@ -32,7 +32,6 @@ import Database.PostgreSQL.Simple.FromField ( Conversion
, returnError
)
import Prelude hiding (null, id, map, sum)
import Data.Time.Segment (jour, timesAfter, Granularity(D))
import Gargantext.Core.Types
import Gargantext.Core.Types.Node (NodeType)
......@@ -55,7 +54,7 @@ import Data.ByteString (ByteString)
import Database.PostgreSQL.Simple (Connection)
import Opaleye hiding (FromField)
import Opaleye.Internal.QueryArr (Query(..))
import Opaleye.Internal.QueryArr (Query)
import qualified Data.Profunctor.Product as PP
-- | Types for Node Database Management
data PGTSVector
......@@ -251,6 +250,14 @@ node userId parentId nodeType name nodeData = Node Nothing typeId userId parentI
typeId = nodeTypeId nodeType
byteData = DB.pack $ DBL.unpack $ encode nodeData
node2write :: (Functor f2, Functor f1) =>
Int
-> NodePoly (f1 Int) Int Int parentId Text (f2 UTCTime) ByteString
-> (f1 (Column PGInt4), Column PGInt4, Column PGInt4,
Column PGInt4, Column PGText, f2 (Column PGTimestamptz),
Column PGJsonb)
node2write pid (Node id tn ud _ nm dt hp) = ((pgInt4 <$> id)
,(pgInt4 tn)
,(pgInt4 ud)
......@@ -312,17 +319,24 @@ mkNode' conn ns = runInsertMany conn nodeTable' ns
mkNodeR' :: Connection -> [NodeWriteT] -> IO [Int]
mkNodeR' conn ns = runInsertManyReturning conn nodeTable' ns (\(i,_,_,_,_,_,_) -> i)
-- | postNode
postNode :: Connection -> UserId -> ParentId -> Node' -> IO [Int]
postNode c uid pid (Node' nt txt v []) = mkNodeR' c (node2table uid pid (Node' nt txt v []))
postNode c uid pid (Node' Corpus txt v ns) = do
[pid'] <- postNode c uid pid (Node' Corpus txt v [])
pids <- mkNodeR' c $ concat $ (map (\(Node' Document txt v _) -> node2table uid pid' $ Node' Document txt v []) ns)
pids <- mkNodeR' c $ concat $ map (\n -> childWith uid pid' n) ns
pure (pids)
postNode c uid pid (Node' Annuaire txt v ns) = do
[pid'] <- postNode c uid pid (Node' Annuaire txt v [])
pids <- mkNodeR' c $ concat $ (map (\(Node' UserPage txt v _) -> node2table uid pid' $ Node' UserPage txt v []) ns)
pids <- mkNodeR' c $ concat $ map (\n -> childWith uid pid' n) ns
pure (pids)
postNode c uid pid (Node' _ _ _ _) = panic $ pack "postNode for this type not implemented yet"
postNode _ _ _ (Node' _ _ _ _) = panic $ pack "postNode for this type not implemented yet"
childWith :: UserId -> ParentId -> Node' -> [NodeWriteT]
childWith uId pId (Node' Document txt v []) = node2table uId pId (Node' Document txt v [])
childWith uId pId (Node' UserPage txt v []) = node2table uId pId (Node' UserPage txt v [])
childWith _ _ (Node' _ _ _ _) = panic $ pack "This NodeType can not be a child"
{-|
Module : Gargantext.API
Description : Server API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ext.IMT where
import Gargantext.Prelude
import Data.Text (Text, pack, splitOn)
import Data.Map (Map)
import qualified Data.Set as S
import qualified Data.List as DL
import qualified Data.Vector as DV
import qualified Data.Map as M
import Gargantext.Text.Metrics.Freq as F
import Gargantext.Text.Parsers.CSV as CSV
data School = School { school_shortName :: Text
, school_longName :: Text
, school_id :: Text
} deriving (Show, Read, Eq)
schools :: [School]
schools = [ School
(pack "Mines Albi-Carmaux")
(pack "Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux")
(pack "469216")
, School
(pack "Mines Alès")
(pack "EMA - École des Mines d'Alès")
(pack "6279")
, School
(pack "Mines Douai")
(pack "Mines Douai EMD - École des Mines de Douai")
(pack "224096")
, School
(pack "Mines Nantes")
(pack "Mines Nantes - Mines Nantes")
(pack "84538")
-- , School
-- (pack "Mines ParisTech")
-- (pack "MINES ParisTech - École nationale supérieure des mines de Paris")
-- (pack "301492")
--
, School
(pack "Mines Saint-Étienne")
(pack "Mines Saint-Étienne MSE - École des Mines de Saint-Étienne")
(pack "29212")
, School
(pack "Télécom Bretagne")
(pack "Télécom Bretagne")
(pack "301262")
, School
(pack "Télécom École de Management")
(pack "TEM - Télécom Ecole de Management")
(pack "301442")
, School
(pack "Télécom ParisTech")
(pack "Télécom ParisTech")
(pack "300362")
, School
(pack "Télécom SudParis")
(pack "TSP - Télécom SudParis")
(pack "352124")
, School
(pack "IMT Atlantique")
(pack "IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire")
(pack "481355")
]
mapIdSchool :: Map Text Text
mapIdSchool = M.fromList $ Gargantext.Prelude.map (\(School n _ i) -> (i,n)) schools
hal_data :: IO (DV.Vector CsvHal)
hal_data = snd <$> CSV.readHal "doc/corpus_imt/Gargantext_Corpus.csv"
names :: S.Set Text
names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools
publisBySchool :: DV.Vector CsvHal -> [(Maybe Text, Int)]
publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n))
$ DL.filter (\i -> S.member (fst i) names)
$ DL.reverse
$ DL.sortOn snd
$ M.toList
$ F.freq
$ DL.concat
$ DV.toList
$ DV.map (\n -> splitOn (pack ", ") (csvHal_instStructId_i n) )
$ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data'
......@@ -238,6 +238,7 @@ zipSnd f xs = zip xs (f xs)
unMaybe :: [Maybe a] -> [a]
unMaybe = map fromJust . L.filter isJust
-- maximumWith
-- | maximumWith
maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
maximumWith f = L.maximumBy (compare `on` f)
......@@ -17,27 +17,19 @@ Text gathers terms in unit of contexts.
module Gargantext.Text
where
import Data.Maybe
import qualified Data.Text as DT
import qualified Data.Set as S
import Data.Text (Text, split)
import qualified Data.Text as DT
import NLP.FullStop (segment)
-----------------------------------------------------------------
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Text.Metrics.Count (Occ, occurrences, cooc)
import Gargantext.Prelude hiding (filter)
-----------------------------------------------------------------
type Config = Lang -> Context
type Context = Text -> [Text]
data Viz = Graph | Phylo | Chart
-----------------------------------------------------------------
-------------------------------------------------------------------
-- Contexts of text
......
......@@ -17,7 +17,7 @@ Context of text management tool
module Gargantext.Text.Context
where
import Data.Text (Text, pack, unpack, length)
import Data.Text (Text, pack, unpack)
import Data.String (IsString)
import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
......@@ -27,7 +27,9 @@ import Gargantext.Prelude hiding (length)
data SplitContext = Chars Int | Sentences Int | Paragraphs Int
tag :: Text -> [Tag Text]
tag = parseTags
-- | splitBy contexts of Chars or Sentences or Paragraphs
-- >> splitBy (Chars 0) "abcde"
-- ["a","b","c","d","e"]
......
......@@ -23,30 +23,28 @@ noApax m = M.filter (>1) m
module Gargantext.Text.Metrics
where
import Data.Text (Text, pack)
import Data.Ord (comparing, Down(..))
import Data.Map (Map)
import Data.Ord (Down(..))
import qualified Data.List as L
import Data.Map (Map)
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed as VU
import Data.Tuple.Extra (both)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
import Data.Array.Accelerate (toList)
import Math.KMeans (kmeans, euclidSq, elements)
import Data.Array.Accelerate (toList, Matrix)
--import Math.KMeans (kmeans, euclidSq, elements)
import Gargantext.Prelude
import Gargantext.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core (Lang(EN))
import Gargantext.Core.Types (Terms(..))
import Gargantext.Core.Types (Terms(..), Label)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Text.Metrics.Count (Grouped)
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
......@@ -56,8 +54,7 @@ import qualified Data.Array.Accelerate as DAA
import GHC.Real (round)
import Debug.Trace
import Prelude (seq)
--import Debug.Trace
data MapListSize = MapListSize Int
data InclusionSize = InclusionSize Int
......@@ -95,25 +92,25 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = -- trace ("coocScore
-- each parts is then ordered by Inclusion/Exclusion
-- take n scored terms in each parts where n * SampleBins = MapListSize.
takeSome :: Ord t => FilterConfig -> [Scored t] -> [Scored t]
takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters k) _) scores = L.take l
takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters _) _) scores = L.take l
$ takeSample n m
$ L.take l' $ sortWith (Down . _scored_incExc) scores
-- $ splitKmeans k scores
where
-- TODO: benchmark with accelerate-example kmeans version
splitKmeans x xs = L.concat $ map elements
$ V.take (k-1)
$ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
euclidSq x xs
--splitKmeans x xs = L.concat $ map elements
-- $ V.take (k-1)
-- $ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
-- euclidSq x xs
n = round ((fromIntegral l)/s)
m = round $ (fromIntegral $ length scores) / (s)
takeSample n m xs = -- trace ("splitKmeans " <> show (length xs)) $
L.concat $ map (L.take n)
takeSample n' m' xs = -- trace ("splitKmeans " <> show (length xs)) $
L.concat $ map (L.take n')
$ map (sortWith (Down . _scored_incExc))
-- TODO use kmeans s instead of splitEvery
-- in order to split in s heteregenous parts
-- without homogeneous order hypothesis
$ splitEvery m
$ splitEvery m'
$ sortWith (Down . _scored_speGen) xs
......@@ -125,7 +122,7 @@ data Scored t = Scored { _scored_terms :: !t
-- TODO in the textflow we end up needing these indices, it might be better
-- to compute them earlier and pass them around.
coocScored :: Ord t => Map (t,t) Int -> [Scored t]
coocScored m = zipWith (\(i,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scores
coocScored m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scores
where
(ti,fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat ti m
......@@ -171,6 +168,7 @@ metrics_sentences = [ "There is a table with a glass of wine and a spoon."
, "I wish the glass did not contain wine."
]
metrics_sentences_Test :: Bool
metrics_sentences_Test = metrics_sentences == metrics_sentences'
-- | Terms reordered to visually check occurrences
......@@ -193,6 +191,7 @@ fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-}
metrics_occ :: IO (Map Grouped (Map Terms Int))
metrics_occ = occurrences <$> L.concat <$> metrics_terms
{-
......@@ -201,8 +200,10 @@ metrics_occ = occurrences <$> L.concat <$> metrics_terms
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-}
metrics_cooc :: IO (Map (Label, Label) Int)
metrics_cooc = cooc <$> metrics_terms
metrics_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
metrics_cooc_mat = do
m <- metrics_cooc
let (ti,_) = createIndices m
......@@ -213,5 +214,6 @@ metrics_cooc_mat = do
, incExcSpeGen mat_cooc
)
metrics_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc
......@@ -29,21 +29,17 @@ module Gargantext.Text.Metrics.Count
where
import Control.Arrow ((***))
import Control.Arrow (Arrow(..), (***))
import qualified Data.List as List
import Data.Map.Strict (Map
, empty, singleton
, insertWith, insertWithKey, unionWith
, toList, lookup, mapKeys
import qualified Data.Map.Strict as DMS
import Data.Map.Strict ( Map, empty, singleton
, insertWith, unionWith
, mapKeys
)
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Text (pack)
import qualified Data.Map.Strict as DMS
import Control.Monad ((>>),(>>=))
import Data.String (String())
import Data.Attoparsec.Text
------------------------------------------------------------------------
import Gargantext.Prelude
......@@ -76,6 +72,7 @@ type Grouped = Stems
type Occs = Int
type Coocs = Int
type Threshold = Int
removeApax :: Threshold -> Map (Label, Label) Int -> Map (Label, Label) Int
removeApax t = DMS.filter (> t)
......@@ -88,9 +85,9 @@ cooc tss = coocOnWithLabel _terms_stem (useLabelPolicy label_policy) tss
coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label)
-> [[a]] -> Map (label, label) Coocs
coocOnWithLabel on policy tss =
mapKeys (delta policy) $ coocOn on tss
coocOnWithLabel on' policy tss = mapKeys (delta policy) $ coocOn on' tss
where
delta :: Arrow a => a b' c' -> a (b', b') (c', c')
delta f = f *** f
......@@ -114,9 +111,9 @@ coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs
coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
where
coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
coocOn' f ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
coocOn' fun ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
where
ts' = List.nub $ map f ts
ts' = List.nub $ map fun ts
xs = [ ((x, y), 1)
| x <- ts'
, y <- ts'
......
{-|
Module : Gargantext.Text.Metrics.Freq
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Metrics.Freq where
import Gargantext.Prelude
import Data.Bool (otherwise)
import Data.Map (empty, Map, insertWith, toList)
import qualified Data.List as L
countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
countElem m e = Data.Map.insertWith (+) e 1 m
freq :: (Ord k) => [k] -> Data.Map.Map k Int
freq = foldl countElem Data.Map.empty
getMaxFromMap :: Ord a => Map a1 a -> [a1]
getMaxFromMap m = go [] Nothing (toList m)
where
go ks _ [] = ks
go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
go ks (Just u) ((k,v):rest)
| v < u = go ks (Just u) rest
| v > u = go [k] (Just v) rest
| otherwise = go (k:ks) (Just v) rest
average :: [Double] -> Double
average x = L.sum x / L.genericLength x
average' :: [Int] -> Double
average' x = (L.sum y) / (L.genericLength y) where
y = L.map fromIntegral x
......@@ -32,7 +32,6 @@ import Data.Map.Strict (Map)
import qualified Data.Set as Set
import Data.Set (Set)
import qualified Data.Vector as V
import Data.Vector (Vector)
import Data.List (filter, concat)
import Data.Maybe (catMaybes)
......
......@@ -25,34 +25,18 @@ module Gargantext.Text.Parsers -- (parse, FileFormat(..))
import Gargantext.Prelude
import System.FilePath (takeExtension, FilePath())
import Data.Attoparsec.ByteString (parseOnly, Parser)
import qualified Data.ByteString as DB
import System.FilePath (FilePath())
import qualified Data.Map as DM
import Data.Either.Extra (partitionEithers)
import Data.Ord()
import Data.Foldable (concat)
import Data.String()
import Data.Either.Extra(Either())
import Data.Text (Text)
import Data.Text.Encoding (decodeUtf8)
import qualified Data.Text as DT
----
--import Control.Monad (join)
import Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Path.IO (resolveFile')
------ import qualified Data.ByteString.Lazy as B
--import Control.Applicative ( (<$>) )
import Control.Concurrent.Async as CCA (mapConcurrently)
-- | Activate Async for to parse in parallel
--import Control.Concurrent.Async as CCA (mapConcurrently)
import Data.String (String())
import Gargantext.Text.Parsers.WOS (wosParser)
---- import Gargantext.Parsers.XML (xmlParser)
---- import Gargantext.Parsers.DOC (docParser)
---- import Gargantext.Parsers.ODT (odtParser)
--import Gargantext.Types.Main (ErrorMessage(), Corpus)
type ParseError = String
type Field = Text
......
......@@ -147,7 +147,7 @@ instance FromNamedRecord CsvDoc where
<*> r .: "authors"
instance ToNamedRecord CsvDoc where
toNamedRecord (CsvDoc t s py pm pd abst aut) =
toNamedRecord (CsvDoc t s py pm pd abst aut) =
namedRecord [ "title" .= t
, "source" .= s
, "publication_year" .= py
......@@ -155,7 +155,7 @@ instance ToNamedRecord CsvDoc where
, "publication_day" .= pd
, "abstract" .= abst
, "authors" .= aut
]
]
csvDecodeOptions :: DecodeOptions
......@@ -184,7 +184,94 @@ readCsv fp = do
Right csvDocs -> pure csvDocs
readHal :: FilePath -> IO (Header, Vector CsvHal)
readHal fp = do
csvData <- BL.readFile fp
case decodeByNameWith csvDecodeOptions csvData of
Left e -> panic (pack e)
Right csvDocs -> pure csvDocs
------------------------------------------------------------------------
writeCsv :: FilePath -> (Header, Vector CsvDoc) -> IO ()
writeCsv fp (h, vs) = BL.writeFile fp $
encodeByNameWith csvEncodeOptions h (V.toList vs)
------------------------------------------------------------------------
-- Hal Format
data CsvHal = CsvHal
{ csvHal_title :: !Text
, csvHal_source :: !Text
, csvHal_publication_year :: !Int
, csvHal_publication_month :: !Int
, csvHal_publication_day :: !Int
, csvHal_abstract :: !Text
, csvHal_authors :: !Text
, csvHal_url :: !Text
, csvHal_isbn_s :: !Text
, csvHal_issue_s :: !Text
, csvHal_journalPublisher_s:: !Text
, csvHal_language_s :: !Text
, csvHal_doiId_s :: !Text
, csvHal_authId_i :: !Text
, csvHal_instStructId_i :: !Text
, csvHal_deptStructId_i :: !Text
, csvHal_labStructId_i :: !Text
, csvHal_rteamStructId_i :: !Text
, csvHal_docType_s :: !Text
}
deriving (Show)
instance FromNamedRecord CsvHal where
parseNamedRecord r = CsvHal <$> r .: "title"
<*> r .: "source"
<*> r .: "publication_year"
<*> r .: "publication_month"
<*> r .: "publication_day"
<*> r .: "abstract"
<*> r .: "authors"
<*> r .: "url"
<*> r .: "isbn_s"
<*> r .: "issue_s"
<*> r .: "journalPublisher_s"
<*> r .: "language_s"
<*> r .: "doiId_s"
<*> r .: "authId_i"
<*> r .: "instStructId_i"
<*> r .: "deptStructId_i"
<*> r .: "labStructId_i"
<*> r .: "rteamStructId_i"
<*> r .: "docType_s"
instance ToNamedRecord CsvHal where
toNamedRecord (CsvHal t s py pm pd abst aut url isbn iss jour lang doi auth inst dept lab team doct) =
namedRecord [ "title" .= t
, "source" .= s
, "publication_year" .= py
, "publication_month" .= pm
, "publication_day" .= pd
, "abstract" .= abst
, "authors" .= aut
, "url" .= url
, "isbn_s" .= isbn
, "issue_s" .= iss
, "journalPublisher_s" .= jour
, "language_s" .= lang
, "doiId_s" .= doi
, "authId_i" .= auth
, "instStructId_i" .= inst
, "deptStructId_i" .= dept
, "labStructId_i" .= lab
, "rteamStructId_i" .= team
, "docType_s" .= doct
]
......@@ -24,7 +24,7 @@ import Gargantext.Core.Types
import Gargantext.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import Data.Char (isAlphaNum, isSpace)
--import Data.Char (isAlphaNum, isSpace)
monoterms' :: Lang -> Text -> [Terms]
monoterms' l txt = map (text2terms l) $ monoterms txt
......@@ -40,9 +40,9 @@ text2terms lang txt = Terms label stems
label = splitOn (pack " ") txt
stems = S.fromList $ map (stem lang) label
--monograms :: Text -> [Text]
--monograms :: Text -> [Text]
--monograms xs = monograms $ toLower $ filter isGram xs
isGram :: Char -> Bool
isGram c = isAlphaNum c || isSpace c || c `elem` ['-','/','\'']
--isGram :: Char -> Bool
--isGram c = isAlphaNum c || isSpace c || c `elem` ['-','/','\'']
......@@ -23,20 +23,20 @@ module Gargantext.Text.Terms.Mono.Token (tokenize)
import Data.Text (Text)
import qualified Gargantext.Text.Terms.Mono.Token.En as En
import Gargantext.Core (Lang(..))
import Gargantext.Prelude
-- | Contexts depend on the lang
--import Gargantext.Core (Lang(..))
type Token = Text
-- >>> tokenize "A rose is a rose is a rose."
-- ["A","rose","is","a","rose","is","a","rose", "."]
--
data Context = Letter | Word | Sentence | Line | Paragraph
tokenize :: Text -> [Token]
tokenize = En.tokenize
tokenize' :: Lang -> Context -> [Token]
tokenize' = undefined
--data Context = Letter | Word | Sentence | Line | Paragraph
--
--tokenize' :: Lang -> Context -> [Token]
--tokenize' = undefined
--
......@@ -17,28 +17,26 @@ From text to viz, all the flow of texts in Gargantext.
module Gargantext.TextFlow
where
import GHC.IO (FilePath)
import qualified Data.Text as T
import Data.Text.IO (readFile)
import Control.Arrow ((***))
import Control.Monad.IO.Class (MonadIO)
import Data.Map.Strict (Map)
import qualified Data.Array.Accelerate as A
import qualified Data.Map.Strict as M
import qualified Data.List as L
import Data.Tuple.Extra (both)
----------------------------------------------
import Gargantext.Core (Lang(FR))
import Gargantext.Core.Types (Label)
import Gargantext.Prelude
import Prelude (print, seq)
import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, map2mat, mat2map)
import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional, distributional)
import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map)
import Gargantext.Viz.Graph.Distances.Matrice (conditional)
import Gargantext.Viz.Graph (Graph(..), Node(..), Edge(..), Attributes(..), TypeNode(..))
import Gargantext.Text.Metrics.Count (cooc)
import Gargantext.Text.Metrics
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Terms (TermType(Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Text.Parsers.CSV
......@@ -56,13 +54,15 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain, LouvainNode(..))
-}
printDebug :: (Show a, MonadIO m) => [Char] -> a -> m ()
printDebug msg x = putStrLn $ msg <> " " <> show x
--printDebug _ _ = pure ()
data TextFlow = CSV | FullText
-- workflow :: Lang (EN|FR) -> FilePath -> Graph
textflow termsLang workType path = do
textflow :: Lang -> TextFlow -> FilePath -> IO Graph
textflow _ workType path = do
-- Text <- IO Text <- FilePath
contexts <- case workType of
FullText -> splitBy (Sentences 5) <$> readFile path
......@@ -97,7 +97,7 @@ textflow termsLang workType path = do
printDebug "myCooc3" $ M.size myCooc3
-- Cooc -> Matrix
let (ti, fi) = createIndices myCooc3
let (ti, _) = createIndices myCooc3
printDebug "ti" $ M.size ti
let myCooc4 = toIndex ti myCooc3
......@@ -145,6 +145,6 @@ data2graph labels coocs distance partitions = Graph nodes edges
, edge_target = cs (show t)
, edge_weight = w
, edge_id = cs (show i) }
| (i, ((s,t), w)) <- zip [0..] (M.toList distance) ]
| (i, ((s,t), w)) <- zip ([0..]::[Integer]) (M.toList distance) ]
-----------------------------------------------------------
......@@ -27,13 +27,9 @@ import Data.Text (Text)
import qualified Text.Read as T
import qualified Data.Text as T
import Data.Map (Map)
import Gargantext.Prelude
import Gargantext.Core.Utils.Prefix (unPrefix)
import Data.Graph.Clustering.Louvain.CplusPlus (LouvainNode)
data TypeNode = Terms | Unknown
deriving (Show, Generic)
......
......@@ -18,18 +18,14 @@ module Gargantext.Viz.Graph.Distances.Conditional
where
import Data.Matrix hiding (identity)
import Data.String.Conversions (ConvertibleStrings(..))
import Data.List (concat, sortOn)
import qualified Data.List as L
import Data.List (sortOn)
import Data.Map (Map)
import qualified Data.Map as M
import Data.Set (Set)
import qualified Data.Set as S
import Data.Vector (Vector)
import qualified Data.Vector as V
import Gargantext.Prelude
......@@ -82,7 +78,7 @@ opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
-------------------------------------------------------
conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
conditional m = filter (threshold m') m'
conditional m = filterMat (threshold m') m'
where
------------------------------------------------------------------------
-- | Main Operations
......@@ -111,24 +107,24 @@ conditional m = filter (threshold m') m'
$ nodes_included k <> nodes_specific k
nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
nodes_specific m = take m $ sortOn snd $ toListsWithIndex sg
nodes_specific n = take n $ sortOn snd $ toListsWithIndex sg
insert as s = foldl' (\s' a -> S.insert a s') s as
k' = 2*k
k = 10
dico_nodes :: Map Int Int
dico_nodes = M.fromList $ zip [1..] nodes_kept
dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
dico_nodes = M.fromList $ zip ([1..] :: [Int]) nodes_kept
--dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
m' = matrix (length nodes_kept)
(length nodes_kept)
(\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
threshold m = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m) (V.enumFromTo 1 (nOf Col m))
threshold m'' = V.minimum $ V.map (\cId -> V.maximum $ getCol cId m'') (V.enumFromTo 1 (nOf Col m''))
filter t m = mapAll (\x -> filter' t x) m
filterMat t m'' = mapAll (\x -> filter' t x) m''
where
filter' t x = case (x >= t) of
filter' t' x = case (x >= t') of
True -> x
False -> 0
......
......@@ -20,14 +20,9 @@ module Gargantext.Viz.Graph.Distances.Distributional
where
import Data.Matrix hiding (identity)
import Data.String.Conversions (ConvertibleStrings(..))
import Data.Map (Map)
import qualified Data.Map as M
import Data.Set (Set)
import qualified Data.Set as S
import Data.Vector (Vector)
import qualified Data.Vector as V
......@@ -64,11 +59,11 @@ mi m = matrix c r createMat
where
(c,r) = (nOf Col m, nOf Row m)
createMat (x,y) = doMi x y m
doMi x y m = if x == y then 0 else (max (log (doMi' x y m)) 0 )
doMi x y m' = if x == y then 0 else (max (log (doMi' x y m')) 0 )
doMi' x y m = (getElem x y m) / ( cross x y m / total m )
doMi' x y m' = (getElem x y m) / ( cross x y m / total m' )
cross x y m = (V.sum $ ax Col x y m) * (V.sum $ ax Row x y m)
cross x y m' = (V.sum $ ax Col x y m) * (V.sum $ ax Row x y m')
......
......@@ -34,25 +34,18 @@ Implementation use Accelerate library :
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Viz.Graph.Distances.Matrice
where
import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
import Data.Array.Accelerate.Smart
import Data.Array.Accelerate.Type
import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
import Data.Maybe (Maybe(Just))
import qualified Gargantext.Prelude as P
import qualified Data.Array.Accelerate.Array.Representation as Repr
import Gargantext.Text.Metrics.Count
-----------------------------------------------------------------------
-- Test perf.
distriTest :: Matrix Double
distriTest = distributional $ myMat 100
-----------------------------------------------------------------------
......@@ -133,7 +126,7 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
where
n = dim m
filter m = zipWith (\a b -> max a b) m (transpose m)
-- filter m = zipWith (\a b -> max a b) m (transpose m)
ri mat = zipWith (/) mat1 mat2
where
......@@ -145,7 +138,7 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
crossProduct m''' = zipWith (*) (cross m''' ) (cross (transpose m'''))
cross mat = zipWith (-) (mkSum n mat) (mat)
-----------------------------------------------------------------------
......@@ -207,11 +200,11 @@ p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e
p_ij m = zipWith (/) m (n_jj m)
where
n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_jj m = backpermute (shape m)
n_jj myMat' = backpermute (shape m)
(lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
-> (Z :. j :. j)
)
) m
) myMat'
-- | P(j|i) = Nij /N(ii) Probability to get i given j
-- to test
......
......@@ -21,7 +21,7 @@ TODO:
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.Viz.Graph.Index
where
......@@ -30,7 +30,6 @@ import qualified Data.Array.Accelerate as A
import qualified Data.Array.Accelerate.Interpreter as A
import Data.Array.Accelerate (Matrix, Elt, Shape, (:.)(..), Z(..))
import qualified Data.Vector.Unboxed as DVU
import Data.Maybe (fromMaybe)
import Data.Set (Set)
......@@ -71,7 +70,7 @@ mat2map :: (Elt a, Shape (Z :. Index)) =>
A.Array (Z :. Index :. Index) a -> Map (Index, Index) a
mat2map m = M.fromList . map f . A.toList . A.run . A.indexed $ A.use m
where
Z :. _ :. n = A.arrayShape m
-- Z :. _ :. n = A.arrayShape m
f ((Z :. i :. j), x) = ((i, j), x)
-------------------------------------------------------------------------------
......@@ -88,8 +87,8 @@ indexConversion index ms = M.fromList $ map (\((k1,k2),c) -> ( ((M.!) index k1,
-------------------------------------------------------------------------------
-- TODO
fromIndex' :: Ord t => Vector t -> Map (Index, Index) a -> Map (t,t) a
fromIndex' vi ns = undefined
--fromIndex' :: Ord t => Vector t -> Map (Index, Index) a -> Map (t,t) a
--fromIndex' vi ns = undefined
-- TODO
createIndices' :: Ord t => Map (t, t) b -> (Map t Index, Vector t)
......
......@@ -21,12 +21,6 @@ module Gargantext.Viz.Graph.Utils
import Data.Matrix hiding (identity)
import Data.Map (Map)
import qualified Data.Map as M
import Data.Set (Set)
import qualified Data.Set as S
import Data.Vector (Vector)
import qualified Data.Vector as V
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment