Commit 4b12a41d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Index with TermList] compiles but weird behavior.

parent c37cdf4b
...@@ -24,24 +24,38 @@ module Main where ...@@ -24,24 +24,38 @@ module Main where
import qualified Data.Vector as DV import qualified Data.Vector as DV
import Gargantext.Prelude
import Data.Text (Text) import Data.Text (Text)
import System.Environment import System.Environment
--import Control.Concurrent.Async as CCA (mapConcurrently)
import Gargantext.Text.Parsers.CSV (readCsv, csv_abstract) import Gargantext.Prelude
import Gargantext.Text.List.CSV (fromCsvListFile) import Gargantext.Text.Context
import Gargantext.Text.Terms
import Gargantext.Text.Terms.WithList
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract)
import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Terms (terms)
import Gargantext.Text.Metrics.Count (cooc)
main :: IO () main :: IO ()
main = do main = do
[corpusFile, termListFile, outputFile] <- getArgs [corpusFile, termListFile, outputFile] <- getArgs
-- corpus :: [Text] -- corpus :: [Text]
corpus <- DV.toList . fmap csv_abstract . snd <$> readCsv corpusFile corpus <- DV.toList <$> map (\n -> (csv_title n) <> " " <> (csv_abstract n))
<$> snd
<$> readCsv corpusFile
putStrLn $ show $ length corpus
-- termListMap :: [Text] -- termListMap :: [Text]
termList <- termListMap <$> fromCsvListFile termListFile termList <- csvGraphTermList termListFile
putStrLn $ show $ length termList
let corpusIndexed = indexCorpusWith corpus termList corpusIndexed <- mapM (terms (WithList $ buildPatterns termList)) corpus
let cooc = cooccurrences corpusIndexed
putStrLn $ show corpusIndexed
let myCooc = cooc corpusIndexed
writeFile outputFile cooc putStrLn $ show myCooc
--writeFile outputFile cooc
...@@ -24,15 +24,19 @@ library: ...@@ -24,15 +24,19 @@ library:
- -Werror - -Werror
exposed-modules: exposed-modules:
- Gargantext - Gargantext
- Gargantext.TextFlow - Gargantext.API
- Gargantext.Prelude
- Gargantext.Core - Gargantext.Core
- Gargantext.Core.Types - Gargantext.Core.Types
- Gargantext.Prelude
- Gargantext.Text - Gargantext.Text
- Gargantext.Text.Context
- Gargantext.Text.List.CSV - Gargantext.Text.List.CSV
- Gargantext.Text.Search - Gargantext.Text.Metrics.Count
- Gargantext.Text.Parsers.CSV - Gargantext.Text.Parsers.CSV
- Gargantext.API - Gargantext.Text.Search
- Gargantext.Text.Terms
- Gargantext.Text.Terms.WithList
- Gargantext.TextFlow
- Gargantext.Viz.Graph.Distances.Matrice - Gargantext.Viz.Graph.Distances.Matrice
dependencies: dependencies:
- QuickCheck - QuickCheck
......
...@@ -126,7 +126,7 @@ getNodesWith' conn id nodeType offset limit = liftIO (getNodesWith conn id node ...@@ -126,7 +126,7 @@ getNodesWith' conn id nodeType offset limit = liftIO (getNodesWith conn id node
getFacet :: Connection -> NodeId -> Maybe Int -> Maybe Int getFacet :: Connection -> NodeId -> Maybe Int -> Maybe Int
-> Handler [FacetDoc] -> Handler [FacetDoc]
getFacet conn id offset limit = liftIO (putStrLn ( "/facet" :: Text)) >> liftIO (getDocFacet conn Corpus id (Just Document) offset limit) getFacet conn id offset limit = liftIO (putStrLn ( "/facet" :: Text)) >> liftIO (getDocFacet conn NodeCorpus id (Just Document) offset limit)
getChart :: Connection -> NodeId -> Maybe UTCTime -> Maybe UTCTime getChart :: Connection -> NodeId -> Maybe UTCTime -> Maybe UTCTime
-> Handler [FacetChart] -> Handler [FacetChart]
......
...@@ -59,7 +59,7 @@ projectTree = NodeT Project [corpusTree] ...@@ -59,7 +59,7 @@ projectTree = NodeT Project [corpusTree]
-- | Corpus Tree -- | Corpus Tree
corpusTree :: Tree NodeType corpusTree :: Tree NodeType
corpusTree = NodeT Corpus ( [ leafT Document ] corpusTree = NodeT NodeCorpus ( [ leafT Document ]
<> [ leafT Lists ] <> [ leafT Lists ]
<> [ leafT Metrics ] <> [ leafT Metrics ]
<> [ leafT Classification] <> [ leafT Classification]
...@@ -81,7 +81,7 @@ data Lists = StopList | MainList | MapList | GroupList ...@@ -81,7 +81,7 @@ data Lists = StopList | MainList | MapList | GroupList
-- | Community Manager Use Case -- | Community Manager Use Case
type Annuaire = Corpus type Annuaire = NodeCorpus
type Individu = Document type Individu = Document
-- | Favorites Node enable Node categorization -- | Favorites Node enable Node categorization
...@@ -120,7 +120,7 @@ type Notebook = Node HyperdataNotebook ...@@ -120,7 +120,7 @@ type Notebook = Node HyperdataNotebook
nodeTypes :: [(NodeType, NodeTypeId)] nodeTypes :: [(NodeType, NodeTypeId)]
nodeTypes = [ (NodeUser , 1) nodeTypes = [ (NodeUser , 1)
, (Folder , 2) , (Folder , 2)
, (Corpus , 30) , (NodeCorpus , 30)
, (Annuaire , 31) , (Annuaire , 31)
, (Document , 40) , (Document , 40)
, (UserPage , 41) , (UserPage , 41)
......
...@@ -232,11 +232,11 @@ type NodeName = Text ...@@ -232,11 +232,11 @@ type NodeName = Text
type NodeUser = Node HyperdataUser type NodeUser = Node HyperdataUser
type Folder = Node HyperdataFolder type Folder = Node HyperdataFolder
type Project = Folder -- NP Node HyperdataProject ? type Project = Folder -- NP Node HyperdataProject ?
type Corpus = Node HyperdataCorpus type NodeCorpus = Node HyperdataCorpus
type Document = Node HyperdataDocument type Document = Node HyperdataDocument
------------------------------------------------------------------------ ------------------------------------------------------------------------
data NodeType = NodeUser | Project | Folder | Corpus | Annuaire | Document | UserPage | DocumentCopy | Favorites data NodeType = NodeUser | Project | Folder | NodeCorpus | Annuaire | Document | UserPage | DocumentCopy | Favorites
| Classification | Classification
| Lists | Lists
| Metrics | Occurrences | Metrics | Occurrences
......
...@@ -160,7 +160,7 @@ post' = do ...@@ -160,7 +160,7 @@ post' = do
c <- connectGargandb "gargantext.ini" c <- connectGargandb "gargantext.ini"
pid <- last <$> home c pid <- last <$> home c
let uid = 1 let uid = 1
postNode c uid pid ( Node' Corpus (pack "Premier corpus") (toJSON (pack "{}"::Text)) [ Node' Document (pack "Doc1") (toJSON (pack "{}" :: Text)) [] postNode c uid pid ( Node' NodeCorpus (pack "Premier corpus") (toJSON (pack "{}"::Text)) [ Node' Document (pack "Doc1") (toJSON (pack "{}" :: Text)) []
, Node' Document (pack "Doc2") (toJSON (pack "{}" :: Text)) [] , Node' Document (pack "Doc2") (toJSON (pack "{}" :: Text)) []
, Node' Document (pack "Doc3") (toJSON (pack "{}" :: Text)) [] , Node' Document (pack "Doc3") (toJSON (pack "{}" :: Text)) []
] ]
...@@ -178,7 +178,7 @@ postCorpus corpusName title ns = do ...@@ -178,7 +178,7 @@ postCorpus corpusName title ns = do
c <- connectGargandb "gargantext.ini" c <- connectGargandb "gargantext.ini"
pid <- last <$> home c pid <- last <$> home c
let uid = 1 let uid = 1
postNode c uid pid ( Node' Corpus corpusName (toJSON (pack "{}"::Text)) postNode c uid pid ( Node' NodeCorpus corpusName (toJSON (pack "{}"::Text))
(map (\n -> Node' Document (title n) (toJSON n) []) ns) (map (\n -> Node' Document (title n) (toJSON n) []) ns)
) )
......
...@@ -323,8 +323,8 @@ mkNodeR' conn ns = runInsertManyReturning conn nodeTable' ns (\(i,_,_,_,_,_,_) - ...@@ -323,8 +323,8 @@ mkNodeR' conn ns = runInsertManyReturning conn nodeTable' ns (\(i,_,_,_,_,_,_) -
postNode :: Connection -> UserId -> ParentId -> Node' -> IO [Int] postNode :: Connection -> UserId -> ParentId -> Node' -> IO [Int]
postNode c uid pid (Node' nt txt v []) = mkNodeR' c (node2table uid pid (Node' nt txt v [])) postNode c uid pid (Node' nt txt v []) = mkNodeR' c (node2table uid pid (Node' nt txt v []))
postNode c uid pid (Node' Corpus txt v ns) = do postNode c uid pid (Node' NodeCorpus txt v ns) = do
[pid'] <- postNode c uid pid (Node' Corpus txt v []) [pid'] <- postNode c uid pid (Node' NodeCorpus txt v [])
pids <- mkNodeR' c $ concat $ map (\n -> childWith uid pid' n) ns pids <- mkNodeR' c $ concat $ map (\n -> childWith uid pid' n) ns
pure (pids) pure (pids)
......
...@@ -30,6 +30,8 @@ import Gargantext.Prelude hiding (length) ...@@ -30,6 +30,8 @@ import Gargantext.Prelude hiding (length)
type Term = Text type Term = Text
type Label = Term type Label = Term
type TermList = [(Label, [[Term]])]
type Sentence a = [a] -- or a nominal group type Sentence a = [a] -- or a nominal group
type Corpus a = [Sentence a] -- a list of sentences type Corpus a = [Sentence a] -- a list of sentences
......
...@@ -22,21 +22,28 @@ import GHC.IO (FilePath) ...@@ -22,21 +22,28 @@ import GHC.IO (FilePath)
import Control.Applicative import Control.Applicative
import Control.Monad (mzero) import Control.Monad (mzero)
import Data.Char (ord) import Data.Char (ord, isSpace)
import Data.Csv import Data.Csv
import Data.Either (Either(Left, Right)) import Data.Either (Either(Left, Right))
import Data.Text (Text, pack) import Data.Text (Text, pack)
import qualified Data.Text as DT
import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy as BL
import Data.Vector (Vector) import Data.Vector (Vector)
import qualified Data.Vector as V import qualified Data.Vector as V
import Gargantext.Prelude hiding (length) import Gargantext.Prelude hiding (length)
-- import Gargantext.Text.List.Types import Gargantext.Text.Context
------------------------------------------------------------------------ ------------------------------------------------------------------------
--csv2lists :: Vector CsvList -> Lists csvGraphTermList :: FilePath -> IO TermList
--csv2lists v = V.foldl' (\e (CsvList listType label forms) -> insertLists lt label forms e) emptyLists v csvGraphTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp
csv2list :: CsvListType -> Vector CsvList -> TermList
csv2list lt vs = V.toList $ V.map (\(CsvList _ label forms)
-> (label, map (DT.split isSpace) $ DT.splitOn csvListFormsDelimiter forms))
$ V.filter (\l -> csvList_status l == lt ) vs
------------------------------------------------------------------------ ------------------------------------------------------------------------
data CsvListType = CsvMap | CsvStop | CsvCandidate data CsvListType = CsvMap | CsvStop | CsvCandidate
......
...@@ -22,9 +22,7 @@ import Data.Map (Map, empty, fromList) ...@@ -22,9 +22,7 @@ import Data.Map (Map, empty, fromList)
import Gargantext.Prelude import Gargantext.Prelude
------------------------------------------------------------------- -------------------------------------------------------------------
type Label = Text data ListType = GraphList | StopList | CandidateList
data ListType = Map | Stop | Candidate
deriving (Show, Eq, Ord, Enum, Bounded) deriving (Show, Eq, Ord, Enum, Bounded)
type Lists = Map ListType (Map Text [Text]) type Lists = Map ListType (Map Text [Text])
......
...@@ -44,13 +44,15 @@ import Gargantext.Text.Terms.Multi (multiterms) ...@@ -44,13 +44,15 @@ import Gargantext.Text.Terms.Multi (multiterms)
import Gargantext.Text.Terms.Mono (monoTerms) import Gargantext.Text.Terms.Mono (monoTerms)
import Gargantext.Text.Terms.WithList (Patterns, extractTermsWithList) import Gargantext.Text.Terms.WithList (Patterns, extractTermsWithList)
data TermType lang = Mono lang | Multi lang | MonoMulti lang | WithList Patterns data TermType lang = Mono lang | Multi lang | MonoMulti lang | WithList Patterns
-- remove Stop Words -- remove Stop Words
-- map (filter (\t -> not . elem t)) $ -- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Sugar to extract terms from text (hiddeng mapM from end user). -- | Sugar to extract terms from text (hiddeng mapM from end user).
extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms]) --extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms :: TermType Lang -> [Text] -> IO [[Terms]]
extractTerms termTypeLang = mapM (terms termTypeLang) extractTerms termTypeLang = mapM (terms termTypeLang)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Terms from Text -- | Terms from Text
......
...@@ -40,7 +40,6 @@ import Gargantext.Prelude ...@@ -40,7 +40,6 @@ import Gargantext.Prelude
isSep :: Char -> Bool isSep :: Char -> Bool
isSep = (`elem` (",.:;?!(){}[]\"" :: String)) isSep = (`elem` (",.:;?!(){}[]\"" :: String))
monoTerms :: Lang -> Text -> [Terms] monoTerms :: Lang -> Text -> [Terms]
monoTerms l txt = map (monoText2term l) $ monoTexts txt monoTerms l txt = map (monoText2term l) $ monoTexts txt
...@@ -50,7 +49,6 @@ monoTexts = L.concat . monoTextsBySentence ...@@ -50,7 +49,6 @@ monoTexts = L.concat . monoTextsBySentence
monoText2term :: Lang -> Text -> Terms monoText2term :: Lang -> Text -> Terms
monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt) monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt)
monoTextsBySentence :: Text -> [[Text]] monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence = map (T.split isSpace) monoTextsBySentence = map (T.split isSpace)
. T.split isSep . T.split isSep
...@@ -58,4 +56,3 @@ monoTextsBySentence = map (T.split isSpace) ...@@ -58,4 +56,3 @@ monoTextsBySentence = map (T.split isSpace)
...@@ -27,11 +27,12 @@ import Gargantext.Prelude ...@@ -27,11 +27,12 @@ import Gargantext.Prelude
import Data.List (concatMap) import Data.List (concatMap)
------------------------------------------------------------------------
type Pattern = KMP.Table Term type Pattern = KMP.Table Term
type TermList = [(Label, [[Term]])]
type Patterns = [(Pattern, Int, Label)] type Patterns = [(Pattern, Int, Label)]
------------------------------------------------------------------------
replaceTerms :: Patterns -> Sentence Term -> Sentence Label replaceTerms :: Patterns -> Sentence Term -> Sentence Label
replaceTerms pats terms = go 0 terms replaceTerms pats terms = go 0 terms
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment