Commit 4b12a41d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Index with TermList] compiles but weird behavior.

parent c37cdf4b
......@@ -24,24 +24,38 @@ module Main where
import qualified Data.Vector as DV
import Gargantext.Prelude
import Data.Text (Text)
import System.Environment
--import Control.Concurrent.Async as CCA (mapConcurrently)
import Gargantext.Text.Parsers.CSV (readCsv, csv_abstract)
import Gargantext.Text.List.CSV (fromCsvListFile)
import Gargantext.Prelude
import Gargantext.Text.Context
import Gargantext.Text.Terms
import Gargantext.Text.Terms.WithList
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract)
import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Terms (terms)
import Gargantext.Text.Metrics.Count (cooc)
main :: IO ()
main = do
[corpusFile, termListFile, outputFile] <- getArgs
-- corpus :: [Text]
corpus <- DV.toList . fmap csv_abstract . snd <$> readCsv corpusFile
corpus <- DV.toList <$> map (\n -> (csv_title n) <> " " <> (csv_abstract n))
<$> snd
<$> readCsv corpusFile
putStrLn $ show $ length corpus
-- termListMap :: [Text]
termList <- termListMap <$> fromCsvListFile termListFile
termList <- csvGraphTermList termListFile
putStrLn $ show $ length termList
corpusIndexed <- mapM (terms (WithList $ buildPatterns termList)) corpus
let corpusIndexed = indexCorpusWith corpus termList
let cooc = cooccurrences corpusIndexed
putStrLn $ show corpusIndexed
let myCooc = cooc corpusIndexed
writeFile outputFile cooc
putStrLn $ show myCooc
--writeFile outputFile cooc
......@@ -24,15 +24,19 @@ library:
- -Werror
exposed-modules:
- Gargantext
- Gargantext.TextFlow
- Gargantext.Prelude
- Gargantext.API
- Gargantext.Core
- Gargantext.Core.Types
- Gargantext.Prelude
- Gargantext.Text
- Gargantext.Text.Context
- Gargantext.Text.List.CSV
- Gargantext.Text.Search
- Gargantext.Text.Metrics.Count
- Gargantext.Text.Parsers.CSV
- Gargantext.API
- Gargantext.Text.Search
- Gargantext.Text.Terms
- Gargantext.Text.Terms.WithList
- Gargantext.TextFlow
- Gargantext.Viz.Graph.Distances.Matrice
dependencies:
- QuickCheck
......
......@@ -126,7 +126,7 @@ getNodesWith' conn id nodeType offset limit = liftIO (getNodesWith conn id node
getFacet :: Connection -> NodeId -> Maybe Int -> Maybe Int
-> Handler [FacetDoc]
getFacet conn id offset limit = liftIO (putStrLn ( "/facet" :: Text)) >> liftIO (getDocFacet conn Corpus id (Just Document) offset limit)
getFacet conn id offset limit = liftIO (putStrLn ( "/facet" :: Text)) >> liftIO (getDocFacet conn NodeCorpus id (Just Document) offset limit)
getChart :: Connection -> NodeId -> Maybe UTCTime -> Maybe UTCTime
-> Handler [FacetChart]
......
......@@ -59,7 +59,7 @@ projectTree = NodeT Project [corpusTree]
-- | Corpus Tree
corpusTree :: Tree NodeType
corpusTree = NodeT Corpus ( [ leafT Document ]
corpusTree = NodeT NodeCorpus ( [ leafT Document ]
<> [ leafT Lists ]
<> [ leafT Metrics ]
<> [ leafT Classification]
......@@ -81,7 +81,7 @@ data Lists = StopList | MainList | MapList | GroupList
-- | Community Manager Use Case
type Annuaire = Corpus
type Annuaire = NodeCorpus
type Individu = Document
-- | Favorites Node enable Node categorization
......@@ -120,7 +120,7 @@ type Notebook = Node HyperdataNotebook
nodeTypes :: [(NodeType, NodeTypeId)]
nodeTypes = [ (NodeUser , 1)
, (Folder , 2)
, (Corpus , 30)
, (NodeCorpus , 30)
, (Annuaire , 31)
, (Document , 40)
, (UserPage , 41)
......
......@@ -232,11 +232,11 @@ type NodeName = Text
type NodeUser = Node HyperdataUser
type Folder = Node HyperdataFolder
type Project = Folder -- NP Node HyperdataProject ?
type Corpus = Node HyperdataCorpus
type NodeCorpus = Node HyperdataCorpus
type Document = Node HyperdataDocument
------------------------------------------------------------------------
data NodeType = NodeUser | Project | Folder | Corpus | Annuaire | Document | UserPage | DocumentCopy | Favorites
data NodeType = NodeUser | Project | Folder | NodeCorpus | Annuaire | Document | UserPage | DocumentCopy | Favorites
| Classification
| Lists
| Metrics | Occurrences
......
......@@ -160,7 +160,7 @@ post' = do
c <- connectGargandb "gargantext.ini"
pid <- last <$> home c
let uid = 1
postNode c uid pid ( Node' Corpus (pack "Premier corpus") (toJSON (pack "{}"::Text)) [ Node' Document (pack "Doc1") (toJSON (pack "{}" :: Text)) []
postNode c uid pid ( Node' NodeCorpus (pack "Premier corpus") (toJSON (pack "{}"::Text)) [ Node' Document (pack "Doc1") (toJSON (pack "{}" :: Text)) []
, Node' Document (pack "Doc2") (toJSON (pack "{}" :: Text)) []
, Node' Document (pack "Doc3") (toJSON (pack "{}" :: Text)) []
]
......@@ -178,7 +178,7 @@ postCorpus corpusName title ns = do
c <- connectGargandb "gargantext.ini"
pid <- last <$> home c
let uid = 1
postNode c uid pid ( Node' Corpus corpusName (toJSON (pack "{}"::Text))
postNode c uid pid ( Node' NodeCorpus corpusName (toJSON (pack "{}"::Text))
(map (\n -> Node' Document (title n) (toJSON n) []) ns)
)
......
......@@ -323,8 +323,8 @@ mkNodeR' conn ns = runInsertManyReturning conn nodeTable' ns (\(i,_,_,_,_,_,_) -
postNode :: Connection -> UserId -> ParentId -> Node' -> IO [Int]
postNode c uid pid (Node' nt txt v []) = mkNodeR' c (node2table uid pid (Node' nt txt v []))
postNode c uid pid (Node' Corpus txt v ns) = do
[pid'] <- postNode c uid pid (Node' Corpus txt v [])
postNode c uid pid (Node' NodeCorpus txt v ns) = do
[pid'] <- postNode c uid pid (Node' NodeCorpus txt v [])
pids <- mkNodeR' c $ concat $ map (\n -> childWith uid pid' n) ns
pure (pids)
......
......@@ -30,6 +30,8 @@ import Gargantext.Prelude hiding (length)
type Term = Text
type Label = Term
type TermList = [(Label, [[Term]])]
type Sentence a = [a] -- or a nominal group
type Corpus a = [Sentence a] -- a list of sentences
......
......@@ -22,21 +22,28 @@ import GHC.IO (FilePath)
import Control.Applicative
import Control.Monad (mzero)
import Data.Char (ord)
import Data.Char (ord, isSpace)
import Data.Csv
import Data.Either (Either(Left, Right))
import Data.Text (Text, pack)
import qualified Data.Text as DT
import qualified Data.ByteString.Lazy as BL
import Data.Vector (Vector)
import qualified Data.Vector as V
import Gargantext.Prelude hiding (length)
-- import Gargantext.Text.List.Types
import Gargantext.Text.Context
------------------------------------------------------------------------
--csv2lists :: Vector CsvList -> Lists
--csv2lists v = V.foldl' (\e (CsvList listType label forms) -> insertLists lt label forms e) emptyLists v
csvGraphTermList :: FilePath -> IO TermList
csvGraphTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp
csv2list :: CsvListType -> Vector CsvList -> TermList
csv2list lt vs = V.toList $ V.map (\(CsvList _ label forms)
-> (label, map (DT.split isSpace) $ DT.splitOn csvListFormsDelimiter forms))
$ V.filter (\l -> csvList_status l == lt ) vs
------------------------------------------------------------------------
data CsvListType = CsvMap | CsvStop | CsvCandidate
......
......@@ -22,9 +22,7 @@ import Data.Map (Map, empty, fromList)
import Gargantext.Prelude
-------------------------------------------------------------------
type Label = Text
data ListType = Map | Stop | Candidate
data ListType = GraphList | StopList | CandidateList
deriving (Show, Eq, Ord, Enum, Bounded)
type Lists = Map ListType (Map Text [Text])
......
......@@ -44,13 +44,15 @@ import Gargantext.Text.Terms.Multi (multiterms)
import Gargantext.Text.Terms.Mono (monoTerms)
import Gargantext.Text.Terms.WithList (Patterns, extractTermsWithList)
data TermType lang = Mono lang | Multi lang | MonoMulti lang | WithList Patterns
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------
-- | Sugar to extract terms from text (hiddeng mapM from end user).
extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms :: TermType Lang -> [Text] -> IO [[Terms]]
extractTerms termTypeLang = mapM (terms termTypeLang)
------------------------------------------------------------------------
-- | Terms from Text
......
......@@ -40,7 +40,6 @@ import Gargantext.Prelude
isSep :: Char -> Bool
isSep = (`elem` (",.:;?!(){}[]\"" :: String))
monoTerms :: Lang -> Text -> [Terms]
monoTerms l txt = map (monoText2term l) $ monoTexts txt
......@@ -50,7 +49,6 @@ monoTexts = L.concat . monoTextsBySentence
monoText2term :: Lang -> Text -> Terms
monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt)
monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence = map (T.split isSpace)
. T.split isSep
......@@ -58,4 +56,3 @@ monoTextsBySentence = map (T.split isSpace)
......@@ -27,11 +27,12 @@ import Gargantext.Prelude
import Data.List (concatMap)
------------------------------------------------------------------------
type Pattern = KMP.Table Term
type TermList = [(Label, [[Term]])]
type Patterns = [(Pattern, Int, Label)]
------------------------------------------------------------------------
replaceTerms :: Patterns -> Sentence Term -> Sentence Label
replaceTerms pats terms = go 0 terms
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment