Commit 762b3416 authored by Quentin Lobbé's avatar Quentin Lobbé

phylo from wos in progress

parent 2f9f9de6
...@@ -29,6 +29,7 @@ import GHC.IO (FilePath) ...@@ -29,6 +29,7 @@ import GHC.IO (FilePath)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.List.CSV (csvGraphTermList) import Gargantext.Text.List.CSV (csvGraphTermList)
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year) import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.Parsers (FileFormat(..),parseDocs)
import Gargantext.Text.Terms.WithList import Gargantext.Text.Terms.WithList
import Gargantext.Text.Context (TermList) import Gargantext.Text.Context (TermList)
...@@ -52,24 +53,32 @@ import qualified Data.ByteString.Lazy as L ...@@ -52,24 +53,32 @@ import qualified Data.ByteString.Lazy as L
-- | Conf | -- -- | Conf | --
-------------- --------------
type ListPath = FilePath type ListPath = FilePath
type CorpusPath = FilePath type CorpusPath = FilePath
data CorpusType = Wos | Csv deriving (Show,Generic)
type Limit = Int type Limit = Int
data Conf = data Conf =
Conf { corpusPath :: CorpusPath Conf { corpusPath :: CorpusPath
, corpusType :: CorpusType
, listPath :: ListPath , listPath :: ListPath
, outputPath :: FilePath , outputPath :: FilePath
, phyloName :: Text
, limit :: Limit , limit :: Limit
} deriving (Show,Generic) } deriving (Show,Generic)
instance FromJSON Conf instance FromJSON Conf
instance ToJSON Conf instance ToJSON Conf
instance FromJSON CorpusType
instance ToJSON CorpusType
-- | Get the conf from a Json file -- | Get the conf from a Json file
getJson :: FilePath -> IO L.ByteString getJson :: FilePath -> IO L.ByteString
getJson path = L.readFile path getJson path = L.readFile path
--------------- ---------------
-- | Parse | -- -- | Parse | --
--------------- ---------------
...@@ -82,12 +91,23 @@ filterTerms patterns (year', doc) = (year',termsInText patterns doc) ...@@ -82,12 +91,23 @@ filterTerms patterns (year', doc) = (year',termsInText patterns doc)
termsInText pats txt = DL.nub $ DL.concat $ map (map unwords) $ extractTermsWithList pats txt termsInText pats txt = DL.nub $ DL.concat $ map (map unwords) $ extractTermsWithList pats txt
csvToCorpus :: Int -> FilePath -> IO ([(Int,Text)]) csvToCorpus :: Int -> CorpusPath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList csvToCorpus limit csv = DV.toList
. DV.take limit . DV.take limit
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n))) . DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> readCsv csv . snd <$> readCsv csv
wosToCorpus :: Int -> CorpusPath -> IO ([(Int,Text)])
wosToCorpus limit path = undefined
fileToCorpus :: CorpusType -> Int -> CorpusPath -> IO ([(Int,Text)])
fileToCorpus format limit path = case format of
Wos -> wosToCorpus limit path
Csv -> csvToCorpus limit path
parse :: Limit -> CorpusPath -> TermList -> IO [Document] parse :: Limit -> CorpusPath -> TermList -> IO [Document]
parse limit corpus lst = do parse limit corpus lst = do
corpus' <- csvToCorpus limit corpus corpus' <- csvToCorpus limit corpus
...@@ -123,7 +143,7 @@ main = do ...@@ -123,7 +143,7 @@ main = do
putStrLn $ show "--| Build the phylo |--" putStrLn $ show "--| Build the phylo |--"
let query = PhyloQueryBuild "cultural_evolution" "" 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.00001 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.5 10) let query = PhyloQueryBuild (phyloName conf) "" 5 3 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.00001 10) 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.5 10)
let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True let queryView = PhyloQueryView 2 Merge False 1 [BranchAge] [defaultSmallBranch] [BranchPeakFreq,GroupLabelCooc] (Just (ByBranchAge,Asc)) Json Flat True
...@@ -133,4 +153,6 @@ main = do ...@@ -133,4 +153,6 @@ main = do
putStrLn $ show "--| Export the phylo as a dot graph |--" putStrLn $ show "--| Export the phylo as a dot graph |--"
P.writeFile (outputPath conf) $ dotToString $ viewToDot view let outputFile = (outputPath conf) P.++ (DT.unpack $ phyloName conf) P.++ ".dot"
P.writeFile outputFile $ dotToString $ viewToDot view
...@@ -71,7 +71,7 @@ type ParseError = String ...@@ -71,7 +71,7 @@ type ParseError = String
-- | According to the format of Input file, -- | According to the format of Input file,
-- different parser are available. -- different parser are available.
data FileFormat = WOS | CsvHalFormat -- | CsvGargV3 data FileFormat = WOS | CsvHalFormat-- | CsvGargV3
deriving (Show) deriving (Show)
-- Implemented (ISI Format) -- Implemented (ISI Format)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment