Commit 27d180af authored by Alexandre Delanoë's avatar Alexandre Delanoë

[WIP] How to graph my writings ?

parent 0427d73c
Pipeline #3618 failed with stage
in 52 minutes and 7 seconds
cabal-version: 1.12 cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.34.7. -- This file has been generated from package.yaml by hpack version 0.35.1.
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.9.3 version: 0.0.6.9.3
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
......
...@@ -16,12 +16,14 @@ Portability : POSIX ...@@ -16,12 +16,14 @@ Portability : POSIX
module Gargantext.API.Node.DocumentsFromWriteNodes module Gargantext.API.Node.DocumentsFromWriteNodes
where where
-- import Data.Maybe (fromMaybe)
import Conduit import Conduit
import Control.Lens ((^.)) import Control.Lens ((^.))
import Data.Aeson import Data.Aeson
import Data.Either (Either(..), rights) import Data.Either (Either(..), rights)
-- import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Swagger import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..)) import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
...@@ -30,6 +32,7 @@ import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage) ...@@ -30,6 +32,7 @@ import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
import Gargantext.API.Prelude (GargM, GargError) import Gargantext.API.Prelude (GargM, GargError)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.FrameWrite import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
import Gargantext.Core.Text.List.Social (FlowSocialListWith)
import Gargantext.Core.Text.Terms (TermType(..)) import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flowDataText, DataText(..)) import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
...@@ -38,19 +41,26 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..) ...@@ -38,19 +41,26 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)
import Gargantext.Database.Admin.Types.Hyperdata.Frame import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith) import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata, node_name) import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI) import Gargantext.Utils.Jobs (serveJobsAPI)
import Gargantext.Core.Text.Corpus.Parsers.Date (split')
import Servant import Servant
import Text.Read (readMaybe)
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Text as T import qualified Data.Text as T
import qualified Gargantext.Defaults as Defaults -- import qualified Gargantext.Defaults as Defaults
------------------------------------------------------------------------ ------------------------------------------------------------------------
type API = Summary " Documents from Write nodes." type API = Summary " Documents from Write nodes."
:> AsyncJobs JobLog '[JSON] Params JobLog :> AsyncJobs JobLog '[JSON] Params JobLog
------------------------------------------------------------------------ ------------------------------------------------------------------------
newtype Params = Params { id :: Int } data Params = Params
{ id :: Int
, paragraphs :: Text
, lang :: Lang
, selection :: FlowSocialListWith
}
deriving (Generic, Show) deriving (Generic, Show)
instance FromJSON Params where instance FromJSON Params where
parseJSON = genericParseJSON defaultOptions parseJSON = genericParseJSON defaultOptions
...@@ -72,7 +82,7 @@ documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m) ...@@ -72,7 +82,7 @@ documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
-> Params -> Params
-> (JobLog -> m ()) -> (JobLog -> m ())
-> m JobLog -> m JobLog
documentsFromWriteNodes uId nId _p logStatus = do documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
let jobLog = JobLog { _scst_succeeded = Just 1 let jobLog = JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 1 , _scst_remaining = Just 1
...@@ -99,31 +109,48 @@ documentsFromWriteNodes uId nId _p logStatus = do ...@@ -99,31 +109,48 @@ documentsFromWriteNodes uId nId _p logStatus = do
pure (node, contents) pure (node, contents)
) frameWrites ) frameWrites
let parsedE = map (\(node, contents) -> hyperdataDocumentFromFrameWrite 7 (node, contents)) frameWritesWithContents let paragraphs' = readMaybe $ T.unpack paragraphs :: Maybe Int
-- TODO hard coded param should be taken from user
let parsedE = (\(node, contents)
-> hyperdataDocumentFromFrameWrite lang (fromMaybe 7 paragraphs') (node, contents)) <$> frameWritesWithContents
let parsed = List.concat $ rights parsedE let parsed = List.concat $ rights parsedE
_ <- flowDataText (RootId (NodeId uId)) _ <- flowDataText (RootId (NodeId uId))
(DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed)) (DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
(Multi EN) cId Nothing logStatus (Multi lang)
cId
(Just selection)
logStatus
pure $ jobLogSuccess jobLog pure $ jobLogSuccess jobLog
------------------------------------------------------------------------ ------------------------------------------------------------------------
hyperdataDocumentFromFrameWrite :: Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument] hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
hyperdataDocumentFromFrameWrite paragraphSize (node, contents) = hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
case parseLines contents of case parseLines contents of
Left _ -> Left "Error parsing node" Left _ -> Left "Error parsing node"
Right (Parsed { authors, contents = ctxts, date }) -> Right (Parsed { authors, contents = ctxts}) ->
let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ] authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
authors' = T.concat $ authorJoinSingle <$> authors authors' = T.concat $ authorJoinSingle <$> authors
date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
, T.pack $ show month, "-" --{-
, T.pack $ show day ]) <$> date (year',month',day') = split' (node^. node_date)
date' = Just $ T.concat [ T.pack $ show year', "-"
, T.pack $ show month', "-"
, T.pack $ show day'
]
--}
{-
date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
, T.pack $ show month', "-"
, T.pack $ show day' ]) <$> date
year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
--}
in
Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite" Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
...@@ -142,6 +169,6 @@ hyperdataDocumentFromFrameWrite paragraphSize (node, contents) = ...@@ -142,6 +169,6 @@ hyperdataDocumentFromFrameWrite paragraphSize (node, contents) =
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ T.pack $ show EN } , _hd_language_iso2 = Just $ T.pack $ show lang }
) (text2titleParagraphs paragraphSize ctxts) ) (text2titleParagraphs paragraphSize ctxts)
) )
...@@ -94,7 +94,8 @@ emptyParsed = ...@@ -94,7 +94,8 @@ emptyParsed =
data Date = data Date =
Date { year :: Integer Date { year :: Integer
, month :: Integer , month :: Integer
, day :: Integer } , day :: Integer
}
deriving (Show) deriving (Show)
data Line = data Line =
...@@ -109,17 +110,17 @@ parseLines :: Text -> Either ParseError Parsed ...@@ -109,17 +110,17 @@ parseLines :: Text -> Either ParseError Parsed
parseLines text = foldl f emptyParsed <$> lst parseLines text = foldl f emptyParsed <$> lst
where where
lst = parse documentLines "" (unpack text) lst = parse documentLines "" (unpack text)
f (Parsed { .. }) (LAuthors as) = Parsed { authors = as, .. } f (Parsed { .. }) (LAuthors as) = Parsed { authors = as , .. }
f (Parsed { .. }) (LContents c) = Parsed { contents = concat [contents, c], .. } f (Parsed { .. }) (LContents c) = Parsed { contents = DT.unlines [contents, c], .. }
f (Parsed { .. }) (LDate d ) = Parsed { date = Just d, .. } f (Parsed { .. }) (LDate d ) = Parsed { date = Just d , .. }
f (Parsed { .. }) (LSource s ) = Parsed { source = Just s, .. } f (Parsed { .. }) (LSource s ) = Parsed { source = Just s , .. }
f (Parsed { .. }) (LTitle t ) = Parsed { title = t, .. } f (Parsed { .. }) (LTitle t ) = Parsed { title = t , .. }
-- Source should be the name of the node -- Source should be the name of the node
-- First line of each Context should be the title. -- First line of each Context should be the title.
documentLinesP :: Parser [Line] documentLinesP :: Parser [Line]
documentLinesP = do documentLinesP = do
t <- titleP t <- titleP
ls <- lineP `sepBy` newline ls <- lineP `sepBy` newline
pure $ [LTitle $ pack t] ++ ls pure $ [LTitle $ pack t] ++ ls
...@@ -128,8 +129,6 @@ documentLines = do ...@@ -128,8 +129,6 @@ documentLines = do
ls <- lineP `sepBy` newline ls <- lineP `sepBy` newline
pure ls pure ls
lineP :: Parser Line lineP :: Parser Line
lineP = do lineP = do
choice [ try authorsLineP choice [ try authorsLineP
...@@ -235,15 +234,20 @@ tokenEnd = void (char '\n') <|> eof ...@@ -235,15 +234,20 @@ tokenEnd = void (char '\n') <|> eof
--- MISC Tools --- MISC Tools
text2titleParagraphs :: Int -> Text -> [(Text, Text)] text2titleParagraphs :: Int -> Text -> [(Text, Text)]
text2titleParagraphs n = catMaybes . List.map doTitle text2titleParagraphs n = catMaybes
. splitEvery n . List.map clean . List.map doTitle
. sentences . DT.concat . DT.lines . (splitEvery n)
. sentences
. DT.intercalate ". "
. List.filter (/= "")
. DT.lines
doTitle :: [Text] -> Maybe (Text, Text) doTitle :: [Text] -> Maybe (Text, Text)
doTitle (t:ts) = Just (t, DT.concat ts) doTitle (t:ts) = Just (t, DT.concat ts)
doTitle [] = Nothing doTitle [] = Nothing
clean :: Text -> Text clean :: Text -> Text
clean = DT.unwords . List.filter (\w -> DT.length w < 25) . DT.words clean = DT.unwords . List.filter (\w -> DT.length w < 25) . DT.words
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment