Commit 27d180af authored by Alexandre Delanoë's avatar Alexandre Delanoë

[WIP] How to graph my writings ?

parent 0427d73c
Pipeline #3618 failed with stage
in 52 minutes and 7 seconds
cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.34.7.
-- This file has been generated from package.yaml by hpack version 0.35.1.
--
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.9.3
version: 0.0.6.9.3
synopsis: Search, map, share
description: Please see README.md
category: Data
......
......@@ -16,12 +16,14 @@ Portability : POSIX
module Gargantext.API.Node.DocumentsFromWriteNodes
where
-- import Data.Maybe (fromMaybe)
import Conduit
import Control.Lens ((^.))
import Data.Aeson
import Data.Either (Either(..), rights)
-- import Data.Maybe (fromMaybe)
import Data.Maybe (fromMaybe)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
......@@ -30,6 +32,7 @@ import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
import Gargantext.API.Prelude (GargM, GargError)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.FrameWrite
import Gargantext.Core.Text.List.Social (FlowSocialListWith)
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
......@@ -38,19 +41,26 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)
import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata, node_name)
import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI)
import Gargantext.Core.Text.Corpus.Parsers.Date (split')
import Servant
import Text.Read (readMaybe)
import qualified Data.List as List
import qualified Data.Text as T
import qualified Gargantext.Defaults as Defaults
-- import qualified Gargantext.Defaults as Defaults
------------------------------------------------------------------------
type API = Summary " Documents from Write nodes."
:> AsyncJobs JobLog '[JSON] Params JobLog
------------------------------------------------------------------------
newtype Params = Params { id :: Int }
data Params = Params
{ id :: Int
, paragraphs :: Text
, lang :: Lang
, selection :: FlowSocialListWith
}
deriving (Generic, Show)
instance FromJSON Params where
parseJSON = genericParseJSON defaultOptions
......@@ -72,7 +82,7 @@ documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
-> Params
-> (JobLog -> m ())
-> m JobLog
documentsFromWriteNodes uId nId _p logStatus = do
documentsFromWriteNodes uId nId Params { selection, lang, paragraphs } logStatus = do
let jobLog = JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
......@@ -99,31 +109,48 @@ documentsFromWriteNodes uId nId _p logStatus = do
pure (node, contents)
) frameWrites
let parsedE = map (\(node, contents) -> hyperdataDocumentFromFrameWrite 7 (node, contents)) frameWritesWithContents
-- TODO hard coded param should be taken from user
let paragraphs' = readMaybe $ T.unpack paragraphs :: Maybe Int
let parsedE = (\(node, contents)
-> hyperdataDocumentFromFrameWrite lang (fromMaybe 7 paragraphs') (node, contents)) <$> frameWritesWithContents
let parsed = List.concat $ rights parsedE
_ <- flowDataText (RootId (NodeId uId))
(DataNew (Just $ fromIntegral $ length parsed, yieldMany parsed))
(Multi EN) cId Nothing logStatus
(Multi lang)
cId
(Just selection)
logStatus
pure $ jobLogSuccess jobLog
------------------------------------------------------------------------
hyperdataDocumentFromFrameWrite :: Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
hyperdataDocumentFromFrameWrite paragraphSize (node, contents) =
hyperdataDocumentFromFrameWrite :: Lang -> Int -> (Node HyperdataFrame, T.Text) -> Either T.Text [HyperdataDocument]
hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
case parseLines contents of
Left _ -> Left "Error parsing node"
Right (Parsed { authors, contents = ctxts, date }) ->
Right (Parsed { authors, contents = ctxts}) ->
let HyperdataFrame { _hf_base, _hf_frame_id } = node ^. node_hyperdata
authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
authors' = T.concat $ authorJoinSingle <$> authors
date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
, T.pack $ show month, "-"
, T.pack $ show day ]) <$> date
authors' = T.concat $ authorJoinSingle <$> authors
--{-
(year',month',day') = split' (node^. node_date)
date' = Just $ T.concat [ T.pack $ show year', "-"
, T.pack $ show month', "-"
, T.pack $ show day'
]
--}
{-
date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year', "-"
, T.pack $ show month', "-"
, T.pack $ show day' ]) <$> date
year' = fromIntegral $ maybe Defaults.year (\(Date { year }) -> year) date
month' = maybe Defaults.month (\(Date { month }) -> fromIntegral month) date
day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date in
day' = maybe Defaults.day (\(Date { day }) -> fromIntegral day) date
--}
in
Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just "FrameWrite"
, _hd_doi = Nothing
, _hd_url = Nothing
......@@ -142,6 +169,6 @@ hyperdataDocumentFromFrameWrite paragraphSize (node, contents) =
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ T.pack $ show EN }
, _hd_language_iso2 = Just $ T.pack $ show lang }
) (text2titleParagraphs paragraphSize ctxts)
)
......@@ -94,7 +94,8 @@ emptyParsed =
data Date =
Date { year :: Integer
, month :: Integer
, day :: Integer }
, day :: Integer
}
deriving (Show)
data Line =
......@@ -109,17 +110,17 @@ parseLines :: Text -> Either ParseError Parsed
parseLines text = foldl f emptyParsed <$> lst
where
lst = parse documentLines "" (unpack text)
f (Parsed { .. }) (LAuthors as) = Parsed { authors = as, .. }
f (Parsed { .. }) (LContents c) = Parsed { contents = concat [contents, c], .. }
f (Parsed { .. }) (LDate d ) = Parsed { date = Just d, .. }
f (Parsed { .. }) (LSource s ) = Parsed { source = Just s, .. }
f (Parsed { .. }) (LTitle t ) = Parsed { title = t, .. }
f (Parsed { .. }) (LAuthors as) = Parsed { authors = as , .. }
f (Parsed { .. }) (LContents c) = Parsed { contents = DT.unlines [contents, c], .. }
f (Parsed { .. }) (LDate d ) = Parsed { date = Just d , .. }
f (Parsed { .. }) (LSource s ) = Parsed { source = Just s , .. }
f (Parsed { .. }) (LTitle t ) = Parsed { title = t , .. }
-- Source should be the name of the node
-- First line of each Context should be the title.
documentLinesP :: Parser [Line]
documentLinesP = do
t <- titleP
t <- titleP
ls <- lineP `sepBy` newline
pure $ [LTitle $ pack t] ++ ls
......@@ -128,8 +129,6 @@ documentLines = do
ls <- lineP `sepBy` newline
pure ls
lineP :: Parser Line
lineP = do
choice [ try authorsLineP
......@@ -235,15 +234,20 @@ tokenEnd = void (char '\n') <|> eof
--- MISC Tools
text2titleParagraphs :: Int -> Text -> [(Text, Text)]
text2titleParagraphs n = catMaybes . List.map doTitle
. splitEvery n . List.map clean
. sentences . DT.concat . DT.lines
text2titleParagraphs n = catMaybes
. List.map doTitle
. (splitEvery n)
. sentences
. DT.intercalate ". "
. List.filter (/= "")
. DT.lines
doTitle :: [Text] -> Maybe (Text, Text)
doTitle (t:ts) = Just (t, DT.concat ts)
doTitle [] = Nothing
clean :: Text -> Text
clean = DT.unwords . List.filter (\w -> DT.length w < 25) . DT.words
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment