Commit c42e28fa authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 86-dev-graphql

parents 2dc0600b 072009e8
...@@ -134,7 +134,7 @@ For Docker env, first create the appropriate image: ...@@ -134,7 +134,7 @@ For Docker env, first create the appropriate image:
``` sh ``` sh
cd devops/docker cd devops/docker
docker build -t cgenie/stack-build:lts-17.13-garg . docker build -t cgenie/stack-build:lts-18.12-garg .
``` ```
then run: then run:
......
FROM fpco/stack-build:lts-17.13 FROM fpco/stack-build:lts-18.12
RUN apt-key adv --keyserver hkp://pool.sks-keyservers.net:80 --recv-keys 8B1DA6120C2BF624 #RUN apt-key adv --keyserver hkp://pool.sks-keyservers.net:80 --recv-keys 8B1DA6120C2BF624
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y git libigraph0-dev && \ apt-get install -y git libigraph0-dev && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
......
...@@ -54,7 +54,7 @@ git clone https://gitlab.iscpif.fr/gargantext/purescript-gargantext ...@@ -54,7 +54,7 @@ git clone https://gitlab.iscpif.fr/gargantext/purescript-gargantext
../install-deps $(pwd) ../install-deps $(pwd)
pushd devops/docker pushd devops/docker
docker build --pull -t fpco/stack-build:lts-17.13-garg . docker build --pull -t fpco/stack-build:lts-18.12-garg .
popd popd
#stack docker pull #stack docker pull
......
...@@ -38,10 +38,6 @@ CREATE TABLE public.nodes ( ...@@ -38,10 +38,6 @@ CREATE TABLE public.nodes (
FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
); );
ALTER TABLE public.nodes OWNER TO gargantua; ALTER TABLE public.nodes OWNER TO gargantua;
ALTER TABLE nodes ADD COLUMN IF NOT EXISTS search_title tsvector;
UPDATE nodes SET search_title = to_tsvector('english', coalesce("hyperdata"->>'title', '') || ' ' || coalesce("hyperdata"->>'abstract', ''));
CREATE INDEX IF NOT EXISTS search_title_idx ON nodes USING GIN (search_title);
-------------------------------------------------------------- --------------------------------------------------------------
-- | Ngrams -- | Ngrams
CREATE TABLE public.ngrams ( CREATE TABLE public.ngrams (
......
ALTER TABLE nodes
DROP COLUMN IF EXISTS search_title;
name: gargantext name: gargantext
version: '0.0.4.2' version: '0.0.4.3'
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
......
...@@ -257,6 +257,7 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do ...@@ -257,6 +257,7 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
CSV -> Parser.parseFormat Parser.CsvGargV3 CSV -> Parser.parseFormat Parser.CsvGargV3
WOS -> Parser.parseFormat Parser.WOS WOS -> Parser.parseFormat Parser.WOS
PresseRIS -> Parser.parseFormat Parser.RisPresse PresseRIS -> Parser.parseFormat Parser.RisPresse
ZIP -> Parser.parseFormat Parser.ZIP
-- TODO granularity of the logStatus -- TODO granularity of the logStatus
eDocs <- liftBase $ parse $ cs d eDocs <- liftBase $ parse $ cs d
...@@ -370,3 +371,4 @@ addToCorpusWithFile user cid nwf@(NewWithFile _d _l fName) logStatus = do ...@@ -370,3 +371,4 @@ addToCorpusWithFile user cid nwf@(NewWithFile _d _l fName) logStatus = do
, _scst_remaining = Just 0 , _scst_remaining = Just 0
, _scst_events = Just [] , _scst_events = Just []
} }
...@@ -45,25 +45,24 @@ data FileType = CSV ...@@ -45,25 +45,24 @@ data FileType = CSV
| CSV_HAL | CSV_HAL
| PresseRIS | PresseRIS
| WOS | WOS
| ZIP
deriving (Eq, Show, Generic) deriving (Eq, Show, Generic)
instance ToSchema FileType instance ToSchema FileType
instance Arbitrary FileType instance Arbitrary FileType where arbitrary = elements [CSV, PresseRIS]
where
arbitrary = elements [CSV, PresseRIS]
instance ToParamSchema FileType instance ToParamSchema FileType
instance FromJSON FileType instance FromJSON FileType
instance ToJSON FileType instance ToJSON FileType
instance ToParamSchema (MultipartData Mem) where instance ToParamSchema (MultipartData Mem) where toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance FromHttpApiData FileType instance FromHttpApiData FileType
where where
parseUrlPiece "CSV" = pure CSV parseUrlPiece "CSV" = pure CSV
parseUrlPiece "CSV_HAL" = pure CSV_HAL parseUrlPiece "CSV_HAL" = pure CSV_HAL
parseUrlPiece "PresseRis" = pure PresseRIS parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece "ZIP" = pure ZIP
parseUrlPiece _ = pure CSV -- TODO error here parseUrlPiece _ = pure CSV -- TODO error here
......
...@@ -34,7 +34,7 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM) ...@@ -34,7 +34,7 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getChildrenByType, getNodeWith) import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude import Gargantext.Prelude
import GHC.Generics (Generic) import GHC.Generics (Generic)
...@@ -60,7 +60,6 @@ api uId nId = ...@@ -60,7 +60,6 @@ api uId nId =
JobFunction (\p log'' -> JobFunction (\p log'' ->
let let
log' x = do log' x = do
printDebug "documents from write nodes" x
liftBase $ log'' x liftBase $ log'' x
in documentsFromWriteNodes uId nId p (liftBase . log') in documentsFromWriteNodes uId nId p (liftBase . log')
) )
...@@ -71,7 +70,7 @@ documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m) ...@@ -71,7 +70,7 @@ documentsFromWriteNodes :: (HasSettings env, FlowCmdM env err m)
-> Params -> Params
-> (JobLog -> m ()) -> (JobLog -> m ())
-> m JobLog -> m JobLog
documentsFromWriteNodes uId nId p logStatus = do documentsFromWriteNodes uId nId _p logStatus = do
logStatus JobLog { _scst_succeeded = Just 1 logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0 , _scst_failed = Just 0
...@@ -79,12 +78,10 @@ documentsFromWriteNodes uId nId p logStatus = do ...@@ -79,12 +78,10 @@ documentsFromWriteNodes uId nId p logStatus = do
, _scst_events = Just [] , _scst_events = Just []
} }
_ <- printDebug "[documentsFromWriteNodes] inside job, uId" uId mcId <- getClosestParentIdByType' nId NodeCorpus
_ <- printDebug "[documentsFromWriteNodes] inside job, nId" nId let cId = maybe (panic "[G.A.N.DFWN] Node has no parent") identity mcId
_ <- printDebug "[documentsFromWriteNodes] inside job, p" p
frameWriteIds <- getChildrenByType nId NodeFrameWrite frameWriteIds <- getChildrenByType nId NodeFrameWrite
_ <- printDebug "[documentsFromWriteNodes] children" frameWriteIds
-- https://write.frame.gargantext.org/<frame_id>/download -- https://write.frame.gargantext.org/<frame_id>/download
frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds frameWrites <- mapM (\id -> getNodeWith id (Proxy :: Proxy HyperdataFrame)) frameWriteIds
...@@ -94,13 +91,11 @@ documentsFromWriteNodes uId nId p logStatus = do ...@@ -94,13 +91,11 @@ documentsFromWriteNodes uId nId p logStatus = do
contents <- getHyperdataFrameContents (node ^. node_hyperdata) contents <- getHyperdataFrameContents (node ^. node_hyperdata)
pure (node, contents) pure (node, contents)
) frameWrites ) frameWrites
_ <- printDebug "[documentsFromWriteNodes] frameWritesWithContents" frameWritesWithContents
let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents let parsedE = (\(node, contents) -> hyperdataDocumentFromFrameWrite (node ^. node_hyperdata, contents)) <$> frameWritesWithContents
let parsed = rights parsedE let parsed = rights parsedE
_ <- printDebug "[documentsFromWriteNodes] parsed" parsed
_ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) nId Nothing _ <- flowDataText (RootId (NodeId uId)) (DataNew [parsed]) (Multi EN) cId Nothing
pure JobLog { _scst_succeeded = Just 2 pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0 , _scst_failed = Just 0
...@@ -113,8 +108,14 @@ hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, cont ...@@ -113,8 +108,14 @@ hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, cont
case parseLines contents of case parseLines contents of
Left _ -> Left "Error parsing node" Left _ -> Left "Error parsing node"
Right (Parsed { authors, contents = c, date, source, title = t }) -> Right (Parsed { authors, contents = c, date, source, title = t }) ->
let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ] in let authorJoinSingle (Author { firstName, lastName }) = T.concat [ lastName, ", ", firstName ]
let authors' = T.concat $ authorJoinSingle <$> authors in authors' = T.concat $ authorJoinSingle <$> authors
date' = (\(Date { year, month, day }) -> T.concat [ T.pack $ show year, "-"
, T.pack $ show month, "-"
, T.pack $ show day ]) <$> date
year' = fromIntegral $ maybe 2021 (\(Date { year }) -> year) date
month' = fromIntegral $ maybe 10 (\(Date { month }) -> month) date
day' = fromIntegral $ maybe 4 (\(Date { day }) -> day) date in
Right HyperdataDocument { _hd_bdd = Just "FrameWrite" Right HyperdataDocument { _hd_bdd = Just "FrameWrite"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
...@@ -126,10 +127,10 @@ hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, cont ...@@ -126,10 +127,10 @@ hyperdataDocumentFromFrameWrite (HyperdataFrame { _hf_base, _hf_frame_id }, cont
, _hd_institutes = Nothing , _hd_institutes = Nothing
, _hd_source = source , _hd_source = source
, _hd_abstract = Just c , _hd_abstract = Just c
, _hd_publication_date = date , _hd_publication_date = date'
, _hd_publication_year = Nothing -- TODO , _hd_publication_year = Just year'
, _hd_publication_month = Nothing -- TODO , _hd_publication_month = Just month'
, _hd_publication_day = Nothing -- TODO , _hd_publication_day = Just day'
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
......
...@@ -68,6 +68,7 @@ type ParseError = String ...@@ -68,6 +68,7 @@ type ParseError = String
-- different parser are available. -- different parser are available.
data FileFormat = WOS | RIS | RisPresse data FileFormat = WOS | RIS | RisPresse
| CsvGargV3 | CsvHal | CsvGargV3 | CsvHal
| ZIP
deriving (Show) deriving (Show)
-- Implemented (ISI Format) -- Implemented (ISI Format)
...@@ -94,6 +95,9 @@ parseFormat WOS bs = do ...@@ -94,6 +95,9 @@ parseFormat WOS bs = do
$ partitionEithers $ partitionEithers
$ [runParser' WOS bs] $ [runParser' WOS bs]
pure $ Right docs pure $ Right docs
parseFormat ZIP _bs = do
printDebug "[parseFormat]" ZIP
pure $ Left "Not implemented for ZIP"
parseFormat _ _ = undefined parseFormat _ _ = undefined
-- | Parse file into documents -- | Parse file into documents
......
...@@ -6,7 +6,7 @@ import Data.Either ...@@ -6,7 +6,7 @@ import Data.Either
import Data.Maybe import Data.Maybe
import Data.Text hiding (foldl) import Data.Text hiding (foldl)
import Gargantext.Prelude import Gargantext.Prelude
import Prelude ((++)) import Prelude ((++), read)
import Text.Parsec hiding (Line) import Text.Parsec hiding (Line)
import Text.Parsec.String import Text.Parsec.String
...@@ -57,14 +57,14 @@ parseLinesSampleUnordered = parseLines sampleUnordered ...@@ -57,14 +57,14 @@ parseLinesSampleUnordered = parseLines sampleUnordered
data Author = data Author =
Author { firstName :: Text Author { firstName :: Text
, lastName :: Text } , lastName :: Text }
deriving (Show) deriving (Show)
data Parsed = data Parsed =
Parsed { title :: Text Parsed { title :: Text
, authors :: [Author] , authors :: [Author]
, date :: Maybe Text , date :: Maybe Date
, source :: Maybe Text , source :: Maybe Text
, contents :: Text } , contents :: Text }
deriving (Show) deriving (Show)
...@@ -76,10 +76,16 @@ emptyParsed = ...@@ -76,10 +76,16 @@ emptyParsed =
, source = Nothing , source = Nothing
, contents = "" } , contents = "" }
data Date =
Date { year :: Integer
, month :: Integer
, day :: Integer }
deriving (Show)
data Line = data Line =
LAuthors [Author] LAuthors [Author]
| LContents Text | LContents Text
| LDate Text | LDate Date
| LSource Text | LSource Text
| LTitle Text | LTitle Text
deriving (Show) deriving (Show)
...@@ -115,7 +121,7 @@ authorsLineP = do ...@@ -115,7 +121,7 @@ authorsLineP = do
dateLineP :: Parser Line dateLineP :: Parser Line
dateLineP = do dateLineP = do
date <- dateP date <- dateP
pure $ LDate $ pack date pure $ LDate date
sourceLineP :: Parser Line sourceLineP :: Parser Line
sourceLineP = do sourceLineP = do
...@@ -169,9 +175,23 @@ datePrefixP :: Parser [Char] ...@@ -169,9 +175,23 @@ datePrefixP :: Parser [Char]
datePrefixP = do datePrefixP = do
_ <- string "^@@date:" _ <- string "^@@date:"
many (char ' ') many (char ' ')
dateP :: Parser [Char] dateP :: Parser Date
dateP = try datePrefixP dateP = try datePrefixP
*> many (noneOf "\n") *> dateISOP
-- *> many (noneOf "\n")
dateISOP :: Parser Date
dateISOP = do
year <- rd <$> number
_ <- char '-'
month <- rd <$> number
_ <- char '-'
day <- rd <$> number
_ <- many (noneOf "\n" )
pure $ Date { year, month, day }
where
rd = read :: [Char] -> Integer
number = many1 digit
sourcePrefixP :: Parser [Char] sourcePrefixP :: Parser [Char]
sourcePrefixP = do sourcePrefixP = do
......
...@@ -48,7 +48,7 @@ import GHC.Generics (Generic) ...@@ -48,7 +48,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Prelude (fromField') import Gargantext.Database.Prelude (fromField')
import Gargantext.Prelude import Gargantext.Prelude
import Opaleye (DefaultFromField(..), PGJsonb, defaultFromField, fieldQueryRunnerColumn, Nullable) import Opaleye (DefaultFromField, defaultFromField, PGJsonb, fieldQueryRunnerColumn, Nullable)
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary hiding (vector) import Test.QuickCheck.Arbitrary hiding (vector)
......
...@@ -141,7 +141,6 @@ instance (Arbitrary hyperdata ...@@ -141,7 +141,6 @@ instance (Arbitrary hyperdata
<*> arbitrary <*> arbitrary
<*> arbitrary <*> arbitrary
<*> arbitrary <*> arbitrary
<*> arbitrary
------------------------------------------------------------------------ ------------------------------------------------------------------------
pgNodeId :: NodeId -> O.Column O.PGInt4 pgNodeId :: NodeId -> O.Column O.PGInt4
......
...@@ -329,7 +329,7 @@ runViewDocuments cId t o l order query = do ...@@ -329,7 +329,7 @@ runViewDocuments cId t o l order query = do
-- WHERE nn.node1_id = ? -- corpusId -- WHERE nn.node1_id = ? -- corpusId
-- AND n.typename = ? -- NodeTypeId -- AND n.typename = ? -- NodeTypeId
-- AND nn.category = ? -- isTrash or not -- AND nn.category = ? -- isTrash or not
-- AND (n.search_title @@ to_tsquery(?) OR ? = '') -- query with an OR hack for empty to_tsquery('') results -- AND (n.search @@ to_tsquery(?) OR ? = '') -- query with an OR hack for empty to_tsquery('') results
-- |] -- |]
runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Cmd err Int runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Cmd err Int
...@@ -359,8 +359,8 @@ viewDocuments cId t ntId mQuery = proc () -> do ...@@ -359,8 +359,8 @@ viewDocuments cId t ntId mQuery = proc () -> do
-- restrict -< (n^.node_name) `ilike` (sqlStrictText iLikeQuery) -- restrict -< (n^.node_name) `ilike` (sqlStrictText iLikeQuery)
restrict -< if query == "" restrict -< if query == ""
then pgBool True then pgBool True
--else (n^.ns_search_title) @@ (pgTSQuery (T.unpack query)) --else (n^.ns_search) @@ (pgTSQuery (T.unpack query))
else (n^.ns_search_title) @@ (toTSQuery $ T.unpack query) else (n^.ns_search) @@ (toTSQuery $ T.unpack query)
returnA -< FacetDoc (_ns_id n) returnA -< FacetDoc (_ns_id n)
(_ns_date n) (_ns_date n)
......
...@@ -142,6 +142,29 @@ getClosestParentIdByType nId nType = do ...@@ -142,6 +142,29 @@ getClosestParentIdByType nId nType = do
WHERE n1.id = ? AND 0 = ?; WHERE n1.id = ? AND 0 = ?;
|] |]
-- | Similar to `getClosestParentIdByType` but includes current node
-- in search too
getClosestParentIdByType' :: HasDBid NodeType
=> NodeId
-> NodeType
-> Cmd err (Maybe NodeId)
getClosestParentIdByType' nId nType = do
result <- runPGSQuery query (nId, 0 :: Int)
case result of
[(NodeId id, pTypename)] -> do
if toDBid nType == pTypename then
pure $ Just $ NodeId id
else
getClosestParentIdByType nId nType
_ -> pure Nothing
where
query :: DPS.Query
query = [sql|
SELECT n.id, n.typename
FROM nodes n
WHERE n.id = ? AND 0 = ?;
|]
-- | Given a node id, find all it's children (no matter how deep) of -- | Given a node id, find all it's children (no matter how deep) of
-- given node type. -- given node type.
getChildrenByType :: HasDBid NodeType getChildrenByType :: HasDBid NodeType
......
...@@ -154,7 +154,6 @@ data NodePolySearch id ...@@ -154,7 +154,6 @@ data NodePolySearch id
, _ns_hyperdata :: hyperdata , _ns_hyperdata :: hyperdata
, _ns_search :: search , _ns_search :: search
, _ns_search_title :: search
} deriving (Show, Generic) } deriving (Show, Generic)
$(makeAdaptorAndInstance "pNodeSearch" ''NodePolySearch) $(makeAdaptorAndInstance "pNodeSearch" ''NodePolySearch)
...@@ -174,7 +173,6 @@ nodeTableSearch = Table "nodes" ( pNodeSearch ...@@ -174,7 +173,6 @@ nodeTableSearch = Table "nodes" ( pNodeSearch
, _ns_hyperdata = requiredTableField "hyperdata" , _ns_hyperdata = requiredTableField "hyperdata"
, _ns_search = optionalTableField "search" , _ns_search = optionalTableField "search"
, _ns_search_title = optionalTableField "search_title"
} }
) )
------------------------------------------------------------------------ ------------------------------------------------------------------------
resolver: resolver:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/4.yaml url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/12.yaml
flags: {} flags: {}
extra-package-dbs: [] extra-package-dbs: []
packages: packages:
...@@ -11,7 +11,7 @@ packages: ...@@ -11,7 +11,7 @@ packages:
docker: docker:
enable: false enable: false
repo: 'cgenie/stack-build:lts-17.13-garg' repo: 'cgenie/stack-build:lts-18.12-garg'
run-args: run-args:
- '--publish=8008:8008' - '--publish=8008:8008'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment