[refactor] this compiles fine with stack now...

parent 5f6cce5b
{-|
Module : Main
Description : Phylomemy profile Main module
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE OverloadedStrings #-}
module Main where
import Common
import Data.Aeson
import Data.List (nub)
import Data.Text qualified as T
import GHC.IO.Encoding
import GHC.Stack
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig, toPeriods, getTimePeriod, getTimeStep)
import GHC.IO.Encoding
import GHC.Stack
import Paths_gargantext
import Prelude
import qualified Data.Text as T
import Shelly
import System.Directory
......
{-|
Module : Common
Description : Phylomemy common module
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE OverloadedStrings #-}
module Common where
import Control.Concurrent.Async (mapConcurrently)
import Crypto.Hash.SHA256 (hash)
import Data.Aeson
import Data.Aeson ( eitherDecode )
import Data.ByteString.Char8 qualified as C8
import Data.List (nub, tail)
import Data.List.Split
import Data.List.Split ( splitOn )
import Data.Maybe (fromJust)
import Data.Text (unpack, replace, pack)
import Data.Text qualified as T
import Data.Vector qualified as Vector
import Gargantext.API.Ngrams.Prelude (toTermList)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Ngrams.Types ( NgramsList )
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseFile)
import Gargantext.Core.Text.Corpus.Parsers (parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv
import Gargantext.Core.Text.Corpus.Parsers.Types (FileFormat(..), FileType(..))
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.API.Tools ( toPhyloDate, toPhyloDate', readJson )
import Gargantext.Core.Viz.Phylo.PhyloTools (toPeriods, getTimePeriod, getTimeStep)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified
import System.Directory (listDirectory)
data Backup = BackupPhyloWithoutLink | BackupPhylo deriving (Show)
---------------
......
......@@ -131,7 +131,7 @@ source-repository-package
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
tag: dd23048307f2cef0655cced2005cd0eaf69f8b6a
tag: dc0a1be8f2c706fb67b5eb7fb1c667eea911a0ba
source-repository-package
type: git
......@@ -215,6 +215,7 @@ allow-newer: *
package gargantext
ghc-options: -fwrite-ide-info -hiedir=".stack-work/hiedb"
optimization: 2
package hmatrix
ghc-options: -O2 -fsimpl-tick-factor=10000 -fdicts-cheap -fdicts-strict -flate-dmd-anal -fno-state-hack
......
......@@ -65,7 +65,6 @@ common defaults
base >=4.7 && <5
, aeson ^>= 2.2.1.0
optimization: 2
common optimized
ghc-options:
-O2
......@@ -470,7 +469,6 @@ library
, bytestring ^>= 0.10.12.0
, case-insensitive ^>= 1.2.1.0
, cassava ^>= 0.5.2.0
, cborg ^>= 0.2.6.0
, cereal ^>= 0.5.8.2
, conduit ^>= 1.3.4.2
, conduit-extra ^>= 1.3.5
......@@ -601,8 +599,8 @@ library
, simple-reflect ^>= 0.3.3
, singletons ^>= 2.7
, singletons-th >= 3.1
, snowball ^>= 1.0.0.1
, split ^>= 0.2.3.4
, stemmer ^>= 0.5.2
, stm ^>= 2.5.0.1
, swagger2 ^>= 2.6
, taggy-lens ^>= 0.1.2
......@@ -818,7 +816,6 @@ executable gargantext-server
, text ^>= 1.2.4.1
, unordered-containers ^>= 0.2.16.0
, vector ^>= 0.7.3
optimization: 2
executable gargantext-upgrade
import:
......
......@@ -31,7 +31,7 @@ module Gargantext.API.Ngrams
, TableNgramsApiPut
, commitStatePatch
, searchTableNgrams
, getTableNgrams
, getTableNgramsCorpus
......@@ -164,7 +164,7 @@ commitStatePatch listId (Versioned _p_version p) = do
-- )
let newA = Versioned (a' ^. a_version) q'
-- NOTE Now is the only good time to save the archive history. We
-- have the handle to the MVar and we need to save its exact
-- snapshot. Node Story archive is a linear table, so it's only
......@@ -380,11 +380,11 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- | For each input root, extends its occurrence count with
-- the information found in the subitems.
withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
withInners tblMap roots = Set.map addSubitemsOccurrences roots
withInners tblMap = Set.map addSubitemsOccurrences
where
addSubitemsOccurrences :: NgramsElement -> NgramsElement
addSubitemsOccurrences e =
e & ne_occurrences .~ (foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children))
e & ne_occurrences .~ foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_nre . nre_children)
alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId
alterOccurrences occs t = case Map.lookup t tblMap of
......
......@@ -9,18 +9,18 @@ Portability : POSIX
-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Ngrams.List.Types where
import Data.Aeson
import Data.Text.Encoding qualified as E
import Data.Aeson ( FromJSON(parseJSON), ToJSON(toJSON), eitherDecode', genericParseJSON, genericToJSON )
import Data.Swagger (ToSchema(..))
import Data.Text qualified as T
import Gargantext.API.Node.Corpus.New.Types (FileType(..))
import Gargantext.Core.Ngrams.Types (NgramsList)
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Protolude
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded (FromForm(..), ToForm, parseUnique)
import Data.Swagger.Schema (genericDeclareNamedSchema)
......@@ -51,7 +51,7 @@ instance FromForm WithJsonFile where
fromForm f = do
d' <- parseUnique "_wjf_data" f :: Either Text Text
d <- case eitherDecode' (toUtf8Lazy d') of
Left s -> Left $ pack s
Left s -> Left $ T.pack s
Right v -> Right v
n <- parseUnique "_wjf_name" f
pure $ WithJsonFile { _wjf_data = d
......
......@@ -47,7 +47,7 @@ import Gargantext.API.Node.Types (NewWithFile(..), NewWithForm, wf_data, wf_file
import Gargantext.Core (withDefaultLanguage, defaultLanguage)
import Gargantext.Core.Ngrams.Types (Versioned(..))
import Gargantext.Core.NodeStory (HasNodeStoryImmediateSaver, HasNodeArchiveStoryImmediateSaver, currentVersion, NgramsStatePatch')
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Corpus.Query qualified as API
import Gargantext.Core.Text.Corpus.Parsers qualified as Parser (parseFormatC)
import Gargantext.Core.Text.Corpus.Parsers.Types qualified as Parser (FileFormat(..))
import Gargantext.Core.Text.Terms (TermType(..))
......@@ -58,7 +58,8 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Mail (sendMail)
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataFile, hff_name, hff_path, toHyperdataDocument)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( toHyperdataDocument )
import Gargantext.Database.Admin.Types.Hyperdata.File ( HyperdataFile, hff_name, hff_path )
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), ParentId)
import Gargantext.Database.GargDB qualified as GargDB
import Gargantext.Database.Prelude (hasConfig)
......@@ -68,7 +69,7 @@ import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_max_docs_parsers)
import Gargantext.System.Logging (logLocM, LogLevel(..))
import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..))
import Gargantext.Utils.Jobs.Monad ( JobHandle, MonadJobStatus(..) )
import Servant ((:>)(), Capture, FormUrlEncoded, JSON, Summary)
import Test.QuickCheck.Arbitrary (Arbitrary(..))
......@@ -265,7 +266,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
let ff = unFileFormat $ nwf ^. wf_fileformat
let data' = case ff of
Parser.Plain -> cs (nwf ^. wf_data)
Parser.ZIP -> case BSB64.decode $ TE.encodeUtf8 (nwf ^. wf_data) of
Parser.ZIP -> case BSB64.decode $ encodeUtf8 (nwf ^. wf_data) of
Left err -> panicTrace $ T.pack "[addToCorpusWithForm] error decoding base64: " <> T.pack err
Right decoded -> decoded
eDocsC <- liftBase $ parseC ff data'
......
......@@ -14,11 +14,10 @@ Portability : POSIX
module Gargantext.API.Node.Document.Export.Types where
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..), (.=), header, namedRecord)
import Data.Swagger
import Data.Text.Encoding qualified as TE
import Gargantext.Core.Types
import Data.Swagger ( genericDeclareNamedSchema, ToParamSchema(..), ToSchema(..) )
import Gargantext.Core.Types ( Node, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude
import Servant
......
......@@ -21,28 +21,29 @@ import Data.Aeson (Options(..), defaultOptions, genericParseJSON, genericToJSON)
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Swagger hiding (title, url)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.API.Node.File
import Gargantext.API.Prelude
import Data.Swagger ( genericDeclareNamedSchema, ToSchema(..) )
import Gargantext.API.Node.File ( FileApi, fileApi )
import Gargantext.API.Prelude ( serverError, GargServer, GargServerC )
import Gargantext.Core (Lang)
import Gargantext.Core.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Text.Terms.Tokenize qualified as Tokenize
import Gargantext.Core.Text.Terms.Tokenize.Types qualified as Tokenize
import Gargantext.Core.Types (TokenTag(..))
import Gargantext.Core.Utils.DateUtils (utc2year)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.CorpusField
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( hc_fields )
import Gargantext.Database.Admin.Types.Hyperdata.Folder ( HyperdataFolder )
import Gargantext.Database.Admin.Types.Hyperdata.CorpusField ( CodeType(JSON), cf_authors, cf_desc, cf_query, cf_title, HyperdataField(HyperdataField), hf_data )
import Gargantext.Database.Admin.Types.Node (NodeId(UnsafeMkNodeId), Node, unNodeId)
import Gargantext.Database.Prelude (Cmd, DBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeNode (selectPublicNodes)
import Gargantext.Database.Schema.Node -- (NodePoly(..))
import Gargantext.Database.Schema.Node ( NodePoly(_node_id), node_date, node_hyperdata ) -- (NodePoly(..))
import Gargantext.Prelude
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Utils.Aeson qualified as GUA
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
------------------------------------------------------------------------
type API = API_Home
......
......@@ -35,13 +35,9 @@ module Gargantext.Core.Ngrams.Types
, NgramsElement(..)
, mkNgramsElement
, ne_children
, ne_list
, ne_ngrams
, ne_nre
, ne_occurrences
, ne_parent
, ne_root
, ne_size
, NgramsList
......@@ -122,7 +118,8 @@ import Codec.Serialise (Serialise())
import Control.Category ((>>>))
import Control.Lens (makePrisms, Iso', iso, from, (.=), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^?), (%~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~), over)
import Control.Monad.State
import Data.Aeson (FromJSONKey(..), FromJSONKeyFunction(..), ToJSONKey(..), decode, encode, genericFromJSONKey, defaultJSONKeyOptions, genericToJSONKey, defaultJSONKeyOptions, genericParseJSON, genericToEncoding, genericToJSON)
import Data.Aeson qualified as Aeson
import Data.Aeson (FromJSONKey(..), FromJSONKeyFunction(..), ToJSONKey(..), decode, encode, genericFromJSONKey, defaultJSONKeyOptions, genericToJSONKey, defaultJSONKeyOptions, genericParseJSON, genericToEncoding, genericToJSON, (.:), (.:?), withObject, object)
import Data.Foldable
import Data.HashMap.Strict.InsOrd qualified as InsOrdHashMap
import Data.Map.Strict qualified as Map
......
......@@ -15,7 +15,7 @@ module Gargantext.Core.Text.Corpus.Parsers.FrameWrite
import Data.Either
import Data.List qualified as List
import Data.Maybe
import Data.Text hiding (foldl)
import Data.Text hiding (foldl')
import Data.Text qualified as DT
import Gargantext.Core.Text (sentences)
import Gargantext.Prelude hiding (ByteString, (<|>), many, try)
......
......@@ -10,7 +10,6 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Terms.Tokenize
where
......@@ -22,11 +21,11 @@ import Data.Interval qualified as I
import Data.IntervalSet qualified as IS
import Data.Set qualified as Set
import Data.Text qualified as T
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core (Lang, NLPServerConfig(..)) --, PosTagAlgo(CoreNLP))
import Gargantext.Core.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Terms.Multi (tokenTagsNoGroup)
import Gargantext.Core.Text.Terms.Tokenize.Types
import Gargantext.Core.Text.Terms.Tokenize.Types ( HighlightResult(..), HighlightedTerm(..), HasTokenizer, htToInterval, intervalToNt, hrToInterval )
import Gargantext.Core.Types (TokenTag(..), POS(..)) --, my_token_offset_end)
import Gargantext.Prelude
import Gargantext.Utils.Array (window)
......
......@@ -16,7 +16,7 @@ module Gargantext.Core.Text.Terms.Tokenize.Types
where
import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Aeson.TH (defaultOptions, deriveJSON)
import Data.Aeson.TH (defaultOptions)
import Data.ExtendedReal (Extended(..))
import Data.Interval ((<=..<=))
import Data.Interval qualified as I
......
......@@ -24,7 +24,7 @@ import Data.Map.Strict.Patch qualified as PM
import Gargantext.Core.Ngrams.Tools (getNodeStory)
import Gargantext.Core.Ngrams.Types
( NgramsTerm(NgramsTerm),
NgramsRepoElement,
NgramsRepoElement(..),
NgramsElement(..),
ne_ngrams,
NgramsPatch(NgramsReplace),
......
......@@ -14,14 +14,13 @@ import qualified Data.ByteString.Lazy.Char8 as BSC
import Data.Csv (defaultEncodeOptions, encodeByNameWith, encodeDefaultOrderedByName, header, namedRecord, (.=), DefaultOrdered, EncodeOptions(..), NamedRecord, Quoting(QuoteNone), ToNamedRecord)
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Gargantext.Core.Ngrams.Types (mSetToList, NgramsRepoElement(..), NgramsTableMap, NgramsTerm(..), unNgramsTerm)
import Gargantext.Core.Types.Main (ListType(..))
import Network.HTTP.Media ((//), (/:))
import qualified Prelude
import Protolude
import Protolude.Partial (read)
import Servant
import Servant ( Accept(contentType), MimeRender(..), MimeUnrender(mimeUnrender) )
data CSV = CSV
......
......@@ -13,82 +13,24 @@ Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Utils.SpacyNLP (
module Gargantext.Utils.SpacyNLP.Types
, spacyRequest
spacyRequest
, spacyTagsToToken
, spacyDataToPosSentences
, nlp
, spacyDataToTokenTags
, nlpTokenTags
) where
import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON)
import Data.Set qualified as Set
import Data.Text hiding (map, group, filter, concat, zip)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import Gargantext.Core.Types (POS(..), NER(..), TokenTag(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Data.Text ( splitOn, toLower )
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types ( Token(Token) )
import Gargantext.Core.Types (TokenTag(..))
import Gargantext.Prelude
import Gargantext.Utils.SpacyNLP.Types
import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
import Network.URI (URI(..))
data SpacyData = SpacyData { _spacy_data :: ![SpacyText] }
deriving (Show)
data SpacyText = SpacyText { _spacy_text :: !Text
, _spacy_tags :: ![SpacyTags]
} deriving (Show)
-- | https://spacy.io/api/token/#attributes
data SpacyTags =
SpacyTags { _spacyTags_text :: !Text
, _spacyTags_text_with_ws :: !Text
, _spacyTags_whitespace :: !Text
, _spacyTags_head :: !Text
, _spacyTags_head_index :: !Int
, _spacyTags_left_edge :: !Text
, _spacyTags_right_edge :: !Text
, _spacyTags_index :: Int
, _spacyTags_ent_type :: !NER
, _spacyTags_ent_iob :: !Text
, _spacyTags_lemma :: !Text
, _spacyTags_normalized :: !Text
, _spacyTags_shape :: !Text
, _spacyTags_prefix :: !Text
, _spacyTags_suffix :: !Text
, _spacyTags_is_alpha :: Bool
, _spacyTags_is_ascii :: Bool
, _spacyTags_is_digit :: Bool
, _spacyTags_is_title :: Bool
, _spacyTags_is_punct :: Bool
, _spacyTags_is_left_punct :: Bool
, _spacyTags_is_right_punct :: Bool
, _spacyTags_is_space :: Bool
, _spacyTags_is_bracket :: Bool
, _spacyTags_is_quote :: Bool
, _spacyTags_is_currency :: Bool
, _spacyTags_like_url :: Bool
, _spacyTags_like_num :: Bool
, _spacyTags_like_email :: Bool
, _spacyTags_is_oov :: Bool
, _spacyTags_is_stop :: Bool
, _spacyTags_pos :: POS
, _spacyTags_tag :: POS
, _spacyTags_dep :: !Text
, _spacyTags_lang :: !Text
, _spacyTags_prob :: !Int
, _spacyTags_char_offset :: !Int
} deriving (Show)
data SpacyRequest = SpacyRequest { _spacyRequest_text :: !Text }
deriving (Show)
spacyRequest :: URI -> Text -> IO SpacyData
spacyRequest uri txt = do
req <- parseRequest $ "POST " <> show (uri { uriPath = "/pos" })
......
......@@ -29,6 +29,7 @@ data SpacyData = SpacyData { _spacy_data :: ![SpacyText]}
data SpacyText = SpacyText { _spacy_text :: !Text
, _spacy_tags :: ![SpacyTags]
} deriving (Show)
-- | https://spacy.io/api/token/#attributes
data SpacyTags =
SpacyTags { _spacyTags_text :: !Text
, _spacyTags_text_with_ws :: !Text
......
"allow-newer": true
"extra-deps":
- corenlp-types-0.1.0.0@sha256:96538c47dddc96dff11922f8bfa1fd27586fbcce42f6ca2a7fc06cd652a36850,3557
- "HSvm-0.1.1.3.22"
- "KMP-0.2.0.0"
- "MissingH-1.4.3.0"
......@@ -40,7 +41,7 @@
- "servant-flatten-0.2"
- "servant-server-0.20"
- "snap-server-1.1.2.1"
- "stemmer-0.5.2"
- "snowball-1.0.0.1"
- "taggy-0.2.1"
- "taggy-lens-0.1.2"
- "tomland-1.3.3.2"
......@@ -247,8 +248,6 @@ flags:
"bytestring--lt-0_10_4": false
"cassava-conduit":
small_base: true
cborg:
"optimize-gmp": true
cereal:
"bytestring-builder": false
"cipher-aes":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment