Commit 3edf5357 authored by Mael NICOLAS's avatar Mael NICOLAS

Initial commit

parents
.stack-work/
grobid-binding.cabal
*~
\ No newline at end of file
# Changelog for grobid-binding
## Unreleased changes
Copyright Author name here (c) 2018
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Author name here nor the names of other
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# grobid-binding
import Distribution.Simple
main = defaultMain
module Main where
import Lib
import Type
{--
P,Pu:/api/processHeaderDocument
-> xml
input req PDF File to be processed
consolidateHeader 0 || 1
P,Pu:/api/processFulltextDocument
->xml
input req PDF File to be processed
consolidateHeader 0 || 1
consolidateCitations 0 || 1
teiCoordinates | list of element name for which coordinates in the PDF document have to be added.
P,Pu:/api/processReferences
->xml
input req PDF File to be processed
consolidateCitations 0 || 1
P-}
main :: IO ()
main = someFunc
File added
name: grobid-binding
version: 0.1.0.0
github: "githubuser/grobid-binding"
license: BSD3
author: "Author name here"
maintainer: "example@example.com"
copyright: "2018 Author name here"
extra-source-files:
- README.md
- ChangeLog.md
# Metadata used when publishing your package
# synopsis: Short description of your package
# category: Web
# To avoid duplicated efforts in documentation and dealing with the
# complications of embedding Haddock markup inside cabal files, it is
# common to point users to the README.md file.
description: Please see the README on Github at <https://github.com/githubuser/grobid-binding#readme>
dependencies:
- base >= 4.7 && < 5
- servant
- servant-client
- servant-client-core
- servant-multipart
- text
- filepath
- wai
- warp
- aeson
- either
- http-client
- http-types
- http-media
- containers
- mime-types
- bytestring
- case-insensitive
library:
source-dirs: src
executables:
grobid-binding-exe:
main: Main.hs
source-dirs: app
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- grobid-binding
tests:
grobid-binding-test:
main: Spec.hs
source-dirs: test
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- grobid-binding
module Lib
( someFunc
) where
someFunc :: IO ()
someFunc = putStrLn "someFunc"
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-|
Copyright: 2017 (C) AlphaSheets, Inc
Description: Client support for multipart.
-}
module Multipart where
import Control.Monad
import Control.Monad.IO.Class (MonadIO(..))
import Data.Monoid
import Data.Sequence as Seq
import Network.HTTP.Client.MultipartFormData (webkitBoundary)
import Network.HTTP.Client.Internal
import Network.HTTP.Media.MediaType
import Network.HTTP.Types
import Servant.API
import Servant.Client
import Servant.Multipart
import System.FilePath
import qualified Data.ByteString as BS
import qualified Data.CaseInsensitive as CI
import qualified Data.Foldable
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Network.HTTP.Types as HTTP
import qualified Network.Mime as Mime
-- | A single part of a multipart message.
-- Adapted from @Network.HTTP.Client.MultipartFormData.Part@
data Part = Part
{ partName :: T.Text -- ^ Name of the corresponding \<input\>
, partFilename :: Maybe String -- ^ A file name, if this is an attached file
, partContentType :: Maybe Mime.MimeType -- ^ Content type
, partHeaders :: [HTTP.Header] -- ^ List of additional headers
, partGetBody :: IO BS.ByteString -- ^ Action in m which returns the body
-- of a message.
-- XXX: This is servant' requestbody
}
-- | A class for encoding datatypes as 'Part's.
-- See 'Part', 'partInput', 'partFile'
class ToMultipart a where
toMultipart :: a -> [Part]
-- A somewhat meaningful instance of 'HasClient' for multipart data. Note that
-- when a file 'Part' is encountered, the whole file will be loaded up in
-- memory. As such,
--
-- !!!!THIS SHOULD BE USED WITH CAUTION!!!!
--
-- See https://github.com/haskell-servant/servant/issues/886
--
instance (ToMultipart ty, Client m sub ~ m a, MonadIO m, HasClient m sub) =>
HasClient m
(MultipartForm Tmp ty :> sub) where
type
Client m
(MultipartForm Tmp ty :> sub)
= ty -> Client m sub
clientWithRoute
:: Proxy m
-> Proxy (MultipartForm Tmp ty :> sub)
-> Request
-> ty -> Client m sub
clientWithRoute pMonad _p2 req dfi = do
-- Adapted from @Network.HTTP.Client.MultipartFormData.formDataBody@
boundary <- liftIO webkitBoundary
body <- liftIO $ renderParts boundary $ toMultipart dfi
let req' = req
{ requestHeaders =
Seq.filter (\(x, _) -> x /= hContentType) (requestHeaders req)
, requestBody = Just
( RequestBodyLBS $ undefined body
, "multipart" // "form-data" /: ("boundary", boundary)
)
}
clientWithRoute pMonad (Proxy @ sub) req'
-------------------------------------------------------------------------------
-- XXX: The functions 'renderParts' and 'renderPart' were adapted from
-- @Network.HTTP.Client.MultipartFormData@.
-------------------------------------------------------------------------------
-- | Combine the 'Part's to form multipart/form-data body
renderParts :: BS.ByteString -- ^ Boundary between parts.
-> [Part] -> IO BS.ByteString
renderParts boundary parts = (fin . mconcat) `liftM` mapM (renderPart boundary) parts
where fin = (<> "--" <> boundary <> "--\r\n")
renderPart :: BS.ByteString -- ^ Boundary between parts.
-> Part -> IO BS.ByteString
renderPart boundary (Part name mfilename mcontenttype hdrs get) =
fmap render get
where render renderBody =
"--" <> boundary <> "\r\n"
<> "Content-Disposition: form-data; name=\""
<> TE.encodeUtf8 name
<> (case mfilename of
Just f -> "\"; filename=\""
<> TE.encodeUtf8 (T.pack $ takeFileName f)
_ -> mempty)
<> "\""
<> (case mcontenttype of
Just ct -> "\r\n"
<> "Content-Type: "
<> ct
_ -> mempty)
<> Data.Foldable.foldMap (\(k, v) ->
"\r\n"
<> CI.original k
<> ": "
<> v) hdrs
<> "\r\n\r\n"
<> renderBody <> "\r\n"
-- | Create a key-value 'Part'.
partInput :: T.Text -> BS.ByteString -> Part
partInput n b = Part
{ partName = n
, partFilename = Nothing
, partContentType = Nothing
, partHeaders = []
, partGetBody = pure b
}
-- | Create a 'Part' for a local file. This does not infer the mime type of the
-- file, although one may be provided.
partFile
:: T.Text
-- ^ The part name
-> FilePath
-- ^ The path to the local file
-> Maybe Mime.MimeType
-- ^ Optional mime type
-> Part
partFile n fp mmt = Part
{ partName = n
, partFilename = Just fp
, partContentType = mmt
, partHeaders = []
, partGetBody = BS.readFile fp
}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
module Type where
import Data.Text as T
import Servant.API
import Servant.API.ContentTypes
import Servant.Multipart
import Servant.Client
import Data.Proxy
type GrobidAPI =
"processHeaderDocument" :> MultipartForm Tmp (MultipartData Mem) :> Post '[PlainText] T.Text
:<|> "process" :> MultipartForm Tmp (MultipartData Mem) :> Post '[PlainText] T.Text
processHeaderDocument :: FilePath -> ClientM T.Text
grobidAPI :: Proxy GrobidAPI
grobidAPI = Proxy
processHeaderDocument :<|> process = client grobidAPI (BaseUrl Http "localhost" 8080)
fileTest :: FilePath
fileTest = "/home/mudada/Code/Haskell/grobid-binding/files"
queries :: ClientM T.Text
queries = processHeaderDocument fileTest
# This file was automatically generated by 'stack init'
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# https://docs.haskellstack.org/en/stable/yaml_configuration/
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
# resolver: ghcjs-0.1.0_ghc-7.10.2
# resolver:
# name: custom-snapshot
# location: "./custom-snapshot.yaml"
resolver: lts-10.4
# User packages to be built.
# Various formats can be used as shown in the example below.
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
# - location:
# git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# extra-dep: true
# subdirs:
# - auto-update
# - wai
#
# A package marked 'extra-dep: true' will only be built if demanded by a
# non-dependency (i.e. a user package), and its test suites and benchmarks
# will not be run. This is useful for tweaking upstream packages.
packages:
- .
# Dependency packages to be pulled from upstream that are not in the resolver
# (e.g., acme-missiles-0.3)
extra-deps:
- servant-multipart-0.11.1
- servant-client-core-0.13
- http-types-0.12
- servant-0.13
- text-1.2.3.0
allow-newer: true
# Override default flag values for local packages and extra-deps
# flags: {}
# Extra package databases containing global packages
# extra-package-dbs: []
# Control whether we use the GHC we find on the path
# system-ghc: true
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=1.6"
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor
main :: IO ()
main = putStrLn "Test suite not yet implemented"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment