Fix #28 (#32)

056c0dfc · Neel Kamath · GitHub · 8237b938 · 056c0dfc · 056c0dfc
Unverified Commit 056c0dfc authored Dec 28, 2019 by Neel Kamath Committed by GitHub Dec 28, 2019
13 changed files
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -30,12 +30,12 @@ test-server:
 test-spec:
  stage: test
  image: node
-  script: npx @stoplight/spectral lint docs/openapi.yaml
+  script: npx @stoplight/spectral lint docs/spec/openapi.yaml

 build-docs:
  stage: build
  image: node
-  script: npx redoc-cli bundle docs/openapi.yaml --title 'spaCy Server'
+  script: npx redoc-cli bundle docs/spec/openapi.yaml --title 'spaCy Server'
  artifacts:
    paths: [redoc-static.html]
  only: [master]

--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ Install [Docker](https://hub.docker.com/search/?type=edition&offering=community)

 ### Generating an SDK

-You can generate a wrapper for the HTTP API using [OpenAPI Generator](https://openapi-generator.tech/) on the file [`https://raw.githubusercontent.com/neelkamath/spacy-server/master/docs/openapi.yaml`](https://raw.githubusercontent.com/neelkamath/spacy-server/master/docs/openapi.yaml).
+You can generate a wrapper for the HTTP API using [OpenAPI Generator](https://openapi-generator.tech/) on the file [`https://raw.githubusercontent.com/neelkamath/spacy-server/master/docs/spec/openapi.yaml`](https://raw.githubusercontent.com/neelkamath/spacy-server/master/docs/openapi.yaml).

 ## [Usage](https://hub.docker.com/r/neelkamath/spacy-server)


--- a/docs/developing.md
+++ b/docs/developing.md
@@ -33,22 +33,26 @@ The container `EXPOSE`s port `8000`. Run using `docker run --rm -p 8000:8000 spa

 ## Specification

-`docs/openapi.yaml` is the [OpenAPI specification](https://swagger.io/specification/) for the HTTP API. Use `$ref` instead of inlining `schema`s so that OpenAPI Generator will name give usable names to the models. Validate the schema by running `npx @stoplight/spectral lint docs/openapi.yaml`.
+`docs/spec/` contains the [OpenAPI specification](https://swagger.io/specification/) for the HTTP API. Use `$ref` instead of inlining `schema`s so that OpenAPI Generator will give usable names to the models.

-## Documentation
+### Testing
+
+```
+npx @stoplight/spectral lint docs/spec/openapi.yaml
+```

 ### Developing

 ``` 
-npx redoc-cli serve docs/openapi.yaml -w
+npx redoc-cli serve docs/spec/openapi.yaml -w
 ```

-Open `http://127.0.0.1:8080` in your browser. The documentation will automatically rebuild whenever you save a change to `docs/openapi.yaml`. Refresh the page whenever you want to view the updated documentation.
+Open `http://127.0.0.1:8080` in your browser. The documentation will automatically rebuild whenever you save a change to `docs/spec/openapi.yaml`. Refresh the page whenever you want to view the updated documentation.

 ### Production

 ``` 
-npx redoc-cli bundle docs/openapi.yaml -o redoc-static.html --title 'spaCy Server'
+npx redoc-cli bundle docs/spec/openapi.yaml -o redoc-static.html --title 'spaCy Server'
 ```

 Open `redoc-static.html` in your browser.
@@ -57,7 +61,7 @@ Open `redoc-static.html` in your browser.

 - If you haven't updated the HTTP API functionality, skip this step.
    1. If you haven't bumped the version in the OpenAPI spec, delete the corresponding GitHub release and git tag.
-    1. Generate  `redoc-static.html`: `npx redoc-cli bundle docs/openapi.yaml -o redoc-static.html --title 'spaCy Server'`
+    1. Generate  `redoc-static.html`: `npx redoc-cli bundle docs/spec/openapi.yaml -o redoc-static.html --title 'spaCy Server'`
    1. Create a GitHub release. The release's body should be ```Download and open the release asset, `redoc-static.html`, in your browser to view the HTTP API documentation.```. Upload `redoc-static.html` as an asset.
 - If required, update the [Docker Hub repository](https://hub.docker.com/r/neelkamath/spacy-server)'s **Overview**.
 - For every commit to the `master` branch in which the tests have passed, the following will automatically be done.

--- a/docs/openapi.yaml
+++ b/docs/openapi.yaml
--- a/docs/spec/components.yaml
+++ b/docs/spec/components.yaml
--- a/docs/spec/health_check.yaml
+++ b/docs/spec/health_check.yaml
+get:
+  tags: [status]
+  description: Check if all systems are operational
+  operationId: healthCheck
+  responses:
+    '204':
+      description: All systems are operational
\ No newline at end of file
--- a/docs/spec/ner.yaml
+++ b/docs/spec/ner.yaml
+post:
+  tags: [nlp]
+  description: Named entity recognition. The pretrained model must have the `ner` and `parser` pipeline components
+    to use this endpoint. If a sense2vec model was bundled with the service, similar phrases can also be provided.
+  operationId: ner
+  requestBody:
+    required: true
+    description: Text to process
+    content:
+      application/json:
+        example:
+          sections:
+            - Net income was $9.4 million compared to the prior year of $2.7 million. Google is a big company.
+            - Revenue exceeded twelve billion dollars, with a loss of $1b.
+          sense2vec: true
+        schema:
+          $ref: components.yaml#/components/schemas/NERRequest
+  responses:
+    '200':
+      description: Labeled text, with phrases similar to each entity
+      content:
+        application/json:
+          example:
+            data:
+              - text: Net income was $9.4 million compared to the prior year of $2.7 million.
+                entities:
+                  - text: "$9.4 million"
+                    label: MONEY
+                    start_char: 15
+                    end_char: 27
+                    lemma: "$ 9.4 million"
+                    start: 3
+                    end: 6
+                    text_with_ws: "$9.4 million "
+                    sense2vec: []
+                  - text: the prior year
+                    label: DATE
+                    start_char: 40
+                    end_char: 54
+                    lemma: the prior year
+                    start: 8
+                    end: 11
+                    text_with_ws: 'the prior year '
+                    sense2vec:
+                      - phrase: the previous year
+                        similarity: 0.7810999751091003
+                      - phrase: that year
+                        similarity: 0.7346000075340271
+                      - phrase: the year prior
+                        similarity: 0.7343000173568726
+                      - phrase: the first year
+                        similarity: 0.7110000252723694
+                      - phrase: previous year
+                        similarity: 0.7109000086784363
+                      - phrase: the year before
+                        similarity: 0.7056999802589417
+                      - phrase: the following year
+                        similarity: 0.6967999935150146
+                      - phrase: prior years
+                        similarity: 0.695900022983551
+                      - phrase: the next year
+                        similarity: 0.6915000081062317
+                      - phrase: the entire year
+                        similarity: 0.6915000081062317
+                  - text: "$2.7 million"
+                    label: MONEY
+                    start_char: 58
+                    end_char: 70
+                    lemma: "$ 2.7 million"
+                    start: 12
+                    end: 15
+                    text_with_ws: "$2.7 million"
+                    sense2vec:
+                      - phrase: "$1 million"
+                        similarity: 0.7616000175476074
+                      - phrase: "$2.4 million"
+                        similarity: 0.7502999901771545
+                      - phrase: "$1.5 million"
+                        similarity: 0.7490000128746033
+                      - phrase: "$5 million"
+                        similarity: 0.7437999844551086
+                      - phrase: "$3 million"
+                        similarity: 0.741599977016449
+                      - phrase: more than $50,000
+                        similarity: 0.739799976348877
+                      - phrase: "$1.4 million"
+                        similarity: 0.7390999794006348
+                      - phrase: '750,000'
+                        similarity: 0.7386000156402588
+                      - phrase: '480,000'
+                        similarity: 0.7379999756813049
+                      - phrase: "$2 million"
+                        similarity: 0.7371000051498413
+              - text: Google is a big company.
+                entities:
+                  - text: Google
+                    label: ORG
+                    start_char: 72
+                    end_char: 78
+                    lemma: Google
+                    start: 16
+                    end: 17
+                    text_with_ws: 'Google '
+                    sense2vec:
+                      - phrase: " Google"
+                        similarity: 0.8996000289916992
+                      - phrase: search engine
+                        similarity: 0.8485999703407288
+                      - phrase: Bing
+                        similarity: 0.8435999751091003
+                      - phrase: even Google
+                        similarity: 0.840399980545044
+                      - phrase: google
+                        similarity: 0.8317999839782715
+                      - phrase: Google Search
+                        similarity: 0.8291000127792358
+                      - phrase: Googles
+                        similarity: 0.8234000205993652
+                      - phrase: "&gt;Google"
+                        similarity: 0.8137999773025513
+                      - phrase: DuckDuckGo
+                        similarity: 0.8126999735832214
+                      - phrase: Yahoo
+                        similarity: 0.8037999868392944
+              - text: Revenue exceeded twelve billion dollars, with a loss of $1b.
+                entities:
+                  - text: twelve billion dollars
+                    label: MONEY
+                    start_char: 17
+                    end_char: 39
+                    lemma: twelve billion dollar
+                    start: 2
+                    end: 5
+                    text_with_ws: twelve billion dollars
+                    sense2vec: []
+                  - text: 1b
+                    label: MONEY
+                    start_char: 57
+                    end_char: 59
+                    lemma: 1b
+                    start: 11
+                    end: 12
+                    text_with_ws: 1b
+                    sense2vec:
+                      - phrase: 100m
+                        similarity: 0.878000020980835
+                      - phrase: 1B
+                        similarity: 0.8756999969482422
+                      - phrase: 100M
+                        similarity: 0.8744999766349792
+                      - phrase: 200M
+                        similarity: 0.8553000092506409
+                      - phrase: "$100 million"
+                        similarity: 0.8299000263214111
+                      - phrase: 50M
+                        similarity: 0.8292999863624573
+                      - phrase: "$50 million"
+                        similarity: 0.8273000121116638
+                      - phrase: 70m
+                        similarity: 0.8263000249862671
+                      - phrase: 300m
+                        similarity: 0.8253999948501587
+                      - phrase: 100B
+                        similarity: 0.8209999799728394
+          schema:
+            $ref: components.yaml#/components/schemas/NERResponse
+    '400':
+      description: The pretrained model lacks the `ner` or `parser` pipeline components.
+      content:
+        application/json:
+          examples:
+            invalid_model:
+              summary: The spaCy model lacks the required pipeline components.
+              value:
+                detail: The pretrained model (en_trf_bertbaseuncased_lg) doesn't support named entity recognition.
+            sense2vec_disabled:
+              summary: Similar phrases via sense2vec were requested, but a sense2vec model wasn't bundled with the
+                service.
+              value:
+                detail: There is no sense2vec model bundled with this service.
+          schema:
+            $ref: components.yaml#/components/schemas/InvalidModel
\ No newline at end of file
--- a/docs/spec/openapi.yaml
+++ b/docs/spec/openapi.yaml
+openapi: 3.0.2
+info:
+  title: spaCy Server
+  version: '2'
+  description: |
+    Industrial-strength NLP via [spaCy](https://spacy.io) and [sense2vec](https://github.com/explosion/sense2vec). No
+    knowledge of spaCy or sense2vec is required to use this service.
+
+    You can use any spaCy-compatible model with this service. Different models offer different labels. In certain parts
+    of the docs, you will be asked to refer to your choice of model's "label scheme". The label scheme can be found at
+    https://spacy.io/models (e.g., https://spacy.io/models/en#en_core_web_sm-labels is the label scheme for the
+    `en_core_web_sm` model).
+
+    You can view the documentation for previous versions [here](https://github.com/neelkamath/spacy-server/releases).
+  contact:
+    name: Neel Kamath
+    email: neelkamathonline@gmail.com
+    url: https://github.com/neelkamath/spacy-server
+  license:
+    name: MIT
+    url: https://opensource.org/licenses/MIT
+servers:
+  - url: http://localhost:8000
+    description: You'll have to [run the server](https://github.com/neelkamath/spacy-server#installation) yourself.
+paths:
+  /ner:
+    $ref: ner.yaml
+  /sense2vec:
+    $ref: sense2vec.yaml
+  /pos:
+    $ref: pos.yaml
+  /tokenizer:
+    $ref: tokenizer.yaml
+  /sentencizer:
+    $ref: sentencizer.yaml
+  /health_check:
+    $ref: health_check.yaml
\ No newline at end of file
--- a/docs/spec/pos.yaml
+++ b/docs/spec/pos.yaml
--- a/docs/spec/sense2vec.yaml
+++ b/docs/spec/sense2vec.yaml
+post:
+  tags: [nlp]
+  description: Compute phrases similar to a phrase in a sentence. sense2vec must be bundled with the service, and
+    the pretrained model must have the `ner` and `parser` pipeline components.
+  operationId: sense2vec
+  requestBody:
+    required: true
+    description: The phrase in the sentence
+    content:
+      application/json:
+        example:
+          sentence: Bill Gates founded Microsoft in April 4, 1975.
+          phrase: Bill Gates
+        schema:
+          $ref: components.yaml#/components/schemas/SentenceWithPhrase
+  responses:
+    '200':
+      description: Computed phrases
+      content:
+        application/json:
+          example:
+            sense2vec:
+              - phrase: Mark Zuckerberg
+                similarity: 0.850600004196167
+              - phrase: Warren Buffet
+                similarity: 0.8501999974250793
+              - phrase: Warren Buffett
+                similarity: 0.8375999927520752
+              - phrase: bill gates
+                similarity: 0.8215000033378601
+              - phrase: Steve Jobs
+                similarity: 0.8180999755859375
+              - phrase: Zuckerberg
+                similarity: 0.8163999915122986
+              - phrase: Elon Musk
+                similarity: 0.8140000104904175
+              - phrase: Bill gates
+                similarity: 0.8119999766349792
+              - phrase: billionaire
+                similarity: 0.8116999864578247
+              - phrase: Elon Musk
+                similarity: 0.8011999726295471
+          schema:
+            $ref: components.yaml#/components/schemas/Sense2vecPhrases
+    '400':
+      description: sense2vec is disabled, or the pretrained model lacks the `ner` or `parser` pipeline components.
+      content:
+        application/json:
+          examples:
+            invalid_model:
+              summary: The spaCy model lacks the required pipeline components.
+              value:
+                detail: The pretrained model (en_trf_bertbaseuncased_lg) doesn't support named entity recognition.
+            sense2vec_disabled:
+              summary: Similar phrases via sense2vec were requested, but a sense2vec model wasn't bundled with the
+                service.
+              value:
+                detail: There is no sense2vec model bundled with this service.
+            phrase_nonexistent:
+              summary: The phrase isn't present in the sentence.
+              value:
+                detail: phrase must be in sentence
+          schema:
+            $ref: components.yaml#/components/schemas/InvalidModel
\ No newline at end of file
--- a/docs/spec/sentencizer.yaml
+++ b/docs/spec/sentencizer.yaml
+post:
+  tags: [nlp]
+  description: Sentence segmentation. The pretrained model must have the `parser` pipeline component for this
+    endpoint to be available.
+  operationId: sentencizer
+  requestBody:
+    required: true
+    description: Sentences to segmentize
+    content:
+      application/json:
+        example:
+          text: Apple is looking at buying U.K. startup for $1 billion. Another sentence.
+        schema:
+          $ref: components.yaml#/components/schemas/Text
+  responses:
+    '200':
+      description: Sentences
+      content:
+        application/json:
+          example:
+            sentences: [Apple is looking at buying U.K. startup for $1 billion., Another sentence.]
+          schema:
+            $ref: components.yaml#/components/schemas/Sentences
+    '400':
+      description: The pretrained model lacks the `parser` pipeline component.
+      content:
+        application/json:
+          example:
+            detail: The pretrained model (en_trf_bertbaseuncased_lg) doesn't support sentence segmentation.
+          schema:
+            $ref: components.yaml#/components/schemas/InvalidModel
\ No newline at end of file
--- a/docs/spec/tokenizer.yaml
+++ b/docs/spec/tokenizer.yaml
+post:
+  tags: [nlp]
+  description: Tokenization
+  operationId: tokenizer
+  requestBody:
+    required: true
+    description: Text to tokenize
+    content:
+      application/json:
+        example:
+          text: Apple is looking at buying U.K. startup for $1 billion
+        schema:
+          $ref: components.yaml#/components/schemas/Text
+  responses:
+    '200':
+      description: Tokenized text
+      content:
+        application/json:
+          example:
+            tokens: [Apple, is, looking, at, buying, U.K., startup, for, $, '1', billion]
+          schema:
+            $ref: components.yaml#/components/schemas/Tokens
\ No newline at end of file
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@@ -16,7 +16,7 @@ deploy () {
 }

 # Get the HTTP API version.
-version=$(grep version docs/openapi.yaml -m 1)
+version=$(grep version docs/spec/openapi.yaml -m 1)
 version=${version#*: }
 version=$(echo "$version" | cut -d "'" -f 2)