Text Generate REST API schema (#18)

* Create generate_rest.yaml Propose generate rest api endpoints Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> --------- Signed-off-by: Gavrish Prabhu <[email protected]>
kserve · Feb 6, 2024 · 52528cf · 52528cf
1 parent 853da9f
commit 52528cf
Showing 1 changed file with 255 additions and 0 deletions.
diff --git a/specification/protocol/generate_rest.yaml b/specification/protocol/generate_rest.yaml
@@ -0,0 +1,255 @@
+openapi: 3.1.0
+info:
+  title: Open Inference API for text generation
+  description: Open Inference API for text generation
+  version: 1.0.0
+components:
+  schemas:
+    Details:
+      type: object
+      required: 
+        - finish_reason
+        - logprobs
+      additionalProperties: {}
+      properties:
+        finish_reason:
+          $ref: '#/components/schemas/Finish_Reason'
+        logprobs:
+          $ref: '#/components/schemas/Logprobs'
+    Finish_Reason:
+      type: string
+      enum: 
+        - length
+        - eos_token
+        - stop_sequence
+      description: The reason the model stopped generating tokens. `length` if number of generated tokens == `max_tokens`. `eos_token` if the model generated its end of sequence token and `stop_sequence` if the model generated a text included in `stop` array
+    GenerateErrorResponse:
+      type: object
+      required:
+        - error
+      properties:
+        error:
+          type: string  
+    GenerateParameters:
+      type: object
+      additionalProperties: {}
+      properties:
+        temperature:
+          type: number
+          format: float
+          default: 1
+          minimum: 0
+          description: What sampling temperature to use, higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+        top_p:
+          type: number
+          format: float
+          maximum: 1
+          minimum: 0
+          description: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+        max_tokens:
+          type: integer
+          format: int32
+          default: 20
+          minimum: 1
+          description: The maximum number of tokens to generate in the completion.
+        stop:
+          type: array
+          items:
+            type: string
+          description: Sequences where the API will stop generating further tokens.
+        details:
+          type: boolean 
+          description: Flag to request for detailed response body that would include finish_reason and logprobs.
+    GenerateRequest:
+      type: object
+      required: 
+        - text_input
+      properties:
+        text_input:
+          type: string
+        parameters:
+          allOf: 
+            - $ref: '#/components/schemas/GenerateParameters'
+    GenerateResponse:
+      type: object
+      required:
+        - text_output
+        - model_name
+      properties:
+        text_output:
+          type: string
+        model_name:
+          type: string
+        model_version:
+          type: string
+        details:
+          $ref: '#/components/schemas/Details'
+    GenerateStreamResponse:
+      type: object
+      required:
+        - text_output
+        - model_name
+      properties:
+        text_output:
+          type: string
+        model_name:
+          type: string
+        model_version:
+          type: string
+        details:
+          $ref: '#/components/schemas/StreamDetails'
+    Logprobs:
+      type: array
+      items:
+        $ref: '#/components/schemas/Token'
+      description: Log probability information for the tokens.
+    StreamDetails:
+      type: object
+      required: 
+        - finish_reason
+        - token
+      additionalProperties: {}
+      properties:
+        finish_reason:
+          $ref: '#/components/schemas/Finish_Reason'
+        token:
+          $ref: '#/components/schemas/Token'
+    Token:
+      type: object
+      required:
+        - id
+        - text
+        - logprob
+        - special
+      properties:
+        id:
+          type: integer
+          format: int32
+          minimum: 0
+          description: Id of the token.
+        logprob:
+          type: number
+          format: float
+          description: The log probability of this token.
+        special:
+          type: boolean
+          description: Describes if the token is a special token. Can be used to ignore tokens when concatenating
+        text:
+          type: string
+          description: The token text value.
+paths:
+  /v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/generate:
+    post:
+      parameters:
+        - name: MODEL_NAME
+          required: true
+          in: path
+          schema:
+            type: string
+        - name: MODEL_VERSION
+          required: true
+          in: path
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/GenerateRequest'
+      responses:
+        '200':
+          description: generated text
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateResponse'
+        '422':
+          description: Input validation error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Input validation error
+        '424':
+          description: Generation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Request failed during generation
+        '429':
+          description: Model is overloaded
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Model is overloaded
+        '500':
+          description: Incomplete generation
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Incomplete generation
+
+  /v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/generate_stream:
+    post:
+      parameters:
+        - name: MODEL_NAME
+          required: true
+          in: path
+          schema:
+            type: string
+        - name: MODEL_VERSION
+          required: true
+          in: path
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/GenerateRequest'
+      responses:
+        '200':
+          description: generated text stream
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateStreamResponse'
+        '422':
+          description: Input validation error
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Input validation error
+        '424':
+          description: Generation Error
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Request failed during generation
+        '429':
+          description: Model is overloaded
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Model is overloaded
+        '500':
+          description: Incomplete generation
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Incomplete generation