Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu <[email protected]>
kserve · Jan 16, 2024 · 24d9129 · 24d9129
1 parent e6977a6
commit 24d9129
Showing 1 changed file with 75 additions and 37 deletions.
diff --git a/specification/protocol/generate_rest.yaml b/specification/protocol/generate_rest.yaml
@@ -5,6 +5,40 @@ info:
   version: 1.0.0
 components:
   schemas:
+    GenerateErrorResponse:
+      type: object
+      required:
+        - error
+      properties:
+        error:
+          type: string  
+    GenerateParameters:
+      type: object
+      additionalProperties: {}
+      properties:
+        temperature:
+          type: number
+          format: float
+          default: 1
+          minimum: 0
+          description: What sampling temperature to use, higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+        top_p:
+          type: number
+          format: float
+          maximum: 1
+          minimum: 0
+          description: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+        max_tokens:
+          type: integer
+          format: int32
+          default: 20
+          minimum: 1
+          description: The maximum number of tokens to generate in the completion.
+        stop:
+          type: array
+          items:
+            type: string
+          description: Sequences where the API will stop generating further tokens.
     GenerateRequest:
       type: object
       required: 
@@ -13,36 +47,10 @@ components:
         text_input:
           type: string
         parameters:
-          $ref: '#/components/schemas/GenerateParameters'
-    GenerateParameters:
-        allOf:
-          $ref: '#/components/schemas/GenerateParameters'
-        type: object
-        additionalProperties: {}
-        properties:
-          temperature:
-            type: number
-            format: float
-            default: 1
-            minimum: 0
-            description: What sampling temperature to use, higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
-          top_p:
-            type: number
-            format: float
-            maximum: 1
-            minimum: 0
-            description: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
-          max_tokens:
-            type: integer
-            format: int32
-            default: 20
-            minimum: 1
-            description: The maximum number of tokens to generate in the completion.
-          stop:
-            type: array
-            items:
-              type: string
-            description: Sequences where the API will stop generating further tokens.
+          allOf: 
+            - $ref: '#/components/schemas/GenerateParameters'
+        logprob:
+          type: boolean  
     GenerateResponse:
       type: object
       required:
@@ -55,6 +63,8 @@ components:
           type: string
         model_version:
           type: string
+        logprobs:
+          $ref: '#/components/schemas/Logprobs'
     GenerateStreamResponse:
       type: object
       required:
@@ -69,18 +79,41 @@ components:
           type: string
         finish_reason:
           type: string
-    GenerateErrorResponse:
+        logprobs:
+          $ref: '#/components/schemas/Logprobs'
+    Logprobs:
+      type: array
+      items:
+        $ref: '#/components/schemas/Token'
+    Token:
       type: object
       required:
-        - error
+        - id
+        - text
+        - logprob
+        - special
       properties:
-        error:
+        id:
+          type: integer
+          format: int32
+          minimum: 0
+        logprob:
+          type: number
+          format: float
+        special:
+          type: boolean
+        text:
           type: string
 paths:
-  /v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/generate:
+  /v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/generate:
     post:
       parameters:
-        - name: model_name
+        - name: MODEL_NAME
+          required: true
+          in: path
+          schema:
+            type: string
+        - name: MODEL_VERSION
           required: true
           in: path
           schema:
@@ -130,10 +163,15 @@ paths:
               example:
                 error: Incomplete generation
 
-  /v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/generate_stream:
+  /v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/generate_stream:
     post:
       parameters:
-        - name: model_name
+        - name: MODEL_NAME
+          required: true
+          in: path
+          schema:
+            type: string
+        - name: MODEL_VERSION
           required: true
           in: path
           schema: