# SPDX-License-Identifier: AGPL-3.0-or-later
# feder8d Model Gateway — OpenAPI 3.1 specification.
#
# OpenAI-compatible surface for the inference endpoints:
#   * POST /v1/chat/completions   (SSE streaming + non-streaming)
#   * POST /v1/embeddings
#   * POST /v1/rerank
#   * GET  /v1/models
#
# feder8d-specific extensions live under x-feder8d-* fields (citations,
# retrieval traces, per-collection model_assignment overrides) so the OpenAI
# Python SDK continues to work unchanged.
#
# Auth: bearerAuth uses the per-tenant JWT issued by the orchestrator
# (services/orchestrator/src/feder8d_orchestrator/licensing.py).
#
# Streaming protocol is Server-Sent Events (Q19.3 lock).

openapi: 3.1.0

info:
  title: feder8d Model Gateway
  version: 0.1.0
  summary: OpenAI-compatible inference router for the feder8d platform.
  description: |
    The Model Gateway is the single ingress point for every model call
    (chat, embedding, rerank) made by any tenant. It validates the tenant
    JWT, checks entitlements, enforces per-tenant rate + token budgets,
    and routes to the appropriate vLLM pod or embedding worker.

    **No prompt content is persisted** by this service (architecture
    constraints rule #3). Audit log stores hashes + token counts only.

    feder8d extensions ride under `x-feder8d-*` fields. Vanilla OpenAI
    clients ignore them; feder8d clients use them for citations and
    retrieval traces.
  license:
    name: AGPL-3.0-or-later
    identifier: AGPL-3.0-or-later

servers:
  - url: https://api.feder8d.ai
    description: Production (af-south-1 control plane → eu-central-1 model plane).
  - url: https://api.staging.feder8d.ai
    description: Staging.

security:
  - bearerAuth: []

tags:
  - name: chat
    description: Chat completions (OpenAI-compatible) with feder8d retrieval extensions.
  - name: embeddings
    description: Vector embeddings.
  - name: rerank
    description: Cross-encoder reranking (bge-reranker family).
  - name: models
    description: Model registry — what the current tenant may call.

paths:
  /v1/chat/completions:
    post:
      operationId: chatCompletions
      tags: [chat]
      summary: Create a chat completion (streaming or one-shot).
      description: |
        OpenAI-compatible. When `stream=true`, the response is a
        `text/event-stream` SSE feed of `chat.completion.chunk` events
        terminated by `data: [DONE]\n\n`.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/ChatCompletionRequest"
            examples:
              minimal:
                summary: One-shot completion
                value:
                  model: feder8d/qwen2.5-7b-instruct
                  messages:
                    - role: user
                      content: "Summarise the latest meeting notes."
              streaming:
                summary: SSE streaming completion
                value:
                  model: feder8d/qwen2.5-7b-instruct
                  stream: true
                  messages:
                    - role: user
                      content: "What did finance decide last week?"
      responses:
        "200":
          description: Completion (streaming or one-shot).
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ChatCompletion"
            text/event-stream:
              schema:
                $ref: "#/components/schemas/ChatCompletionChunkStream"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "403":
          $ref: "#/components/responses/Forbidden"
        "429":
          $ref: "#/components/responses/RateLimited"
        "503":
          $ref: "#/components/responses/Unavailable"

  /v1/embeddings:
    post:
      operationId: createEmbeddings
      tags: [embeddings]
      summary: Create embeddings for one or more inputs.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/EmbeddingRequest"
      responses:
        "200":
          description: Vector embeddings.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/EmbeddingResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "403":
          $ref: "#/components/responses/Forbidden"
        "429":
          $ref: "#/components/responses/RateLimited"

  /v1/rerank:
    post:
      operationId: rerank
      tags: [rerank]
      summary: Rerank candidate documents against a query.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/RerankRequest"
      responses:
        "200":
          description: Reranked results.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/RerankResponse"
        "400":
          $ref: "#/components/responses/BadRequest"
        "401":
          $ref: "#/components/responses/Unauthorized"
        "403":
          $ref: "#/components/responses/Forbidden"

  /v1/models:
    get:
      operationId: listModels
      tags: [models]
      summary: List models available to the calling tenant.
      responses:
        "200":
          description: Model registry response.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ModelList"
        "401":
          $ref: "#/components/responses/Unauthorized"

components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: |
        Tenant license JWT (Ed25519/EdDSA) minted by the orchestrator. The
        gateway verifies the signature and reads `tenant_id`, `plan_id`, and
        entitlements from the claims — `tenant_id` is never accepted from a
        request parameter.

  responses:
    BadRequest:
      description: Malformed request.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    Unauthorized:
      description: Missing or invalid tenant JWT.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    Forbidden:
      description: Tenant lacks the entitlement for the requested model.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    RateLimited:
      description: Per-tenant rate or token budget exceeded.
      headers:
        Retry-After:
          schema:
            type: integer
            minimum: 0
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"
    Unavailable:
      description: Upstream model pool unavailable.
      content:
        application/json:
          schema:
            $ref: "#/components/schemas/ErrorResponse"

  schemas:
    ErrorResponse:
      type: object
      additionalProperties: false
      required: [error]
      properties:
        error:
          type: object
          additionalProperties: false
          required: [code, message]
          properties:
            code:
              type: string
              examples: [invalid_request, unauthorized, forbidden, rate_limited]
            message:
              type: string
            type:
              type: string
            param:
              type: string

    Role:
      type: string
      enum: [system, user, assistant, tool]

    ChatMessage:
      type: object
      additionalProperties: false
      required: [role, content]
      properties:
        role:
          $ref: "#/components/schemas/Role"
        content:
          oneOf:
            - type: string
            - type: array
              items:
                type: object
                properties:
                  type:
                    type: string
                  text:
                    type: string
        name:
          type: string
        tool_call_id:
          type: string

    ResponseFormat:
      type: object
      additionalProperties: false
      properties:
        type:
          type: string
          enum: [text, json_object, json_schema]
        json_schema:
          type: object
          additionalProperties: true

    XFeder8dRetrievalDirective:
      type: object
      additionalProperties: false
      description: |
        feder8d-specific retrieval extension. When present, the gateway
        delegates retrieval to the tenant-api before invoking the chat model
        and returns the assembled citations in the response.
      properties:
        collection_ids:
          type: array
          items:
            type: string
          minItems: 1
        top_k:
          type: integer
          minimum: 1
          maximum: 100
          default: 8
        model_assignment_override:
          type: string
          description: |
            Per-collection model assignment override. Requires the
            calling user to hold the `manage:model_assignment` permission.
        include_retrieval_trace:
          type: boolean
          default: false

    ChatCompletionRequest:
      type: object
      additionalProperties: false
      required: [model, messages]
      properties:
        model:
          type: string
        messages:
          type: array
          items:
            $ref: "#/components/schemas/ChatMessage"
          minItems: 1
        max_tokens:
          type: integer
          minimum: 1
        temperature:
          type: number
          minimum: 0
          maximum: 2
        top_p:
          type: number
          minimum: 0
          maximum: 1
        n:
          type: integer
          minimum: 1
          maximum: 8
        stream:
          type: boolean
          default: false
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
              maxItems: 4
        response_format:
          $ref: "#/components/schemas/ResponseFormat"
        tools:
          type: array
          items:
            type: object
            additionalProperties: true
        tool_choice:
          oneOf:
            - type: string
            - type: object
              additionalProperties: true
        user:
          type: string
        x-feder8d-retrieval:
          $ref: "#/components/schemas/XFeder8dRetrievalDirective"

    ChatCompletionChoice:
      type: object
      additionalProperties: false
      required: [index, message, finish_reason]
      properties:
        index:
          type: integer
        message:
          $ref: "#/components/schemas/ChatMessage"
        finish_reason:
          type: string
          enum: [stop, length, tool_calls, content_filter]

    Usage:
      type: object
      additionalProperties: false
      required: [prompt_tokens, completion_tokens, total_tokens]
      properties:
        prompt_tokens:
          type: integer
          minimum: 0
        completion_tokens:
          type: integer
          minimum: 0
        total_tokens:
          type: integer
          minimum: 0

    Citation:
      type: object
      additionalProperties: false
      required: [chunk_id, collection_id]
      properties:
        chunk_id:
          type: string
        document_id:
          type: string
        collection_id:
          type: string
        title:
          type: string
        source_uri:
          type: string
        excerpt:
          type: string

    XFeder8dRetrievalTrace:
      type: object
      additionalProperties: false
      properties:
        dense_hits:
          type: integer
          minimum: 0
        sparse_hits:
          type: integer
          minimum: 0
        reranked_count:
          type: integer
          minimum: 0
        assembled_tokens:
          type: integer
          minimum: 0
        retrieval_ms:
          type: number
          minimum: 0
        model_assignment:
          type: string
          description: Final model used after override resolution.

    ChatCompletion:
      type: object
      additionalProperties: false
      required: [id, object, created, model, choices, usage]
      properties:
        id:
          type: string
        object:
          type: string
          enum: [chat.completion]
        created:
          type: integer
        model:
          type: string
        choices:
          type: array
          items:
            $ref: "#/components/schemas/ChatCompletionChoice"
          minItems: 1
        usage:
          $ref: "#/components/schemas/Usage"
        x-feder8d-citations:
          type: array
          items:
            $ref: "#/components/schemas/Citation"
        x-feder8d-retrieval-trace:
          $ref: "#/components/schemas/XFeder8dRetrievalTrace"

    ChatCompletionChunkChoice:
      type: object
      additionalProperties: false
      required: [index, delta]
      properties:
        index:
          type: integer
        delta:
          type: object
          additionalProperties: false
          properties:
            role:
              $ref: "#/components/schemas/Role"
            content:
              type: string
            tool_calls:
              type: array
              items:
                type: object
                additionalProperties: true
        finish_reason:
          type: [string, "null"]
          enum: [stop, length, tool_calls, content_filter, null]

    ChatCompletionChunk:
      type: object
      additionalProperties: false
      required: [id, object, created, model, choices]
      properties:
        id:
          type: string
        object:
          type: string
          enum: [chat.completion.chunk]
        created:
          type: integer
        model:
          type: string
        choices:
          type: array
          items:
            $ref: "#/components/schemas/ChatCompletionChunkChoice"
        x-feder8d-citations:
          type: array
          items:
            $ref: "#/components/schemas/Citation"

    ChatCompletionChunkStream:
      type: array
      description: |
        SSE stream of `data: <ChatCompletionChunk JSON>\n\n` events ending
        with `data: [DONE]\n\n`. Each non-DONE event payload conforms to
        `ChatCompletionChunk`. The final non-DONE event may carry
        `x-feder8d-citations` and `x-feder8d-retrieval-trace`.
      items:
        $ref: "#/components/schemas/ChatCompletionChunk"

    EmbeddingRequest:
      type: object
      additionalProperties: false
      required: [model, input]
      properties:
        model:
          type: string
        input:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
              minItems: 1
        encoding_format:
          type: string
          enum: [float, base64]
          default: float
        dimensions:
          type: integer
          minimum: 1
        user:
          type: string

    EmbeddingObject:
      type: object
      additionalProperties: false
      required: [index, embedding, object]
      properties:
        index:
          type: integer
        embedding:
          type: array
          items:
            type: number
        object:
          type: string
          enum: [embedding]

    EmbeddingResponse:
      type: object
      additionalProperties: false
      required: [object, data, model, usage]
      properties:
        object:
          type: string
          enum: [list]
        data:
          type: array
          items:
            $ref: "#/components/schemas/EmbeddingObject"
        model:
          type: string
        usage:
          $ref: "#/components/schemas/Usage"

    RerankRequest:
      type: object
      additionalProperties: false
      required: [model, query, documents]
      properties:
        model:
          type: string
        query:
          type: string
        documents:
          type: array
          items:
            type: string
          minItems: 1
        top_n:
          type: integer
          minimum: 1
        return_documents:
          type: boolean
          default: false

    RerankResultObject:
      type: object
      additionalProperties: false
      required: [index, relevance_score]
      properties:
        index:
          type: integer
        relevance_score:
          type: number
        document:
          type: string

    RerankResponse:
      type: object
      additionalProperties: false
      required: [results, model]
      properties:
        results:
          type: array
          items:
            $ref: "#/components/schemas/RerankResultObject"
        model:
          type: string
        usage:
          $ref: "#/components/schemas/Usage"

    ModelObject:
      type: object
      additionalProperties: false
      required: [id, object, owned_by]
      properties:
        id:
          type: string
        object:
          type: string
          enum: [model]
        created:
          type: integer
        owned_by:
          type: string
        x-feder8d-pool:
          type: string
          enum: [small_pool, medium_pool, large_pool, embedding, reranker, byo]
        x-feder8d-context-window:
          type: integer
          minimum: 1
        x-feder8d-license:
          type: string
          description: SPDX or vendor license identifier of the model weights.
        x-feder8d-region:
          type: string
          description: Region the model pool is served from (eu-central-1 at launch).

    ModelList:
      type: object
      additionalProperties: false
      required: [object, data]
      properties:
        object:
          type: string
          enum: [list]
        data:
          type: array
          items:
            $ref: "#/components/schemas/ModelObject"
