> ## Documentation Index
> Fetch the complete documentation index at: https://docs.cuadra.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Create Dataset

> Create a new dataset for organizing and managing training data or RAG documents.


## OpenAPI

````yaml post /v1/datasets
openapi: 3.1.0
info:
  title: Cuadra AI API
  description: >-
    REST API for AI-powered chat, RAG, and multi-channel messaging. Full
    documentation at https://docs.cuadra.ai
  version: 1.0.0
  contact:
    name: Cuadra AI Support
    url: https://cuadra.ai/support
    email: support@cuadra.ai
  license:
    name: Proprietary
    url: https://cuadra.ai/terms
  termsOfService: https://cuadra.ai/terms
servers:
  - url: https://api.cuadra.ai
    description: Production
security:
  - bearerAuth: []
  - oauth2: []
tags:
  - name: Health
    description: >-
      System health monitoring and readiness checks. Essential for load
      balancers and deployment automation.
  - name: Models
    description: >-
      Manage custom AI model configurations derived from base models. Create,
      update, and delete with flexible pricing and token limits.
  - name: Chats
    description: >-
      Interact with AI models for intelligent chat completions. Supports
      streaming and non-streaming modes with message history.
  - name: Files
    description: >-
      Manage file uploads for chat attachments and dataset knowledge bases.
      Supports automatic processing, chunking, and embedding for RAG.
  - name: Datasets
    description: >-
      Manage RAG knowledge base datasets with versioning and semantic search.
      Organize uploaded files into searchable knowledge bases.
  - name: Particles
    description: >-
      Manage reusable system prompt particles for AI model behavior. Particles
      are modular components (role, tone, guardrails, constraints, format) that
      compose into system prompts.
  - name: System Prompts
    description: >-
      Compose particles into complete system prompts for AI models. System
      prompts define model behavior through ordered particle compositions with
      support for version pinning and token budget validation.
  - name: Usage
    description: >-
      Monitor and analyze API usage with detailed token counting. Track usage
      across models, tenants, and time periods.
  - name: Artifacts
    description: >-
      Manage rich content artifacts generated during chat conversations.
      Supports markdown, code, HTML, SVG, and Mermaid diagrams.
paths:
  /v1/datasets:
    post:
      tags:
        - Datasets
      summary: Create Dataset
      description: >-
        Create a new dataset for organizing and managing training data or RAG
        documents.
      operationId: createDataset
      parameters:
        - name: Idempotency-Key
          in: header
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            title: Idempotency-Key
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/DatasetCreate'
      responses:
        '201':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DatasetOut'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
components:
  schemas:
    DatasetCreate:
      properties:
        name:
          type: string
          maxLength: 200
          minLength: 1
          title: Name
          description: Dataset display name
          examples:
            - Customer Support QA
        description:
          anyOf:
            - type: string
              maxLength: 2000
            - type: 'null'
          title: Description
          description: Optional description
          examples:
            - High-quality customer support question-answer pairs
        consentForTraining:
          type: boolean
          title: Consentfortraining
          description: Indicates end-user/org consent for using data in model training.
          default: true
          examples:
            - true
      type: object
      required:
        - name
      title: DatasetCreate
      description: Request schema for creating a dataset.
      examples:
        - consentForTraining: true
          description: High-quality customer support question-answer pairs
          name: Customer Support QA
        - consentForTraining: true
          description: Legal document summaries and analysis for legal AI
          name: Legal Document Analysis
        - consentForTraining: true
          description: Technical documentation Q&A for product support
          name: Product Documentation
    DatasetOut:
      properties:
        id:
          type: string
          title: Id
          description: Dataset ID
          examples:
            - ds_abc123
        name:
          type: string
          title: Name
          description: Dataset display name
          examples:
            - Customer Support QA
        description:
          anyOf:
            - type: string
            - type: 'null'
          title: Description
          description: Optional description
          examples:
            - Q&A pairs for support
        createdAt:
          type: string
          format: date-time
          title: Createdat
          description: Creation timestamp (UTC ISO8601)
          examples:
            - '2024-01-15T10:30:00Z'
        updatedAt:
          type: string
          format: date-time
          title: Updatedat
          description: Last update timestamp (UTC ISO8601)
          examples:
            - '2024-01-15T14:45:00Z'
        consentForTraining:
          type: boolean
          title: Consentfortraining
          description: Indicates end-user/org consent for using data in model training.
          default: true
        items:
          anyOf:
            - items: {}
              type: array
            - type: 'null'
          title: Items
          description: >-
            Dataset items (files). Only included when expand[]=items is
            specified.
        snapshots:
          anyOf:
            - items: {}
              type: array
            - type: 'null'
          title: Snapshots
          description: >-
            Dataset snapshots. Only included when expand[]=snapshots is
            specified.
      type: object
      required:
        - id
        - name
        - createdAt
        - updatedAt
      title: DatasetOut
      description: Response schema for a dataset.
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: JWT token from Stytch B2B authentication (magic link, SSO, or M2M)
    oauth2:
      type: oauth2
      flows:
        clientCredentials:
          tokenUrl: https://auth.cuadra.ai/oauth/token
          scopes:
            chats:invoke: Invoke chat completions (billable)
            chats:read: Read and list chats
            chats:write: Create and update chats
            chats:delete: Delete chats
            chats:admin: Full chat access (read/write/delete/invoke)
            models:read: Read model configurations
            models:write: Create and update models
            models:delete: Delete models
            models:admin: Full access to models (grants read + write + delete)
            datasets:read: Read and list datasets and snapshots
            datasets:write: Create and update datasets and snapshots
            datasets:delete: Delete datasets and snapshots
            datasets:admin: Full dataset access (read/write/delete)
            files:read: Read, list, and download files
            files:write: Upload, associate, and reprocess files
            files:delete: Delete files (single and bulk)
            files:admin: Full access to files (grants read + write + delete)
            particles:read: View particles and particle versions
            particles:write: Create and update particles
            particles:delete: Delete particles
            particles:admin: Full particle access (read/write/delete)
            system-prompts:read: View system prompts and compositions
            system-prompts:write: Create and update system prompts
            system-prompts:delete: Delete system prompts
            system-prompts:admin: Full system prompt access (read/write/delete)
            usage:read: Read usage and billing information
            usage:admin: Full access to usage data (grants read)
            connections:read: View external connections and sync status
            connections:write: Create and update external connections
            connections:delete: Delete connections and sync configurations
            connections:admin: Full connection access (read/write/delete)
            channels:read: View channels and channel configuration
            channels:write: Create and update channels
            channels:delete: Delete channels and release phone numbers
            channels:admin: Full channel access (read/write/delete)
            org:admin: Full resource access, manage members
            org:owner: 'Owner: all access including billing'
            chat:write: Delete and manage chats

````