> ## Documentation Index
> Fetch the complete documentation index at: https://docs.agentset.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Retrieve a document

> Retrieve the info for a document.



## OpenAPI

````yaml get /v1/namespace/{namespaceId}/documents/{documentId}
openapi: 3.1.1
info:
  title: AgentsetAPI
  description: Agentset is agentic rag-as-a-service
  version: 0.0.1
  contact:
    name: Agentset Support
    email: support@agentset.ai
    url: https://api.agentset.ai/
  license:
    name: MIT License
    url: https://github.com/agentset-ai/agentset/blob/main/LICENSE.md
servers:
  - url: https://api.agentset.ai
    description: Production API
security: []
paths:
  /v1/namespace/{namespaceId}/documents/{documentId}:
    get:
      tags:
        - Documents
      summary: Retrieve a document
      description: Retrieve the info for a document.
      operationId: getDocument
      parameters:
        - $ref: '#/components/parameters/NamespaceIdRef'
        - $ref: '#/components/parameters/DocumentIdRef'
        - $ref: '#/components/parameters/TenantIdRef'
      responses:
        '200':
          description: The retrieved ingest job
          content:
            application/json:
              schema:
                type: object
                properties:
                  success:
                    type: boolean
                    const: true
                  data:
                    $ref: '#/components/schemas/document'
                required:
                  - success
                  - data
                additionalProperties: false
        '400':
          $ref: '#/components/responses/400'
        '401':
          $ref: '#/components/responses/401'
        '403':
          $ref: '#/components/responses/403'
        '404':
          $ref: '#/components/responses/404'
        '409':
          $ref: '#/components/responses/409'
        '410':
          $ref: '#/components/responses/410'
        '422':
          $ref: '#/components/responses/422'
        '429':
          $ref: '#/components/responses/429'
        '500':
          $ref: '#/components/responses/500'
      security:
        - token: []
      x-codeSamples:
        - lang: TypeScript
          source: |

            import { Agentset } from "agentset";

            const agentset = new Agentset({ apiKey: 'agentset_xxx' });
            const ns = agentset.namespace('ns_xxx');

            const document = await ns.documents.get("doc_123");
            console.log(document);
        - lang: python
          label: Python (SDK)
          source: |-
            from agentset import Agentset


            with Agentset(
                namespace_id="ns_123",
                x_tenant_id="<id>",
                token="AGENTSET_API_KEY",
            ) as a_client:

                res = a_client.documents.get(document_id="doc_123")

                # Handle response
                print(res)
components:
  parameters:
    NamespaceIdRef:
      in: path
      name: namespaceId
      schema:
        type: string
        examples:
          - ns_123
        description: The id of the namespace (prefixed with ns_)
      x-speakeasy-globals-hidden: true
      required: true
      description: The id of the namespace (prefixed with ns_)
    DocumentIdRef:
      in: path
      name: documentId
      schema:
        type: string
        examples:
          - doc_123
        description: The id of the document (prefixed with doc_)
      required: true
      description: The id of the document (prefixed with doc_)
    TenantIdRef:
      in: header
      name: x-tenant-id
      schema:
        description: >-
          Optional tenant id to use for the request. If not provided, the
          namespace will be used directly. Must be alphanumeric and up to 64
          characters.
        type: string
        pattern: ^[A-Za-z0-9]{1,64}$
      description: >-
        Optional tenant id to use for the request. If not provided, the
        namespace will be used directly. Must be alphanumeric and up to 64
        characters.
  schemas:
    document:
      type: object
      properties:
        id:
          type: string
          description: The unique ID of the document.
        ingestJobId:
          type: string
          description: The ingest job ID of the document.
        name:
          default: null
          description: The name of the document.
          anyOf:
            - type: string
            - type: 'null'
        tenantId:
          default: null
          description: The tenant ID of the ingest job.
          anyOf:
            - type: string
            - type: 'null'
        status:
          $ref: '#/components/schemas/document-status'
        error:
          default: null
          description: >-
            The error message of the document. Only exists when the status is
            failed.
          anyOf:
            - type: string
            - type: 'null'
        source:
          oneOf:
            - type: object
              properties:
                type:
                  type: string
                  const: TEXT
                text:
                  type: string
                  description: The text to ingest.
              required:
                - type
                - text
              additionalProperties: false
            - type: object
              properties:
                type:
                  type: string
                  const: FILE
                fileUrl:
                  type: string
                  format: uri
                  description: The URL of the file to ingest.
              required:
                - type
                - fileUrl
              additionalProperties: false
            - type: object
              properties:
                type:
                  type: string
                  const: MANAGED_FILE
                key:
                  type: string
                  description: The key of the managed file to ingest.
              required:
                - type
                - key
              additionalProperties: false
            - type: object
              properties:
                type:
                  type: string
                  const: CRAWLED_PAGE
                title:
                  description: The title of the crawled page.
                  type: string
                description:
                  description: The description of the crawled page.
                  type: string
                language:
                  description: The language of the crawled page.
                  type: string
              required:
                - type
              additionalProperties: false
            - type: object
              properties:
                type:
                  type: string
                  const: YOUTUBE_VIDEO
                videoId:
                  type: string
                  description: The ID of the youtube video.
                duration:
                  description: The duration of the youtube video in seconds.
                  type: number
              required:
                - type
                - videoId
              additionalProperties: false
          description: The source of the document.
          type: object
        properties:
          default: null
          anyOf:
            - type: object
              properties:
                fileSize:
                  type: number
                  description: The size of the file in bytes.
                mimeType:
                  default: null
                  anyOf:
                    - type: string
                      description: The MIME type of the file.
                    - type: 'null'
              required:
                - fileSize
                - mimeType
              additionalProperties: false
              description: The properties of the document.
            - type: 'null'
        config:
          default: null
          anyOf:
            - $ref: '#/components/schemas/document-configOutput'
            - type: 'null'
        totalChunks:
          type: number
          description: The total number of chunks.
        totalTokens:
          type: number
          description: The total number of tokens.
        totalCharacters:
          type: number
          description: The total number of characters.
        totalPages:
          type: number
          description: >-
            The total number of pages. Will be 0 if the document is not paged
            (e.g. PDF).
        createdAt:
          description: The date and time the document was created.
          type: string
        queuedAt:
          default: null
          anyOf:
            - type: string
            - type: 'null'
          description: The date and time the document was queued.
        preProcessingAt:
          default: null
          anyOf:
            - type: string
            - type: 'null'
          description: The date and time the document was pre-processed.
        processingAt:
          default: null
          anyOf:
            - type: string
            - type: 'null'
          description: The date and time the document was processed.
        completedAt:
          default: null
          anyOf:
            - type: string
            - type: 'null'
          description: The date and time the document was completed.
        failedAt:
          default: null
          anyOf:
            - type: string
            - type: 'null'
          description: The date and time the document failed.
      required:
        - id
        - ingestJobId
        - name
        - tenantId
        - status
        - error
        - source
        - properties
        - config
        - totalChunks
        - totalTokens
        - totalCharacters
        - totalPages
        - createdAt
        - queuedAt
        - preProcessingAt
        - processingAt
        - completedAt
        - failedAt
      additionalProperties: false
      title: Document
    document-status:
      type: string
      enum:
        - BACKLOG
        - QUEUED
        - QUEUED_FOR_RESYNC
        - QUEUED_FOR_DELETE
        - PRE_PROCESSING
        - PROCESSING
        - DELETING
        - CANCELLING
        - COMPLETED
        - FAILED
        - CANCELLED
      description: The status of the document.
    document-configOutput:
      type: object
      properties:
        chunkSize:
          type: integer
          minimum: 32
          maximum: 9007199254740991
          description: >-
            Chunk size (in characters). Controls approximately how much text is
            included in each chunk. Defaults to `2048`.
        delimiter:
          type: string
          description: Delimiter to use for separating text before chunking.
        metadata:
          type: object
          propertyNames:
            type: string
          additionalProperties:
            anyOf:
              - type: string
              - type: number
              - type: boolean
              - type: array
                items:
                  type: string
          description: >-
            Custom metadata to be added to the ingested documents. It cannot
            contain nested objects; only string, number, boolean, and array of
            strings are allowed.
        languageCode:
          $ref: '#/components/schemas/language-code'
          description: >-
            Language code to use for text processing (for example, `en`, `ar`,
            or `fr`). When omitted, the partition API will attempt to detect the
            language automatically.
        mode:
          $ref: '#/components/schemas/mode'
        disableImageExtraction:
          type: boolean
          description: >-
            Disable image extraction from the document. When combined with
            `useLlm`, images may still be automatically captioned by the
            partition API. Defaults to `false`.
        disableImageCaptions:
          type: boolean
          description: >-
            Disable synthetic image captions/descriptions in output. Images will
            be rendered as plain img tags without alt text. Defaults to `false`.
        chartUnderstanding:
          type: boolean
          description: >-
            Enable chart understanding. This will extract the data from the
            charts in the document. Defaults to `false`.
        keepPageheaderInOutput:
          type: boolean
          description: Keep the page header in the output. Defaults to `false`.
        keepPagefooterInOutput:
          type: boolean
          description: Keep the page footer in the output. Defaults to `false`.
        forceOcr:
          type: boolean
          description: >-
            Force OCR on the document even if selectable text exists. Useful for
            scanned documents with unreliable embedded text. Defaults to
            `false`.
          deprecated: true
          x-speakeasy-deprecation-message: We no longer support this option.
        disableOcrMath:
          type: boolean
          description: >-
            Disable inline math recognition in OCR. This can be useful if the
            document contains content that is frequently misclassified as math.
            Defaults to `false`.
          deprecated: true
          x-speakeasy-deprecation-message: We no longer support this option.
        useLlm:
          type: boolean
          description: >-
            Enable LLM-assisted parsing to improve tables, forms, inline math,
            and layout detection. May increase latency and token usage. Defaults
            to `true`.
          deprecated: true
          x-speakeasy-deprecation-message: We no longer support this option. Use `mode` instead.
        chunkOverlap:
          type: number
          description: >-
            [Deprecated] Custom chunk overlap (in characters) between
            consecutive chunks. Helps preserve context across chunk boundaries.
          deprecated: true
          x-speakeasy-deprecation-message: We no longer support this option. Use `chunkSize` instead.
        maxChunkSize:
          type: number
          description: >-
            [Deprecated] Hard chunk size. This option is ignored by the current
            partition pipeline and kept only for backwards compatibility.
          deprecated: true
          x-speakeasy-deprecation-message: We no longer support this option. Use `chunkSize` instead.
        chunkingStrategy:
          type: string
          enum:
            - basic
            - by_title
          description: >-
            [Deprecated] The legacy chunking strategy. This option is ignored by
            the current partition pipeline and kept only for backwards
            compatibility.
          deprecated: true
          x-speakeasy-deprecation-message: We no longer support this option.
        strategy:
          type: string
          enum:
            - auto
            - fast
            - hi_res
            - ocr_only
          description: >-
            [Deprecated] Legacy processing strategy used by the previous
            partition API. This option is ignored by the current pipeline and
            kept only for backwards compatibility.
          deprecated: true
          x-speakeasy-deprecation-message: We no longer support this option. Use `mode` instead.
      additionalProperties: false
      description: The document config.
    language-code:
      type: string
      enum:
        - af
        - am
        - ar
        - bg
        - bn
        - ca
        - cs
        - cy
        - da
        - de
        - en
        - es
        - et
        - fa
        - fi
        - fr
        - ga
        - gl
        - he
        - hi
        - hr
        - hu
        - id
        - is
        - it
        - jp
        - kr
        - lt
        - lv
        - mk
        - ms
        - mt
        - ne
        - nl
        - 'no'
        - pl
        - pt
        - ro
        - ru
        - sk
        - sl
        - sr
        - sv
        - sw
        - ta
        - te
        - th
        - tl
        - tr
        - uk
        - ur
        - vi
        - zh
        - zu
    mode:
      type: string
      enum:
        - fast
        - balanced
        - accurate
      description: >-
        Processing mode for the parser. `fast` favors speed, `accurate` (pro
        subscription only) favors quality and layout fidelity, and `balanced`
        offers a compromise between the two. Defaults to `balanced`.
  responses:
    '400':
      description: >-
        The server cannot or will not process the request due to something that
        is perceived to be a client error (e.g., malformed request syntax,
        invalid request message framing, or deceptive request routing).
      content:
        application/json:
          schema:
            x-speakeasy-name-override: BadRequest
            type: object
            properties:
              success:
                type: boolean
                example: false
              error:
                type: object
                properties:
                  code:
                    type: string
                    enum:
                      - bad_request
                    description: A short code indicating the error code returned.
                    example: bad_request
                  message:
                    x-speakeasy-error-message: true
                    type: string
                    description: A human readable explanation of what went wrong.
                    example: The requested resource was not found.
                  doc_url:
                    type: string
                    description: >-
                      A link to our documentation with more details about this
                      error code
                    example: https://docs.agentset.ai/api-reference/errors#bad-request
                required:
                  - code
                  - message
            required:
              - success
              - error
    '401':
      description: >-
        Although the HTTP standard specifies "unauthorized", semantically this
        response means "unauthenticated". That is, the client must authenticate
        itself to get the requested response.
      content:
        application/json:
          schema:
            x-speakeasy-name-override: Unauthorized
            type: object
            properties:
              success:
                type: boolean
                example: false
              error:
                type: object
                properties:
                  code:
                    type: string
                    enum:
                      - unauthorized
                    description: A short code indicating the error code returned.
                    example: unauthorized
                  message:
                    x-speakeasy-error-message: true
                    type: string
                    description: A human readable explanation of what went wrong.
                    example: The requested resource was not found.
                  doc_url:
                    type: string
                    description: >-
                      A link to our documentation with more details about this
                      error code
                    example: https://docs.agentset.ai/api-reference/errors#unauthorized
                required:
                  - code
                  - message
            required:
              - success
              - error
    '403':
      description: >-
        The client does not have access rights to the content; that is, it is
        unauthorized, so the server is refusing to give the requested resource.
        Unlike 401 Unauthorized, the client's identity is known to the server.
      content:
        application/json:
          schema:
            x-speakeasy-name-override: Forbidden
            type: object
            properties:
              success:
                type: boolean
                example: false
              error:
                type: object
                properties:
                  code:
                    type: string
                    enum:
                      - forbidden
                    description: A short code indicating the error code returned.
                    example: forbidden
                  message:
                    x-speakeasy-error-message: true
                    type: string
                    description: A human readable explanation of what went wrong.
                    example: The requested resource was not found.
                  doc_url:
                    type: string
                    description: >-
                      A link to our documentation with more details about this
                      error code
                    example: https://docs.agentset.ai/api-reference/errors#forbidden
                required:
                  - code
                  - message
            required:
              - success
              - error
    '404':
      description: The server cannot find the requested resource.
      content:
        application/json:
          schema:
            x-speakeasy-name-override: NotFound
            type: object
            properties:
              success:
                type: boolean
                example: false
              error:
                type: object
                properties:
                  code:
                    type: string
                    enum:
                      - not_found
                    description: A short code indicating the error code returned.
                    example: not_found
                  message:
                    x-speakeasy-error-message: true
                    type: string
                    description: A human readable explanation of what went wrong.
                    example: The requested resource was not found.
                  doc_url:
                    type: string
                    description: >-
                      A link to our documentation with more details about this
                      error code
                    example: https://docs.agentset.ai/api-reference/errors#not-found
                required:
                  - code
                  - message
            required:
              - success
              - error
    '409':
      description: >-
        This response is sent when a request conflicts with the current state of
        the server.
      content:
        application/json:
          schema:
            x-speakeasy-name-override: Conflict
            type: object
            properties:
              success:
                type: boolean
                example: false
              error:
                type: object
                properties:
                  code:
                    type: string
                    enum:
                      - conflict
                    description: A short code indicating the error code returned.
                    example: conflict
                  message:
                    x-speakeasy-error-message: true
                    type: string
                    description: A human readable explanation of what went wrong.
                    example: The requested resource was not found.
                  doc_url:
                    type: string
                    description: >-
                      A link to our documentation with more details about this
                      error code
                    example: https://docs.agentset.ai/api-reference/errors#conflict
                required:
                  - code
                  - message
            required:
              - success
              - error
    '410':
      description: >-
        This response is sent when the requested content has been permanently
        deleted from server, with no forwarding address.
      content:
        application/json:
          schema:
            x-speakeasy-name-override: InviteExpired
            type: object
            properties:
              success:
                type: boolean
                example: false
              error:
                type: object
                properties:
                  code:
                    type: string
                    enum:
                      - invite_expired
                    description: A short code indicating the error code returned.
                    example: invite_expired
                  message:
                    x-speakeasy-error-message: true
                    type: string
                    description: A human readable explanation of what went wrong.
                    example: The requested resource was not found.
                  doc_url:
                    type: string
                    description: >-
                      A link to our documentation with more details about this
                      error code
                    example: >-
                      https://docs.agentset.ai/api-reference/errors#invite-expired
                required:
                  - code
                  - message
            required:
              - success
              - error
    '422':
      description: >-
        The request was well-formed but was unable to be followed due to
        semantic errors.
      content:
        application/json:
          schema:
            x-speakeasy-name-override: UnprocessableEntity
            type: object
            properties:
              success:
                type: boolean
                example: false
              error:
                type: object
                properties:
                  code:
                    type: string
                    enum:
                      - unprocessable_entity
                    description: A short code indicating the error code returned.
                    example: unprocessable_entity
                  message:
                    x-speakeasy-error-message: true
                    type: string
                    description: A human readable explanation of what went wrong.
                    example: The requested resource was not found.
                  doc_url:
                    type: string
                    description: >-
                      A link to our documentation with more details about this
                      error code
                    example: >-
                      https://docs.agentset.ai/api-reference/errors#unprocessable-entity
                required:
                  - code
                  - message
            required:
              - success
              - error
    '429':
      description: >-
        The user has sent too many requests in a given amount of time ("rate
        limiting")
      content:
        application/json:
          schema:
            x-speakeasy-name-override: RateLimitExceeded
            type: object
            properties:
              success:
                type: boolean
                example: false
              error:
                type: object
                properties:
                  code:
                    type: string
                    enum:
                      - rate_limit_exceeded
                    description: A short code indicating the error code returned.
                    example: rate_limit_exceeded
                  message:
                    x-speakeasy-error-message: true
                    type: string
                    description: A human readable explanation of what went wrong.
                    example: The requested resource was not found.
                  doc_url:
                    type: string
                    description: >-
                      A link to our documentation with more details about this
                      error code
                    example: >-
                      https://docs.agentset.ai/api-reference/errors#rate-limit_exceeded
                required:
                  - code
                  - message
            required:
              - success
              - error
    '500':
      description: The server has encountered a situation it does not know how to handle.
      content:
        application/json:
          schema:
            x-speakeasy-name-override: InternalServerError
            type: object
            properties:
              success:
                type: boolean
                example: false
              error:
                type: object
                properties:
                  code:
                    type: string
                    enum:
                      - internal_server_error
                    description: A short code indicating the error code returned.
                    example: internal_server_error
                  message:
                    x-speakeasy-error-message: true
                    type: string
                    description: A human readable explanation of what went wrong.
                    example: The requested resource was not found.
                  doc_url:
                    type: string
                    description: >-
                      A link to our documentation with more details about this
                      error code
                    example: >-
                      https://docs.agentset.ai/api-reference/errors#internal-server_error
                required:
                  - code
                  - message
            required:
              - success
              - error
  securitySchemes:
    token:
      type: http
      description: Default authentication mechanism
      scheme: bearer
      x-speakeasy-example: AGENTSET_API_KEY

````