Skip to content
Get started

Beta

BetaAgent Data

Get Agent Data
beta.agent_data.get(stritem_id, AgentDataGetParams**kwargs) -> AgentData
GET/api/v1/beta/agent-data/{item_id}
Update Agent Data
beta.agent_data.update(stritem_id, AgentDataUpdateParams**kwargs) -> AgentData
PUT/api/v1/beta/agent-data/{item_id}
Delete Agent Data
beta.agent_data.delete(stritem_id, AgentDataDeleteParams**kwargs) -> AgentDataDeleteResponse
DELETE/api/v1/beta/agent-data/{item_id}
Create Agent Data
beta.agent_data.agent_data(AgentDataAgentDataParams**kwargs) -> AgentData
POST/api/v1/beta/agent-data
Search Agent Data
beta.agent_data.search(AgentDataSearchParams**kwargs) -> SyncPaginatedCursorPost[AgentData]
POST/api/v1/beta/agent-data/:search
Aggregate Agent Data
beta.agent_data.aggregate(AgentDataAggregateParams**kwargs) -> SyncPaginatedCursorPost[AgentDataAggregateResponse]
POST/api/v1/beta/agent-data/:aggregate
Delete Agent Data By Query
beta.agent_data.delete_by_query(AgentDataDeleteByQueryParams**kwargs) -> AgentDataDeleteByQueryResponse
POST/api/v1/beta/agent-data/:delete
ModelsExpand Collapse
class AgentData:

API Result for a single agent data item

data: Dict[str, object]
deployment_name: str
id: Optional[str]
collection: Optional[str]
created_at: Optional[datetime]
project_id: Optional[str]
updated_at: Optional[datetime]

BetaParse Configurations

Create Parse Configuration
beta.parse_configurations.create(ParseConfigurationCreateParams**kwargs) -> ParseConfiguration
POST/api/v1/beta/parse-configurations
List Parse Configurations
beta.parse_configurations.list(ParseConfigurationListParams**kwargs) -> SyncPaginatedCursor[ParseConfiguration]
GET/api/v1/beta/parse-configurations
Get Parse Configuration
beta.parse_configurations.get(strconfig_id, ParseConfigurationGetParams**kwargs) -> ParseConfiguration
GET/api/v1/beta/parse-configurations/{config_id}
Update Parse Configuration
beta.parse_configurations.update(strconfig_id, ParseConfigurationUpdateParams**kwargs) -> ParseConfiguration
PUT/api/v1/beta/parse-configurations/{config_id}
Delete Parse Configuration
beta.parse_configurations.delete(strconfig_id, ParseConfigurationDeleteParams**kwargs)
DELETE/api/v1/beta/parse-configurations/{config_id}
ModelsExpand Collapse
class ParseConfiguration:

Parse configuration schema.

id: str

Unique identifier for the parse configuration

created_at: datetime

Creation timestamp

formatdate-time
name: str

Name of the parse configuration

LlamaParseParameters configuration

adaptive_long_table: Optional[bool]
aggressive_table_extraction: Optional[bool]
auto_mode: Optional[bool]
auto_mode_configuration_json: Optional[str]
auto_mode_trigger_on_image_in_page: Optional[bool]
auto_mode_trigger_on_regexp_in_page: Optional[str]
auto_mode_trigger_on_table_in_page: Optional[bool]
auto_mode_trigger_on_text_in_page: Optional[str]
azure_openai_api_version: Optional[str]
azure_openai_deployment_name: Optional[str]
azure_openai_endpoint: Optional[str]
azure_openai_key: Optional[str]
bbox_bottom: Optional[float]
bbox_left: Optional[float]
bbox_right: Optional[float]
bbox_top: Optional[float]
bounding_box: Optional[str]
compact_markdown_table: Optional[bool]
complemental_formatting_instruction: Optional[str]
content_guideline_instruction: Optional[str]
continuous_mode: Optional[bool]
disable_image_extraction: Optional[bool]
disable_ocr: Optional[bool]
disable_reconstruction: Optional[bool]
do_not_cache: Optional[bool]
do_not_unroll_columns: Optional[bool]
enable_cost_optimizer: Optional[bool]
extract_charts: Optional[bool]
extract_layout: Optional[bool]
extract_printed_page_number: Optional[bool]
fast_mode: Optional[bool]
formatting_instruction: Optional[str]
gpt4o_api_key: Optional[str]
gpt4o_mode: Optional[bool]
guess_xlsx_sheet_name: Optional[bool]
hide_footers: Optional[bool]
hide_headers: Optional[bool]
high_res_ocr: Optional[bool]
html_make_all_elements_visible: Optional[bool]
html_remove_fixed_elements: Optional[bool]
html_remove_navigation_elements: Optional[bool]
http_proxy: Optional[str]
ignore_document_elements_for_layout_detection: Optional[bool]
images_to_save: Optional[List[Literal["screenshot", "embedded", "layout"]]]
Accepts one of the following:
"screenshot"
"embedded"
"layout"
inline_images_in_markdown: Optional[bool]
input_s3_path: Optional[str]
input_s3_region: Optional[str]
input_url: Optional[str]
internal_is_screenshot_job: Optional[bool]
invalidate_cache: Optional[bool]
is_formatting_instruction: Optional[bool]
job_timeout_extra_time_per_page_in_seconds: Optional[float]
job_timeout_in_seconds: Optional[float]
keep_page_separator_when_merging_tables: Optional[bool]
languages: Optional[List[ParsingLanguages]]
Accepts one of the following:
"af"
"az"
"bs"
"cs"
"cy"
"da"
"de"
"en"
"es"
"et"
"fr"
"ga"
"hr"
"hu"
"id"
"is"
"it"
"ku"
"la"
"lt"
"lv"
"mi"
"ms"
"mt"
"nl"
"no"
"oc"
"pi"
"pl"
"pt"
"ro"
"rs_latin"
"sk"
"sl"
"sq"
"sv"
"sw"
"tl"
"tr"
"uz"
"vi"
"ar"
"fa"
"ug"
"ur"
"bn"
"as"
"mni"
"ru"
"rs_cyrillic"
"be"
"bg"
"uk"
"mn"
"abq"
"ady"
"kbd"
"ava"
"dar"
"inh"
"che"
"lbe"
"lez"
"tab"
"tjk"
"hi"
"mr"
"ne"
"bh"
"mai"
"ang"
"bho"
"mah"
"sck"
"new"
"gom"
"sa"
"bgc"
"th"
"ch_sim"
"ch_tra"
"ja"
"ko"
"ta"
"te"
"kn"
layout_aware: Optional[bool]
line_level_bounding_box: Optional[bool]
markdown_table_multiline_header_separator: Optional[str]
max_pages: Optional[int]
max_pages_enforced: Optional[int]
merge_tables_across_pages_in_markdown: Optional[bool]
model: Optional[str]
outlined_table_extraction: Optional[bool]
output_pdf_of_document: Optional[bool]
output_s3_path_prefix: Optional[str]
output_s3_region: Optional[str]
output_tables_as_html: Optional[bool]
page_error_tolerance: Optional[float]
page_header_prefix: Optional[str]
page_header_suffix: Optional[str]
page_prefix: Optional[str]
page_separator: Optional[str]
page_suffix: Optional[str]
parse_mode: Optional[ParsingMode]

Enum for representing the mode of parsing to be used.

Accepts one of the following:
"parse_page_without_llm"
"parse_page_with_llm"
"parse_page_with_lvm"
"parse_page_with_agent"
"parse_page_with_layout_agent"
"parse_document_with_llm"
"parse_document_with_lvm"
"parse_document_with_agent"
parsing_instruction: Optional[str]
precise_bounding_box: Optional[bool]
premium_mode: Optional[bool]
presentation_out_of_bounds_content: Optional[bool]
presentation_skip_embedded_data: Optional[bool]
preserve_layout_alignment_across_pages: Optional[bool]
preserve_very_small_text: Optional[bool]
preset: Optional[str]
priority: Optional[Literal["low", "medium", "high", "critical"]]

The priority for the request. This field may be ignored or overwritten depending on the organization tier.

Accepts one of the following:
"low"
"medium"
"high"
"critical"
project_id: Optional[str]
remove_hidden_text: Optional[bool]
replace_failed_page_mode: Optional[FailPageMode]

Enum for representing the different available page error handling modes.

Accepts one of the following:
"raw_text"
"blank_page"
"error_message"
replace_failed_page_with_error_message_prefix: Optional[str]
replace_failed_page_with_error_message_suffix: Optional[str]
save_images: Optional[bool]
skip_diagonal_text: Optional[bool]
specialized_chart_parsing_agentic: Optional[bool]
specialized_chart_parsing_efficient: Optional[bool]
specialized_chart_parsing_plus: Optional[bool]
specialized_image_parsing: Optional[bool]
spreadsheet_extract_sub_tables: Optional[bool]
spreadsheet_force_formula_computation: Optional[bool]
strict_mode_buggy_font: Optional[bool]
strict_mode_image_extraction: Optional[bool]
strict_mode_image_ocr: Optional[bool]
strict_mode_reconstruction: Optional[bool]
structured_output: Optional[bool]
structured_output_json_schema: Optional[str]
structured_output_json_schema_name: Optional[str]
system_prompt: Optional[str]
system_prompt_append: Optional[str]
take_screenshot: Optional[bool]
target_pages: Optional[str]
tier: Optional[str]
use_vendor_multimodal_model: Optional[bool]
user_prompt: Optional[str]
vendor_multimodal_api_key: Optional[str]
vendor_multimodal_model_name: Optional[str]
version: Optional[str]
webhook_configurations: Optional[List[WebhookConfiguration]]

The outbound webhook configurations

webhook_events: Optional[List[Literal["extract.pending", "extract.success", "extract.error", 13 more]]]

List of event names to subscribe to

Accepts one of the following:
"extract.pending"
"extract.success"
"extract.error"
"extract.partial_success"
"extract.cancelled"
"parse.pending"
"parse.success"
"parse.error"
"parse.partial_success"
"parse.cancelled"
"classify.pending"
"classify.success"
"classify.error"
"classify.partial_success"
"classify.cancelled"
"unmapped_event"
webhook_headers: Optional[Dict[str, str]]

Custom HTTP headers to include with webhook requests.

webhook_output_format: Optional[str]

The output format to use for the webhook. Defaults to string if none supplied. Currently supported values: string, json

webhook_url: Optional[str]

The URL to send webhook notifications to.

webhook_url: Optional[str]
source_id: str

ID of the source

source_type: str

Type of the source (e.g., 'project')

updated_at: datetime

Last update timestamp

formatdate-time
version: str

Version of the configuration

creator: Optional[str]

Creator of the configuration

class ParseConfigurationCreate:

Schema for creating a new parse configuration (API boundary).

name: str

Name of the parse configuration

LlamaParseParameters configuration

adaptive_long_table: Optional[bool]
aggressive_table_extraction: Optional[bool]
auto_mode: Optional[bool]
auto_mode_configuration_json: Optional[str]
auto_mode_trigger_on_image_in_page: Optional[bool]
auto_mode_trigger_on_regexp_in_page: Optional[str]
auto_mode_trigger_on_table_in_page: Optional[bool]
auto_mode_trigger_on_text_in_page: Optional[str]
azure_openai_api_version: Optional[str]
azure_openai_deployment_name: Optional[str]
azure_openai_endpoint: Optional[str]
azure_openai_key: Optional[str]
bbox_bottom: Optional[float]
bbox_left: Optional[float]
bbox_right: Optional[float]
bbox_top: Optional[float]
bounding_box: Optional[str]
compact_markdown_table: Optional[bool]
complemental_formatting_instruction: Optional[str]
content_guideline_instruction: Optional[str]
continuous_mode: Optional[bool]
disable_image_extraction: Optional[bool]
disable_ocr: Optional[bool]
disable_reconstruction: Optional[bool]
do_not_cache: Optional[bool]
do_not_unroll_columns: Optional[bool]
enable_cost_optimizer: Optional[bool]
extract_charts: Optional[bool]
extract_layout: Optional[bool]
extract_printed_page_number: Optional[bool]
fast_mode: Optional[bool]
formatting_instruction: Optional[str]
gpt4o_api_key: Optional[str]
gpt4o_mode: Optional[bool]
guess_xlsx_sheet_name: Optional[bool]
hide_footers: Optional[bool]
hide_headers: Optional[bool]
high_res_ocr: Optional[bool]
html_make_all_elements_visible: Optional[bool]
html_remove_fixed_elements: Optional[bool]
html_remove_navigation_elements: Optional[bool]
http_proxy: Optional[str]
ignore_document_elements_for_layout_detection: Optional[bool]
images_to_save: Optional[List[Literal["screenshot", "embedded", "layout"]]]
Accepts one of the following:
"screenshot"
"embedded"
"layout"
inline_images_in_markdown: Optional[bool]
input_s3_path: Optional[str]
input_s3_region: Optional[str]
input_url: Optional[str]
internal_is_screenshot_job: Optional[bool]
invalidate_cache: Optional[bool]
is_formatting_instruction: Optional[bool]
job_timeout_extra_time_per_page_in_seconds: Optional[float]
job_timeout_in_seconds: Optional[float]
keep_page_separator_when_merging_tables: Optional[bool]
languages: Optional[List[ParsingLanguages]]
Accepts one of the following:
"af"
"az"
"bs"
"cs"
"cy"
"da"
"de"
"en"
"es"
"et"
"fr"
"ga"
"hr"
"hu"
"id"
"is"
"it"
"ku"
"la"
"lt"
"lv"
"mi"
"ms"
"mt"
"nl"
"no"
"oc"
"pi"
"pl"
"pt"
"ro"
"rs_latin"
"sk"
"sl"
"sq"
"sv"
"sw"
"tl"
"tr"
"uz"
"vi"
"ar"
"fa"
"ug"
"ur"
"bn"
"as"
"mni"
"ru"
"rs_cyrillic"
"be"
"bg"
"uk"
"mn"
"abq"
"ady"
"kbd"
"ava"
"dar"
"inh"
"che"
"lbe"
"lez"
"tab"
"tjk"
"hi"
"mr"
"ne"
"bh"
"mai"
"ang"
"bho"
"mah"
"sck"
"new"
"gom"
"sa"
"bgc"
"th"
"ch_sim"
"ch_tra"
"ja"
"ko"
"ta"
"te"
"kn"
layout_aware: Optional[bool]
line_level_bounding_box: Optional[bool]
markdown_table_multiline_header_separator: Optional[str]
max_pages: Optional[int]
max_pages_enforced: Optional[int]
merge_tables_across_pages_in_markdown: Optional[bool]
model: Optional[str]
outlined_table_extraction: Optional[bool]
output_pdf_of_document: Optional[bool]
output_s3_path_prefix: Optional[str]
output_s3_region: Optional[str]
output_tables_as_html: Optional[bool]
page_error_tolerance: Optional[float]
page_header_prefix: Optional[str]
page_header_suffix: Optional[str]
page_prefix: Optional[str]
page_separator: Optional[str]
page_suffix: Optional[str]
parse_mode: Optional[ParsingMode]

Enum for representing the mode of parsing to be used.

Accepts one of the following:
"parse_page_without_llm"
"parse_page_with_llm"
"parse_page_with_lvm"
"parse_page_with_agent"
"parse_page_with_layout_agent"
"parse_document_with_llm"
"parse_document_with_lvm"
"parse_document_with_agent"
parsing_instruction: Optional[str]
precise_bounding_box: Optional[bool]
premium_mode: Optional[bool]
presentation_out_of_bounds_content: Optional[bool]
presentation_skip_embedded_data: Optional[bool]
preserve_layout_alignment_across_pages: Optional[bool]
preserve_very_small_text: Optional[bool]
preset: Optional[str]
priority: Optional[Literal["low", "medium", "high", "critical"]]

The priority for the request. This field may be ignored or overwritten depending on the organization tier.

Accepts one of the following:
"low"
"medium"
"high"
"critical"
project_id: Optional[str]
remove_hidden_text: Optional[bool]
replace_failed_page_mode: Optional[FailPageMode]

Enum for representing the different available page error handling modes.

Accepts one of the following:
"raw_text"
"blank_page"
"error_message"
replace_failed_page_with_error_message_prefix: Optional[str]
replace_failed_page_with_error_message_suffix: Optional[str]
save_images: Optional[bool]
skip_diagonal_text: Optional[bool]
specialized_chart_parsing_agentic: Optional[bool]
specialized_chart_parsing_efficient: Optional[bool]
specialized_chart_parsing_plus: Optional[bool]
specialized_image_parsing: Optional[bool]
spreadsheet_extract_sub_tables: Optional[bool]
spreadsheet_force_formula_computation: Optional[bool]
strict_mode_buggy_font: Optional[bool]
strict_mode_image_extraction: Optional[bool]
strict_mode_image_ocr: Optional[bool]
strict_mode_reconstruction: Optional[bool]
structured_output: Optional[bool]
structured_output_json_schema: Optional[str]
structured_output_json_schema_name: Optional[str]
system_prompt: Optional[str]
system_prompt_append: Optional[str]
take_screenshot: Optional[bool]
target_pages: Optional[str]
tier: Optional[str]
use_vendor_multimodal_model: Optional[bool]
user_prompt: Optional[str]
vendor_multimodal_api_key: Optional[str]
vendor_multimodal_model_name: Optional[str]
version: Optional[str]
webhook_configurations: Optional[List[WebhookConfiguration]]

The outbound webhook configurations

webhook_events: Optional[List[Literal["extract.pending", "extract.success", "extract.error", 13 more]]]

List of event names to subscribe to

Accepts one of the following:
"extract.pending"
"extract.success"
"extract.error"
"extract.partial_success"
"extract.cancelled"
"parse.pending"
"parse.success"
"parse.error"
"parse.partial_success"
"parse.cancelled"
"classify.pending"
"classify.success"
"classify.error"
"classify.partial_success"
"classify.cancelled"
"unmapped_event"
webhook_headers: Optional[Dict[str, str]]

Custom HTTP headers to include with webhook requests.

webhook_output_format: Optional[str]

The output format to use for the webhook. Defaults to string if none supplied. Currently supported values: string, json

webhook_url: Optional[str]

The URL to send webhook notifications to.

webhook_url: Optional[str]
version: str

Version of the configuration

creator: Optional[str]

Creator of the configuration

source_id: Optional[str]

ID of the source

source_type: Optional[str]

Type of the source (e.g., 'project')

class ParseConfigurationQueryResponse:

Response schema for paginated parse configuration queries.

items: List[ParseConfiguration]

The list of items.

id: str

Unique identifier for the parse configuration

created_at: datetime

Creation timestamp

formatdate-time
name: str

Name of the parse configuration

LlamaParseParameters configuration

adaptive_long_table: Optional[bool]
aggressive_table_extraction: Optional[bool]
auto_mode: Optional[bool]
auto_mode_configuration_json: Optional[str]
auto_mode_trigger_on_image_in_page: Optional[bool]
auto_mode_trigger_on_regexp_in_page: Optional[str]
auto_mode_trigger_on_table_in_page: Optional[bool]
auto_mode_trigger_on_text_in_page: Optional[str]
azure_openai_api_version: Optional[str]
azure_openai_deployment_name: Optional[str]
azure_openai_endpoint: Optional[str]
azure_openai_key: Optional[str]
bbox_bottom: Optional[float]
bbox_left: Optional[float]
bbox_right: Optional[float]
bbox_top: Optional[float]
bounding_box: Optional[str]
compact_markdown_table: Optional[bool]
complemental_formatting_instruction: Optional[str]
content_guideline_instruction: Optional[str]
continuous_mode: Optional[bool]
disable_image_extraction: Optional[bool]
disable_ocr: Optional[bool]
disable_reconstruction: Optional[bool]
do_not_cache: Optional[bool]
do_not_unroll_columns: Optional[bool]
enable_cost_optimizer: Optional[bool]
extract_charts: Optional[bool]
extract_layout: Optional[bool]
extract_printed_page_number: Optional[bool]
fast_mode: Optional[bool]
formatting_instruction: Optional[str]
gpt4o_api_key: Optional[str]
gpt4o_mode: Optional[bool]
guess_xlsx_sheet_name: Optional[bool]
hide_footers: Optional[bool]
hide_headers: Optional[bool]
high_res_ocr: Optional[bool]
html_make_all_elements_visible: Optional[bool]
html_remove_fixed_elements: Optional[bool]
html_remove_navigation_elements: Optional[bool]
http_proxy: Optional[str]
ignore_document_elements_for_layout_detection: Optional[bool]
images_to_save: Optional[List[Literal["screenshot", "embedded", "layout"]]]
Accepts one of the following:
"screenshot"
"embedded"
"layout"
inline_images_in_markdown: Optional[bool]
input_s3_path: Optional[str]
input_s3_region: Optional[str]
input_url: Optional[str]
internal_is_screenshot_job: Optional[bool]
invalidate_cache: Optional[bool]
is_formatting_instruction: Optional[bool]
job_timeout_extra_time_per_page_in_seconds: Optional[float]
job_timeout_in_seconds: Optional[float]
keep_page_separator_when_merging_tables: Optional[bool]
languages: Optional[List[ParsingLanguages]]
Accepts one of the following:
"af"
"az"
"bs"
"cs"
"cy"
"da"
"de"
"en"
"es"
"et"
"fr"
"ga"
"hr"
"hu"
"id"
"is"
"it"
"ku"
"la"
"lt"
"lv"
"mi"
"ms"
"mt"
"nl"
"no"
"oc"
"pi"
"pl"
"pt"
"ro"
"rs_latin"
"sk"
"sl"
"sq"
"sv"
"sw"
"tl"
"tr"
"uz"
"vi"
"ar"
"fa"
"ug"
"ur"
"bn"
"as"
"mni"
"ru"
"rs_cyrillic"
"be"
"bg"
"uk"
"mn"
"abq"
"ady"
"kbd"
"ava"
"dar"
"inh"
"che"
"lbe"
"lez"
"tab"
"tjk"
"hi"
"mr"
"ne"
"bh"
"mai"
"ang"
"bho"
"mah"
"sck"
"new"
"gom"
"sa"
"bgc"
"th"
"ch_sim"
"ch_tra"
"ja"
"ko"
"ta"
"te"
"kn"
layout_aware: Optional[bool]
line_level_bounding_box: Optional[bool]
markdown_table_multiline_header_separator: Optional[str]
max_pages: Optional[int]
max_pages_enforced: Optional[int]
merge_tables_across_pages_in_markdown: Optional[bool]
model: Optional[str]
outlined_table_extraction: Optional[bool]
output_pdf_of_document: Optional[bool]
output_s3_path_prefix: Optional[str]
output_s3_region: Optional[str]
output_tables_as_html: Optional[bool]
page_error_tolerance: Optional[float]
page_header_prefix: Optional[str]
page_header_suffix: Optional[str]
page_prefix: Optional[str]
page_separator: Optional[str]
page_suffix: Optional[str]
parse_mode: Optional[ParsingMode]

Enum for representing the mode of parsing to be used.

Accepts one of the following:
"parse_page_without_llm"
"parse_page_with_llm"
"parse_page_with_lvm"
"parse_page_with_agent"
"parse_page_with_layout_agent"
"parse_document_with_llm"
"parse_document_with_lvm"
"parse_document_with_agent"
parsing_instruction: Optional[str]
precise_bounding_box: Optional[bool]
premium_mode: Optional[bool]
presentation_out_of_bounds_content: Optional[bool]
presentation_skip_embedded_data: Optional[bool]
preserve_layout_alignment_across_pages: Optional[bool]
preserve_very_small_text: Optional[bool]
preset: Optional[str]
priority: Optional[Literal["low", "medium", "high", "critical"]]

The priority for the request. This field may be ignored or overwritten depending on the organization tier.

Accepts one of the following:
"low"
"medium"
"high"
"critical"
project_id: Optional[str]
remove_hidden_text: Optional[bool]
replace_failed_page_mode: Optional[FailPageMode]

Enum for representing the different available page error handling modes.

Accepts one of the following:
"raw_text"
"blank_page"
"error_message"
replace_failed_page_with_error_message_prefix: Optional[str]
replace_failed_page_with_error_message_suffix: Optional[str]
save_images: Optional[bool]
skip_diagonal_text: Optional[bool]
specialized_chart_parsing_agentic: Optional[bool]
specialized_chart_parsing_efficient: Optional[bool]
specialized_chart_parsing_plus: Optional[bool]
specialized_image_parsing: Optional[bool]
spreadsheet_extract_sub_tables: Optional[bool]
spreadsheet_force_formula_computation: Optional[bool]
strict_mode_buggy_font: Optional[bool]
strict_mode_image_extraction: Optional[bool]
strict_mode_image_ocr: Optional[bool]
strict_mode_reconstruction: Optional[bool]
structured_output: Optional[bool]
structured_output_json_schema: Optional[str]
structured_output_json_schema_name: Optional[str]
system_prompt: Optional[str]
system_prompt_append: Optional[str]
take_screenshot: Optional[bool]
target_pages: Optional[str]
tier: Optional[str]
use_vendor_multimodal_model: Optional[bool]
user_prompt: Optional[str]
vendor_multimodal_api_key: Optional[str]
vendor_multimodal_model_name: Optional[str]
version: Optional[str]
webhook_configurations: Optional[List[WebhookConfiguration]]

The outbound webhook configurations

webhook_events: Optional[List[Literal["extract.pending", "extract.success", "extract.error", 13 more]]]

List of event names to subscribe to

Accepts one of the following:
"extract.pending"
"extract.success"
"extract.error"
"extract.partial_success"
"extract.cancelled"
"parse.pending"
"parse.success"
"parse.error"
"parse.partial_success"
"parse.cancelled"
"classify.pending"
"classify.success"
"classify.error"
"classify.partial_success"
"classify.cancelled"
"unmapped_event"
webhook_headers: Optional[Dict[str, str]]

Custom HTTP headers to include with webhook requests.

webhook_output_format: Optional[str]

The output format to use for the webhook. Defaults to string if none supplied. Currently supported values: string, json

webhook_url: Optional[str]

The URL to send webhook notifications to.

webhook_url: Optional[str]
source_id: str

ID of the source

source_type: str

Type of the source (e.g., 'project')

updated_at: datetime

Last update timestamp

formatdate-time
version: str

Version of the configuration

creator: Optional[str]

Creator of the configuration

next_page_token: Optional[str]

A token, which can be sent as page_token to retrieve the next page. If this field is omitted, there are no subsequent pages.

total_size: Optional[int]

The total number of items available. This is only populated when specifically requested. The value may be an estimate and can be used for display purposes only.

BetaSheets

Create Spreadsheet Job
beta.sheets.create(SheetCreateParams**kwargs) -> SheetsJob
POST/api/v1/beta/sheets/jobs
List Spreadsheet Jobs
beta.sheets.list(SheetListParams**kwargs) -> SyncPaginatedCursor[SheetsJob]
GET/api/v1/beta/sheets/jobs
Get Spreadsheet Job
beta.sheets.get(strspreadsheet_job_id, SheetGetParams**kwargs) -> SheetsJob
GET/api/v1/beta/sheets/jobs/{spreadsheet_job_id}
Get Result Region
beta.sheets.get_result_table(Literal["table", "extra", "cell_metadata"]region_type, SheetGetResultTableParams**kwargs) -> PresignedURL
GET/api/v1/beta/sheets/jobs/{spreadsheet_job_id}/regions/{region_id}/result/{region_type}
Delete Spreadsheet Job
beta.sheets.delete_job(strspreadsheet_job_id, SheetDeleteJobParams**kwargs) -> object
DELETE/api/v1/beta/sheets/jobs/{spreadsheet_job_id}
ModelsExpand Collapse
class SheetsJob:

A spreadsheet parsing job

id: str

The ID of the job

Configuration for the parsing job

extraction_range: Optional[str]

A1 notation of the range to extract a single region from. If None, the entire sheet is used.

flatten_hierarchical_tables: Optional[bool]

Return a flattened dataframe when a detected table is recognized as hierarchical.

generate_additional_metadata: Optional[bool]

Whether to generate additional metadata (title, description) for each extracted region.

include_hidden_cells: Optional[bool]

Whether to include hidden cells when extracting regions from the spreadsheet.

sheet_names: Optional[List[str]]

The names of the sheets to extract regions from. If empty, all sheets will be processed.

table_merge_sensitivity: Optional[Literal["strong", "weak"]]

Influences how likely similar-looking regions are merged into a single table. Useful for spreadsheets that either have sparse tables (strong merging) or many distinct tables close together (weak merging).

Accepts one of the following:
"strong"
"weak"
use_experimental_processing: Optional[bool]

Enables experimental processing. Accuracy may be impacted.

created_at: str

When the job was created

file_id: Optional[str]

The ID of the input file

formatuuid
project_id: str

The ID of the project

formatuuid
status: StatusEnum

The status of the parsing job

Accepts one of the following:
"PENDING"
"SUCCESS"
"ERROR"
"PARTIAL_SUCCESS"
"CANCELLED"
updated_at: str

When the job was last updated

user_id: str

The ID of the user

errors: Optional[List[str]]

Any errors encountered

Deprecatedfile: Optional[File]

Schema for a file.

id: str

Unique identifier

formatuuid
name: str
project_id: str

The ID of the project that the file belongs to

formatuuid
created_at: Optional[datetime]

Creation datetime

formatdate-time
data_source_id: Optional[str]

The ID of the data source that the file belongs to

formatuuid
expires_at: Optional[datetime]

The expiration date for the file. Files past this date can be deleted.

formatdate-time
external_file_id: Optional[str]

The ID of the file in the external system

file_size: Optional[int]

Size of the file in bytes

minimum0
file_type: Optional[str]

File type (e.g. pdf, docx, etc.)

maxLength3000
minLength1
last_modified_at: Optional[datetime]

The last modified time of the file

formatdate-time
permission_info: Optional[Dict[str, Union[Dict[str, object], List[object], str, 3 more]]]

Permission information for the file

Accepts one of the following:
Dict[str, object]
List[object]
str
float
bool
purpose: Optional[str]

The intended purpose of the file (e.g., 'user_data', 'parse', 'extract', 'split', 'classify')

resource_info: Optional[Dict[str, Union[Dict[str, object], List[object], str, 3 more]]]

Resource information for the file

Accepts one of the following:
Dict[str, object]
List[object]
str
float
bool
updated_at: Optional[datetime]

Update datetime

formatdate-time
regions: Optional[List[Region]]

All extracted regions (populated when job is complete)

location: str

Location of the region in the spreadsheet

region_type: str

Type of the extracted region

sheet_name: str

Worksheet name where region was found

description: Optional[str]

Generated description for the region

region_id: Optional[str]

Unique identifier for this region within the file

title: Optional[str]

Generated title for the region

success: Optional[bool]

Whether the job completed successfully

worksheet_metadata: Optional[List[WorksheetMetadata]]

Metadata for each processed worksheet (populated when job is complete)

sheet_name: str

Name of the worksheet

description: Optional[str]

Generated description of the worksheet

title: Optional[str]

Generated title for the worksheet

class SheetsParsingConfig:

Configuration for spreadsheet parsing and region extraction

extraction_range: Optional[str]

A1 notation of the range to extract a single region from. If None, the entire sheet is used.

flatten_hierarchical_tables: Optional[bool]

Return a flattened dataframe when a detected table is recognized as hierarchical.

generate_additional_metadata: Optional[bool]

Whether to generate additional metadata (title, description) for each extracted region.

include_hidden_cells: Optional[bool]

Whether to include hidden cells when extracting regions from the spreadsheet.

sheet_names: Optional[List[str]]

The names of the sheets to extract regions from. If empty, all sheets will be processed.

table_merge_sensitivity: Optional[Literal["strong", "weak"]]

Influences how likely similar-looking regions are merged into a single table. Useful for spreadsheets that either have sparse tables (strong merging) or many distinct tables close together (weak merging).

Accepts one of the following:
"strong"
"weak"
use_experimental_processing: Optional[bool]

Enables experimental processing. Accuracy may be impacted.

BetaDirectories

Create Directory
beta.directories.create(DirectoryCreateParams**kwargs) -> DirectoryCreateResponse
POST/api/v1/beta/directories
List Directories
beta.directories.list(DirectoryListParams**kwargs) -> SyncPaginatedCursor[DirectoryListResponse]
GET/api/v1/beta/directories
Get Directory
beta.directories.get(strdirectory_id, DirectoryGetParams**kwargs) -> DirectoryGetResponse
GET/api/v1/beta/directories/{directory_id}
Update Directory
beta.directories.update(strdirectory_id, DirectoryUpdateParams**kwargs) -> DirectoryUpdateResponse
PATCH/api/v1/beta/directories/{directory_id}
Delete Directory
beta.directories.delete(strdirectory_id, DirectoryDeleteParams**kwargs)
DELETE/api/v1/beta/directories/{directory_id}

BetaDirectoriesFiles

Add Directory File
beta.directories.files.add(strdirectory_id, FileAddParams**kwargs) -> FileAddResponse
POST/api/v1/beta/directories/{directory_id}/files
List Directory Files
beta.directories.files.list(strdirectory_id, FileListParams**kwargs) -> SyncPaginatedCursor[FileListResponse]
GET/api/v1/beta/directories/{directory_id}/files
Get Directory File
beta.directories.files.get(strdirectory_file_id, FileGetParams**kwargs) -> FileGetResponse
GET/api/v1/beta/directories/{directory_id}/files/{directory_file_id}
Update Directory File
beta.directories.files.update(strdirectory_file_id, FileUpdateParams**kwargs) -> FileUpdateResponse
PATCH/api/v1/beta/directories/{directory_id}/files/{directory_file_id}
Delete Directory File
beta.directories.files.delete(strdirectory_file_id, FileDeleteParams**kwargs)
DELETE/api/v1/beta/directories/{directory_id}/files/{directory_file_id}
Upload File To Directory
beta.directories.files.upload(strdirectory_id, FileUploadParams**kwargs) -> FileUploadResponse
POST/api/v1/beta/directories/{directory_id}/files/upload

BetaBatch

Create Batch Job
beta.batch.create(BatchCreateParams**kwargs) -> BatchCreateResponse
POST/api/v1/beta/batch-processing
List Batch Jobs
beta.batch.list(BatchListParams**kwargs) -> SyncPaginatedBatchItems[BatchListResponse]
GET/api/v1/beta/batch-processing
Get Batch Job Status
beta.batch.get_status(strjob_id, BatchGetStatusParams**kwargs) -> BatchGetStatusResponse
GET/api/v1/beta/batch-processing/{job_id}
Cancel Batch Job
beta.batch.cancel(strjob_id, BatchCancelParams**kwargs) -> BatchCancelResponse
POST/api/v1/beta/batch-processing/{job_id}/cancel

BetaBatchJob Items

List Batch Job Items
beta.batch.job_items.list(strjob_id, JobItemListParams**kwargs) -> SyncPaginatedBatchItems[JobItemListResponse]
GET/api/v1/beta/batch-processing/{job_id}/items
Get Item Processing Results
beta.batch.job_items.get_processing_results(stritem_id, JobItemGetProcessingResultsParams**kwargs) -> JobItemGetProcessingResultsResponse
GET/api/v1/beta/batch-processing/items/{item_id}/processing-results

BetaSplit

Create Split Job
beta.split.create(SplitCreateParams**kwargs) -> SplitCreateResponse
POST/api/v1/beta/split/jobs
List Split Jobs
beta.split.list(SplitListParams**kwargs) -> SyncPaginatedCursor[SplitListResponse]
GET/api/v1/beta/split/jobs
Get Split Job
beta.split.get(strsplit_job_id, SplitGetParams**kwargs) -> SplitGetResponse
GET/api/v1/beta/split/jobs/{split_job_id}
ModelsExpand Collapse
class SplitCategory:

Category definition for document splitting.

name: str

Name of the category.

maxLength200
minLength1
description: Optional[str]

Optional description of what content belongs in this category.

maxLength2000
minLength1
class SplitDocumentInput:

Document input specification.

type: str

Type of document input. Valid values are: file_id

value: str

Document identifier.

class SplitResultResponse:

Result of a completed split job.

segments: List[SplitSegmentResponse]

List of document segments.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.

class SplitSegmentResponse:

A segment of the split document.

category: str

Category name this split belongs to.

confidence_category: str

Categorical confidence level. Valid values are: high, medium, low.

pages: List[int]

1-indexed page numbers in this split.