# Pipelines ## Search Pipelines `List pipelines().list(PipelineListParamsparams = PipelineListParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines` Search for pipelines by name, type, or project. ### Parameters - `PipelineListParams params` - `Optional organizationId` - `Optional pipelineName` - `Optional pipelineType` Enum for representing the type of a pipeline - `Optional projectId` - `Optional projectName` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.Pipeline; import com.llamacloud_prod.api.models.pipelines.PipelineListParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); List pipelines = client.pipelines().list(); } } ``` #### Response ```json [ { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "class_name": "class_name", "embed_batch_size": 1, "model_name": "openai-text-embedding-3-small", "num_workers": 0 }, "type": "MANAGED_OPENAI_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "embedding_config_hash": "embedding_config_hash", "parsing_config_hash": "parsing_config_hash", "transform_config_hash": "transform_config_hash" }, "created_at": "2019-12-27T18:11:19.117Z", "data_sink": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "sink_type": "PINECONE", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "additional_kwargs": { "foo": "bar" }, "api_base": "api_base", "api_key": "api_key", "api_version": "api_version", "azure_deployment": "azure_deployment", "azure_endpoint": "azure_endpoint", "class_name": "class_name", "default_headers": { "foo": "string" }, "dimensions": 0, "embed_batch_size": 1, "max_retries": 0, "model_name": "model_name", "num_workers": 0, "reuse_client": true, "timeout": 0 }, "type": "AZURE_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "llama_parse_parameters": { "adaptive_long_table": true, "aggressive_table_extraction": true, "annotate_links": true, "auto_mode": true, "auto_mode_configuration_json": "auto_mode_configuration_json", "auto_mode_trigger_on_image_in_page": true, "auto_mode_trigger_on_regexp_in_page": "auto_mode_trigger_on_regexp_in_page", "auto_mode_trigger_on_table_in_page": true, "auto_mode_trigger_on_text_in_page": "auto_mode_trigger_on_text_in_page", "azure_openai_api_version": "azure_openai_api_version", "azure_openai_deployment_name": "azure_openai_deployment_name", "azure_openai_endpoint": "azure_openai_endpoint", "azure_openai_key": "azure_openai_key", "bbox_bottom": 0, "bbox_left": 0, "bbox_right": 0, "bbox_top": 0, "bounding_box": "bounding_box", "compact_markdown_table": true, "complemental_formatting_instruction": "complemental_formatting_instruction", "content_guideline_instruction": "content_guideline_instruction", "continuous_mode": true, "disable_image_extraction": true, "disable_ocr": true, "disable_reconstruction": true, "do_not_cache": true, "do_not_unroll_columns": true, "enable_cost_optimizer": true, "extract_charts": true, "extract_layout": true, "extract_printed_page_number": true, "fast_mode": true, "formatting_instruction": "formatting_instruction", "gpt4o_api_key": "gpt4o_api_key", "gpt4o_mode": true, "guess_xlsx_sheet_name": true, "hide_footers": true, "hide_headers": true, "high_res_ocr": true, "html_make_all_elements_visible": true, "html_remove_fixed_elements": true, "html_remove_navigation_elements": true, "http_proxy": "http_proxy", "ignore_document_elements_for_layout_detection": true, "images_to_save": [ "screenshot" ], "inline_images_in_markdown": true, "input_s3_path": "input_s3_path", "input_s3_region": "input_s3_region", "input_url": "input_url", "internal_is_screenshot_job": true, "invalidate_cache": true, "is_formatting_instruction": true, "job_timeout_extra_time_per_page_in_seconds": 0, "job_timeout_in_seconds": 0, "keep_page_separator_when_merging_tables": true, "languages": [ "af" ], "layout_aware": true, "line_level_bounding_box": true, "markdown_table_multiline_header_separator": "markdown_table_multiline_header_separator", "max_pages": 0, "max_pages_enforced": 0, "merge_tables_across_pages_in_markdown": true, "model": "model", "outlined_table_extraction": true, "output_pdf_of_document": true, "output_s3_path_prefix": "output_s3_path_prefix", "output_s3_region": "output_s3_region", "output_tables_as_HTML": true, "page_error_tolerance": 0, "page_footer_prefix": "page_footer_prefix", "page_footer_suffix": "page_footer_suffix", "page_header_prefix": "page_header_prefix", "page_header_suffix": "page_header_suffix", "page_prefix": "page_prefix", "page_separator": "page_separator", "page_suffix": "page_suffix", "parse_mode": "parse_page_without_llm", "parsing_instruction": "parsing_instruction", "precise_bounding_box": true, "premium_mode": true, "presentation_out_of_bounds_content": true, "presentation_skip_embedded_data": true, "preserve_layout_alignment_across_pages": true, "preserve_very_small_text": true, "preset": "preset", "priority": "low", "project_id": "project_id", "remove_hidden_text": true, "replace_failed_page_mode": "raw_text", "replace_failed_page_with_error_message_prefix": "replace_failed_page_with_error_message_prefix", "replace_failed_page_with_error_message_suffix": "replace_failed_page_with_error_message_suffix", "save_images": true, "skip_diagonal_text": true, "specialized_chart_parsing_agentic": true, "specialized_chart_parsing_efficient": true, "specialized_chart_parsing_plus": true, "specialized_image_parsing": true, "spreadsheet_extract_sub_tables": true, "spreadsheet_force_formula_computation": true, "spreadsheet_include_hidden_sheets": true, "strict_mode_buggy_font": true, "strict_mode_image_extraction": true, "strict_mode_image_ocr": true, "strict_mode_reconstruction": true, "structured_output": true, "structured_output_json_schema": "structured_output_json_schema", "structured_output_json_schema_name": "structured_output_json_schema_name", "system_prompt": "system_prompt", "system_prompt_append": "system_prompt_append", "take_screenshot": true, "target_pages": "target_pages", "tier": "tier", "use_vendor_multimodal_model": true, "user_prompt": "user_prompt", "vendor_multimodal_api_key": "vendor_multimodal_api_key", "vendor_multimodal_model_name": "vendor_multimodal_model_name", "version": "version", "webhook_configurations": [ { "webhook_events": [ "parse.success", "parse.error" ], "webhook_headers": { "Authorization": "Bearer sk-..." }, "webhook_output_format": "json", "webhook_url": "https://example.com/webhooks/llamacloud" } ], "webhook_url": "webhook_url" }, "managed_pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "metadata_config": { "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ] }, "pipeline_type": "PLAYGROUND", "preset_retrieval_parameters": { "alpha": 0, "class_name": "class_name", "dense_similarity_cutoff": 0, "dense_similarity_top_k": 1, "enable_reranking": true, "files_top_k": 1, "rerank_top_n": 1, "retrieval_mode": "chunks", "retrieve_image_nodes": true, "retrieve_page_figure_nodes": true, "retrieve_page_screenshot_nodes": true, "search_filters": { "filters": [ { "key": "key", "value": 0, "operator": "==" } ], "condition": "and" }, "search_filters_inference_schema": { "foo": { "foo": "bar" } }, "sparse_similarity_top_k": 1 }, "sparse_model_config": { "class_name": "class_name", "model_type": "splade" }, "status": "CREATED", "transform_config": { "chunk_overlap": 0, "chunk_size": 1, "mode": "auto" }, "updated_at": "2019-12-27T18:11:19.117Z" } ] ``` ## Create Pipeline `Pipeline pipelines().create(PipelineCreateParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **post** `/api/v1/pipelines` Create a new managed ingestion pipeline. A pipeline connects data sources to a vector store for RAG. After creation, call `POST /pipelines/{id}/sync` to start ingesting documents. ### Parameters - `PipelineCreateParams params` - `Optional organizationId` - `Optional projectId` - `PipelineCreate pipelineCreate` Schema for creating a pipeline. ### Returns - `class Pipeline:` Schema for a pipeline. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` - `class ManagedOpenAIEmbedding:` - `Optional component` Configuration for the Managed OpenAI embedding model. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The name of the OpenAI embedding model. - `OPENAI_TEXT_EMBEDDING_3_SMALL("openai-text-embedding-3-small")` - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `MANAGED_OPENAI_EMBEDDING("MANAGED_OPENAI_EMBEDDING")` - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` - `String name` - `String projectId` - `Optional configHash` Hashes for the configuration of a pipeline. - `Optional embeddingConfigHash` Hash of the embedding config. - `Optional parsingConfigHash` Hash of the llama parse parameters. - `Optional transformConfigHash` Hash of the transform config. - `Optional createdAt` Creation datetime - `Optional dataSink` Schema for a data sink. - `String id` Unique identifier - `Component component` Component that implements the data sink - `class UnionMember0:` - `class CloudPineconeVectorStore:` Cloud Pinecone Vector Store. This class is used to store the configuration for a Pinecone vector store, so that it can be created and used in LlamaCloud. Args: api_key (str): API key for authenticating with Pinecone index_name (str): name of the Pinecone index namespace (optional[str]): namespace to use in the Pinecone index insert_kwargs (optional[dict]): additional kwargs to pass during insertion - `String apiKey` The API key for authenticating with Pinecone - `String indexName` - `Optional className` - `Optional insertKwargs` - `Optional namespace` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudPostgresVectorStore:` - `String database` - `long embedDim` - `String host` - `String password` - `long port` - `String schemaName` - `String tableName` - `String user` - `Optional className` - `Optional hnswSettings` HNSW settings for PGVector. - `Optional distanceMethod` The distance method to use. - `L2("l2")` - `IP("ip")` - `COSINE("cosine")` - `L1("l1")` - `HAMMING("hamming")` - `JACCARD("jaccard")` - `Optional efConstruction` The number of edges to use during the construction phase. - `Optional efSearch` The number of edges to use during the search phase. - `Optional m` The number of bi-directional links created for each new element. - `Optional vectorType` The type of vector to use. - `VECTOR("vector")` - `HALF_VEC("half_vec")` - `BIT("bit")` - `SPARSE_VEC("sparse_vec")` - `Optional hybridSearch` - `Optional performSetup` - `Optional supportsNestedMetadataFilters` - `class CloudQdrantVectorStore:` Cloud Qdrant Vector Store. This class is used to store the configuration for a Qdrant vector store, so that it can be created and used in LlamaCloud. Args: collection_name (str): name of the Qdrant collection url (str): url of the Qdrant instance api_key (str): API key for authenticating with Qdrant max_retries (int): maximum number of retries in case of a failure. Defaults to 3 client_kwargs (dict): additional kwargs to pass to the Qdrant client - `String apiKey` - `String collectionName` - `String url` - `Optional className` - `Optional clientKwargs` - `Optional maxRetries` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudAzureAiSearchVectorStore:` Cloud Azure AI Search Vector Store. - `String searchServiceApiKey` - `String searchServiceEndpoint` - `Optional className` - `Optional clientId` - `Optional clientSecret` - `Optional embeddingDimension` - `Optional filterableMetadataFieldKeys` - `Optional indexName` - `Optional searchServiceApiVersion` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `Optional tenantId` - `class CloudMongoDBAtlasVectorSearch:` Cloud MongoDB Atlas Vector Store. This class is used to store the configuration for a MongoDB Atlas vector store, so that it can be created and used in LlamaCloud. Args: mongodb_uri (str): URI for connecting to MongoDB Atlas db_name (str): name of the MongoDB database collection_name (str): name of the MongoDB collection vector_index_name (str): name of the MongoDB Atlas vector index fulltext_index_name (str): name of the MongoDB Atlas full-text index - `String collectionName` - `String dbName` - `String mongoDBUri` - `Optional className` - `Optional embeddingDimension` - `Optional fulltextIndexName` - `Optional supportsNestedMetadataFilters` - `Optional vectorIndexName` - `class CloudMilvusVectorStore:` Cloud Milvus Vector Store. - `String uri` - `Optional token` - `Optional className` - `Optional collectionName` - `Optional embeddingDimension` - `Optional supportsNestedMetadataFilters` - `class CloudAstraDbVectorStore:` Cloud AstraDB Vector Store. This class is used to store the configuration for an AstraDB vector store, so that it can be created and used in LlamaCloud. Args: token (str): The Astra DB Application Token to use. api_endpoint (str): The Astra DB JSON API endpoint for your database. collection_name (str): Collection name to use. If not existing, it will be created. embedding_dimension (int): Length of the embedding vectors in use. keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace' - `String token` The Astra DB Application Token to use - `String apiEndpoint` The Astra DB JSON API endpoint for your database - `String collectionName` Collection name to use. If not existing, it will be created - `long embeddingDimension` Length of the embedding vectors in use - `Optional className` - `Optional keyspace` The keyspace to use. If not provided, 'default_keyspace' - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `String name` The name of the data sink. - `String projectId` - `SinkType sinkType` - `PINECONE("PINECONE")` - `POSTGRES("POSTGRES")` - `QDRANT("QDRANT")` - `AZUREAI_SEARCH("AZUREAI_SEARCH")` - `MONGODB_ATLAS("MONGODB_ATLAS")` - `MILVUS("MILVUS")` - `ASTRA_DB("ASTRA_DB")` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfig` Schema for an embedding model config. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` The embedding configuration for the embedding model config. - `class AzureOpenAIEmbeddingConfig:` - `class CohereEmbeddingConfig:` - `class GeminiEmbeddingConfig:` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `class OpenAIEmbeddingConfig:` - `class VertexAiEmbeddingConfig:` - `class BedrockEmbeddingConfig:` - `String name` The name of the embedding model config. - `String projectId` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfigId` The ID of the EmbeddingModelConfig this pipeline is using. - `Optional llamaParseParameters` Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. - `Optional adaptiveLongTable` - `Optional aggressiveTableExtraction` - `Optional annotateLinks` - `Optional autoMode` - `Optional autoModeConfigurationJson` - `Optional autoModeTriggerOnImageInPage` - `Optional autoModeTriggerOnRegexpInPage` - `Optional autoModeTriggerOnTableInPage` - `Optional autoModeTriggerOnTextInPage` - `Optional azureOpenAIApiVersion` - `Optional azureOpenAIDeploymentName` - `Optional azureOpenAIEndpoint` - `Optional azureOpenAIKey` - `Optional bboxBottom` - `Optional bboxLeft` - `Optional bboxRight` - `Optional bboxTop` - `Optional boundingBox` - `Optional compactMarkdownTable` - `Optional complementalFormattingInstruction` - `Optional contentGuidelineInstruction` - `Optional continuousMode` - `Optional disableImageExtraction` - `Optional disableOcr` - `Optional disableReconstruction` - `Optional doNotCache` - `Optional doNotUnrollColumns` - `Optional enableCostOptimizer` - `Optional extractCharts` - `Optional extractLayout` - `Optional extractPrintedPageNumber` - `Optional fastMode` - `Optional formattingInstruction` - `Optional gpt4oApiKey` - `Optional gpt4oMode` - `Optional guessXlsxSheetName` - `Optional hideFooters` - `Optional hideHeaders` - `Optional highResOcr` - `Optional htmlMakeAllElementsVisible` - `Optional htmlRemoveFixedElements` - `Optional htmlRemoveNavigationElements` - `Optional httpProxy` - `Optional ignoreDocumentElementsForLayoutDetection` - `Optional> imagesToSave` - `SCREENSHOT("screenshot")` - `EMBEDDED("embedded")` - `LAYOUT("layout")` - `Optional inlineImagesInMarkdown` - `Optional inputS3Path` - `Optional inputS3Region` - `Optional inputUrl` - `Optional internalIsScreenshotJob` - `Optional invalidateCache` - `Optional isFormattingInstruction` - `Optional jobTimeoutExtraTimePerPageInSeconds` - `Optional jobTimeoutInSeconds` - `Optional keepPageSeparatorWhenMergingTables` - `Optional> languages` - `AF("af")` - `AZ("az")` - `BS("bs")` - `CS("cs")` - `CY("cy")` - `DA("da")` - `DE("de")` - `EN("en")` - `ES("es")` - `ET("et")` - `FR("fr")` - `GA("ga")` - `HR("hr")` - `HU("hu")` - `ID("id")` - `IS("is")` - `IT("it")` - `KU("ku")` - `LA("la")` - `LT("lt")` - `LV("lv")` - `MI("mi")` - `MS("ms")` - `MT("mt")` - `NL("nl")` - `NO("no")` - `OC("oc")` - `PI("pi")` - `PL("pl")` - `PT("pt")` - `RO("ro")` - `RS_LATIN("rs_latin")` - `SK("sk")` - `SL("sl")` - `SQ("sq")` - `SV("sv")` - `SW("sw")` - `TL("tl")` - `TR("tr")` - `UZ("uz")` - `VI("vi")` - `AR("ar")` - `FA("fa")` - `UG("ug")` - `UR("ur")` - `BN("bn")` - `AS("as")` - `MNI("mni")` - `RU("ru")` - `RS_CYRILLIC("rs_cyrillic")` - `BE("be")` - `BG("bg")` - `UK("uk")` - `MN("mn")` - `ABQ("abq")` - `ADY("ady")` - `KBD("kbd")` - `AVA("ava")` - `DAR("dar")` - `INH("inh")` - `CHE("che")` - `LBE("lbe")` - `LEZ("lez")` - `TAB("tab")` - `TJK("tjk")` - `HI("hi")` - `MR("mr")` - `NE("ne")` - `BH("bh")` - `MAI("mai")` - `ANG("ang")` - `BHO("bho")` - `MAH("mah")` - `SCK("sck")` - `NEW("new")` - `GOM("gom")` - `SA("sa")` - `BGC("bgc")` - `TH("th")` - `CH_SIM("ch_sim")` - `CH_TRA("ch_tra")` - `JA("ja")` - `KO("ko")` - `TA("ta")` - `TE("te")` - `KN("kn")` - `Optional layoutAware` - `Optional lineLevelBoundingBox` - `Optional markdownTableMultilineHeaderSeparator` - `Optional maxPages` - `Optional maxPagesEnforced` - `Optional mergeTablesAcrossPagesInMarkdown` - `Optional model` - `Optional outlinedTableExtraction` - `Optional outputPdfOfDocument` - `Optional outputS3PathPrefix` - `Optional outputS3Region` - `Optional outputTablesAsHtml` - `Optional pageErrorTolerance` - `Optional pageFooterPrefix` - `Optional pageFooterSuffix` - `Optional pageHeaderPrefix` - `Optional pageHeaderSuffix` - `Optional pagePrefix` - `Optional pageSeparator` - `Optional pageSuffix` - `Optional parseMode` Enum for representing the mode of parsing to be used. - `PARSE_PAGE_WITHOUT_LLM("parse_page_without_llm")` - `PARSE_PAGE_WITH_LLM("parse_page_with_llm")` - `PARSE_PAGE_WITH_LVM("parse_page_with_lvm")` - `PARSE_PAGE_WITH_AGENT("parse_page_with_agent")` - `PARSE_PAGE_WITH_LAYOUT_AGENT("parse_page_with_layout_agent")` - `PARSE_DOCUMENT_WITH_LLM("parse_document_with_llm")` - `PARSE_DOCUMENT_WITH_LVM("parse_document_with_lvm")` - `PARSE_DOCUMENT_WITH_AGENT("parse_document_with_agent")` - `Optional parsingInstruction` - `Optional preciseBoundingBox` - `Optional premiumMode` - `Optional presentationOutOfBoundsContent` - `Optional presentationSkipEmbeddedData` - `Optional preserveLayoutAlignmentAcrossPages` - `Optional preserveVerySmallText` - `Optional preset` - `Optional priority` The priority for the request. This field may be ignored or overwritten depending on the organization tier. - `LOW("low")` - `MEDIUM("medium")` - `HIGH("high")` - `CRITICAL("critical")` - `Optional projectId` - `Optional removeHiddenText` - `Optional replaceFailedPageMode` Enum for representing the different available page error handling modes. - `RAW_TEXT("raw_text")` - `BLANK_PAGE("blank_page")` - `ERROR_MESSAGE("error_message")` - `Optional replaceFailedPageWithErrorMessagePrefix` - `Optional replaceFailedPageWithErrorMessageSuffix` - `Optional saveImages` - `Optional skipDiagonalText` - `Optional specializedChartParsingAgentic` - `Optional specializedChartParsingEfficient` - `Optional specializedChartParsingPlus` - `Optional specializedImageParsing` - `Optional spreadsheetExtractSubTables` - `Optional spreadsheetForceFormulaComputation` - `Optional spreadsheetIncludeHiddenSheets` - `Optional strictModeBuggyFont` - `Optional strictModeImageExtraction` - `Optional strictModeImageOcr` - `Optional strictModeReconstruction` - `Optional structuredOutput` - `Optional structuredOutputJsonSchema` - `Optional structuredOutputJsonSchemaName` - `Optional systemPrompt` - `Optional systemPromptAppend` - `Optional takeScreenshot` - `Optional targetPages` - `Optional tier` - `Optional useVendorMultimodalModel` - `Optional userPrompt` - `Optional vendorMultimodalApiKey` - `Optional vendorMultimodalModelName` - `Optional version` - `Optional> webhookConfigurations` Outbound webhook endpoints to notify on job status changes - `Optional> webhookEvents` Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. - `EXTRACT_PENDING("extract.pending")` - `EXTRACT_SUCCESS("extract.success")` - `EXTRACT_ERROR("extract.error")` - `EXTRACT_PARTIAL_SUCCESS("extract.partial_success")` - `EXTRACT_CANCELLED("extract.cancelled")` - `PARSE_PENDING("parse.pending")` - `PARSE_RUNNING("parse.running")` - `PARSE_SUCCESS("parse.success")` - `PARSE_ERROR("parse.error")` - `PARSE_PARTIAL_SUCCESS("parse.partial_success")` - `PARSE_CANCELLED("parse.cancelled")` - `CLASSIFY_PENDING("classify.pending")` - `CLASSIFY_RUNNING("classify.running")` - `CLASSIFY_SUCCESS("classify.success")` - `CLASSIFY_ERROR("classify.error")` - `CLASSIFY_PARTIAL_SUCCESS("classify.partial_success")` - `CLASSIFY_CANCELLED("classify.cancelled")` - `SHEETS_PENDING("sheets.pending")` - `SHEETS_SUCCESS("sheets.success")` - `SHEETS_ERROR("sheets.error")` - `SHEETS_PARTIAL_SUCCESS("sheets.partial_success")` - `SHEETS_CANCELLED("sheets.cancelled")` - `UNMAPPED_EVENT("unmapped_event")` - `Optional webhookHeaders` Custom HTTP headers sent with each webhook request (e.g. auth tokens) - `Optional webhookOutputFormat` Response format sent to the webhook: 'string' (default) or 'json' - `Optional webhookUrl` URL to receive webhook POST notifications - `Optional webhookUrl` - `Optional managedPipelineId` The ID of the ManagedPipeline this playground pipeline is linked to. - `Optional metadataConfig` Metadata configuration for the pipeline. - `Optional> excludedEmbedMetadataKeys` List of metadata keys to exclude from embeddings - `Optional> excludedLlmMetadataKeys` List of metadata keys to exclude from LLM during retrieval - `Optional pipelineType` Type of pipeline. Either PLAYGROUND or MANAGED. - `PLAYGROUND("PLAYGROUND")` - `MANAGED("MANAGED")` - `Optional presetRetrievalParameters` Preset retrieval parameters for the pipeline. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. - `Optional sparseModelConfig` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional className` - `Optional modelType` The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). - `SPLADE("splade")` - `BM25("bm25")` - `AUTO("auto")` - `Optional status` Status of the pipeline. - `CREATED("CREATED")` - `DELETING("DELETING")` - `Optional transformConfig` Configuration for the transformation. - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` - `Optional updatedAt` Update datetime ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.Pipeline; import com.llamacloud_prod.api.models.pipelines.PipelineCreate; import com.llamacloud_prod.api.models.pipelines.PipelineCreateParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); PipelineCreate params = PipelineCreate.builder() .name("x") .build(); Pipeline pipeline = client.pipelines().create(params); } } ``` #### Response ```json { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "class_name": "class_name", "embed_batch_size": 1, "model_name": "openai-text-embedding-3-small", "num_workers": 0 }, "type": "MANAGED_OPENAI_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "embedding_config_hash": "embedding_config_hash", "parsing_config_hash": "parsing_config_hash", "transform_config_hash": "transform_config_hash" }, "created_at": "2019-12-27T18:11:19.117Z", "data_sink": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "sink_type": "PINECONE", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "additional_kwargs": { "foo": "bar" }, "api_base": "api_base", "api_key": "api_key", "api_version": "api_version", "azure_deployment": "azure_deployment", "azure_endpoint": "azure_endpoint", "class_name": "class_name", "default_headers": { "foo": "string" }, "dimensions": 0, "embed_batch_size": 1, "max_retries": 0, "model_name": "model_name", "num_workers": 0, "reuse_client": true, "timeout": 0 }, "type": "AZURE_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "llama_parse_parameters": { "adaptive_long_table": true, "aggressive_table_extraction": true, "annotate_links": true, "auto_mode": true, "auto_mode_configuration_json": "auto_mode_configuration_json", "auto_mode_trigger_on_image_in_page": true, "auto_mode_trigger_on_regexp_in_page": "auto_mode_trigger_on_regexp_in_page", "auto_mode_trigger_on_table_in_page": true, "auto_mode_trigger_on_text_in_page": "auto_mode_trigger_on_text_in_page", "azure_openai_api_version": "azure_openai_api_version", "azure_openai_deployment_name": "azure_openai_deployment_name", "azure_openai_endpoint": "azure_openai_endpoint", "azure_openai_key": "azure_openai_key", "bbox_bottom": 0, "bbox_left": 0, "bbox_right": 0, "bbox_top": 0, "bounding_box": "bounding_box", "compact_markdown_table": true, "complemental_formatting_instruction": "complemental_formatting_instruction", "content_guideline_instruction": "content_guideline_instruction", "continuous_mode": true, "disable_image_extraction": true, "disable_ocr": true, "disable_reconstruction": true, "do_not_cache": true, "do_not_unroll_columns": true, "enable_cost_optimizer": true, "extract_charts": true, "extract_layout": true, "extract_printed_page_number": true, "fast_mode": true, "formatting_instruction": "formatting_instruction", "gpt4o_api_key": "gpt4o_api_key", "gpt4o_mode": true, "guess_xlsx_sheet_name": true, "hide_footers": true, "hide_headers": true, "high_res_ocr": true, "html_make_all_elements_visible": true, "html_remove_fixed_elements": true, "html_remove_navigation_elements": true, "http_proxy": "http_proxy", "ignore_document_elements_for_layout_detection": true, "images_to_save": [ "screenshot" ], "inline_images_in_markdown": true, "input_s3_path": "input_s3_path", "input_s3_region": "input_s3_region", "input_url": "input_url", "internal_is_screenshot_job": true, "invalidate_cache": true, "is_formatting_instruction": true, "job_timeout_extra_time_per_page_in_seconds": 0, "job_timeout_in_seconds": 0, "keep_page_separator_when_merging_tables": true, "languages": [ "af" ], "layout_aware": true, "line_level_bounding_box": true, "markdown_table_multiline_header_separator": "markdown_table_multiline_header_separator", "max_pages": 0, "max_pages_enforced": 0, "merge_tables_across_pages_in_markdown": true, "model": "model", "outlined_table_extraction": true, "output_pdf_of_document": true, "output_s3_path_prefix": "output_s3_path_prefix", "output_s3_region": "output_s3_region", "output_tables_as_HTML": true, "page_error_tolerance": 0, "page_footer_prefix": "page_footer_prefix", "page_footer_suffix": "page_footer_suffix", "page_header_prefix": "page_header_prefix", "page_header_suffix": "page_header_suffix", "page_prefix": "page_prefix", "page_separator": "page_separator", "page_suffix": "page_suffix", "parse_mode": "parse_page_without_llm", "parsing_instruction": "parsing_instruction", "precise_bounding_box": true, "premium_mode": true, "presentation_out_of_bounds_content": true, "presentation_skip_embedded_data": true, "preserve_layout_alignment_across_pages": true, "preserve_very_small_text": true, "preset": "preset", "priority": "low", "project_id": "project_id", "remove_hidden_text": true, "replace_failed_page_mode": "raw_text", "replace_failed_page_with_error_message_prefix": "replace_failed_page_with_error_message_prefix", "replace_failed_page_with_error_message_suffix": "replace_failed_page_with_error_message_suffix", "save_images": true, "skip_diagonal_text": true, "specialized_chart_parsing_agentic": true, "specialized_chart_parsing_efficient": true, "specialized_chart_parsing_plus": true, "specialized_image_parsing": true, "spreadsheet_extract_sub_tables": true, "spreadsheet_force_formula_computation": true, "spreadsheet_include_hidden_sheets": true, "strict_mode_buggy_font": true, "strict_mode_image_extraction": true, "strict_mode_image_ocr": true, "strict_mode_reconstruction": true, "structured_output": true, "structured_output_json_schema": "structured_output_json_schema", "structured_output_json_schema_name": "structured_output_json_schema_name", "system_prompt": "system_prompt", "system_prompt_append": "system_prompt_append", "take_screenshot": true, "target_pages": "target_pages", "tier": "tier", "use_vendor_multimodal_model": true, "user_prompt": "user_prompt", "vendor_multimodal_api_key": "vendor_multimodal_api_key", "vendor_multimodal_model_name": "vendor_multimodal_model_name", "version": "version", "webhook_configurations": [ { "webhook_events": [ "parse.success", "parse.error" ], "webhook_headers": { "Authorization": "Bearer sk-..." }, "webhook_output_format": "json", "webhook_url": "https://example.com/webhooks/llamacloud" } ], "webhook_url": "webhook_url" }, "managed_pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "metadata_config": { "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ] }, "pipeline_type": "PLAYGROUND", "preset_retrieval_parameters": { "alpha": 0, "class_name": "class_name", "dense_similarity_cutoff": 0, "dense_similarity_top_k": 1, "enable_reranking": true, "files_top_k": 1, "rerank_top_n": 1, "retrieval_mode": "chunks", "retrieve_image_nodes": true, "retrieve_page_figure_nodes": true, "retrieve_page_screenshot_nodes": true, "search_filters": { "filters": [ { "key": "key", "value": 0, "operator": "==" } ], "condition": "and" }, "search_filters_inference_schema": { "foo": { "foo": "bar" } }, "sparse_similarity_top_k": 1 }, "sparse_model_config": { "class_name": "class_name", "model_type": "splade" }, "status": "CREATED", "transform_config": { "chunk_overlap": 0, "chunk_size": 1, "mode": "auto" }, "updated_at": "2019-12-27T18:11:19.117Z" } ``` ## Get Pipeline `Pipeline pipelines().get(PipelineGetParamsparams = PipelineGetParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}` Get a pipeline by ID. ### Parameters - `PipelineGetParams params` - `Optional pipelineId` ### Returns - `class Pipeline:` Schema for a pipeline. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` - `class ManagedOpenAIEmbedding:` - `Optional component` Configuration for the Managed OpenAI embedding model. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The name of the OpenAI embedding model. - `OPENAI_TEXT_EMBEDDING_3_SMALL("openai-text-embedding-3-small")` - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `MANAGED_OPENAI_EMBEDDING("MANAGED_OPENAI_EMBEDDING")` - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` - `String name` - `String projectId` - `Optional configHash` Hashes for the configuration of a pipeline. - `Optional embeddingConfigHash` Hash of the embedding config. - `Optional parsingConfigHash` Hash of the llama parse parameters. - `Optional transformConfigHash` Hash of the transform config. - `Optional createdAt` Creation datetime - `Optional dataSink` Schema for a data sink. - `String id` Unique identifier - `Component component` Component that implements the data sink - `class UnionMember0:` - `class CloudPineconeVectorStore:` Cloud Pinecone Vector Store. This class is used to store the configuration for a Pinecone vector store, so that it can be created and used in LlamaCloud. Args: api_key (str): API key for authenticating with Pinecone index_name (str): name of the Pinecone index namespace (optional[str]): namespace to use in the Pinecone index insert_kwargs (optional[dict]): additional kwargs to pass during insertion - `String apiKey` The API key for authenticating with Pinecone - `String indexName` - `Optional className` - `Optional insertKwargs` - `Optional namespace` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudPostgresVectorStore:` - `String database` - `long embedDim` - `String host` - `String password` - `long port` - `String schemaName` - `String tableName` - `String user` - `Optional className` - `Optional hnswSettings` HNSW settings for PGVector. - `Optional distanceMethod` The distance method to use. - `L2("l2")` - `IP("ip")` - `COSINE("cosine")` - `L1("l1")` - `HAMMING("hamming")` - `JACCARD("jaccard")` - `Optional efConstruction` The number of edges to use during the construction phase. - `Optional efSearch` The number of edges to use during the search phase. - `Optional m` The number of bi-directional links created for each new element. - `Optional vectorType` The type of vector to use. - `VECTOR("vector")` - `HALF_VEC("half_vec")` - `BIT("bit")` - `SPARSE_VEC("sparse_vec")` - `Optional hybridSearch` - `Optional performSetup` - `Optional supportsNestedMetadataFilters` - `class CloudQdrantVectorStore:` Cloud Qdrant Vector Store. This class is used to store the configuration for a Qdrant vector store, so that it can be created and used in LlamaCloud. Args: collection_name (str): name of the Qdrant collection url (str): url of the Qdrant instance api_key (str): API key for authenticating with Qdrant max_retries (int): maximum number of retries in case of a failure. Defaults to 3 client_kwargs (dict): additional kwargs to pass to the Qdrant client - `String apiKey` - `String collectionName` - `String url` - `Optional className` - `Optional clientKwargs` - `Optional maxRetries` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudAzureAiSearchVectorStore:` Cloud Azure AI Search Vector Store. - `String searchServiceApiKey` - `String searchServiceEndpoint` - `Optional className` - `Optional clientId` - `Optional clientSecret` - `Optional embeddingDimension` - `Optional filterableMetadataFieldKeys` - `Optional indexName` - `Optional searchServiceApiVersion` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `Optional tenantId` - `class CloudMongoDBAtlasVectorSearch:` Cloud MongoDB Atlas Vector Store. This class is used to store the configuration for a MongoDB Atlas vector store, so that it can be created and used in LlamaCloud. Args: mongodb_uri (str): URI for connecting to MongoDB Atlas db_name (str): name of the MongoDB database collection_name (str): name of the MongoDB collection vector_index_name (str): name of the MongoDB Atlas vector index fulltext_index_name (str): name of the MongoDB Atlas full-text index - `String collectionName` - `String dbName` - `String mongoDBUri` - `Optional className` - `Optional embeddingDimension` - `Optional fulltextIndexName` - `Optional supportsNestedMetadataFilters` - `Optional vectorIndexName` - `class CloudMilvusVectorStore:` Cloud Milvus Vector Store. - `String uri` - `Optional token` - `Optional className` - `Optional collectionName` - `Optional embeddingDimension` - `Optional supportsNestedMetadataFilters` - `class CloudAstraDbVectorStore:` Cloud AstraDB Vector Store. This class is used to store the configuration for an AstraDB vector store, so that it can be created and used in LlamaCloud. Args: token (str): The Astra DB Application Token to use. api_endpoint (str): The Astra DB JSON API endpoint for your database. collection_name (str): Collection name to use. If not existing, it will be created. embedding_dimension (int): Length of the embedding vectors in use. keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace' - `String token` The Astra DB Application Token to use - `String apiEndpoint` The Astra DB JSON API endpoint for your database - `String collectionName` Collection name to use. If not existing, it will be created - `long embeddingDimension` Length of the embedding vectors in use - `Optional className` - `Optional keyspace` The keyspace to use. If not provided, 'default_keyspace' - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `String name` The name of the data sink. - `String projectId` - `SinkType sinkType` - `PINECONE("PINECONE")` - `POSTGRES("POSTGRES")` - `QDRANT("QDRANT")` - `AZUREAI_SEARCH("AZUREAI_SEARCH")` - `MONGODB_ATLAS("MONGODB_ATLAS")` - `MILVUS("MILVUS")` - `ASTRA_DB("ASTRA_DB")` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfig` Schema for an embedding model config. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` The embedding configuration for the embedding model config. - `class AzureOpenAIEmbeddingConfig:` - `class CohereEmbeddingConfig:` - `class GeminiEmbeddingConfig:` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `class OpenAIEmbeddingConfig:` - `class VertexAiEmbeddingConfig:` - `class BedrockEmbeddingConfig:` - `String name` The name of the embedding model config. - `String projectId` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfigId` The ID of the EmbeddingModelConfig this pipeline is using. - `Optional llamaParseParameters` Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. - `Optional adaptiveLongTable` - `Optional aggressiveTableExtraction` - `Optional annotateLinks` - `Optional autoMode` - `Optional autoModeConfigurationJson` - `Optional autoModeTriggerOnImageInPage` - `Optional autoModeTriggerOnRegexpInPage` - `Optional autoModeTriggerOnTableInPage` - `Optional autoModeTriggerOnTextInPage` - `Optional azureOpenAIApiVersion` - `Optional azureOpenAIDeploymentName` - `Optional azureOpenAIEndpoint` - `Optional azureOpenAIKey` - `Optional bboxBottom` - `Optional bboxLeft` - `Optional bboxRight` - `Optional bboxTop` - `Optional boundingBox` - `Optional compactMarkdownTable` - `Optional complementalFormattingInstruction` - `Optional contentGuidelineInstruction` - `Optional continuousMode` - `Optional disableImageExtraction` - `Optional disableOcr` - `Optional disableReconstruction` - `Optional doNotCache` - `Optional doNotUnrollColumns` - `Optional enableCostOptimizer` - `Optional extractCharts` - `Optional extractLayout` - `Optional extractPrintedPageNumber` - `Optional fastMode` - `Optional formattingInstruction` - `Optional gpt4oApiKey` - `Optional gpt4oMode` - `Optional guessXlsxSheetName` - `Optional hideFooters` - `Optional hideHeaders` - `Optional highResOcr` - `Optional htmlMakeAllElementsVisible` - `Optional htmlRemoveFixedElements` - `Optional htmlRemoveNavigationElements` - `Optional httpProxy` - `Optional ignoreDocumentElementsForLayoutDetection` - `Optional> imagesToSave` - `SCREENSHOT("screenshot")` - `EMBEDDED("embedded")` - `LAYOUT("layout")` - `Optional inlineImagesInMarkdown` - `Optional inputS3Path` - `Optional inputS3Region` - `Optional inputUrl` - `Optional internalIsScreenshotJob` - `Optional invalidateCache` - `Optional isFormattingInstruction` - `Optional jobTimeoutExtraTimePerPageInSeconds` - `Optional jobTimeoutInSeconds` - `Optional keepPageSeparatorWhenMergingTables` - `Optional> languages` - `AF("af")` - `AZ("az")` - `BS("bs")` - `CS("cs")` - `CY("cy")` - `DA("da")` - `DE("de")` - `EN("en")` - `ES("es")` - `ET("et")` - `FR("fr")` - `GA("ga")` - `HR("hr")` - `HU("hu")` - `ID("id")` - `IS("is")` - `IT("it")` - `KU("ku")` - `LA("la")` - `LT("lt")` - `LV("lv")` - `MI("mi")` - `MS("ms")` - `MT("mt")` - `NL("nl")` - `NO("no")` - `OC("oc")` - `PI("pi")` - `PL("pl")` - `PT("pt")` - `RO("ro")` - `RS_LATIN("rs_latin")` - `SK("sk")` - `SL("sl")` - `SQ("sq")` - `SV("sv")` - `SW("sw")` - `TL("tl")` - `TR("tr")` - `UZ("uz")` - `VI("vi")` - `AR("ar")` - `FA("fa")` - `UG("ug")` - `UR("ur")` - `BN("bn")` - `AS("as")` - `MNI("mni")` - `RU("ru")` - `RS_CYRILLIC("rs_cyrillic")` - `BE("be")` - `BG("bg")` - `UK("uk")` - `MN("mn")` - `ABQ("abq")` - `ADY("ady")` - `KBD("kbd")` - `AVA("ava")` - `DAR("dar")` - `INH("inh")` - `CHE("che")` - `LBE("lbe")` - `LEZ("lez")` - `TAB("tab")` - `TJK("tjk")` - `HI("hi")` - `MR("mr")` - `NE("ne")` - `BH("bh")` - `MAI("mai")` - `ANG("ang")` - `BHO("bho")` - `MAH("mah")` - `SCK("sck")` - `NEW("new")` - `GOM("gom")` - `SA("sa")` - `BGC("bgc")` - `TH("th")` - `CH_SIM("ch_sim")` - `CH_TRA("ch_tra")` - `JA("ja")` - `KO("ko")` - `TA("ta")` - `TE("te")` - `KN("kn")` - `Optional layoutAware` - `Optional lineLevelBoundingBox` - `Optional markdownTableMultilineHeaderSeparator` - `Optional maxPages` - `Optional maxPagesEnforced` - `Optional mergeTablesAcrossPagesInMarkdown` - `Optional model` - `Optional outlinedTableExtraction` - `Optional outputPdfOfDocument` - `Optional outputS3PathPrefix` - `Optional outputS3Region` - `Optional outputTablesAsHtml` - `Optional pageErrorTolerance` - `Optional pageFooterPrefix` - `Optional pageFooterSuffix` - `Optional pageHeaderPrefix` - `Optional pageHeaderSuffix` - `Optional pagePrefix` - `Optional pageSeparator` - `Optional pageSuffix` - `Optional parseMode` Enum for representing the mode of parsing to be used. - `PARSE_PAGE_WITHOUT_LLM("parse_page_without_llm")` - `PARSE_PAGE_WITH_LLM("parse_page_with_llm")` - `PARSE_PAGE_WITH_LVM("parse_page_with_lvm")` - `PARSE_PAGE_WITH_AGENT("parse_page_with_agent")` - `PARSE_PAGE_WITH_LAYOUT_AGENT("parse_page_with_layout_agent")` - `PARSE_DOCUMENT_WITH_LLM("parse_document_with_llm")` - `PARSE_DOCUMENT_WITH_LVM("parse_document_with_lvm")` - `PARSE_DOCUMENT_WITH_AGENT("parse_document_with_agent")` - `Optional parsingInstruction` - `Optional preciseBoundingBox` - `Optional premiumMode` - `Optional presentationOutOfBoundsContent` - `Optional presentationSkipEmbeddedData` - `Optional preserveLayoutAlignmentAcrossPages` - `Optional preserveVerySmallText` - `Optional preset` - `Optional priority` The priority for the request. This field may be ignored or overwritten depending on the organization tier. - `LOW("low")` - `MEDIUM("medium")` - `HIGH("high")` - `CRITICAL("critical")` - `Optional projectId` - `Optional removeHiddenText` - `Optional replaceFailedPageMode` Enum for representing the different available page error handling modes. - `RAW_TEXT("raw_text")` - `BLANK_PAGE("blank_page")` - `ERROR_MESSAGE("error_message")` - `Optional replaceFailedPageWithErrorMessagePrefix` - `Optional replaceFailedPageWithErrorMessageSuffix` - `Optional saveImages` - `Optional skipDiagonalText` - `Optional specializedChartParsingAgentic` - `Optional specializedChartParsingEfficient` - `Optional specializedChartParsingPlus` - `Optional specializedImageParsing` - `Optional spreadsheetExtractSubTables` - `Optional spreadsheetForceFormulaComputation` - `Optional spreadsheetIncludeHiddenSheets` - `Optional strictModeBuggyFont` - `Optional strictModeImageExtraction` - `Optional strictModeImageOcr` - `Optional strictModeReconstruction` - `Optional structuredOutput` - `Optional structuredOutputJsonSchema` - `Optional structuredOutputJsonSchemaName` - `Optional systemPrompt` - `Optional systemPromptAppend` - `Optional takeScreenshot` - `Optional targetPages` - `Optional tier` - `Optional useVendorMultimodalModel` - `Optional userPrompt` - `Optional vendorMultimodalApiKey` - `Optional vendorMultimodalModelName` - `Optional version` - `Optional> webhookConfigurations` Outbound webhook endpoints to notify on job status changes - `Optional> webhookEvents` Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. - `EXTRACT_PENDING("extract.pending")` - `EXTRACT_SUCCESS("extract.success")` - `EXTRACT_ERROR("extract.error")` - `EXTRACT_PARTIAL_SUCCESS("extract.partial_success")` - `EXTRACT_CANCELLED("extract.cancelled")` - `PARSE_PENDING("parse.pending")` - `PARSE_RUNNING("parse.running")` - `PARSE_SUCCESS("parse.success")` - `PARSE_ERROR("parse.error")` - `PARSE_PARTIAL_SUCCESS("parse.partial_success")` - `PARSE_CANCELLED("parse.cancelled")` - `CLASSIFY_PENDING("classify.pending")` - `CLASSIFY_RUNNING("classify.running")` - `CLASSIFY_SUCCESS("classify.success")` - `CLASSIFY_ERROR("classify.error")` - `CLASSIFY_PARTIAL_SUCCESS("classify.partial_success")` - `CLASSIFY_CANCELLED("classify.cancelled")` - `SHEETS_PENDING("sheets.pending")` - `SHEETS_SUCCESS("sheets.success")` - `SHEETS_ERROR("sheets.error")` - `SHEETS_PARTIAL_SUCCESS("sheets.partial_success")` - `SHEETS_CANCELLED("sheets.cancelled")` - `UNMAPPED_EVENT("unmapped_event")` - `Optional webhookHeaders` Custom HTTP headers sent with each webhook request (e.g. auth tokens) - `Optional webhookOutputFormat` Response format sent to the webhook: 'string' (default) or 'json' - `Optional webhookUrl` URL to receive webhook POST notifications - `Optional webhookUrl` - `Optional managedPipelineId` The ID of the ManagedPipeline this playground pipeline is linked to. - `Optional metadataConfig` Metadata configuration for the pipeline. - `Optional> excludedEmbedMetadataKeys` List of metadata keys to exclude from embeddings - `Optional> excludedLlmMetadataKeys` List of metadata keys to exclude from LLM during retrieval - `Optional pipelineType` Type of pipeline. Either PLAYGROUND or MANAGED. - `PLAYGROUND("PLAYGROUND")` - `MANAGED("MANAGED")` - `Optional presetRetrievalParameters` Preset retrieval parameters for the pipeline. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. - `Optional sparseModelConfig` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional className` - `Optional modelType` The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). - `SPLADE("splade")` - `BM25("bm25")` - `AUTO("auto")` - `Optional status` Status of the pipeline. - `CREATED("CREATED")` - `DELETING("DELETING")` - `Optional transformConfig` Configuration for the transformation. - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` - `Optional updatedAt` Update datetime ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.Pipeline; import com.llamacloud_prod.api.models.pipelines.PipelineGetParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); Pipeline pipeline = client.pipelines().get("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "class_name": "class_name", "embed_batch_size": 1, "model_name": "openai-text-embedding-3-small", "num_workers": 0 }, "type": "MANAGED_OPENAI_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "embedding_config_hash": "embedding_config_hash", "parsing_config_hash": "parsing_config_hash", "transform_config_hash": "transform_config_hash" }, "created_at": "2019-12-27T18:11:19.117Z", "data_sink": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "sink_type": "PINECONE", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "additional_kwargs": { "foo": "bar" }, "api_base": "api_base", "api_key": "api_key", "api_version": "api_version", "azure_deployment": "azure_deployment", "azure_endpoint": "azure_endpoint", "class_name": "class_name", "default_headers": { "foo": "string" }, "dimensions": 0, "embed_batch_size": 1, "max_retries": 0, "model_name": "model_name", "num_workers": 0, "reuse_client": true, "timeout": 0 }, "type": "AZURE_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "llama_parse_parameters": { "adaptive_long_table": true, "aggressive_table_extraction": true, "annotate_links": true, "auto_mode": true, "auto_mode_configuration_json": "auto_mode_configuration_json", "auto_mode_trigger_on_image_in_page": true, "auto_mode_trigger_on_regexp_in_page": "auto_mode_trigger_on_regexp_in_page", "auto_mode_trigger_on_table_in_page": true, "auto_mode_trigger_on_text_in_page": "auto_mode_trigger_on_text_in_page", "azure_openai_api_version": "azure_openai_api_version", "azure_openai_deployment_name": "azure_openai_deployment_name", "azure_openai_endpoint": "azure_openai_endpoint", "azure_openai_key": "azure_openai_key", "bbox_bottom": 0, "bbox_left": 0, "bbox_right": 0, "bbox_top": 0, "bounding_box": "bounding_box", "compact_markdown_table": true, "complemental_formatting_instruction": "complemental_formatting_instruction", "content_guideline_instruction": "content_guideline_instruction", "continuous_mode": true, "disable_image_extraction": true, "disable_ocr": true, "disable_reconstruction": true, "do_not_cache": true, "do_not_unroll_columns": true, "enable_cost_optimizer": true, "extract_charts": true, "extract_layout": true, "extract_printed_page_number": true, "fast_mode": true, "formatting_instruction": "formatting_instruction", "gpt4o_api_key": "gpt4o_api_key", "gpt4o_mode": true, "guess_xlsx_sheet_name": true, "hide_footers": true, "hide_headers": true, "high_res_ocr": true, "html_make_all_elements_visible": true, "html_remove_fixed_elements": true, "html_remove_navigation_elements": true, "http_proxy": "http_proxy", "ignore_document_elements_for_layout_detection": true, "images_to_save": [ "screenshot" ], "inline_images_in_markdown": true, "input_s3_path": "input_s3_path", "input_s3_region": "input_s3_region", "input_url": "input_url", "internal_is_screenshot_job": true, "invalidate_cache": true, "is_formatting_instruction": true, "job_timeout_extra_time_per_page_in_seconds": 0, "job_timeout_in_seconds": 0, "keep_page_separator_when_merging_tables": true, "languages": [ "af" ], "layout_aware": true, "line_level_bounding_box": true, "markdown_table_multiline_header_separator": "markdown_table_multiline_header_separator", "max_pages": 0, "max_pages_enforced": 0, "merge_tables_across_pages_in_markdown": true, "model": "model", "outlined_table_extraction": true, "output_pdf_of_document": true, "output_s3_path_prefix": "output_s3_path_prefix", "output_s3_region": "output_s3_region", "output_tables_as_HTML": true, "page_error_tolerance": 0, "page_footer_prefix": "page_footer_prefix", "page_footer_suffix": "page_footer_suffix", "page_header_prefix": "page_header_prefix", "page_header_suffix": "page_header_suffix", "page_prefix": "page_prefix", "page_separator": "page_separator", "page_suffix": "page_suffix", "parse_mode": "parse_page_without_llm", "parsing_instruction": "parsing_instruction", "precise_bounding_box": true, "premium_mode": true, "presentation_out_of_bounds_content": true, "presentation_skip_embedded_data": true, "preserve_layout_alignment_across_pages": true, "preserve_very_small_text": true, "preset": "preset", "priority": "low", "project_id": "project_id", "remove_hidden_text": true, "replace_failed_page_mode": "raw_text", "replace_failed_page_with_error_message_prefix": "replace_failed_page_with_error_message_prefix", "replace_failed_page_with_error_message_suffix": "replace_failed_page_with_error_message_suffix", "save_images": true, "skip_diagonal_text": true, "specialized_chart_parsing_agentic": true, "specialized_chart_parsing_efficient": true, "specialized_chart_parsing_plus": true, "specialized_image_parsing": true, "spreadsheet_extract_sub_tables": true, "spreadsheet_force_formula_computation": true, "spreadsheet_include_hidden_sheets": true, "strict_mode_buggy_font": true, "strict_mode_image_extraction": true, "strict_mode_image_ocr": true, "strict_mode_reconstruction": true, "structured_output": true, "structured_output_json_schema": "structured_output_json_schema", "structured_output_json_schema_name": "structured_output_json_schema_name", "system_prompt": "system_prompt", "system_prompt_append": "system_prompt_append", "take_screenshot": true, "target_pages": "target_pages", "tier": "tier", "use_vendor_multimodal_model": true, "user_prompt": "user_prompt", "vendor_multimodal_api_key": "vendor_multimodal_api_key", "vendor_multimodal_model_name": "vendor_multimodal_model_name", "version": "version", "webhook_configurations": [ { "webhook_events": [ "parse.success", "parse.error" ], "webhook_headers": { "Authorization": "Bearer sk-..." }, "webhook_output_format": "json", "webhook_url": "https://example.com/webhooks/llamacloud" } ], "webhook_url": "webhook_url" }, "managed_pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "metadata_config": { "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ] }, "pipeline_type": "PLAYGROUND", "preset_retrieval_parameters": { "alpha": 0, "class_name": "class_name", "dense_similarity_cutoff": 0, "dense_similarity_top_k": 1, "enable_reranking": true, "files_top_k": 1, "rerank_top_n": 1, "retrieval_mode": "chunks", "retrieve_image_nodes": true, "retrieve_page_figure_nodes": true, "retrieve_page_screenshot_nodes": true, "search_filters": { "filters": [ { "key": "key", "value": 0, "operator": "==" } ], "condition": "and" }, "search_filters_inference_schema": { "foo": { "foo": "bar" } }, "sparse_similarity_top_k": 1 }, "sparse_model_config": { "class_name": "class_name", "model_type": "splade" }, "status": "CREATED", "transform_config": { "chunk_overlap": 0, "chunk_size": 1, "mode": "auto" }, "updated_at": "2019-12-27T18:11:19.117Z" } ``` ## Update Existing Pipeline `Pipeline pipelines().update(PipelineUpdateParamsparams = PipelineUpdateParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **put** `/api/v1/pipelines/{pipeline_id}` Update an existing pipeline's configuration. ### Parameters - `PipelineUpdateParams params` - `Optional pipelineId` - `Optional dataSink` Schema for creating a data sink. - `Optional dataSinkId` Data sink ID. When provided instead of data_sink, the data sink will be looked up by ID. - `Optional embeddingConfig` - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` - `Optional embeddingModelConfigId` Embedding model config ID. When provided instead of embedding_config, the embedding model config will be looked up by ID. - `Optional llamaParseParameters` Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. - `Optional managedPipelineId` The ID of the ManagedPipeline this playground pipeline is linked to. - `Optional metadataConfig` Metadata configuration for the pipeline. - `Optional name` - `Optional presetRetrievalParameters` Schema for the search params for an retrieval execution that can be preset for a pipeline. - `Optional sparseModelConfig` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional status` Status of the pipeline deployment. - `Optional transformConfig` Configuration for the transformation. - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` ### Returns - `class Pipeline:` Schema for a pipeline. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` - `class ManagedOpenAIEmbedding:` - `Optional component` Configuration for the Managed OpenAI embedding model. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The name of the OpenAI embedding model. - `OPENAI_TEXT_EMBEDDING_3_SMALL("openai-text-embedding-3-small")` - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `MANAGED_OPENAI_EMBEDDING("MANAGED_OPENAI_EMBEDDING")` - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` - `String name` - `String projectId` - `Optional configHash` Hashes for the configuration of a pipeline. - `Optional embeddingConfigHash` Hash of the embedding config. - `Optional parsingConfigHash` Hash of the llama parse parameters. - `Optional transformConfigHash` Hash of the transform config. - `Optional createdAt` Creation datetime - `Optional dataSink` Schema for a data sink. - `String id` Unique identifier - `Component component` Component that implements the data sink - `class UnionMember0:` - `class CloudPineconeVectorStore:` Cloud Pinecone Vector Store. This class is used to store the configuration for a Pinecone vector store, so that it can be created and used in LlamaCloud. Args: api_key (str): API key for authenticating with Pinecone index_name (str): name of the Pinecone index namespace (optional[str]): namespace to use in the Pinecone index insert_kwargs (optional[dict]): additional kwargs to pass during insertion - `String apiKey` The API key for authenticating with Pinecone - `String indexName` - `Optional className` - `Optional insertKwargs` - `Optional namespace` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudPostgresVectorStore:` - `String database` - `long embedDim` - `String host` - `String password` - `long port` - `String schemaName` - `String tableName` - `String user` - `Optional className` - `Optional hnswSettings` HNSW settings for PGVector. - `Optional distanceMethod` The distance method to use. - `L2("l2")` - `IP("ip")` - `COSINE("cosine")` - `L1("l1")` - `HAMMING("hamming")` - `JACCARD("jaccard")` - `Optional efConstruction` The number of edges to use during the construction phase. - `Optional efSearch` The number of edges to use during the search phase. - `Optional m` The number of bi-directional links created for each new element. - `Optional vectorType` The type of vector to use. - `VECTOR("vector")` - `HALF_VEC("half_vec")` - `BIT("bit")` - `SPARSE_VEC("sparse_vec")` - `Optional hybridSearch` - `Optional performSetup` - `Optional supportsNestedMetadataFilters` - `class CloudQdrantVectorStore:` Cloud Qdrant Vector Store. This class is used to store the configuration for a Qdrant vector store, so that it can be created and used in LlamaCloud. Args: collection_name (str): name of the Qdrant collection url (str): url of the Qdrant instance api_key (str): API key for authenticating with Qdrant max_retries (int): maximum number of retries in case of a failure. Defaults to 3 client_kwargs (dict): additional kwargs to pass to the Qdrant client - `String apiKey` - `String collectionName` - `String url` - `Optional className` - `Optional clientKwargs` - `Optional maxRetries` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudAzureAiSearchVectorStore:` Cloud Azure AI Search Vector Store. - `String searchServiceApiKey` - `String searchServiceEndpoint` - `Optional className` - `Optional clientId` - `Optional clientSecret` - `Optional embeddingDimension` - `Optional filterableMetadataFieldKeys` - `Optional indexName` - `Optional searchServiceApiVersion` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `Optional tenantId` - `class CloudMongoDBAtlasVectorSearch:` Cloud MongoDB Atlas Vector Store. This class is used to store the configuration for a MongoDB Atlas vector store, so that it can be created and used in LlamaCloud. Args: mongodb_uri (str): URI for connecting to MongoDB Atlas db_name (str): name of the MongoDB database collection_name (str): name of the MongoDB collection vector_index_name (str): name of the MongoDB Atlas vector index fulltext_index_name (str): name of the MongoDB Atlas full-text index - `String collectionName` - `String dbName` - `String mongoDBUri` - `Optional className` - `Optional embeddingDimension` - `Optional fulltextIndexName` - `Optional supportsNestedMetadataFilters` - `Optional vectorIndexName` - `class CloudMilvusVectorStore:` Cloud Milvus Vector Store. - `String uri` - `Optional token` - `Optional className` - `Optional collectionName` - `Optional embeddingDimension` - `Optional supportsNestedMetadataFilters` - `class CloudAstraDbVectorStore:` Cloud AstraDB Vector Store. This class is used to store the configuration for an AstraDB vector store, so that it can be created and used in LlamaCloud. Args: token (str): The Astra DB Application Token to use. api_endpoint (str): The Astra DB JSON API endpoint for your database. collection_name (str): Collection name to use. If not existing, it will be created. embedding_dimension (int): Length of the embedding vectors in use. keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace' - `String token` The Astra DB Application Token to use - `String apiEndpoint` The Astra DB JSON API endpoint for your database - `String collectionName` Collection name to use. If not existing, it will be created - `long embeddingDimension` Length of the embedding vectors in use - `Optional className` - `Optional keyspace` The keyspace to use. If not provided, 'default_keyspace' - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `String name` The name of the data sink. - `String projectId` - `SinkType sinkType` - `PINECONE("PINECONE")` - `POSTGRES("POSTGRES")` - `QDRANT("QDRANT")` - `AZUREAI_SEARCH("AZUREAI_SEARCH")` - `MONGODB_ATLAS("MONGODB_ATLAS")` - `MILVUS("MILVUS")` - `ASTRA_DB("ASTRA_DB")` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfig` Schema for an embedding model config. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` The embedding configuration for the embedding model config. - `class AzureOpenAIEmbeddingConfig:` - `class CohereEmbeddingConfig:` - `class GeminiEmbeddingConfig:` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `class OpenAIEmbeddingConfig:` - `class VertexAiEmbeddingConfig:` - `class BedrockEmbeddingConfig:` - `String name` The name of the embedding model config. - `String projectId` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfigId` The ID of the EmbeddingModelConfig this pipeline is using. - `Optional llamaParseParameters` Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. - `Optional adaptiveLongTable` - `Optional aggressiveTableExtraction` - `Optional annotateLinks` - `Optional autoMode` - `Optional autoModeConfigurationJson` - `Optional autoModeTriggerOnImageInPage` - `Optional autoModeTriggerOnRegexpInPage` - `Optional autoModeTriggerOnTableInPage` - `Optional autoModeTriggerOnTextInPage` - `Optional azureOpenAIApiVersion` - `Optional azureOpenAIDeploymentName` - `Optional azureOpenAIEndpoint` - `Optional azureOpenAIKey` - `Optional bboxBottom` - `Optional bboxLeft` - `Optional bboxRight` - `Optional bboxTop` - `Optional boundingBox` - `Optional compactMarkdownTable` - `Optional complementalFormattingInstruction` - `Optional contentGuidelineInstruction` - `Optional continuousMode` - `Optional disableImageExtraction` - `Optional disableOcr` - `Optional disableReconstruction` - `Optional doNotCache` - `Optional doNotUnrollColumns` - `Optional enableCostOptimizer` - `Optional extractCharts` - `Optional extractLayout` - `Optional extractPrintedPageNumber` - `Optional fastMode` - `Optional formattingInstruction` - `Optional gpt4oApiKey` - `Optional gpt4oMode` - `Optional guessXlsxSheetName` - `Optional hideFooters` - `Optional hideHeaders` - `Optional highResOcr` - `Optional htmlMakeAllElementsVisible` - `Optional htmlRemoveFixedElements` - `Optional htmlRemoveNavigationElements` - `Optional httpProxy` - `Optional ignoreDocumentElementsForLayoutDetection` - `Optional> imagesToSave` - `SCREENSHOT("screenshot")` - `EMBEDDED("embedded")` - `LAYOUT("layout")` - `Optional inlineImagesInMarkdown` - `Optional inputS3Path` - `Optional inputS3Region` - `Optional inputUrl` - `Optional internalIsScreenshotJob` - `Optional invalidateCache` - `Optional isFormattingInstruction` - `Optional jobTimeoutExtraTimePerPageInSeconds` - `Optional jobTimeoutInSeconds` - `Optional keepPageSeparatorWhenMergingTables` - `Optional> languages` - `AF("af")` - `AZ("az")` - `BS("bs")` - `CS("cs")` - `CY("cy")` - `DA("da")` - `DE("de")` - `EN("en")` - `ES("es")` - `ET("et")` - `FR("fr")` - `GA("ga")` - `HR("hr")` - `HU("hu")` - `ID("id")` - `IS("is")` - `IT("it")` - `KU("ku")` - `LA("la")` - `LT("lt")` - `LV("lv")` - `MI("mi")` - `MS("ms")` - `MT("mt")` - `NL("nl")` - `NO("no")` - `OC("oc")` - `PI("pi")` - `PL("pl")` - `PT("pt")` - `RO("ro")` - `RS_LATIN("rs_latin")` - `SK("sk")` - `SL("sl")` - `SQ("sq")` - `SV("sv")` - `SW("sw")` - `TL("tl")` - `TR("tr")` - `UZ("uz")` - `VI("vi")` - `AR("ar")` - `FA("fa")` - `UG("ug")` - `UR("ur")` - `BN("bn")` - `AS("as")` - `MNI("mni")` - `RU("ru")` - `RS_CYRILLIC("rs_cyrillic")` - `BE("be")` - `BG("bg")` - `UK("uk")` - `MN("mn")` - `ABQ("abq")` - `ADY("ady")` - `KBD("kbd")` - `AVA("ava")` - `DAR("dar")` - `INH("inh")` - `CHE("che")` - `LBE("lbe")` - `LEZ("lez")` - `TAB("tab")` - `TJK("tjk")` - `HI("hi")` - `MR("mr")` - `NE("ne")` - `BH("bh")` - `MAI("mai")` - `ANG("ang")` - `BHO("bho")` - `MAH("mah")` - `SCK("sck")` - `NEW("new")` - `GOM("gom")` - `SA("sa")` - `BGC("bgc")` - `TH("th")` - `CH_SIM("ch_sim")` - `CH_TRA("ch_tra")` - `JA("ja")` - `KO("ko")` - `TA("ta")` - `TE("te")` - `KN("kn")` - `Optional layoutAware` - `Optional lineLevelBoundingBox` - `Optional markdownTableMultilineHeaderSeparator` - `Optional maxPages` - `Optional maxPagesEnforced` - `Optional mergeTablesAcrossPagesInMarkdown` - `Optional model` - `Optional outlinedTableExtraction` - `Optional outputPdfOfDocument` - `Optional outputS3PathPrefix` - `Optional outputS3Region` - `Optional outputTablesAsHtml` - `Optional pageErrorTolerance` - `Optional pageFooterPrefix` - `Optional pageFooterSuffix` - `Optional pageHeaderPrefix` - `Optional pageHeaderSuffix` - `Optional pagePrefix` - `Optional pageSeparator` - `Optional pageSuffix` - `Optional parseMode` Enum for representing the mode of parsing to be used. - `PARSE_PAGE_WITHOUT_LLM("parse_page_without_llm")` - `PARSE_PAGE_WITH_LLM("parse_page_with_llm")` - `PARSE_PAGE_WITH_LVM("parse_page_with_lvm")` - `PARSE_PAGE_WITH_AGENT("parse_page_with_agent")` - `PARSE_PAGE_WITH_LAYOUT_AGENT("parse_page_with_layout_agent")` - `PARSE_DOCUMENT_WITH_LLM("parse_document_with_llm")` - `PARSE_DOCUMENT_WITH_LVM("parse_document_with_lvm")` - `PARSE_DOCUMENT_WITH_AGENT("parse_document_with_agent")` - `Optional parsingInstruction` - `Optional preciseBoundingBox` - `Optional premiumMode` - `Optional presentationOutOfBoundsContent` - `Optional presentationSkipEmbeddedData` - `Optional preserveLayoutAlignmentAcrossPages` - `Optional preserveVerySmallText` - `Optional preset` - `Optional priority` The priority for the request. This field may be ignored or overwritten depending on the organization tier. - `LOW("low")` - `MEDIUM("medium")` - `HIGH("high")` - `CRITICAL("critical")` - `Optional projectId` - `Optional removeHiddenText` - `Optional replaceFailedPageMode` Enum for representing the different available page error handling modes. - `RAW_TEXT("raw_text")` - `BLANK_PAGE("blank_page")` - `ERROR_MESSAGE("error_message")` - `Optional replaceFailedPageWithErrorMessagePrefix` - `Optional replaceFailedPageWithErrorMessageSuffix` - `Optional saveImages` - `Optional skipDiagonalText` - `Optional specializedChartParsingAgentic` - `Optional specializedChartParsingEfficient` - `Optional specializedChartParsingPlus` - `Optional specializedImageParsing` - `Optional spreadsheetExtractSubTables` - `Optional spreadsheetForceFormulaComputation` - `Optional spreadsheetIncludeHiddenSheets` - `Optional strictModeBuggyFont` - `Optional strictModeImageExtraction` - `Optional strictModeImageOcr` - `Optional strictModeReconstruction` - `Optional structuredOutput` - `Optional structuredOutputJsonSchema` - `Optional structuredOutputJsonSchemaName` - `Optional systemPrompt` - `Optional systemPromptAppend` - `Optional takeScreenshot` - `Optional targetPages` - `Optional tier` - `Optional useVendorMultimodalModel` - `Optional userPrompt` - `Optional vendorMultimodalApiKey` - `Optional vendorMultimodalModelName` - `Optional version` - `Optional> webhookConfigurations` Outbound webhook endpoints to notify on job status changes - `Optional> webhookEvents` Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. - `EXTRACT_PENDING("extract.pending")` - `EXTRACT_SUCCESS("extract.success")` - `EXTRACT_ERROR("extract.error")` - `EXTRACT_PARTIAL_SUCCESS("extract.partial_success")` - `EXTRACT_CANCELLED("extract.cancelled")` - `PARSE_PENDING("parse.pending")` - `PARSE_RUNNING("parse.running")` - `PARSE_SUCCESS("parse.success")` - `PARSE_ERROR("parse.error")` - `PARSE_PARTIAL_SUCCESS("parse.partial_success")` - `PARSE_CANCELLED("parse.cancelled")` - `CLASSIFY_PENDING("classify.pending")` - `CLASSIFY_RUNNING("classify.running")` - `CLASSIFY_SUCCESS("classify.success")` - `CLASSIFY_ERROR("classify.error")` - `CLASSIFY_PARTIAL_SUCCESS("classify.partial_success")` - `CLASSIFY_CANCELLED("classify.cancelled")` - `SHEETS_PENDING("sheets.pending")` - `SHEETS_SUCCESS("sheets.success")` - `SHEETS_ERROR("sheets.error")` - `SHEETS_PARTIAL_SUCCESS("sheets.partial_success")` - `SHEETS_CANCELLED("sheets.cancelled")` - `UNMAPPED_EVENT("unmapped_event")` - `Optional webhookHeaders` Custom HTTP headers sent with each webhook request (e.g. auth tokens) - `Optional webhookOutputFormat` Response format sent to the webhook: 'string' (default) or 'json' - `Optional webhookUrl` URL to receive webhook POST notifications - `Optional webhookUrl` - `Optional managedPipelineId` The ID of the ManagedPipeline this playground pipeline is linked to. - `Optional metadataConfig` Metadata configuration for the pipeline. - `Optional> excludedEmbedMetadataKeys` List of metadata keys to exclude from embeddings - `Optional> excludedLlmMetadataKeys` List of metadata keys to exclude from LLM during retrieval - `Optional pipelineType` Type of pipeline. Either PLAYGROUND or MANAGED. - `PLAYGROUND("PLAYGROUND")` - `MANAGED("MANAGED")` - `Optional presetRetrievalParameters` Preset retrieval parameters for the pipeline. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. - `Optional sparseModelConfig` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional className` - `Optional modelType` The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). - `SPLADE("splade")` - `BM25("bm25")` - `AUTO("auto")` - `Optional status` Status of the pipeline. - `CREATED("CREATED")` - `DELETING("DELETING")` - `Optional transformConfig` Configuration for the transformation. - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` - `Optional updatedAt` Update datetime ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.Pipeline; import com.llamacloud_prod.api.models.pipelines.PipelineUpdateParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); Pipeline pipeline = client.pipelines().update("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "class_name": "class_name", "embed_batch_size": 1, "model_name": "openai-text-embedding-3-small", "num_workers": 0 }, "type": "MANAGED_OPENAI_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "embedding_config_hash": "embedding_config_hash", "parsing_config_hash": "parsing_config_hash", "transform_config_hash": "transform_config_hash" }, "created_at": "2019-12-27T18:11:19.117Z", "data_sink": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "sink_type": "PINECONE", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "additional_kwargs": { "foo": "bar" }, "api_base": "api_base", "api_key": "api_key", "api_version": "api_version", "azure_deployment": "azure_deployment", "azure_endpoint": "azure_endpoint", "class_name": "class_name", "default_headers": { "foo": "string" }, "dimensions": 0, "embed_batch_size": 1, "max_retries": 0, "model_name": "model_name", "num_workers": 0, "reuse_client": true, "timeout": 0 }, "type": "AZURE_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "llama_parse_parameters": { "adaptive_long_table": true, "aggressive_table_extraction": true, "annotate_links": true, "auto_mode": true, "auto_mode_configuration_json": "auto_mode_configuration_json", "auto_mode_trigger_on_image_in_page": true, "auto_mode_trigger_on_regexp_in_page": "auto_mode_trigger_on_regexp_in_page", "auto_mode_trigger_on_table_in_page": true, "auto_mode_trigger_on_text_in_page": "auto_mode_trigger_on_text_in_page", "azure_openai_api_version": "azure_openai_api_version", "azure_openai_deployment_name": "azure_openai_deployment_name", "azure_openai_endpoint": "azure_openai_endpoint", "azure_openai_key": "azure_openai_key", "bbox_bottom": 0, "bbox_left": 0, "bbox_right": 0, "bbox_top": 0, "bounding_box": "bounding_box", "compact_markdown_table": true, "complemental_formatting_instruction": "complemental_formatting_instruction", "content_guideline_instruction": "content_guideline_instruction", "continuous_mode": true, "disable_image_extraction": true, "disable_ocr": true, "disable_reconstruction": true, "do_not_cache": true, "do_not_unroll_columns": true, "enable_cost_optimizer": true, "extract_charts": true, "extract_layout": true, "extract_printed_page_number": true, "fast_mode": true, "formatting_instruction": "formatting_instruction", "gpt4o_api_key": "gpt4o_api_key", "gpt4o_mode": true, "guess_xlsx_sheet_name": true, "hide_footers": true, "hide_headers": true, "high_res_ocr": true, "html_make_all_elements_visible": true, "html_remove_fixed_elements": true, "html_remove_navigation_elements": true, "http_proxy": "http_proxy", "ignore_document_elements_for_layout_detection": true, "images_to_save": [ "screenshot" ], "inline_images_in_markdown": true, "input_s3_path": "input_s3_path", "input_s3_region": "input_s3_region", "input_url": "input_url", "internal_is_screenshot_job": true, "invalidate_cache": true, "is_formatting_instruction": true, "job_timeout_extra_time_per_page_in_seconds": 0, "job_timeout_in_seconds": 0, "keep_page_separator_when_merging_tables": true, "languages": [ "af" ], "layout_aware": true, "line_level_bounding_box": true, "markdown_table_multiline_header_separator": "markdown_table_multiline_header_separator", "max_pages": 0, "max_pages_enforced": 0, "merge_tables_across_pages_in_markdown": true, "model": "model", "outlined_table_extraction": true, "output_pdf_of_document": true, "output_s3_path_prefix": "output_s3_path_prefix", "output_s3_region": "output_s3_region", "output_tables_as_HTML": true, "page_error_tolerance": 0, "page_footer_prefix": "page_footer_prefix", "page_footer_suffix": "page_footer_suffix", "page_header_prefix": "page_header_prefix", "page_header_suffix": "page_header_suffix", "page_prefix": "page_prefix", "page_separator": "page_separator", "page_suffix": "page_suffix", "parse_mode": "parse_page_without_llm", "parsing_instruction": "parsing_instruction", "precise_bounding_box": true, "premium_mode": true, "presentation_out_of_bounds_content": true, "presentation_skip_embedded_data": true, "preserve_layout_alignment_across_pages": true, "preserve_very_small_text": true, "preset": "preset", "priority": "low", "project_id": "project_id", "remove_hidden_text": true, "replace_failed_page_mode": "raw_text", "replace_failed_page_with_error_message_prefix": "replace_failed_page_with_error_message_prefix", "replace_failed_page_with_error_message_suffix": "replace_failed_page_with_error_message_suffix", "save_images": true, "skip_diagonal_text": true, "specialized_chart_parsing_agentic": true, "specialized_chart_parsing_efficient": true, "specialized_chart_parsing_plus": true, "specialized_image_parsing": true, "spreadsheet_extract_sub_tables": true, "spreadsheet_force_formula_computation": true, "spreadsheet_include_hidden_sheets": true, "strict_mode_buggy_font": true, "strict_mode_image_extraction": true, "strict_mode_image_ocr": true, "strict_mode_reconstruction": true, "structured_output": true, "structured_output_json_schema": "structured_output_json_schema", "structured_output_json_schema_name": "structured_output_json_schema_name", "system_prompt": "system_prompt", "system_prompt_append": "system_prompt_append", "take_screenshot": true, "target_pages": "target_pages", "tier": "tier", "use_vendor_multimodal_model": true, "user_prompt": "user_prompt", "vendor_multimodal_api_key": "vendor_multimodal_api_key", "vendor_multimodal_model_name": "vendor_multimodal_model_name", "version": "version", "webhook_configurations": [ { "webhook_events": [ "parse.success", "parse.error" ], "webhook_headers": { "Authorization": "Bearer sk-..." }, "webhook_output_format": "json", "webhook_url": "https://example.com/webhooks/llamacloud" } ], "webhook_url": "webhook_url" }, "managed_pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "metadata_config": { "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ] }, "pipeline_type": "PLAYGROUND", "preset_retrieval_parameters": { "alpha": 0, "class_name": "class_name", "dense_similarity_cutoff": 0, "dense_similarity_top_k": 1, "enable_reranking": true, "files_top_k": 1, "rerank_top_n": 1, "retrieval_mode": "chunks", "retrieve_image_nodes": true, "retrieve_page_figure_nodes": true, "retrieve_page_screenshot_nodes": true, "search_filters": { "filters": [ { "key": "key", "value": 0, "operator": "==" } ], "condition": "and" }, "search_filters_inference_schema": { "foo": { "foo": "bar" } }, "sparse_similarity_top_k": 1 }, "sparse_model_config": { "class_name": "class_name", "model_type": "splade" }, "status": "CREATED", "transform_config": { "chunk_overlap": 0, "chunk_size": 1, "mode": "auto" }, "updated_at": "2019-12-27T18:11:19.117Z" } ``` ## Delete Pipeline `pipelines().delete(PipelineDeleteParamsparams = PipelineDeleteParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **delete** `/api/v1/pipelines/{pipeline_id}` Delete a pipeline and all associated resources. Removes pipeline files, data sources, and vector store data. This operation is irreversible. ### Parameters - `PipelineDeleteParams params` - `Optional pipelineId` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.PipelineDeleteParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); client.pipelines().delete("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` ## Get Pipeline Status `ManagedIngestionStatusResponse pipelines().getStatus(PipelineGetStatusParamsparams = PipelineGetStatusParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}/status` Get the ingestion status of a managed pipeline. Returns document counts, sync progress, and the last effective timestamp. Only available for managed pipelines. ### Parameters - `PipelineGetStatusParams params` - `Optional pipelineId` - `Optional fullDetails` ### Returns - `class ManagedIngestionStatusResponse:` - `Status status` Status of the ingestion. - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `PARTIAL_SUCCESS("PARTIAL_SUCCESS")` - `CANCELLED("CANCELLED")` - `Optional deploymentDate` Date of the deployment. - `Optional effectiveAt` When the status is effective - `Optional> error` List of errors that occurred during ingestion. - `String jobId` ID of the job that failed. - `String message` List of errors that occurred during ingestion. - `Step step` Name of the job that failed. - `MANAGED_INGESTION("MANAGED_INGESTION")` - `DATA_SOURCE("DATA_SOURCE")` - `FILE_UPDATER("FILE_UPDATER")` - `PARSE("PARSE")` - `TRANSFORM("TRANSFORM")` - `INGESTION("INGESTION")` - `METADATA_UPDATE("METADATA_UPDATE")` - `Optional jobId` ID of the latest job. ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.ManagedIngestionStatusResponse; import com.llamacloud_prod.api.models.pipelines.PipelineGetStatusParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); ManagedIngestionStatusResponse managedIngestionStatusResponse = client.pipelines().getStatus("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json { "status": "NOT_STARTED", "deployment_date": "2019-12-27T18:11:19.117Z", "effective_at": "2019-12-27T18:11:19.117Z", "error": [ { "job_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "message": "message", "step": "MANAGED_INGESTION" } ], "job_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e" } ``` ## Upsert Pipeline `Pipeline pipelines().upsert(PipelineUpsertParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **put** `/api/v1/pipelines` Upsert a pipeline. Updates the pipeline if one with the same name and project already exists, otherwise creates a new one. ### Parameters - `PipelineUpsertParams params` - `Optional organizationId` - `Optional projectId` - `PipelineCreate pipelineCreate` Schema for creating a pipeline. ### Returns - `class Pipeline:` Schema for a pipeline. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` - `class ManagedOpenAIEmbedding:` - `Optional component` Configuration for the Managed OpenAI embedding model. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The name of the OpenAI embedding model. - `OPENAI_TEXT_EMBEDDING_3_SMALL("openai-text-embedding-3-small")` - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `MANAGED_OPENAI_EMBEDDING("MANAGED_OPENAI_EMBEDDING")` - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` - `String name` - `String projectId` - `Optional configHash` Hashes for the configuration of a pipeline. - `Optional embeddingConfigHash` Hash of the embedding config. - `Optional parsingConfigHash` Hash of the llama parse parameters. - `Optional transformConfigHash` Hash of the transform config. - `Optional createdAt` Creation datetime - `Optional dataSink` Schema for a data sink. - `String id` Unique identifier - `Component component` Component that implements the data sink - `class UnionMember0:` - `class CloudPineconeVectorStore:` Cloud Pinecone Vector Store. This class is used to store the configuration for a Pinecone vector store, so that it can be created and used in LlamaCloud. Args: api_key (str): API key for authenticating with Pinecone index_name (str): name of the Pinecone index namespace (optional[str]): namespace to use in the Pinecone index insert_kwargs (optional[dict]): additional kwargs to pass during insertion - `String apiKey` The API key for authenticating with Pinecone - `String indexName` - `Optional className` - `Optional insertKwargs` - `Optional namespace` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudPostgresVectorStore:` - `String database` - `long embedDim` - `String host` - `String password` - `long port` - `String schemaName` - `String tableName` - `String user` - `Optional className` - `Optional hnswSettings` HNSW settings for PGVector. - `Optional distanceMethod` The distance method to use. - `L2("l2")` - `IP("ip")` - `COSINE("cosine")` - `L1("l1")` - `HAMMING("hamming")` - `JACCARD("jaccard")` - `Optional efConstruction` The number of edges to use during the construction phase. - `Optional efSearch` The number of edges to use during the search phase. - `Optional m` The number of bi-directional links created for each new element. - `Optional vectorType` The type of vector to use. - `VECTOR("vector")` - `HALF_VEC("half_vec")` - `BIT("bit")` - `SPARSE_VEC("sparse_vec")` - `Optional hybridSearch` - `Optional performSetup` - `Optional supportsNestedMetadataFilters` - `class CloudQdrantVectorStore:` Cloud Qdrant Vector Store. This class is used to store the configuration for a Qdrant vector store, so that it can be created and used in LlamaCloud. Args: collection_name (str): name of the Qdrant collection url (str): url of the Qdrant instance api_key (str): API key for authenticating with Qdrant max_retries (int): maximum number of retries in case of a failure. Defaults to 3 client_kwargs (dict): additional kwargs to pass to the Qdrant client - `String apiKey` - `String collectionName` - `String url` - `Optional className` - `Optional clientKwargs` - `Optional maxRetries` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudAzureAiSearchVectorStore:` Cloud Azure AI Search Vector Store. - `String searchServiceApiKey` - `String searchServiceEndpoint` - `Optional className` - `Optional clientId` - `Optional clientSecret` - `Optional embeddingDimension` - `Optional filterableMetadataFieldKeys` - `Optional indexName` - `Optional searchServiceApiVersion` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `Optional tenantId` - `class CloudMongoDBAtlasVectorSearch:` Cloud MongoDB Atlas Vector Store. This class is used to store the configuration for a MongoDB Atlas vector store, so that it can be created and used in LlamaCloud. Args: mongodb_uri (str): URI for connecting to MongoDB Atlas db_name (str): name of the MongoDB database collection_name (str): name of the MongoDB collection vector_index_name (str): name of the MongoDB Atlas vector index fulltext_index_name (str): name of the MongoDB Atlas full-text index - `String collectionName` - `String dbName` - `String mongoDBUri` - `Optional className` - `Optional embeddingDimension` - `Optional fulltextIndexName` - `Optional supportsNestedMetadataFilters` - `Optional vectorIndexName` - `class CloudMilvusVectorStore:` Cloud Milvus Vector Store. - `String uri` - `Optional token` - `Optional className` - `Optional collectionName` - `Optional embeddingDimension` - `Optional supportsNestedMetadataFilters` - `class CloudAstraDbVectorStore:` Cloud AstraDB Vector Store. This class is used to store the configuration for an AstraDB vector store, so that it can be created and used in LlamaCloud. Args: token (str): The Astra DB Application Token to use. api_endpoint (str): The Astra DB JSON API endpoint for your database. collection_name (str): Collection name to use. If not existing, it will be created. embedding_dimension (int): Length of the embedding vectors in use. keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace' - `String token` The Astra DB Application Token to use - `String apiEndpoint` The Astra DB JSON API endpoint for your database - `String collectionName` Collection name to use. If not existing, it will be created - `long embeddingDimension` Length of the embedding vectors in use - `Optional className` - `Optional keyspace` The keyspace to use. If not provided, 'default_keyspace' - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `String name` The name of the data sink. - `String projectId` - `SinkType sinkType` - `PINECONE("PINECONE")` - `POSTGRES("POSTGRES")` - `QDRANT("QDRANT")` - `AZUREAI_SEARCH("AZUREAI_SEARCH")` - `MONGODB_ATLAS("MONGODB_ATLAS")` - `MILVUS("MILVUS")` - `ASTRA_DB("ASTRA_DB")` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfig` Schema for an embedding model config. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` The embedding configuration for the embedding model config. - `class AzureOpenAIEmbeddingConfig:` - `class CohereEmbeddingConfig:` - `class GeminiEmbeddingConfig:` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `class OpenAIEmbeddingConfig:` - `class VertexAiEmbeddingConfig:` - `class BedrockEmbeddingConfig:` - `String name` The name of the embedding model config. - `String projectId` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfigId` The ID of the EmbeddingModelConfig this pipeline is using. - `Optional llamaParseParameters` Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. - `Optional adaptiveLongTable` - `Optional aggressiveTableExtraction` - `Optional annotateLinks` - `Optional autoMode` - `Optional autoModeConfigurationJson` - `Optional autoModeTriggerOnImageInPage` - `Optional autoModeTriggerOnRegexpInPage` - `Optional autoModeTriggerOnTableInPage` - `Optional autoModeTriggerOnTextInPage` - `Optional azureOpenAIApiVersion` - `Optional azureOpenAIDeploymentName` - `Optional azureOpenAIEndpoint` - `Optional azureOpenAIKey` - `Optional bboxBottom` - `Optional bboxLeft` - `Optional bboxRight` - `Optional bboxTop` - `Optional boundingBox` - `Optional compactMarkdownTable` - `Optional complementalFormattingInstruction` - `Optional contentGuidelineInstruction` - `Optional continuousMode` - `Optional disableImageExtraction` - `Optional disableOcr` - `Optional disableReconstruction` - `Optional doNotCache` - `Optional doNotUnrollColumns` - `Optional enableCostOptimizer` - `Optional extractCharts` - `Optional extractLayout` - `Optional extractPrintedPageNumber` - `Optional fastMode` - `Optional formattingInstruction` - `Optional gpt4oApiKey` - `Optional gpt4oMode` - `Optional guessXlsxSheetName` - `Optional hideFooters` - `Optional hideHeaders` - `Optional highResOcr` - `Optional htmlMakeAllElementsVisible` - `Optional htmlRemoveFixedElements` - `Optional htmlRemoveNavigationElements` - `Optional httpProxy` - `Optional ignoreDocumentElementsForLayoutDetection` - `Optional> imagesToSave` - `SCREENSHOT("screenshot")` - `EMBEDDED("embedded")` - `LAYOUT("layout")` - `Optional inlineImagesInMarkdown` - `Optional inputS3Path` - `Optional inputS3Region` - `Optional inputUrl` - `Optional internalIsScreenshotJob` - `Optional invalidateCache` - `Optional isFormattingInstruction` - `Optional jobTimeoutExtraTimePerPageInSeconds` - `Optional jobTimeoutInSeconds` - `Optional keepPageSeparatorWhenMergingTables` - `Optional> languages` - `AF("af")` - `AZ("az")` - `BS("bs")` - `CS("cs")` - `CY("cy")` - `DA("da")` - `DE("de")` - `EN("en")` - `ES("es")` - `ET("et")` - `FR("fr")` - `GA("ga")` - `HR("hr")` - `HU("hu")` - `ID("id")` - `IS("is")` - `IT("it")` - `KU("ku")` - `LA("la")` - `LT("lt")` - `LV("lv")` - `MI("mi")` - `MS("ms")` - `MT("mt")` - `NL("nl")` - `NO("no")` - `OC("oc")` - `PI("pi")` - `PL("pl")` - `PT("pt")` - `RO("ro")` - `RS_LATIN("rs_latin")` - `SK("sk")` - `SL("sl")` - `SQ("sq")` - `SV("sv")` - `SW("sw")` - `TL("tl")` - `TR("tr")` - `UZ("uz")` - `VI("vi")` - `AR("ar")` - `FA("fa")` - `UG("ug")` - `UR("ur")` - `BN("bn")` - `AS("as")` - `MNI("mni")` - `RU("ru")` - `RS_CYRILLIC("rs_cyrillic")` - `BE("be")` - `BG("bg")` - `UK("uk")` - `MN("mn")` - `ABQ("abq")` - `ADY("ady")` - `KBD("kbd")` - `AVA("ava")` - `DAR("dar")` - `INH("inh")` - `CHE("che")` - `LBE("lbe")` - `LEZ("lez")` - `TAB("tab")` - `TJK("tjk")` - `HI("hi")` - `MR("mr")` - `NE("ne")` - `BH("bh")` - `MAI("mai")` - `ANG("ang")` - `BHO("bho")` - `MAH("mah")` - `SCK("sck")` - `NEW("new")` - `GOM("gom")` - `SA("sa")` - `BGC("bgc")` - `TH("th")` - `CH_SIM("ch_sim")` - `CH_TRA("ch_tra")` - `JA("ja")` - `KO("ko")` - `TA("ta")` - `TE("te")` - `KN("kn")` - `Optional layoutAware` - `Optional lineLevelBoundingBox` - `Optional markdownTableMultilineHeaderSeparator` - `Optional maxPages` - `Optional maxPagesEnforced` - `Optional mergeTablesAcrossPagesInMarkdown` - `Optional model` - `Optional outlinedTableExtraction` - `Optional outputPdfOfDocument` - `Optional outputS3PathPrefix` - `Optional outputS3Region` - `Optional outputTablesAsHtml` - `Optional pageErrorTolerance` - `Optional pageFooterPrefix` - `Optional pageFooterSuffix` - `Optional pageHeaderPrefix` - `Optional pageHeaderSuffix` - `Optional pagePrefix` - `Optional pageSeparator` - `Optional pageSuffix` - `Optional parseMode` Enum for representing the mode of parsing to be used. - `PARSE_PAGE_WITHOUT_LLM("parse_page_without_llm")` - `PARSE_PAGE_WITH_LLM("parse_page_with_llm")` - `PARSE_PAGE_WITH_LVM("parse_page_with_lvm")` - `PARSE_PAGE_WITH_AGENT("parse_page_with_agent")` - `PARSE_PAGE_WITH_LAYOUT_AGENT("parse_page_with_layout_agent")` - `PARSE_DOCUMENT_WITH_LLM("parse_document_with_llm")` - `PARSE_DOCUMENT_WITH_LVM("parse_document_with_lvm")` - `PARSE_DOCUMENT_WITH_AGENT("parse_document_with_agent")` - `Optional parsingInstruction` - `Optional preciseBoundingBox` - `Optional premiumMode` - `Optional presentationOutOfBoundsContent` - `Optional presentationSkipEmbeddedData` - `Optional preserveLayoutAlignmentAcrossPages` - `Optional preserveVerySmallText` - `Optional preset` - `Optional priority` The priority for the request. This field may be ignored or overwritten depending on the organization tier. - `LOW("low")` - `MEDIUM("medium")` - `HIGH("high")` - `CRITICAL("critical")` - `Optional projectId` - `Optional removeHiddenText` - `Optional replaceFailedPageMode` Enum for representing the different available page error handling modes. - `RAW_TEXT("raw_text")` - `BLANK_PAGE("blank_page")` - `ERROR_MESSAGE("error_message")` - `Optional replaceFailedPageWithErrorMessagePrefix` - `Optional replaceFailedPageWithErrorMessageSuffix` - `Optional saveImages` - `Optional skipDiagonalText` - `Optional specializedChartParsingAgentic` - `Optional specializedChartParsingEfficient` - `Optional specializedChartParsingPlus` - `Optional specializedImageParsing` - `Optional spreadsheetExtractSubTables` - `Optional spreadsheetForceFormulaComputation` - `Optional spreadsheetIncludeHiddenSheets` - `Optional strictModeBuggyFont` - `Optional strictModeImageExtraction` - `Optional strictModeImageOcr` - `Optional strictModeReconstruction` - `Optional structuredOutput` - `Optional structuredOutputJsonSchema` - `Optional structuredOutputJsonSchemaName` - `Optional systemPrompt` - `Optional systemPromptAppend` - `Optional takeScreenshot` - `Optional targetPages` - `Optional tier` - `Optional useVendorMultimodalModel` - `Optional userPrompt` - `Optional vendorMultimodalApiKey` - `Optional vendorMultimodalModelName` - `Optional version` - `Optional> webhookConfigurations` Outbound webhook endpoints to notify on job status changes - `Optional> webhookEvents` Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. - `EXTRACT_PENDING("extract.pending")` - `EXTRACT_SUCCESS("extract.success")` - `EXTRACT_ERROR("extract.error")` - `EXTRACT_PARTIAL_SUCCESS("extract.partial_success")` - `EXTRACT_CANCELLED("extract.cancelled")` - `PARSE_PENDING("parse.pending")` - `PARSE_RUNNING("parse.running")` - `PARSE_SUCCESS("parse.success")` - `PARSE_ERROR("parse.error")` - `PARSE_PARTIAL_SUCCESS("parse.partial_success")` - `PARSE_CANCELLED("parse.cancelled")` - `CLASSIFY_PENDING("classify.pending")` - `CLASSIFY_RUNNING("classify.running")` - `CLASSIFY_SUCCESS("classify.success")` - `CLASSIFY_ERROR("classify.error")` - `CLASSIFY_PARTIAL_SUCCESS("classify.partial_success")` - `CLASSIFY_CANCELLED("classify.cancelled")` - `SHEETS_PENDING("sheets.pending")` - `SHEETS_SUCCESS("sheets.success")` - `SHEETS_ERROR("sheets.error")` - `SHEETS_PARTIAL_SUCCESS("sheets.partial_success")` - `SHEETS_CANCELLED("sheets.cancelled")` - `UNMAPPED_EVENT("unmapped_event")` - `Optional webhookHeaders` Custom HTTP headers sent with each webhook request (e.g. auth tokens) - `Optional webhookOutputFormat` Response format sent to the webhook: 'string' (default) or 'json' - `Optional webhookUrl` URL to receive webhook POST notifications - `Optional webhookUrl` - `Optional managedPipelineId` The ID of the ManagedPipeline this playground pipeline is linked to. - `Optional metadataConfig` Metadata configuration for the pipeline. - `Optional> excludedEmbedMetadataKeys` List of metadata keys to exclude from embeddings - `Optional> excludedLlmMetadataKeys` List of metadata keys to exclude from LLM during retrieval - `Optional pipelineType` Type of pipeline. Either PLAYGROUND or MANAGED. - `PLAYGROUND("PLAYGROUND")` - `MANAGED("MANAGED")` - `Optional presetRetrievalParameters` Preset retrieval parameters for the pipeline. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. - `Optional sparseModelConfig` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional className` - `Optional modelType` The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). - `SPLADE("splade")` - `BM25("bm25")` - `AUTO("auto")` - `Optional status` Status of the pipeline. - `CREATED("CREATED")` - `DELETING("DELETING")` - `Optional transformConfig` Configuration for the transformation. - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` - `Optional updatedAt` Update datetime ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.Pipeline; import com.llamacloud_prod.api.models.pipelines.PipelineCreate; import com.llamacloud_prod.api.models.pipelines.PipelineUpsertParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); PipelineCreate params = PipelineCreate.builder() .name("x") .build(); Pipeline pipeline = client.pipelines().upsert(params); } } ``` #### Response ```json { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "class_name": "class_name", "embed_batch_size": 1, "model_name": "openai-text-embedding-3-small", "num_workers": 0 }, "type": "MANAGED_OPENAI_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "embedding_config_hash": "embedding_config_hash", "parsing_config_hash": "parsing_config_hash", "transform_config_hash": "transform_config_hash" }, "created_at": "2019-12-27T18:11:19.117Z", "data_sink": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "sink_type": "PINECONE", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "additional_kwargs": { "foo": "bar" }, "api_base": "api_base", "api_key": "api_key", "api_version": "api_version", "azure_deployment": "azure_deployment", "azure_endpoint": "azure_endpoint", "class_name": "class_name", "default_headers": { "foo": "string" }, "dimensions": 0, "embed_batch_size": 1, "max_retries": 0, "model_name": "model_name", "num_workers": 0, "reuse_client": true, "timeout": 0 }, "type": "AZURE_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "llama_parse_parameters": { "adaptive_long_table": true, "aggressive_table_extraction": true, "annotate_links": true, "auto_mode": true, "auto_mode_configuration_json": "auto_mode_configuration_json", "auto_mode_trigger_on_image_in_page": true, "auto_mode_trigger_on_regexp_in_page": "auto_mode_trigger_on_regexp_in_page", "auto_mode_trigger_on_table_in_page": true, "auto_mode_trigger_on_text_in_page": "auto_mode_trigger_on_text_in_page", "azure_openai_api_version": "azure_openai_api_version", "azure_openai_deployment_name": "azure_openai_deployment_name", "azure_openai_endpoint": "azure_openai_endpoint", "azure_openai_key": "azure_openai_key", "bbox_bottom": 0, "bbox_left": 0, "bbox_right": 0, "bbox_top": 0, "bounding_box": "bounding_box", "compact_markdown_table": true, "complemental_formatting_instruction": "complemental_formatting_instruction", "content_guideline_instruction": "content_guideline_instruction", "continuous_mode": true, "disable_image_extraction": true, "disable_ocr": true, "disable_reconstruction": true, "do_not_cache": true, "do_not_unroll_columns": true, "enable_cost_optimizer": true, "extract_charts": true, "extract_layout": true, "extract_printed_page_number": true, "fast_mode": true, "formatting_instruction": "formatting_instruction", "gpt4o_api_key": "gpt4o_api_key", "gpt4o_mode": true, "guess_xlsx_sheet_name": true, "hide_footers": true, "hide_headers": true, "high_res_ocr": true, "html_make_all_elements_visible": true, "html_remove_fixed_elements": true, "html_remove_navigation_elements": true, "http_proxy": "http_proxy", "ignore_document_elements_for_layout_detection": true, "images_to_save": [ "screenshot" ], "inline_images_in_markdown": true, "input_s3_path": "input_s3_path", "input_s3_region": "input_s3_region", "input_url": "input_url", "internal_is_screenshot_job": true, "invalidate_cache": true, "is_formatting_instruction": true, "job_timeout_extra_time_per_page_in_seconds": 0, "job_timeout_in_seconds": 0, "keep_page_separator_when_merging_tables": true, "languages": [ "af" ], "layout_aware": true, "line_level_bounding_box": true, "markdown_table_multiline_header_separator": "markdown_table_multiline_header_separator", "max_pages": 0, "max_pages_enforced": 0, "merge_tables_across_pages_in_markdown": true, "model": "model", "outlined_table_extraction": true, "output_pdf_of_document": true, "output_s3_path_prefix": "output_s3_path_prefix", "output_s3_region": "output_s3_region", "output_tables_as_HTML": true, "page_error_tolerance": 0, "page_footer_prefix": "page_footer_prefix", "page_footer_suffix": "page_footer_suffix", "page_header_prefix": "page_header_prefix", "page_header_suffix": "page_header_suffix", "page_prefix": "page_prefix", "page_separator": "page_separator", "page_suffix": "page_suffix", "parse_mode": "parse_page_without_llm", "parsing_instruction": "parsing_instruction", "precise_bounding_box": true, "premium_mode": true, "presentation_out_of_bounds_content": true, "presentation_skip_embedded_data": true, "preserve_layout_alignment_across_pages": true, "preserve_very_small_text": true, "preset": "preset", "priority": "low", "project_id": "project_id", "remove_hidden_text": true, "replace_failed_page_mode": "raw_text", "replace_failed_page_with_error_message_prefix": "replace_failed_page_with_error_message_prefix", "replace_failed_page_with_error_message_suffix": "replace_failed_page_with_error_message_suffix", "save_images": true, "skip_diagonal_text": true, "specialized_chart_parsing_agentic": true, "specialized_chart_parsing_efficient": true, "specialized_chart_parsing_plus": true, "specialized_image_parsing": true, "spreadsheet_extract_sub_tables": true, "spreadsheet_force_formula_computation": true, "spreadsheet_include_hidden_sheets": true, "strict_mode_buggy_font": true, "strict_mode_image_extraction": true, "strict_mode_image_ocr": true, "strict_mode_reconstruction": true, "structured_output": true, "structured_output_json_schema": "structured_output_json_schema", "structured_output_json_schema_name": "structured_output_json_schema_name", "system_prompt": "system_prompt", "system_prompt_append": "system_prompt_append", "take_screenshot": true, "target_pages": "target_pages", "tier": "tier", "use_vendor_multimodal_model": true, "user_prompt": "user_prompt", "vendor_multimodal_api_key": "vendor_multimodal_api_key", "vendor_multimodal_model_name": "vendor_multimodal_model_name", "version": "version", "webhook_configurations": [ { "webhook_events": [ "parse.success", "parse.error" ], "webhook_headers": { "Authorization": "Bearer sk-..." }, "webhook_output_format": "json", "webhook_url": "https://example.com/webhooks/llamacloud" } ], "webhook_url": "webhook_url" }, "managed_pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "metadata_config": { "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ] }, "pipeline_type": "PLAYGROUND", "preset_retrieval_parameters": { "alpha": 0, "class_name": "class_name", "dense_similarity_cutoff": 0, "dense_similarity_top_k": 1, "enable_reranking": true, "files_top_k": 1, "rerank_top_n": 1, "retrieval_mode": "chunks", "retrieve_image_nodes": true, "retrieve_page_figure_nodes": true, "retrieve_page_screenshot_nodes": true, "search_filters": { "filters": [ { "key": "key", "value": 0, "operator": "==" } ], "condition": "and" }, "search_filters_inference_schema": { "foo": { "foo": "bar" } }, "sparse_similarity_top_k": 1 }, "sparse_model_config": { "class_name": "class_name", "model_type": "splade" }, "status": "CREATED", "transform_config": { "chunk_overlap": 0, "chunk_size": 1, "mode": "auto" }, "updated_at": "2019-12-27T18:11:19.117Z" } ``` ## Run Search `PipelineRetrieveResponse pipelines().retrieve(PipelineRetrieveParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **post** `/api/v1/pipelines/{pipeline_id}/retrieve` Run a retrieval query against a managed pipeline. Searches the pipeline's vector store using the provided query and retrieval parameters. Supports dense, sparse, and hybrid search modes with configurable top-k and reranking. ### Parameters - `PipelineRetrieveParams params` - `Optional pipelineId` - `Optional organizationId` - `Optional projectId` - `String query` The query to retrieve against. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. ### Returns - `class PipelineRetrieveResponse:` Schema for the result of an retrieval execution. - `String pipelineId` The ID of the pipeline that the query was retrieved against. - `List retrievalNodes` The nodes retrieved by the pipeline for the given query. - `TextNode node` Provided for backward compatibility. - `Optional className` - `Optional> embedding` Embedding of the node. - `Optional endCharIdx` End char index of the node. - `Optional> excludedEmbedMetadataKeys` Metadata keys that are excluded from text for the embed model. - `Optional> excludedLlmMetadataKeys` Metadata keys that are excluded from text for the LLM. - `Optional extraInfo` A flat dictionary of metadata fields - `Optional id` Unique ID of the node. - `Optional metadataSeperator` Separator between metadata fields when converting to string. - `Optional metadataTemplate` Template for how metadata is formatted, with {key} and {value} placeholders. - `Optional mimetype` MIME type of the node content. - `Optional relationships` A mapping of relationships to other node information. - `class RelatedNodeInfo:` - `String nodeId` - `Optional className` - `Optional hash` - `Optional metadata` - `Optional nodeType` - `_1("1")` - `_2("2")` - `_3("3")` - `_4("4")` - `_5("5")` - `List` - `String nodeId` - `Optional className` - `Optional hash` - `Optional metadata` - `Optional nodeType` - `_1("1")` - `_2("2")` - `_3("3")` - `_4("4")` - `_5("5")` - `Optional startCharIdx` Start char index of the node. - `Optional text` Text content of the node. - `Optional textTemplate` Template for how text is formatted, with {content} and {metadata_str} placeholders. - `Optional className` - `Optional score` - `Optional className` - `Optional> imageNodes` The image nodes retrieved by the pipeline for the given query. Deprecated - will soon be replaced with 'page_screenshot_nodes'. - `Node node` - `String fileId` The ID of the file that the page screenshot was taken from - `long imageSize` The size of the image in bytes - `long pageIndex` The index of the page for which the screenshot is taken (0-indexed) - `Optional metadata` Metadata for the screenshot - `double score` The score of the screenshot node - `Optional className` - `Optional inferredSearchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional metadata` Metadata associated with the retrieval execution - `Optional> pageFigureNodes` The page figure nodes retrieved by the pipeline for the given query. - `Node node` - `double confidence` The confidence of the figure - `String figureName` The name of the figure - `long figureSize` The size of the figure in bytes - `String fileId` The ID of the file that the figure was taken from - `long pageIndex` The index of the page for which the figure is taken (0-indexed) - `Optional isLikelyNoise` Whether the figure is likely to be noise - `Optional metadata` Metadata for the figure - `double score` The score of the figure node - `Optional className` - `Optional retrievalLatency` The end-to-end latency for retrieval and reranking. ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.PipelineRetrieveParams; import com.llamacloud_prod.api.models.pipelines.PipelineRetrieveResponse; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); PipelineRetrieveParams params = PipelineRetrieveParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .query("x") .build(); PipelineRetrieveResponse pipeline = client.pipelines().retrieve(params); } } ``` #### Response ```json { "pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "retrieval_nodes": [ { "node": { "class_name": "class_name", "embedding": [ 0 ], "end_char_idx": 0, "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ], "extra_info": { "foo": "bar" }, "id_": "id_", "metadata_seperator": "metadata_seperator", "metadata_template": "metadata_template", "mimetype": "mimetype", "relationships": { "foo": { "node_id": "node_id", "class_name": "class_name", "hash": "hash", "metadata": { "foo": "bar" }, "node_type": "1" } }, "start_char_idx": 0, "text": "text", "text_template": "text_template" }, "class_name": "class_name", "score": 0 } ], "class_name": "class_name", "image_nodes": [ { "node": { "file_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "image_size": 0, "page_index": 0, "metadata": { "foo": "bar" } }, "score": 0, "class_name": "class_name" } ], "inferred_search_filters": { "filters": [ { "key": "key", "value": 0, "operator": "==" } ], "condition": "and" }, "metadata": { "foo": "string" }, "page_figure_nodes": [ { "node": { "confidence": 0, "figure_name": "figure_name", "figure_size": 0, "file_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "page_index": 0, "is_likely_noise": true, "metadata": { "foo": "bar" } }, "score": 0, "class_name": "class_name" } ], "retrieval_latency": { "foo": 0 } } ``` ## Domain Types ### Advanced Mode Transform Config - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` ### Auto Transform Config - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` ### Azure OpenAI Embedding - `class AzureOpenAIEmbedding:` - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. ### Azure OpenAI Embedding Config - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` ### Bedrock Embedding - `class BedrockEmbedding:` - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. ### Bedrock Embedding Config - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` ### Cohere Embedding - `class CohereEmbedding:` - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE ### Cohere Embedding Config - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` ### Data Sink Create - `class DataSinkCreate:` Schema for creating a data sink. - `Component component` Component that implements the data sink - `class UnionMember0:` - `class CloudPineconeVectorStore:` Cloud Pinecone Vector Store. This class is used to store the configuration for a Pinecone vector store, so that it can be created and used in LlamaCloud. Args: api_key (str): API key for authenticating with Pinecone index_name (str): name of the Pinecone index namespace (optional[str]): namespace to use in the Pinecone index insert_kwargs (optional[dict]): additional kwargs to pass during insertion - `String apiKey` The API key for authenticating with Pinecone - `String indexName` - `Optional className` - `Optional insertKwargs` - `Optional namespace` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudPostgresVectorStore:` - `String database` - `long embedDim` - `String host` - `String password` - `long port` - `String schemaName` - `String tableName` - `String user` - `Optional className` - `Optional hnswSettings` HNSW settings for PGVector. - `Optional distanceMethod` The distance method to use. - `L2("l2")` - `IP("ip")` - `COSINE("cosine")` - `L1("l1")` - `HAMMING("hamming")` - `JACCARD("jaccard")` - `Optional efConstruction` The number of edges to use during the construction phase. - `Optional efSearch` The number of edges to use during the search phase. - `Optional m` The number of bi-directional links created for each new element. - `Optional vectorType` The type of vector to use. - `VECTOR("vector")` - `HALF_VEC("half_vec")` - `BIT("bit")` - `SPARSE_VEC("sparse_vec")` - `Optional hybridSearch` - `Optional performSetup` - `Optional supportsNestedMetadataFilters` - `class CloudQdrantVectorStore:` Cloud Qdrant Vector Store. This class is used to store the configuration for a Qdrant vector store, so that it can be created and used in LlamaCloud. Args: collection_name (str): name of the Qdrant collection url (str): url of the Qdrant instance api_key (str): API key for authenticating with Qdrant max_retries (int): maximum number of retries in case of a failure. Defaults to 3 client_kwargs (dict): additional kwargs to pass to the Qdrant client - `String apiKey` - `String collectionName` - `String url` - `Optional className` - `Optional clientKwargs` - `Optional maxRetries` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudAzureAiSearchVectorStore:` Cloud Azure AI Search Vector Store. - `String searchServiceApiKey` - `String searchServiceEndpoint` - `Optional className` - `Optional clientId` - `Optional clientSecret` - `Optional embeddingDimension` - `Optional filterableMetadataFieldKeys` - `Optional indexName` - `Optional searchServiceApiVersion` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `Optional tenantId` - `class CloudMongoDBAtlasVectorSearch:` Cloud MongoDB Atlas Vector Store. This class is used to store the configuration for a MongoDB Atlas vector store, so that it can be created and used in LlamaCloud. Args: mongodb_uri (str): URI for connecting to MongoDB Atlas db_name (str): name of the MongoDB database collection_name (str): name of the MongoDB collection vector_index_name (str): name of the MongoDB Atlas vector index fulltext_index_name (str): name of the MongoDB Atlas full-text index - `String collectionName` - `String dbName` - `String mongoDBUri` - `Optional className` - `Optional embeddingDimension` - `Optional fulltextIndexName` - `Optional supportsNestedMetadataFilters` - `Optional vectorIndexName` - `class CloudMilvusVectorStore:` Cloud Milvus Vector Store. - `String uri` - `Optional token` - `Optional className` - `Optional collectionName` - `Optional embeddingDimension` - `Optional supportsNestedMetadataFilters` - `class CloudAstraDbVectorStore:` Cloud AstraDB Vector Store. This class is used to store the configuration for an AstraDB vector store, so that it can be created and used in LlamaCloud. Args: token (str): The Astra DB Application Token to use. api_endpoint (str): The Astra DB JSON API endpoint for your database. collection_name (str): Collection name to use. If not existing, it will be created. embedding_dimension (int): Length of the embedding vectors in use. keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace' - `String token` The Astra DB Application Token to use - `String apiEndpoint` The Astra DB JSON API endpoint for your database - `String collectionName` Collection name to use. If not existing, it will be created - `long embeddingDimension` Length of the embedding vectors in use - `Optional className` - `Optional keyspace` The keyspace to use. If not provided, 'default_keyspace' - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `String name` The name of the data sink. - `SinkType sinkType` - `PINECONE("PINECONE")` - `POSTGRES("POSTGRES")` - `QDRANT("QDRANT")` - `AZUREAI_SEARCH("AZUREAI_SEARCH")` - `MONGODB_ATLAS("MONGODB_ATLAS")` - `MILVUS("MILVUS")` - `ASTRA_DB("ASTRA_DB")` ### Gemini Embedding - `class GeminiEmbedding:` - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. ### Gemini Embedding Config - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` ### Hugging Face Inference API Embedding - `class HuggingFaceInferenceApiEmbedding:` - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. ### Hugging Face Inference API Embedding Config - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` ### Llama Parse Parameters - `class LlamaParseParameters:` - `Optional adaptiveLongTable` - `Optional aggressiveTableExtraction` - `Optional annotateLinks` - `Optional autoMode` - `Optional autoModeConfigurationJson` - `Optional autoModeTriggerOnImageInPage` - `Optional autoModeTriggerOnRegexpInPage` - `Optional autoModeTriggerOnTableInPage` - `Optional autoModeTriggerOnTextInPage` - `Optional azureOpenAIApiVersion` - `Optional azureOpenAIDeploymentName` - `Optional azureOpenAIEndpoint` - `Optional azureOpenAIKey` - `Optional bboxBottom` - `Optional bboxLeft` - `Optional bboxRight` - `Optional bboxTop` - `Optional boundingBox` - `Optional compactMarkdownTable` - `Optional complementalFormattingInstruction` - `Optional contentGuidelineInstruction` - `Optional continuousMode` - `Optional disableImageExtraction` - `Optional disableOcr` - `Optional disableReconstruction` - `Optional doNotCache` - `Optional doNotUnrollColumns` - `Optional enableCostOptimizer` - `Optional extractCharts` - `Optional extractLayout` - `Optional extractPrintedPageNumber` - `Optional fastMode` - `Optional formattingInstruction` - `Optional gpt4oApiKey` - `Optional gpt4oMode` - `Optional guessXlsxSheetName` - `Optional hideFooters` - `Optional hideHeaders` - `Optional highResOcr` - `Optional htmlMakeAllElementsVisible` - `Optional htmlRemoveFixedElements` - `Optional htmlRemoveNavigationElements` - `Optional httpProxy` - `Optional ignoreDocumentElementsForLayoutDetection` - `Optional> imagesToSave` - `SCREENSHOT("screenshot")` - `EMBEDDED("embedded")` - `LAYOUT("layout")` - `Optional inlineImagesInMarkdown` - `Optional inputS3Path` - `Optional inputS3Region` - `Optional inputUrl` - `Optional internalIsScreenshotJob` - `Optional invalidateCache` - `Optional isFormattingInstruction` - `Optional jobTimeoutExtraTimePerPageInSeconds` - `Optional jobTimeoutInSeconds` - `Optional keepPageSeparatorWhenMergingTables` - `Optional> languages` - `AF("af")` - `AZ("az")` - `BS("bs")` - `CS("cs")` - `CY("cy")` - `DA("da")` - `DE("de")` - `EN("en")` - `ES("es")` - `ET("et")` - `FR("fr")` - `GA("ga")` - `HR("hr")` - `HU("hu")` - `ID("id")` - `IS("is")` - `IT("it")` - `KU("ku")` - `LA("la")` - `LT("lt")` - `LV("lv")` - `MI("mi")` - `MS("ms")` - `MT("mt")` - `NL("nl")` - `NO("no")` - `OC("oc")` - `PI("pi")` - `PL("pl")` - `PT("pt")` - `RO("ro")` - `RS_LATIN("rs_latin")` - `SK("sk")` - `SL("sl")` - `SQ("sq")` - `SV("sv")` - `SW("sw")` - `TL("tl")` - `TR("tr")` - `UZ("uz")` - `VI("vi")` - `AR("ar")` - `FA("fa")` - `UG("ug")` - `UR("ur")` - `BN("bn")` - `AS("as")` - `MNI("mni")` - `RU("ru")` - `RS_CYRILLIC("rs_cyrillic")` - `BE("be")` - `BG("bg")` - `UK("uk")` - `MN("mn")` - `ABQ("abq")` - `ADY("ady")` - `KBD("kbd")` - `AVA("ava")` - `DAR("dar")` - `INH("inh")` - `CHE("che")` - `LBE("lbe")` - `LEZ("lez")` - `TAB("tab")` - `TJK("tjk")` - `HI("hi")` - `MR("mr")` - `NE("ne")` - `BH("bh")` - `MAI("mai")` - `ANG("ang")` - `BHO("bho")` - `MAH("mah")` - `SCK("sck")` - `NEW("new")` - `GOM("gom")` - `SA("sa")` - `BGC("bgc")` - `TH("th")` - `CH_SIM("ch_sim")` - `CH_TRA("ch_tra")` - `JA("ja")` - `KO("ko")` - `TA("ta")` - `TE("te")` - `KN("kn")` - `Optional layoutAware` - `Optional lineLevelBoundingBox` - `Optional markdownTableMultilineHeaderSeparator` - `Optional maxPages` - `Optional maxPagesEnforced` - `Optional mergeTablesAcrossPagesInMarkdown` - `Optional model` - `Optional outlinedTableExtraction` - `Optional outputPdfOfDocument` - `Optional outputS3PathPrefix` - `Optional outputS3Region` - `Optional outputTablesAsHtml` - `Optional pageErrorTolerance` - `Optional pageFooterPrefix` - `Optional pageFooterSuffix` - `Optional pageHeaderPrefix` - `Optional pageHeaderSuffix` - `Optional pagePrefix` - `Optional pageSeparator` - `Optional pageSuffix` - `Optional parseMode` Enum for representing the mode of parsing to be used. - `PARSE_PAGE_WITHOUT_LLM("parse_page_without_llm")` - `PARSE_PAGE_WITH_LLM("parse_page_with_llm")` - `PARSE_PAGE_WITH_LVM("parse_page_with_lvm")` - `PARSE_PAGE_WITH_AGENT("parse_page_with_agent")` - `PARSE_PAGE_WITH_LAYOUT_AGENT("parse_page_with_layout_agent")` - `PARSE_DOCUMENT_WITH_LLM("parse_document_with_llm")` - `PARSE_DOCUMENT_WITH_LVM("parse_document_with_lvm")` - `PARSE_DOCUMENT_WITH_AGENT("parse_document_with_agent")` - `Optional parsingInstruction` - `Optional preciseBoundingBox` - `Optional premiumMode` - `Optional presentationOutOfBoundsContent` - `Optional presentationSkipEmbeddedData` - `Optional preserveLayoutAlignmentAcrossPages` - `Optional preserveVerySmallText` - `Optional preset` - `Optional priority` The priority for the request. This field may be ignored or overwritten depending on the organization tier. - `LOW("low")` - `MEDIUM("medium")` - `HIGH("high")` - `CRITICAL("critical")` - `Optional projectId` - `Optional removeHiddenText` - `Optional replaceFailedPageMode` Enum for representing the different available page error handling modes. - `RAW_TEXT("raw_text")` - `BLANK_PAGE("blank_page")` - `ERROR_MESSAGE("error_message")` - `Optional replaceFailedPageWithErrorMessagePrefix` - `Optional replaceFailedPageWithErrorMessageSuffix` - `Optional saveImages` - `Optional skipDiagonalText` - `Optional specializedChartParsingAgentic` - `Optional specializedChartParsingEfficient` - `Optional specializedChartParsingPlus` - `Optional specializedImageParsing` - `Optional spreadsheetExtractSubTables` - `Optional spreadsheetForceFormulaComputation` - `Optional spreadsheetIncludeHiddenSheets` - `Optional strictModeBuggyFont` - `Optional strictModeImageExtraction` - `Optional strictModeImageOcr` - `Optional strictModeReconstruction` - `Optional structuredOutput` - `Optional structuredOutputJsonSchema` - `Optional structuredOutputJsonSchemaName` - `Optional systemPrompt` - `Optional systemPromptAppend` - `Optional takeScreenshot` - `Optional targetPages` - `Optional tier` - `Optional useVendorMultimodalModel` - `Optional userPrompt` - `Optional vendorMultimodalApiKey` - `Optional vendorMultimodalModelName` - `Optional version` - `Optional> webhookConfigurations` Outbound webhook endpoints to notify on job status changes - `Optional> webhookEvents` Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. - `EXTRACT_PENDING("extract.pending")` - `EXTRACT_SUCCESS("extract.success")` - `EXTRACT_ERROR("extract.error")` - `EXTRACT_PARTIAL_SUCCESS("extract.partial_success")` - `EXTRACT_CANCELLED("extract.cancelled")` - `PARSE_PENDING("parse.pending")` - `PARSE_RUNNING("parse.running")` - `PARSE_SUCCESS("parse.success")` - `PARSE_ERROR("parse.error")` - `PARSE_PARTIAL_SUCCESS("parse.partial_success")` - `PARSE_CANCELLED("parse.cancelled")` - `CLASSIFY_PENDING("classify.pending")` - `CLASSIFY_RUNNING("classify.running")` - `CLASSIFY_SUCCESS("classify.success")` - `CLASSIFY_ERROR("classify.error")` - `CLASSIFY_PARTIAL_SUCCESS("classify.partial_success")` - `CLASSIFY_CANCELLED("classify.cancelled")` - `SHEETS_PENDING("sheets.pending")` - `SHEETS_SUCCESS("sheets.success")` - `SHEETS_ERROR("sheets.error")` - `SHEETS_PARTIAL_SUCCESS("sheets.partial_success")` - `SHEETS_CANCELLED("sheets.cancelled")` - `UNMAPPED_EVENT("unmapped_event")` - `Optional webhookHeaders` Custom HTTP headers sent with each webhook request (e.g. auth tokens) - `Optional webhookOutputFormat` Response format sent to the webhook: 'string' (default) or 'json' - `Optional webhookUrl` URL to receive webhook POST notifications - `Optional webhookUrl` ### Llm Parameters - `class LlmParameters:` - `Optional className` - `Optional modelName` The name of the model to use for LLM completions. - `GPT_4_O("GPT_4O")` - `GPT_4_O_MINI("GPT_4O_MINI")` - `GPT_4_1("GPT_4_1")` - `GPT_4_1_NANO("GPT_4_1_NANO")` - `GPT_4_1_MINI("GPT_4_1_MINI")` - `AZURE_OPENAI_GPT_4_O("AZURE_OPENAI_GPT_4O")` - `AZURE_OPENAI_GPT_4_O_MINI("AZURE_OPENAI_GPT_4O_MINI")` - `AZURE_OPENAI_GPT_4_1("AZURE_OPENAI_GPT_4_1")` - `AZURE_OPENAI_GPT_4_1_MINI("AZURE_OPENAI_GPT_4_1_MINI")` - `AZURE_OPENAI_GPT_4_1_NANO("AZURE_OPENAI_GPT_4_1_NANO")` - `CLAUDE_4_5_SONNET("CLAUDE_4_5_SONNET")` - `BEDROCK_CLAUDE_3_5_SONNET_V1("BEDROCK_CLAUDE_3_5_SONNET_V1")` - `BEDROCK_CLAUDE_3_5_SONNET_V2("BEDROCK_CLAUDE_3_5_SONNET_V2")` - `Optional systemPrompt` The system prompt to use for the completion. - `Optional temperature` The temperature value for the model. - `Optional useChainOfThoughtReasoning` Whether to use chain of thought reasoning. - `Optional useCitation` Whether to show citations in the response. ### Managed Ingestion Status Response - `class ManagedIngestionStatusResponse:` - `Status status` Status of the ingestion. - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `PARTIAL_SUCCESS("PARTIAL_SUCCESS")` - `CANCELLED("CANCELLED")` - `Optional deploymentDate` Date of the deployment. - `Optional effectiveAt` When the status is effective - `Optional> error` List of errors that occurred during ingestion. - `String jobId` ID of the job that failed. - `String message` List of errors that occurred during ingestion. - `Step step` Name of the job that failed. - `MANAGED_INGESTION("MANAGED_INGESTION")` - `DATA_SOURCE("DATA_SOURCE")` - `FILE_UPDATER("FILE_UPDATER")` - `PARSE("PARSE")` - `TRANSFORM("TRANSFORM")` - `INGESTION("INGESTION")` - `METADATA_UPDATE("METADATA_UPDATE")` - `Optional jobId` ID of the latest job. ### Message Role - `enum MessageRole:` Message role. - `SYSTEM("system")` - `DEVELOPER("developer")` - `USER("user")` - `ASSISTANT("assistant")` - `FUNCTION("function")` - `TOOL("tool")` - `CHATBOT("chatbot")` - `MODEL("model")` ### Metadata Filters - `class MetadataFilters:` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` ### OpenAI Embedding - `class OpenAIEmbedding:` - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. ### OpenAI Embedding Config - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` ### Page Figure Node With Score - `class PageFigureNodeWithScore:` Page figure metadata with score - `Node node` - `double confidence` The confidence of the figure - `String figureName` The name of the figure - `long figureSize` The size of the figure in bytes - `String fileId` The ID of the file that the figure was taken from - `long pageIndex` The index of the page for which the figure is taken (0-indexed) - `Optional isLikelyNoise` Whether the figure is likely to be noise - `Optional metadata` Metadata for the figure - `double score` The score of the figure node - `Optional className` ### Page Screenshot Node With Score - `class PageScreenshotNodeWithScore:` Page screenshot metadata with score - `Node node` - `String fileId` The ID of the file that the page screenshot was taken from - `long imageSize` The size of the image in bytes - `long pageIndex` The index of the page for which the screenshot is taken (0-indexed) - `Optional metadata` Metadata for the screenshot - `double score` The score of the screenshot node - `Optional className` ### Pipeline - `class Pipeline:` Schema for a pipeline. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` - `class ManagedOpenAIEmbedding:` - `Optional component` Configuration for the Managed OpenAI embedding model. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The name of the OpenAI embedding model. - `OPENAI_TEXT_EMBEDDING_3_SMALL("openai-text-embedding-3-small")` - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `MANAGED_OPENAI_EMBEDDING("MANAGED_OPENAI_EMBEDDING")` - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` - `String name` - `String projectId` - `Optional configHash` Hashes for the configuration of a pipeline. - `Optional embeddingConfigHash` Hash of the embedding config. - `Optional parsingConfigHash` Hash of the llama parse parameters. - `Optional transformConfigHash` Hash of the transform config. - `Optional createdAt` Creation datetime - `Optional dataSink` Schema for a data sink. - `String id` Unique identifier - `Component component` Component that implements the data sink - `class UnionMember0:` - `class CloudPineconeVectorStore:` Cloud Pinecone Vector Store. This class is used to store the configuration for a Pinecone vector store, so that it can be created and used in LlamaCloud. Args: api_key (str): API key for authenticating with Pinecone index_name (str): name of the Pinecone index namespace (optional[str]): namespace to use in the Pinecone index insert_kwargs (optional[dict]): additional kwargs to pass during insertion - `String apiKey` The API key for authenticating with Pinecone - `String indexName` - `Optional className` - `Optional insertKwargs` - `Optional namespace` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudPostgresVectorStore:` - `String database` - `long embedDim` - `String host` - `String password` - `long port` - `String schemaName` - `String tableName` - `String user` - `Optional className` - `Optional hnswSettings` HNSW settings for PGVector. - `Optional distanceMethod` The distance method to use. - `L2("l2")` - `IP("ip")` - `COSINE("cosine")` - `L1("l1")` - `HAMMING("hamming")` - `JACCARD("jaccard")` - `Optional efConstruction` The number of edges to use during the construction phase. - `Optional efSearch` The number of edges to use during the search phase. - `Optional m` The number of bi-directional links created for each new element. - `Optional vectorType` The type of vector to use. - `VECTOR("vector")` - `HALF_VEC("half_vec")` - `BIT("bit")` - `SPARSE_VEC("sparse_vec")` - `Optional hybridSearch` - `Optional performSetup` - `Optional supportsNestedMetadataFilters` - `class CloudQdrantVectorStore:` Cloud Qdrant Vector Store. This class is used to store the configuration for a Qdrant vector store, so that it can be created and used in LlamaCloud. Args: collection_name (str): name of the Qdrant collection url (str): url of the Qdrant instance api_key (str): API key for authenticating with Qdrant max_retries (int): maximum number of retries in case of a failure. Defaults to 3 client_kwargs (dict): additional kwargs to pass to the Qdrant client - `String apiKey` - `String collectionName` - `String url` - `Optional className` - `Optional clientKwargs` - `Optional maxRetries` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudAzureAiSearchVectorStore:` Cloud Azure AI Search Vector Store. - `String searchServiceApiKey` - `String searchServiceEndpoint` - `Optional className` - `Optional clientId` - `Optional clientSecret` - `Optional embeddingDimension` - `Optional filterableMetadataFieldKeys` - `Optional indexName` - `Optional searchServiceApiVersion` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `Optional tenantId` - `class CloudMongoDBAtlasVectorSearch:` Cloud MongoDB Atlas Vector Store. This class is used to store the configuration for a MongoDB Atlas vector store, so that it can be created and used in LlamaCloud. Args: mongodb_uri (str): URI for connecting to MongoDB Atlas db_name (str): name of the MongoDB database collection_name (str): name of the MongoDB collection vector_index_name (str): name of the MongoDB Atlas vector index fulltext_index_name (str): name of the MongoDB Atlas full-text index - `String collectionName` - `String dbName` - `String mongoDBUri` - `Optional className` - `Optional embeddingDimension` - `Optional fulltextIndexName` - `Optional supportsNestedMetadataFilters` - `Optional vectorIndexName` - `class CloudMilvusVectorStore:` Cloud Milvus Vector Store. - `String uri` - `Optional token` - `Optional className` - `Optional collectionName` - `Optional embeddingDimension` - `Optional supportsNestedMetadataFilters` - `class CloudAstraDbVectorStore:` Cloud AstraDB Vector Store. This class is used to store the configuration for an AstraDB vector store, so that it can be created and used in LlamaCloud. Args: token (str): The Astra DB Application Token to use. api_endpoint (str): The Astra DB JSON API endpoint for your database. collection_name (str): Collection name to use. If not existing, it will be created. embedding_dimension (int): Length of the embedding vectors in use. keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace' - `String token` The Astra DB Application Token to use - `String apiEndpoint` The Astra DB JSON API endpoint for your database - `String collectionName` Collection name to use. If not existing, it will be created - `long embeddingDimension` Length of the embedding vectors in use - `Optional className` - `Optional keyspace` The keyspace to use. If not provided, 'default_keyspace' - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `String name` The name of the data sink. - `String projectId` - `SinkType sinkType` - `PINECONE("PINECONE")` - `POSTGRES("POSTGRES")` - `QDRANT("QDRANT")` - `AZUREAI_SEARCH("AZUREAI_SEARCH")` - `MONGODB_ATLAS("MONGODB_ATLAS")` - `MILVUS("MILVUS")` - `ASTRA_DB("ASTRA_DB")` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfig` Schema for an embedding model config. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` The embedding configuration for the embedding model config. - `class AzureOpenAIEmbeddingConfig:` - `class CohereEmbeddingConfig:` - `class GeminiEmbeddingConfig:` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `class OpenAIEmbeddingConfig:` - `class VertexAiEmbeddingConfig:` - `class BedrockEmbeddingConfig:` - `String name` The name of the embedding model config. - `String projectId` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfigId` The ID of the EmbeddingModelConfig this pipeline is using. - `Optional llamaParseParameters` Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. - `Optional adaptiveLongTable` - `Optional aggressiveTableExtraction` - `Optional annotateLinks` - `Optional autoMode` - `Optional autoModeConfigurationJson` - `Optional autoModeTriggerOnImageInPage` - `Optional autoModeTriggerOnRegexpInPage` - `Optional autoModeTriggerOnTableInPage` - `Optional autoModeTriggerOnTextInPage` - `Optional azureOpenAIApiVersion` - `Optional azureOpenAIDeploymentName` - `Optional azureOpenAIEndpoint` - `Optional azureOpenAIKey` - `Optional bboxBottom` - `Optional bboxLeft` - `Optional bboxRight` - `Optional bboxTop` - `Optional boundingBox` - `Optional compactMarkdownTable` - `Optional complementalFormattingInstruction` - `Optional contentGuidelineInstruction` - `Optional continuousMode` - `Optional disableImageExtraction` - `Optional disableOcr` - `Optional disableReconstruction` - `Optional doNotCache` - `Optional doNotUnrollColumns` - `Optional enableCostOptimizer` - `Optional extractCharts` - `Optional extractLayout` - `Optional extractPrintedPageNumber` - `Optional fastMode` - `Optional formattingInstruction` - `Optional gpt4oApiKey` - `Optional gpt4oMode` - `Optional guessXlsxSheetName` - `Optional hideFooters` - `Optional hideHeaders` - `Optional highResOcr` - `Optional htmlMakeAllElementsVisible` - `Optional htmlRemoveFixedElements` - `Optional htmlRemoveNavigationElements` - `Optional httpProxy` - `Optional ignoreDocumentElementsForLayoutDetection` - `Optional> imagesToSave` - `SCREENSHOT("screenshot")` - `EMBEDDED("embedded")` - `LAYOUT("layout")` - `Optional inlineImagesInMarkdown` - `Optional inputS3Path` - `Optional inputS3Region` - `Optional inputUrl` - `Optional internalIsScreenshotJob` - `Optional invalidateCache` - `Optional isFormattingInstruction` - `Optional jobTimeoutExtraTimePerPageInSeconds` - `Optional jobTimeoutInSeconds` - `Optional keepPageSeparatorWhenMergingTables` - `Optional> languages` - `AF("af")` - `AZ("az")` - `BS("bs")` - `CS("cs")` - `CY("cy")` - `DA("da")` - `DE("de")` - `EN("en")` - `ES("es")` - `ET("et")` - `FR("fr")` - `GA("ga")` - `HR("hr")` - `HU("hu")` - `ID("id")` - `IS("is")` - `IT("it")` - `KU("ku")` - `LA("la")` - `LT("lt")` - `LV("lv")` - `MI("mi")` - `MS("ms")` - `MT("mt")` - `NL("nl")` - `NO("no")` - `OC("oc")` - `PI("pi")` - `PL("pl")` - `PT("pt")` - `RO("ro")` - `RS_LATIN("rs_latin")` - `SK("sk")` - `SL("sl")` - `SQ("sq")` - `SV("sv")` - `SW("sw")` - `TL("tl")` - `TR("tr")` - `UZ("uz")` - `VI("vi")` - `AR("ar")` - `FA("fa")` - `UG("ug")` - `UR("ur")` - `BN("bn")` - `AS("as")` - `MNI("mni")` - `RU("ru")` - `RS_CYRILLIC("rs_cyrillic")` - `BE("be")` - `BG("bg")` - `UK("uk")` - `MN("mn")` - `ABQ("abq")` - `ADY("ady")` - `KBD("kbd")` - `AVA("ava")` - `DAR("dar")` - `INH("inh")` - `CHE("che")` - `LBE("lbe")` - `LEZ("lez")` - `TAB("tab")` - `TJK("tjk")` - `HI("hi")` - `MR("mr")` - `NE("ne")` - `BH("bh")` - `MAI("mai")` - `ANG("ang")` - `BHO("bho")` - `MAH("mah")` - `SCK("sck")` - `NEW("new")` - `GOM("gom")` - `SA("sa")` - `BGC("bgc")` - `TH("th")` - `CH_SIM("ch_sim")` - `CH_TRA("ch_tra")` - `JA("ja")` - `KO("ko")` - `TA("ta")` - `TE("te")` - `KN("kn")` - `Optional layoutAware` - `Optional lineLevelBoundingBox` - `Optional markdownTableMultilineHeaderSeparator` - `Optional maxPages` - `Optional maxPagesEnforced` - `Optional mergeTablesAcrossPagesInMarkdown` - `Optional model` - `Optional outlinedTableExtraction` - `Optional outputPdfOfDocument` - `Optional outputS3PathPrefix` - `Optional outputS3Region` - `Optional outputTablesAsHtml` - `Optional pageErrorTolerance` - `Optional pageFooterPrefix` - `Optional pageFooterSuffix` - `Optional pageHeaderPrefix` - `Optional pageHeaderSuffix` - `Optional pagePrefix` - `Optional pageSeparator` - `Optional pageSuffix` - `Optional parseMode` Enum for representing the mode of parsing to be used. - `PARSE_PAGE_WITHOUT_LLM("parse_page_without_llm")` - `PARSE_PAGE_WITH_LLM("parse_page_with_llm")` - `PARSE_PAGE_WITH_LVM("parse_page_with_lvm")` - `PARSE_PAGE_WITH_AGENT("parse_page_with_agent")` - `PARSE_PAGE_WITH_LAYOUT_AGENT("parse_page_with_layout_agent")` - `PARSE_DOCUMENT_WITH_LLM("parse_document_with_llm")` - `PARSE_DOCUMENT_WITH_LVM("parse_document_with_lvm")` - `PARSE_DOCUMENT_WITH_AGENT("parse_document_with_agent")` - `Optional parsingInstruction` - `Optional preciseBoundingBox` - `Optional premiumMode` - `Optional presentationOutOfBoundsContent` - `Optional presentationSkipEmbeddedData` - `Optional preserveLayoutAlignmentAcrossPages` - `Optional preserveVerySmallText` - `Optional preset` - `Optional priority` The priority for the request. This field may be ignored or overwritten depending on the organization tier. - `LOW("low")` - `MEDIUM("medium")` - `HIGH("high")` - `CRITICAL("critical")` - `Optional projectId` - `Optional removeHiddenText` - `Optional replaceFailedPageMode` Enum for representing the different available page error handling modes. - `RAW_TEXT("raw_text")` - `BLANK_PAGE("blank_page")` - `ERROR_MESSAGE("error_message")` - `Optional replaceFailedPageWithErrorMessagePrefix` - `Optional replaceFailedPageWithErrorMessageSuffix` - `Optional saveImages` - `Optional skipDiagonalText` - `Optional specializedChartParsingAgentic` - `Optional specializedChartParsingEfficient` - `Optional specializedChartParsingPlus` - `Optional specializedImageParsing` - `Optional spreadsheetExtractSubTables` - `Optional spreadsheetForceFormulaComputation` - `Optional spreadsheetIncludeHiddenSheets` - `Optional strictModeBuggyFont` - `Optional strictModeImageExtraction` - `Optional strictModeImageOcr` - `Optional strictModeReconstruction` - `Optional structuredOutput` - `Optional structuredOutputJsonSchema` - `Optional structuredOutputJsonSchemaName` - `Optional systemPrompt` - `Optional systemPromptAppend` - `Optional takeScreenshot` - `Optional targetPages` - `Optional tier` - `Optional useVendorMultimodalModel` - `Optional userPrompt` - `Optional vendorMultimodalApiKey` - `Optional vendorMultimodalModelName` - `Optional version` - `Optional> webhookConfigurations` Outbound webhook endpoints to notify on job status changes - `Optional> webhookEvents` Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. - `EXTRACT_PENDING("extract.pending")` - `EXTRACT_SUCCESS("extract.success")` - `EXTRACT_ERROR("extract.error")` - `EXTRACT_PARTIAL_SUCCESS("extract.partial_success")` - `EXTRACT_CANCELLED("extract.cancelled")` - `PARSE_PENDING("parse.pending")` - `PARSE_RUNNING("parse.running")` - `PARSE_SUCCESS("parse.success")` - `PARSE_ERROR("parse.error")` - `PARSE_PARTIAL_SUCCESS("parse.partial_success")` - `PARSE_CANCELLED("parse.cancelled")` - `CLASSIFY_PENDING("classify.pending")` - `CLASSIFY_RUNNING("classify.running")` - `CLASSIFY_SUCCESS("classify.success")` - `CLASSIFY_ERROR("classify.error")` - `CLASSIFY_PARTIAL_SUCCESS("classify.partial_success")` - `CLASSIFY_CANCELLED("classify.cancelled")` - `SHEETS_PENDING("sheets.pending")` - `SHEETS_SUCCESS("sheets.success")` - `SHEETS_ERROR("sheets.error")` - `SHEETS_PARTIAL_SUCCESS("sheets.partial_success")` - `SHEETS_CANCELLED("sheets.cancelled")` - `UNMAPPED_EVENT("unmapped_event")` - `Optional webhookHeaders` Custom HTTP headers sent with each webhook request (e.g. auth tokens) - `Optional webhookOutputFormat` Response format sent to the webhook: 'string' (default) or 'json' - `Optional webhookUrl` URL to receive webhook POST notifications - `Optional webhookUrl` - `Optional managedPipelineId` The ID of the ManagedPipeline this playground pipeline is linked to. - `Optional metadataConfig` Metadata configuration for the pipeline. - `Optional> excludedEmbedMetadataKeys` List of metadata keys to exclude from embeddings - `Optional> excludedLlmMetadataKeys` List of metadata keys to exclude from LLM during retrieval - `Optional pipelineType` Type of pipeline. Either PLAYGROUND or MANAGED. - `PLAYGROUND("PLAYGROUND")` - `MANAGED("MANAGED")` - `Optional presetRetrievalParameters` Preset retrieval parameters for the pipeline. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. - `Optional sparseModelConfig` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional className` - `Optional modelType` The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). - `SPLADE("splade")` - `BM25("bm25")` - `AUTO("auto")` - `Optional status` Status of the pipeline. - `CREATED("CREATED")` - `DELETING("DELETING")` - `Optional transformConfig` Configuration for the transformation. - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` - `Optional updatedAt` Update datetime ### Pipeline Create - `class PipelineCreate:` Schema for creating a pipeline. - `String name` - `Optional dataSink` Schema for creating a data sink. - `Component component` Component that implements the data sink - `class UnionMember0:` - `class CloudPineconeVectorStore:` Cloud Pinecone Vector Store. This class is used to store the configuration for a Pinecone vector store, so that it can be created and used in LlamaCloud. Args: api_key (str): API key for authenticating with Pinecone index_name (str): name of the Pinecone index namespace (optional[str]): namespace to use in the Pinecone index insert_kwargs (optional[dict]): additional kwargs to pass during insertion - `String apiKey` The API key for authenticating with Pinecone - `String indexName` - `Optional className` - `Optional insertKwargs` - `Optional namespace` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudPostgresVectorStore:` - `String database` - `long embedDim` - `String host` - `String password` - `long port` - `String schemaName` - `String tableName` - `String user` - `Optional className` - `Optional hnswSettings` HNSW settings for PGVector. - `Optional distanceMethod` The distance method to use. - `L2("l2")` - `IP("ip")` - `COSINE("cosine")` - `L1("l1")` - `HAMMING("hamming")` - `JACCARD("jaccard")` - `Optional efConstruction` The number of edges to use during the construction phase. - `Optional efSearch` The number of edges to use during the search phase. - `Optional m` The number of bi-directional links created for each new element. - `Optional vectorType` The type of vector to use. - `VECTOR("vector")` - `HALF_VEC("half_vec")` - `BIT("bit")` - `SPARSE_VEC("sparse_vec")` - `Optional hybridSearch` - `Optional performSetup` - `Optional supportsNestedMetadataFilters` - `class CloudQdrantVectorStore:` Cloud Qdrant Vector Store. This class is used to store the configuration for a Qdrant vector store, so that it can be created and used in LlamaCloud. Args: collection_name (str): name of the Qdrant collection url (str): url of the Qdrant instance api_key (str): API key for authenticating with Qdrant max_retries (int): maximum number of retries in case of a failure. Defaults to 3 client_kwargs (dict): additional kwargs to pass to the Qdrant client - `String apiKey` - `String collectionName` - `String url` - `Optional className` - `Optional clientKwargs` - `Optional maxRetries` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudAzureAiSearchVectorStore:` Cloud Azure AI Search Vector Store. - `String searchServiceApiKey` - `String searchServiceEndpoint` - `Optional className` - `Optional clientId` - `Optional clientSecret` - `Optional embeddingDimension` - `Optional filterableMetadataFieldKeys` - `Optional indexName` - `Optional searchServiceApiVersion` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `Optional tenantId` - `class CloudMongoDBAtlasVectorSearch:` Cloud MongoDB Atlas Vector Store. This class is used to store the configuration for a MongoDB Atlas vector store, so that it can be created and used in LlamaCloud. Args: mongodb_uri (str): URI for connecting to MongoDB Atlas db_name (str): name of the MongoDB database collection_name (str): name of the MongoDB collection vector_index_name (str): name of the MongoDB Atlas vector index fulltext_index_name (str): name of the MongoDB Atlas full-text index - `String collectionName` - `String dbName` - `String mongoDBUri` - `Optional className` - `Optional embeddingDimension` - `Optional fulltextIndexName` - `Optional supportsNestedMetadataFilters` - `Optional vectorIndexName` - `class CloudMilvusVectorStore:` Cloud Milvus Vector Store. - `String uri` - `Optional token` - `Optional className` - `Optional collectionName` - `Optional embeddingDimension` - `Optional supportsNestedMetadataFilters` - `class CloudAstraDbVectorStore:` Cloud AstraDB Vector Store. This class is used to store the configuration for an AstraDB vector store, so that it can be created and used in LlamaCloud. Args: token (str): The Astra DB Application Token to use. api_endpoint (str): The Astra DB JSON API endpoint for your database. collection_name (str): Collection name to use. If not existing, it will be created. embedding_dimension (int): Length of the embedding vectors in use. keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace' - `String token` The Astra DB Application Token to use - `String apiEndpoint` The Astra DB JSON API endpoint for your database - `String collectionName` Collection name to use. If not existing, it will be created - `long embeddingDimension` Length of the embedding vectors in use - `Optional className` - `Optional keyspace` The keyspace to use. If not provided, 'default_keyspace' - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `String name` The name of the data sink. - `SinkType sinkType` - `PINECONE("PINECONE")` - `POSTGRES("POSTGRES")` - `QDRANT("QDRANT")` - `AZUREAI_SEARCH("AZUREAI_SEARCH")` - `MONGODB_ATLAS("MONGODB_ATLAS")` - `MILVUS("MILVUS")` - `ASTRA_DB("ASTRA_DB")` - `Optional dataSinkId` Data sink ID. When provided instead of data_sink, the data sink will be looked up by ID. - `Optional embeddingConfig` - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` - `Optional embeddingModelConfigId` Embedding model config ID. When provided instead of embedding_config, the embedding model config will be looked up by ID. - `Optional llamaParseParameters` Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. - `Optional adaptiveLongTable` - `Optional aggressiveTableExtraction` - `Optional annotateLinks` - `Optional autoMode` - `Optional autoModeConfigurationJson` - `Optional autoModeTriggerOnImageInPage` - `Optional autoModeTriggerOnRegexpInPage` - `Optional autoModeTriggerOnTableInPage` - `Optional autoModeTriggerOnTextInPage` - `Optional azureOpenAIApiVersion` - `Optional azureOpenAIDeploymentName` - `Optional azureOpenAIEndpoint` - `Optional azureOpenAIKey` - `Optional bboxBottom` - `Optional bboxLeft` - `Optional bboxRight` - `Optional bboxTop` - `Optional boundingBox` - `Optional compactMarkdownTable` - `Optional complementalFormattingInstruction` - `Optional contentGuidelineInstruction` - `Optional continuousMode` - `Optional disableImageExtraction` - `Optional disableOcr` - `Optional disableReconstruction` - `Optional doNotCache` - `Optional doNotUnrollColumns` - `Optional enableCostOptimizer` - `Optional extractCharts` - `Optional extractLayout` - `Optional extractPrintedPageNumber` - `Optional fastMode` - `Optional formattingInstruction` - `Optional gpt4oApiKey` - `Optional gpt4oMode` - `Optional guessXlsxSheetName` - `Optional hideFooters` - `Optional hideHeaders` - `Optional highResOcr` - `Optional htmlMakeAllElementsVisible` - `Optional htmlRemoveFixedElements` - `Optional htmlRemoveNavigationElements` - `Optional httpProxy` - `Optional ignoreDocumentElementsForLayoutDetection` - `Optional> imagesToSave` - `SCREENSHOT("screenshot")` - `EMBEDDED("embedded")` - `LAYOUT("layout")` - `Optional inlineImagesInMarkdown` - `Optional inputS3Path` - `Optional inputS3Region` - `Optional inputUrl` - `Optional internalIsScreenshotJob` - `Optional invalidateCache` - `Optional isFormattingInstruction` - `Optional jobTimeoutExtraTimePerPageInSeconds` - `Optional jobTimeoutInSeconds` - `Optional keepPageSeparatorWhenMergingTables` - `Optional> languages` - `AF("af")` - `AZ("az")` - `BS("bs")` - `CS("cs")` - `CY("cy")` - `DA("da")` - `DE("de")` - `EN("en")` - `ES("es")` - `ET("et")` - `FR("fr")` - `GA("ga")` - `HR("hr")` - `HU("hu")` - `ID("id")` - `IS("is")` - `IT("it")` - `KU("ku")` - `LA("la")` - `LT("lt")` - `LV("lv")` - `MI("mi")` - `MS("ms")` - `MT("mt")` - `NL("nl")` - `NO("no")` - `OC("oc")` - `PI("pi")` - `PL("pl")` - `PT("pt")` - `RO("ro")` - `RS_LATIN("rs_latin")` - `SK("sk")` - `SL("sl")` - `SQ("sq")` - `SV("sv")` - `SW("sw")` - `TL("tl")` - `TR("tr")` - `UZ("uz")` - `VI("vi")` - `AR("ar")` - `FA("fa")` - `UG("ug")` - `UR("ur")` - `BN("bn")` - `AS("as")` - `MNI("mni")` - `RU("ru")` - `RS_CYRILLIC("rs_cyrillic")` - `BE("be")` - `BG("bg")` - `UK("uk")` - `MN("mn")` - `ABQ("abq")` - `ADY("ady")` - `KBD("kbd")` - `AVA("ava")` - `DAR("dar")` - `INH("inh")` - `CHE("che")` - `LBE("lbe")` - `LEZ("lez")` - `TAB("tab")` - `TJK("tjk")` - `HI("hi")` - `MR("mr")` - `NE("ne")` - `BH("bh")` - `MAI("mai")` - `ANG("ang")` - `BHO("bho")` - `MAH("mah")` - `SCK("sck")` - `NEW("new")` - `GOM("gom")` - `SA("sa")` - `BGC("bgc")` - `TH("th")` - `CH_SIM("ch_sim")` - `CH_TRA("ch_tra")` - `JA("ja")` - `KO("ko")` - `TA("ta")` - `TE("te")` - `KN("kn")` - `Optional layoutAware` - `Optional lineLevelBoundingBox` - `Optional markdownTableMultilineHeaderSeparator` - `Optional maxPages` - `Optional maxPagesEnforced` - `Optional mergeTablesAcrossPagesInMarkdown` - `Optional model` - `Optional outlinedTableExtraction` - `Optional outputPdfOfDocument` - `Optional outputS3PathPrefix` - `Optional outputS3Region` - `Optional outputTablesAsHtml` - `Optional pageErrorTolerance` - `Optional pageFooterPrefix` - `Optional pageFooterSuffix` - `Optional pageHeaderPrefix` - `Optional pageHeaderSuffix` - `Optional pagePrefix` - `Optional pageSeparator` - `Optional pageSuffix` - `Optional parseMode` Enum for representing the mode of parsing to be used. - `PARSE_PAGE_WITHOUT_LLM("parse_page_without_llm")` - `PARSE_PAGE_WITH_LLM("parse_page_with_llm")` - `PARSE_PAGE_WITH_LVM("parse_page_with_lvm")` - `PARSE_PAGE_WITH_AGENT("parse_page_with_agent")` - `PARSE_PAGE_WITH_LAYOUT_AGENT("parse_page_with_layout_agent")` - `PARSE_DOCUMENT_WITH_LLM("parse_document_with_llm")` - `PARSE_DOCUMENT_WITH_LVM("parse_document_with_lvm")` - `PARSE_DOCUMENT_WITH_AGENT("parse_document_with_agent")` - `Optional parsingInstruction` - `Optional preciseBoundingBox` - `Optional premiumMode` - `Optional presentationOutOfBoundsContent` - `Optional presentationSkipEmbeddedData` - `Optional preserveLayoutAlignmentAcrossPages` - `Optional preserveVerySmallText` - `Optional preset` - `Optional priority` The priority for the request. This field may be ignored or overwritten depending on the organization tier. - `LOW("low")` - `MEDIUM("medium")` - `HIGH("high")` - `CRITICAL("critical")` - `Optional projectId` - `Optional removeHiddenText` - `Optional replaceFailedPageMode` Enum for representing the different available page error handling modes. - `RAW_TEXT("raw_text")` - `BLANK_PAGE("blank_page")` - `ERROR_MESSAGE("error_message")` - `Optional replaceFailedPageWithErrorMessagePrefix` - `Optional replaceFailedPageWithErrorMessageSuffix` - `Optional saveImages` - `Optional skipDiagonalText` - `Optional specializedChartParsingAgentic` - `Optional specializedChartParsingEfficient` - `Optional specializedChartParsingPlus` - `Optional specializedImageParsing` - `Optional spreadsheetExtractSubTables` - `Optional spreadsheetForceFormulaComputation` - `Optional spreadsheetIncludeHiddenSheets` - `Optional strictModeBuggyFont` - `Optional strictModeImageExtraction` - `Optional strictModeImageOcr` - `Optional strictModeReconstruction` - `Optional structuredOutput` - `Optional structuredOutputJsonSchema` - `Optional structuredOutputJsonSchemaName` - `Optional systemPrompt` - `Optional systemPromptAppend` - `Optional takeScreenshot` - `Optional targetPages` - `Optional tier` - `Optional useVendorMultimodalModel` - `Optional userPrompt` - `Optional vendorMultimodalApiKey` - `Optional vendorMultimodalModelName` - `Optional version` - `Optional> webhookConfigurations` Outbound webhook endpoints to notify on job status changes - `Optional> webhookEvents` Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. - `EXTRACT_PENDING("extract.pending")` - `EXTRACT_SUCCESS("extract.success")` - `EXTRACT_ERROR("extract.error")` - `EXTRACT_PARTIAL_SUCCESS("extract.partial_success")` - `EXTRACT_CANCELLED("extract.cancelled")` - `PARSE_PENDING("parse.pending")` - `PARSE_RUNNING("parse.running")` - `PARSE_SUCCESS("parse.success")` - `PARSE_ERROR("parse.error")` - `PARSE_PARTIAL_SUCCESS("parse.partial_success")` - `PARSE_CANCELLED("parse.cancelled")` - `CLASSIFY_PENDING("classify.pending")` - `CLASSIFY_RUNNING("classify.running")` - `CLASSIFY_SUCCESS("classify.success")` - `CLASSIFY_ERROR("classify.error")` - `CLASSIFY_PARTIAL_SUCCESS("classify.partial_success")` - `CLASSIFY_CANCELLED("classify.cancelled")` - `SHEETS_PENDING("sheets.pending")` - `SHEETS_SUCCESS("sheets.success")` - `SHEETS_ERROR("sheets.error")` - `SHEETS_PARTIAL_SUCCESS("sheets.partial_success")` - `SHEETS_CANCELLED("sheets.cancelled")` - `UNMAPPED_EVENT("unmapped_event")` - `Optional webhookHeaders` Custom HTTP headers sent with each webhook request (e.g. auth tokens) - `Optional webhookOutputFormat` Response format sent to the webhook: 'string' (default) or 'json' - `Optional webhookUrl` URL to receive webhook POST notifications - `Optional webhookUrl` - `Optional managedPipelineId` The ID of the ManagedPipeline this playground pipeline is linked to. - `Optional metadataConfig` Metadata configuration for the pipeline. - `Optional> excludedEmbedMetadataKeys` List of metadata keys to exclude from embeddings - `Optional> excludedLlmMetadataKeys` List of metadata keys to exclude from LLM during retrieval - `Optional pipelineType` Type of pipeline. Either PLAYGROUND or MANAGED. - `PLAYGROUND("PLAYGROUND")` - `MANAGED("MANAGED")` - `Optional presetRetrievalParameters` Preset retrieval parameters for the pipeline. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. - `Optional sparseModelConfig` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional className` - `Optional modelType` The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). - `SPLADE("splade")` - `BM25("bm25")` - `AUTO("auto")` - `Optional status` Status of the pipeline deployment. - `Optional transformConfig` Configuration for the transformation. - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` ### Pipeline Metadata Config - `class PipelineMetadataConfig:` - `Optional> excludedEmbedMetadataKeys` List of metadata keys to exclude from embeddings - `Optional> excludedLlmMetadataKeys` List of metadata keys to exclude from LLM during retrieval ### Pipeline Type - `enum PipelineType:` Enum for representing the type of a pipeline - `PLAYGROUND("PLAYGROUND")` - `MANAGED("MANAGED")` ### Preset Retrieval Params - `class PresetRetrievalParams:` Schema for the search params for an retrieval execution that can be preset for a pipeline. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. ### Retrieval Mode - `enum RetrievalMode:` - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` ### Sparse Model Config - `class SparseModelConfig:` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional className` - `Optional modelType` The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). - `SPLADE("splade")` - `BM25("bm25")` - `AUTO("auto")` ### Vertex AI Embedding Config - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` ### Vertex Text Embedding - `class VertexTextEmbedding:` - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. # Sync ## Sync Pipeline `Pipeline pipelines().sync().create(SyncCreateParamsparams = SyncCreateParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **post** `/api/v1/pipelines/{pipeline_id}/sync` Trigger an incremental sync for a managed pipeline. Processes new and updated documents from data sources and files, then updates the index for retrieval. ### Parameters - `SyncCreateParams params` - `Optional pipelineId` ### Returns - `class Pipeline:` Schema for a pipeline. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` - `class ManagedOpenAIEmbedding:` - `Optional component` Configuration for the Managed OpenAI embedding model. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The name of the OpenAI embedding model. - `OPENAI_TEXT_EMBEDDING_3_SMALL("openai-text-embedding-3-small")` - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `MANAGED_OPENAI_EMBEDDING("MANAGED_OPENAI_EMBEDDING")` - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` - `String name` - `String projectId` - `Optional configHash` Hashes for the configuration of a pipeline. - `Optional embeddingConfigHash` Hash of the embedding config. - `Optional parsingConfigHash` Hash of the llama parse parameters. - `Optional transformConfigHash` Hash of the transform config. - `Optional createdAt` Creation datetime - `Optional dataSink` Schema for a data sink. - `String id` Unique identifier - `Component component` Component that implements the data sink - `class UnionMember0:` - `class CloudPineconeVectorStore:` Cloud Pinecone Vector Store. This class is used to store the configuration for a Pinecone vector store, so that it can be created and used in LlamaCloud. Args: api_key (str): API key for authenticating with Pinecone index_name (str): name of the Pinecone index namespace (optional[str]): namespace to use in the Pinecone index insert_kwargs (optional[dict]): additional kwargs to pass during insertion - `String apiKey` The API key for authenticating with Pinecone - `String indexName` - `Optional className` - `Optional insertKwargs` - `Optional namespace` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudPostgresVectorStore:` - `String database` - `long embedDim` - `String host` - `String password` - `long port` - `String schemaName` - `String tableName` - `String user` - `Optional className` - `Optional hnswSettings` HNSW settings for PGVector. - `Optional distanceMethod` The distance method to use. - `L2("l2")` - `IP("ip")` - `COSINE("cosine")` - `L1("l1")` - `HAMMING("hamming")` - `JACCARD("jaccard")` - `Optional efConstruction` The number of edges to use during the construction phase. - `Optional efSearch` The number of edges to use during the search phase. - `Optional m` The number of bi-directional links created for each new element. - `Optional vectorType` The type of vector to use. - `VECTOR("vector")` - `HALF_VEC("half_vec")` - `BIT("bit")` - `SPARSE_VEC("sparse_vec")` - `Optional hybridSearch` - `Optional performSetup` - `Optional supportsNestedMetadataFilters` - `class CloudQdrantVectorStore:` Cloud Qdrant Vector Store. This class is used to store the configuration for a Qdrant vector store, so that it can be created and used in LlamaCloud. Args: collection_name (str): name of the Qdrant collection url (str): url of the Qdrant instance api_key (str): API key for authenticating with Qdrant max_retries (int): maximum number of retries in case of a failure. Defaults to 3 client_kwargs (dict): additional kwargs to pass to the Qdrant client - `String apiKey` - `String collectionName` - `String url` - `Optional className` - `Optional clientKwargs` - `Optional maxRetries` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudAzureAiSearchVectorStore:` Cloud Azure AI Search Vector Store. - `String searchServiceApiKey` - `String searchServiceEndpoint` - `Optional className` - `Optional clientId` - `Optional clientSecret` - `Optional embeddingDimension` - `Optional filterableMetadataFieldKeys` - `Optional indexName` - `Optional searchServiceApiVersion` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `Optional tenantId` - `class CloudMongoDBAtlasVectorSearch:` Cloud MongoDB Atlas Vector Store. This class is used to store the configuration for a MongoDB Atlas vector store, so that it can be created and used in LlamaCloud. Args: mongodb_uri (str): URI for connecting to MongoDB Atlas db_name (str): name of the MongoDB database collection_name (str): name of the MongoDB collection vector_index_name (str): name of the MongoDB Atlas vector index fulltext_index_name (str): name of the MongoDB Atlas full-text index - `String collectionName` - `String dbName` - `String mongoDBUri` - `Optional className` - `Optional embeddingDimension` - `Optional fulltextIndexName` - `Optional supportsNestedMetadataFilters` - `Optional vectorIndexName` - `class CloudMilvusVectorStore:` Cloud Milvus Vector Store. - `String uri` - `Optional token` - `Optional className` - `Optional collectionName` - `Optional embeddingDimension` - `Optional supportsNestedMetadataFilters` - `class CloudAstraDbVectorStore:` Cloud AstraDB Vector Store. This class is used to store the configuration for an AstraDB vector store, so that it can be created and used in LlamaCloud. Args: token (str): The Astra DB Application Token to use. api_endpoint (str): The Astra DB JSON API endpoint for your database. collection_name (str): Collection name to use. If not existing, it will be created. embedding_dimension (int): Length of the embedding vectors in use. keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace' - `String token` The Astra DB Application Token to use - `String apiEndpoint` The Astra DB JSON API endpoint for your database - `String collectionName` Collection name to use. If not existing, it will be created - `long embeddingDimension` Length of the embedding vectors in use - `Optional className` - `Optional keyspace` The keyspace to use. If not provided, 'default_keyspace' - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `String name` The name of the data sink. - `String projectId` - `SinkType sinkType` - `PINECONE("PINECONE")` - `POSTGRES("POSTGRES")` - `QDRANT("QDRANT")` - `AZUREAI_SEARCH("AZUREAI_SEARCH")` - `MONGODB_ATLAS("MONGODB_ATLAS")` - `MILVUS("MILVUS")` - `ASTRA_DB("ASTRA_DB")` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfig` Schema for an embedding model config. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` The embedding configuration for the embedding model config. - `class AzureOpenAIEmbeddingConfig:` - `class CohereEmbeddingConfig:` - `class GeminiEmbeddingConfig:` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `class OpenAIEmbeddingConfig:` - `class VertexAiEmbeddingConfig:` - `class BedrockEmbeddingConfig:` - `String name` The name of the embedding model config. - `String projectId` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfigId` The ID of the EmbeddingModelConfig this pipeline is using. - `Optional llamaParseParameters` Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. - `Optional adaptiveLongTable` - `Optional aggressiveTableExtraction` - `Optional annotateLinks` - `Optional autoMode` - `Optional autoModeConfigurationJson` - `Optional autoModeTriggerOnImageInPage` - `Optional autoModeTriggerOnRegexpInPage` - `Optional autoModeTriggerOnTableInPage` - `Optional autoModeTriggerOnTextInPage` - `Optional azureOpenAIApiVersion` - `Optional azureOpenAIDeploymentName` - `Optional azureOpenAIEndpoint` - `Optional azureOpenAIKey` - `Optional bboxBottom` - `Optional bboxLeft` - `Optional bboxRight` - `Optional bboxTop` - `Optional boundingBox` - `Optional compactMarkdownTable` - `Optional complementalFormattingInstruction` - `Optional contentGuidelineInstruction` - `Optional continuousMode` - `Optional disableImageExtraction` - `Optional disableOcr` - `Optional disableReconstruction` - `Optional doNotCache` - `Optional doNotUnrollColumns` - `Optional enableCostOptimizer` - `Optional extractCharts` - `Optional extractLayout` - `Optional extractPrintedPageNumber` - `Optional fastMode` - `Optional formattingInstruction` - `Optional gpt4oApiKey` - `Optional gpt4oMode` - `Optional guessXlsxSheetName` - `Optional hideFooters` - `Optional hideHeaders` - `Optional highResOcr` - `Optional htmlMakeAllElementsVisible` - `Optional htmlRemoveFixedElements` - `Optional htmlRemoveNavigationElements` - `Optional httpProxy` - `Optional ignoreDocumentElementsForLayoutDetection` - `Optional> imagesToSave` - `SCREENSHOT("screenshot")` - `EMBEDDED("embedded")` - `LAYOUT("layout")` - `Optional inlineImagesInMarkdown` - `Optional inputS3Path` - `Optional inputS3Region` - `Optional inputUrl` - `Optional internalIsScreenshotJob` - `Optional invalidateCache` - `Optional isFormattingInstruction` - `Optional jobTimeoutExtraTimePerPageInSeconds` - `Optional jobTimeoutInSeconds` - `Optional keepPageSeparatorWhenMergingTables` - `Optional> languages` - `AF("af")` - `AZ("az")` - `BS("bs")` - `CS("cs")` - `CY("cy")` - `DA("da")` - `DE("de")` - `EN("en")` - `ES("es")` - `ET("et")` - `FR("fr")` - `GA("ga")` - `HR("hr")` - `HU("hu")` - `ID("id")` - `IS("is")` - `IT("it")` - `KU("ku")` - `LA("la")` - `LT("lt")` - `LV("lv")` - `MI("mi")` - `MS("ms")` - `MT("mt")` - `NL("nl")` - `NO("no")` - `OC("oc")` - `PI("pi")` - `PL("pl")` - `PT("pt")` - `RO("ro")` - `RS_LATIN("rs_latin")` - `SK("sk")` - `SL("sl")` - `SQ("sq")` - `SV("sv")` - `SW("sw")` - `TL("tl")` - `TR("tr")` - `UZ("uz")` - `VI("vi")` - `AR("ar")` - `FA("fa")` - `UG("ug")` - `UR("ur")` - `BN("bn")` - `AS("as")` - `MNI("mni")` - `RU("ru")` - `RS_CYRILLIC("rs_cyrillic")` - `BE("be")` - `BG("bg")` - `UK("uk")` - `MN("mn")` - `ABQ("abq")` - `ADY("ady")` - `KBD("kbd")` - `AVA("ava")` - `DAR("dar")` - `INH("inh")` - `CHE("che")` - `LBE("lbe")` - `LEZ("lez")` - `TAB("tab")` - `TJK("tjk")` - `HI("hi")` - `MR("mr")` - `NE("ne")` - `BH("bh")` - `MAI("mai")` - `ANG("ang")` - `BHO("bho")` - `MAH("mah")` - `SCK("sck")` - `NEW("new")` - `GOM("gom")` - `SA("sa")` - `BGC("bgc")` - `TH("th")` - `CH_SIM("ch_sim")` - `CH_TRA("ch_tra")` - `JA("ja")` - `KO("ko")` - `TA("ta")` - `TE("te")` - `KN("kn")` - `Optional layoutAware` - `Optional lineLevelBoundingBox` - `Optional markdownTableMultilineHeaderSeparator` - `Optional maxPages` - `Optional maxPagesEnforced` - `Optional mergeTablesAcrossPagesInMarkdown` - `Optional model` - `Optional outlinedTableExtraction` - `Optional outputPdfOfDocument` - `Optional outputS3PathPrefix` - `Optional outputS3Region` - `Optional outputTablesAsHtml` - `Optional pageErrorTolerance` - `Optional pageFooterPrefix` - `Optional pageFooterSuffix` - `Optional pageHeaderPrefix` - `Optional pageHeaderSuffix` - `Optional pagePrefix` - `Optional pageSeparator` - `Optional pageSuffix` - `Optional parseMode` Enum for representing the mode of parsing to be used. - `PARSE_PAGE_WITHOUT_LLM("parse_page_without_llm")` - `PARSE_PAGE_WITH_LLM("parse_page_with_llm")` - `PARSE_PAGE_WITH_LVM("parse_page_with_lvm")` - `PARSE_PAGE_WITH_AGENT("parse_page_with_agent")` - `PARSE_PAGE_WITH_LAYOUT_AGENT("parse_page_with_layout_agent")` - `PARSE_DOCUMENT_WITH_LLM("parse_document_with_llm")` - `PARSE_DOCUMENT_WITH_LVM("parse_document_with_lvm")` - `PARSE_DOCUMENT_WITH_AGENT("parse_document_with_agent")` - `Optional parsingInstruction` - `Optional preciseBoundingBox` - `Optional premiumMode` - `Optional presentationOutOfBoundsContent` - `Optional presentationSkipEmbeddedData` - `Optional preserveLayoutAlignmentAcrossPages` - `Optional preserveVerySmallText` - `Optional preset` - `Optional priority` The priority for the request. This field may be ignored or overwritten depending on the organization tier. - `LOW("low")` - `MEDIUM("medium")` - `HIGH("high")` - `CRITICAL("critical")` - `Optional projectId` - `Optional removeHiddenText` - `Optional replaceFailedPageMode` Enum for representing the different available page error handling modes. - `RAW_TEXT("raw_text")` - `BLANK_PAGE("blank_page")` - `ERROR_MESSAGE("error_message")` - `Optional replaceFailedPageWithErrorMessagePrefix` - `Optional replaceFailedPageWithErrorMessageSuffix` - `Optional saveImages` - `Optional skipDiagonalText` - `Optional specializedChartParsingAgentic` - `Optional specializedChartParsingEfficient` - `Optional specializedChartParsingPlus` - `Optional specializedImageParsing` - `Optional spreadsheetExtractSubTables` - `Optional spreadsheetForceFormulaComputation` - `Optional spreadsheetIncludeHiddenSheets` - `Optional strictModeBuggyFont` - `Optional strictModeImageExtraction` - `Optional strictModeImageOcr` - `Optional strictModeReconstruction` - `Optional structuredOutput` - `Optional structuredOutputJsonSchema` - `Optional structuredOutputJsonSchemaName` - `Optional systemPrompt` - `Optional systemPromptAppend` - `Optional takeScreenshot` - `Optional targetPages` - `Optional tier` - `Optional useVendorMultimodalModel` - `Optional userPrompt` - `Optional vendorMultimodalApiKey` - `Optional vendorMultimodalModelName` - `Optional version` - `Optional> webhookConfigurations` Outbound webhook endpoints to notify on job status changes - `Optional> webhookEvents` Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. - `EXTRACT_PENDING("extract.pending")` - `EXTRACT_SUCCESS("extract.success")` - `EXTRACT_ERROR("extract.error")` - `EXTRACT_PARTIAL_SUCCESS("extract.partial_success")` - `EXTRACT_CANCELLED("extract.cancelled")` - `PARSE_PENDING("parse.pending")` - `PARSE_RUNNING("parse.running")` - `PARSE_SUCCESS("parse.success")` - `PARSE_ERROR("parse.error")` - `PARSE_PARTIAL_SUCCESS("parse.partial_success")` - `PARSE_CANCELLED("parse.cancelled")` - `CLASSIFY_PENDING("classify.pending")` - `CLASSIFY_RUNNING("classify.running")` - `CLASSIFY_SUCCESS("classify.success")` - `CLASSIFY_ERROR("classify.error")` - `CLASSIFY_PARTIAL_SUCCESS("classify.partial_success")` - `CLASSIFY_CANCELLED("classify.cancelled")` - `SHEETS_PENDING("sheets.pending")` - `SHEETS_SUCCESS("sheets.success")` - `SHEETS_ERROR("sheets.error")` - `SHEETS_PARTIAL_SUCCESS("sheets.partial_success")` - `SHEETS_CANCELLED("sheets.cancelled")` - `UNMAPPED_EVENT("unmapped_event")` - `Optional webhookHeaders` Custom HTTP headers sent with each webhook request (e.g. auth tokens) - `Optional webhookOutputFormat` Response format sent to the webhook: 'string' (default) or 'json' - `Optional webhookUrl` URL to receive webhook POST notifications - `Optional webhookUrl` - `Optional managedPipelineId` The ID of the ManagedPipeline this playground pipeline is linked to. - `Optional metadataConfig` Metadata configuration for the pipeline. - `Optional> excludedEmbedMetadataKeys` List of metadata keys to exclude from embeddings - `Optional> excludedLlmMetadataKeys` List of metadata keys to exclude from LLM during retrieval - `Optional pipelineType` Type of pipeline. Either PLAYGROUND or MANAGED. - `PLAYGROUND("PLAYGROUND")` - `MANAGED("MANAGED")` - `Optional presetRetrievalParameters` Preset retrieval parameters for the pipeline. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. - `Optional sparseModelConfig` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional className` - `Optional modelType` The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). - `SPLADE("splade")` - `BM25("bm25")` - `AUTO("auto")` - `Optional status` Status of the pipeline. - `CREATED("CREATED")` - `DELETING("DELETING")` - `Optional transformConfig` Configuration for the transformation. - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` - `Optional updatedAt` Update datetime ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.Pipeline; import com.llamacloud_prod.api.models.pipelines.sync.SyncCreateParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); Pipeline pipeline = client.pipelines().sync().create("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "class_name": "class_name", "embed_batch_size": 1, "model_name": "openai-text-embedding-3-small", "num_workers": 0 }, "type": "MANAGED_OPENAI_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "embedding_config_hash": "embedding_config_hash", "parsing_config_hash": "parsing_config_hash", "transform_config_hash": "transform_config_hash" }, "created_at": "2019-12-27T18:11:19.117Z", "data_sink": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "sink_type": "PINECONE", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "additional_kwargs": { "foo": "bar" }, "api_base": "api_base", "api_key": "api_key", "api_version": "api_version", "azure_deployment": "azure_deployment", "azure_endpoint": "azure_endpoint", "class_name": "class_name", "default_headers": { "foo": "string" }, "dimensions": 0, "embed_batch_size": 1, "max_retries": 0, "model_name": "model_name", "num_workers": 0, "reuse_client": true, "timeout": 0 }, "type": "AZURE_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "llama_parse_parameters": { "adaptive_long_table": true, "aggressive_table_extraction": true, "annotate_links": true, "auto_mode": true, "auto_mode_configuration_json": "auto_mode_configuration_json", "auto_mode_trigger_on_image_in_page": true, "auto_mode_trigger_on_regexp_in_page": "auto_mode_trigger_on_regexp_in_page", "auto_mode_trigger_on_table_in_page": true, "auto_mode_trigger_on_text_in_page": "auto_mode_trigger_on_text_in_page", "azure_openai_api_version": "azure_openai_api_version", "azure_openai_deployment_name": "azure_openai_deployment_name", "azure_openai_endpoint": "azure_openai_endpoint", "azure_openai_key": "azure_openai_key", "bbox_bottom": 0, "bbox_left": 0, "bbox_right": 0, "bbox_top": 0, "bounding_box": "bounding_box", "compact_markdown_table": true, "complemental_formatting_instruction": "complemental_formatting_instruction", "content_guideline_instruction": "content_guideline_instruction", "continuous_mode": true, "disable_image_extraction": true, "disable_ocr": true, "disable_reconstruction": true, "do_not_cache": true, "do_not_unroll_columns": true, "enable_cost_optimizer": true, "extract_charts": true, "extract_layout": true, "extract_printed_page_number": true, "fast_mode": true, "formatting_instruction": "formatting_instruction", "gpt4o_api_key": "gpt4o_api_key", "gpt4o_mode": true, "guess_xlsx_sheet_name": true, "hide_footers": true, "hide_headers": true, "high_res_ocr": true, "html_make_all_elements_visible": true, "html_remove_fixed_elements": true, "html_remove_navigation_elements": true, "http_proxy": "http_proxy", "ignore_document_elements_for_layout_detection": true, "images_to_save": [ "screenshot" ], "inline_images_in_markdown": true, "input_s3_path": "input_s3_path", "input_s3_region": "input_s3_region", "input_url": "input_url", "internal_is_screenshot_job": true, "invalidate_cache": true, "is_formatting_instruction": true, "job_timeout_extra_time_per_page_in_seconds": 0, "job_timeout_in_seconds": 0, "keep_page_separator_when_merging_tables": true, "languages": [ "af" ], "layout_aware": true, "line_level_bounding_box": true, "markdown_table_multiline_header_separator": "markdown_table_multiline_header_separator", "max_pages": 0, "max_pages_enforced": 0, "merge_tables_across_pages_in_markdown": true, "model": "model", "outlined_table_extraction": true, "output_pdf_of_document": true, "output_s3_path_prefix": "output_s3_path_prefix", "output_s3_region": "output_s3_region", "output_tables_as_HTML": true, "page_error_tolerance": 0, "page_footer_prefix": "page_footer_prefix", "page_footer_suffix": "page_footer_suffix", "page_header_prefix": "page_header_prefix", "page_header_suffix": "page_header_suffix", "page_prefix": "page_prefix", "page_separator": "page_separator", "page_suffix": "page_suffix", "parse_mode": "parse_page_without_llm", "parsing_instruction": "parsing_instruction", "precise_bounding_box": true, "premium_mode": true, "presentation_out_of_bounds_content": true, "presentation_skip_embedded_data": true, "preserve_layout_alignment_across_pages": true, "preserve_very_small_text": true, "preset": "preset", "priority": "low", "project_id": "project_id", "remove_hidden_text": true, "replace_failed_page_mode": "raw_text", "replace_failed_page_with_error_message_prefix": "replace_failed_page_with_error_message_prefix", "replace_failed_page_with_error_message_suffix": "replace_failed_page_with_error_message_suffix", "save_images": true, "skip_diagonal_text": true, "specialized_chart_parsing_agentic": true, "specialized_chart_parsing_efficient": true, "specialized_chart_parsing_plus": true, "specialized_image_parsing": true, "spreadsheet_extract_sub_tables": true, "spreadsheet_force_formula_computation": true, "spreadsheet_include_hidden_sheets": true, "strict_mode_buggy_font": true, "strict_mode_image_extraction": true, "strict_mode_image_ocr": true, "strict_mode_reconstruction": true, "structured_output": true, "structured_output_json_schema": "structured_output_json_schema", "structured_output_json_schema_name": "structured_output_json_schema_name", "system_prompt": "system_prompt", "system_prompt_append": "system_prompt_append", "take_screenshot": true, "target_pages": "target_pages", "tier": "tier", "use_vendor_multimodal_model": true, "user_prompt": "user_prompt", "vendor_multimodal_api_key": "vendor_multimodal_api_key", "vendor_multimodal_model_name": "vendor_multimodal_model_name", "version": "version", "webhook_configurations": [ { "webhook_events": [ "parse.success", "parse.error" ], "webhook_headers": { "Authorization": "Bearer sk-..." }, "webhook_output_format": "json", "webhook_url": "https://example.com/webhooks/llamacloud" } ], "webhook_url": "webhook_url" }, "managed_pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "metadata_config": { "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ] }, "pipeline_type": "PLAYGROUND", "preset_retrieval_parameters": { "alpha": 0, "class_name": "class_name", "dense_similarity_cutoff": 0, "dense_similarity_top_k": 1, "enable_reranking": true, "files_top_k": 1, "rerank_top_n": 1, "retrieval_mode": "chunks", "retrieve_image_nodes": true, "retrieve_page_figure_nodes": true, "retrieve_page_screenshot_nodes": true, "search_filters": { "filters": [ { "key": "key", "value": 0, "operator": "==" } ], "condition": "and" }, "search_filters_inference_schema": { "foo": { "foo": "bar" } }, "sparse_similarity_top_k": 1 }, "sparse_model_config": { "class_name": "class_name", "model_type": "splade" }, "status": "CREATED", "transform_config": { "chunk_overlap": 0, "chunk_size": 1, "mode": "auto" }, "updated_at": "2019-12-27T18:11:19.117Z" } ``` ## Cancel Pipeline Sync `Pipeline pipelines().sync().cancel(SyncCancelParamsparams = SyncCancelParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **post** `/api/v1/pipelines/{pipeline_id}/sync/cancel` Cancel all running sync jobs for a pipeline. ### Parameters - `SyncCancelParams params` - `Optional pipelineId` ### Returns - `class Pipeline:` Schema for a pipeline. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` - `class ManagedOpenAIEmbedding:` - `Optional component` Configuration for the Managed OpenAI embedding model. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The name of the OpenAI embedding model. - `OPENAI_TEXT_EMBEDDING_3_SMALL("openai-text-embedding-3-small")` - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `MANAGED_OPENAI_EMBEDDING("MANAGED_OPENAI_EMBEDDING")` - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` - `String name` - `String projectId` - `Optional configHash` Hashes for the configuration of a pipeline. - `Optional embeddingConfigHash` Hash of the embedding config. - `Optional parsingConfigHash` Hash of the llama parse parameters. - `Optional transformConfigHash` Hash of the transform config. - `Optional createdAt` Creation datetime - `Optional dataSink` Schema for a data sink. - `String id` Unique identifier - `Component component` Component that implements the data sink - `class UnionMember0:` - `class CloudPineconeVectorStore:` Cloud Pinecone Vector Store. This class is used to store the configuration for a Pinecone vector store, so that it can be created and used in LlamaCloud. Args: api_key (str): API key for authenticating with Pinecone index_name (str): name of the Pinecone index namespace (optional[str]): namespace to use in the Pinecone index insert_kwargs (optional[dict]): additional kwargs to pass during insertion - `String apiKey` The API key for authenticating with Pinecone - `String indexName` - `Optional className` - `Optional insertKwargs` - `Optional namespace` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudPostgresVectorStore:` - `String database` - `long embedDim` - `String host` - `String password` - `long port` - `String schemaName` - `String tableName` - `String user` - `Optional className` - `Optional hnswSettings` HNSW settings for PGVector. - `Optional distanceMethod` The distance method to use. - `L2("l2")` - `IP("ip")` - `COSINE("cosine")` - `L1("l1")` - `HAMMING("hamming")` - `JACCARD("jaccard")` - `Optional efConstruction` The number of edges to use during the construction phase. - `Optional efSearch` The number of edges to use during the search phase. - `Optional m` The number of bi-directional links created for each new element. - `Optional vectorType` The type of vector to use. - `VECTOR("vector")` - `HALF_VEC("half_vec")` - `BIT("bit")` - `SPARSE_VEC("sparse_vec")` - `Optional hybridSearch` - `Optional performSetup` - `Optional supportsNestedMetadataFilters` - `class CloudQdrantVectorStore:` Cloud Qdrant Vector Store. This class is used to store the configuration for a Qdrant vector store, so that it can be created and used in LlamaCloud. Args: collection_name (str): name of the Qdrant collection url (str): url of the Qdrant instance api_key (str): API key for authenticating with Qdrant max_retries (int): maximum number of retries in case of a failure. Defaults to 3 client_kwargs (dict): additional kwargs to pass to the Qdrant client - `String apiKey` - `String collectionName` - `String url` - `Optional className` - `Optional clientKwargs` - `Optional maxRetries` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudAzureAiSearchVectorStore:` Cloud Azure AI Search Vector Store. - `String searchServiceApiKey` - `String searchServiceEndpoint` - `Optional className` - `Optional clientId` - `Optional clientSecret` - `Optional embeddingDimension` - `Optional filterableMetadataFieldKeys` - `Optional indexName` - `Optional searchServiceApiVersion` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `Optional tenantId` - `class CloudMongoDBAtlasVectorSearch:` Cloud MongoDB Atlas Vector Store. This class is used to store the configuration for a MongoDB Atlas vector store, so that it can be created and used in LlamaCloud. Args: mongodb_uri (str): URI for connecting to MongoDB Atlas db_name (str): name of the MongoDB database collection_name (str): name of the MongoDB collection vector_index_name (str): name of the MongoDB Atlas vector index fulltext_index_name (str): name of the MongoDB Atlas full-text index - `String collectionName` - `String dbName` - `String mongoDBUri` - `Optional className` - `Optional embeddingDimension` - `Optional fulltextIndexName` - `Optional supportsNestedMetadataFilters` - `Optional vectorIndexName` - `class CloudMilvusVectorStore:` Cloud Milvus Vector Store. - `String uri` - `Optional token` - `Optional className` - `Optional collectionName` - `Optional embeddingDimension` - `Optional supportsNestedMetadataFilters` - `class CloudAstraDbVectorStore:` Cloud AstraDB Vector Store. This class is used to store the configuration for an AstraDB vector store, so that it can be created and used in LlamaCloud. Args: token (str): The Astra DB Application Token to use. api_endpoint (str): The Astra DB JSON API endpoint for your database. collection_name (str): Collection name to use. If not existing, it will be created. embedding_dimension (int): Length of the embedding vectors in use. keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace' - `String token` The Astra DB Application Token to use - `String apiEndpoint` The Astra DB JSON API endpoint for your database - `String collectionName` Collection name to use. If not existing, it will be created - `long embeddingDimension` Length of the embedding vectors in use - `Optional className` - `Optional keyspace` The keyspace to use. If not provided, 'default_keyspace' - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `String name` The name of the data sink. - `String projectId` - `SinkType sinkType` - `PINECONE("PINECONE")` - `POSTGRES("POSTGRES")` - `QDRANT("QDRANT")` - `AZUREAI_SEARCH("AZUREAI_SEARCH")` - `MONGODB_ATLAS("MONGODB_ATLAS")` - `MILVUS("MILVUS")` - `ASTRA_DB("ASTRA_DB")` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfig` Schema for an embedding model config. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` The embedding configuration for the embedding model config. - `class AzureOpenAIEmbeddingConfig:` - `class CohereEmbeddingConfig:` - `class GeminiEmbeddingConfig:` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `class OpenAIEmbeddingConfig:` - `class VertexAiEmbeddingConfig:` - `class BedrockEmbeddingConfig:` - `String name` The name of the embedding model config. - `String projectId` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfigId` The ID of the EmbeddingModelConfig this pipeline is using. - `Optional llamaParseParameters` Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. - `Optional adaptiveLongTable` - `Optional aggressiveTableExtraction` - `Optional annotateLinks` - `Optional autoMode` - `Optional autoModeConfigurationJson` - `Optional autoModeTriggerOnImageInPage` - `Optional autoModeTriggerOnRegexpInPage` - `Optional autoModeTriggerOnTableInPage` - `Optional autoModeTriggerOnTextInPage` - `Optional azureOpenAIApiVersion` - `Optional azureOpenAIDeploymentName` - `Optional azureOpenAIEndpoint` - `Optional azureOpenAIKey` - `Optional bboxBottom` - `Optional bboxLeft` - `Optional bboxRight` - `Optional bboxTop` - `Optional boundingBox` - `Optional compactMarkdownTable` - `Optional complementalFormattingInstruction` - `Optional contentGuidelineInstruction` - `Optional continuousMode` - `Optional disableImageExtraction` - `Optional disableOcr` - `Optional disableReconstruction` - `Optional doNotCache` - `Optional doNotUnrollColumns` - `Optional enableCostOptimizer` - `Optional extractCharts` - `Optional extractLayout` - `Optional extractPrintedPageNumber` - `Optional fastMode` - `Optional formattingInstruction` - `Optional gpt4oApiKey` - `Optional gpt4oMode` - `Optional guessXlsxSheetName` - `Optional hideFooters` - `Optional hideHeaders` - `Optional highResOcr` - `Optional htmlMakeAllElementsVisible` - `Optional htmlRemoveFixedElements` - `Optional htmlRemoveNavigationElements` - `Optional httpProxy` - `Optional ignoreDocumentElementsForLayoutDetection` - `Optional> imagesToSave` - `SCREENSHOT("screenshot")` - `EMBEDDED("embedded")` - `LAYOUT("layout")` - `Optional inlineImagesInMarkdown` - `Optional inputS3Path` - `Optional inputS3Region` - `Optional inputUrl` - `Optional internalIsScreenshotJob` - `Optional invalidateCache` - `Optional isFormattingInstruction` - `Optional jobTimeoutExtraTimePerPageInSeconds` - `Optional jobTimeoutInSeconds` - `Optional keepPageSeparatorWhenMergingTables` - `Optional> languages` - `AF("af")` - `AZ("az")` - `BS("bs")` - `CS("cs")` - `CY("cy")` - `DA("da")` - `DE("de")` - `EN("en")` - `ES("es")` - `ET("et")` - `FR("fr")` - `GA("ga")` - `HR("hr")` - `HU("hu")` - `ID("id")` - `IS("is")` - `IT("it")` - `KU("ku")` - `LA("la")` - `LT("lt")` - `LV("lv")` - `MI("mi")` - `MS("ms")` - `MT("mt")` - `NL("nl")` - `NO("no")` - `OC("oc")` - `PI("pi")` - `PL("pl")` - `PT("pt")` - `RO("ro")` - `RS_LATIN("rs_latin")` - `SK("sk")` - `SL("sl")` - `SQ("sq")` - `SV("sv")` - `SW("sw")` - `TL("tl")` - `TR("tr")` - `UZ("uz")` - `VI("vi")` - `AR("ar")` - `FA("fa")` - `UG("ug")` - `UR("ur")` - `BN("bn")` - `AS("as")` - `MNI("mni")` - `RU("ru")` - `RS_CYRILLIC("rs_cyrillic")` - `BE("be")` - `BG("bg")` - `UK("uk")` - `MN("mn")` - `ABQ("abq")` - `ADY("ady")` - `KBD("kbd")` - `AVA("ava")` - `DAR("dar")` - `INH("inh")` - `CHE("che")` - `LBE("lbe")` - `LEZ("lez")` - `TAB("tab")` - `TJK("tjk")` - `HI("hi")` - `MR("mr")` - `NE("ne")` - `BH("bh")` - `MAI("mai")` - `ANG("ang")` - `BHO("bho")` - `MAH("mah")` - `SCK("sck")` - `NEW("new")` - `GOM("gom")` - `SA("sa")` - `BGC("bgc")` - `TH("th")` - `CH_SIM("ch_sim")` - `CH_TRA("ch_tra")` - `JA("ja")` - `KO("ko")` - `TA("ta")` - `TE("te")` - `KN("kn")` - `Optional layoutAware` - `Optional lineLevelBoundingBox` - `Optional markdownTableMultilineHeaderSeparator` - `Optional maxPages` - `Optional maxPagesEnforced` - `Optional mergeTablesAcrossPagesInMarkdown` - `Optional model` - `Optional outlinedTableExtraction` - `Optional outputPdfOfDocument` - `Optional outputS3PathPrefix` - `Optional outputS3Region` - `Optional outputTablesAsHtml` - `Optional pageErrorTolerance` - `Optional pageFooterPrefix` - `Optional pageFooterSuffix` - `Optional pageHeaderPrefix` - `Optional pageHeaderSuffix` - `Optional pagePrefix` - `Optional pageSeparator` - `Optional pageSuffix` - `Optional parseMode` Enum for representing the mode of parsing to be used. - `PARSE_PAGE_WITHOUT_LLM("parse_page_without_llm")` - `PARSE_PAGE_WITH_LLM("parse_page_with_llm")` - `PARSE_PAGE_WITH_LVM("parse_page_with_lvm")` - `PARSE_PAGE_WITH_AGENT("parse_page_with_agent")` - `PARSE_PAGE_WITH_LAYOUT_AGENT("parse_page_with_layout_agent")` - `PARSE_DOCUMENT_WITH_LLM("parse_document_with_llm")` - `PARSE_DOCUMENT_WITH_LVM("parse_document_with_lvm")` - `PARSE_DOCUMENT_WITH_AGENT("parse_document_with_agent")` - `Optional parsingInstruction` - `Optional preciseBoundingBox` - `Optional premiumMode` - `Optional presentationOutOfBoundsContent` - `Optional presentationSkipEmbeddedData` - `Optional preserveLayoutAlignmentAcrossPages` - `Optional preserveVerySmallText` - `Optional preset` - `Optional priority` The priority for the request. This field may be ignored or overwritten depending on the organization tier. - `LOW("low")` - `MEDIUM("medium")` - `HIGH("high")` - `CRITICAL("critical")` - `Optional projectId` - `Optional removeHiddenText` - `Optional replaceFailedPageMode` Enum for representing the different available page error handling modes. - `RAW_TEXT("raw_text")` - `BLANK_PAGE("blank_page")` - `ERROR_MESSAGE("error_message")` - `Optional replaceFailedPageWithErrorMessagePrefix` - `Optional replaceFailedPageWithErrorMessageSuffix` - `Optional saveImages` - `Optional skipDiagonalText` - `Optional specializedChartParsingAgentic` - `Optional specializedChartParsingEfficient` - `Optional specializedChartParsingPlus` - `Optional specializedImageParsing` - `Optional spreadsheetExtractSubTables` - `Optional spreadsheetForceFormulaComputation` - `Optional spreadsheetIncludeHiddenSheets` - `Optional strictModeBuggyFont` - `Optional strictModeImageExtraction` - `Optional strictModeImageOcr` - `Optional strictModeReconstruction` - `Optional structuredOutput` - `Optional structuredOutputJsonSchema` - `Optional structuredOutputJsonSchemaName` - `Optional systemPrompt` - `Optional systemPromptAppend` - `Optional takeScreenshot` - `Optional targetPages` - `Optional tier` - `Optional useVendorMultimodalModel` - `Optional userPrompt` - `Optional vendorMultimodalApiKey` - `Optional vendorMultimodalModelName` - `Optional version` - `Optional> webhookConfigurations` Outbound webhook endpoints to notify on job status changes - `Optional> webhookEvents` Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. - `EXTRACT_PENDING("extract.pending")` - `EXTRACT_SUCCESS("extract.success")` - `EXTRACT_ERROR("extract.error")` - `EXTRACT_PARTIAL_SUCCESS("extract.partial_success")` - `EXTRACT_CANCELLED("extract.cancelled")` - `PARSE_PENDING("parse.pending")` - `PARSE_RUNNING("parse.running")` - `PARSE_SUCCESS("parse.success")` - `PARSE_ERROR("parse.error")` - `PARSE_PARTIAL_SUCCESS("parse.partial_success")` - `PARSE_CANCELLED("parse.cancelled")` - `CLASSIFY_PENDING("classify.pending")` - `CLASSIFY_RUNNING("classify.running")` - `CLASSIFY_SUCCESS("classify.success")` - `CLASSIFY_ERROR("classify.error")` - `CLASSIFY_PARTIAL_SUCCESS("classify.partial_success")` - `CLASSIFY_CANCELLED("classify.cancelled")` - `SHEETS_PENDING("sheets.pending")` - `SHEETS_SUCCESS("sheets.success")` - `SHEETS_ERROR("sheets.error")` - `SHEETS_PARTIAL_SUCCESS("sheets.partial_success")` - `SHEETS_CANCELLED("sheets.cancelled")` - `UNMAPPED_EVENT("unmapped_event")` - `Optional webhookHeaders` Custom HTTP headers sent with each webhook request (e.g. auth tokens) - `Optional webhookOutputFormat` Response format sent to the webhook: 'string' (default) or 'json' - `Optional webhookUrl` URL to receive webhook POST notifications - `Optional webhookUrl` - `Optional managedPipelineId` The ID of the ManagedPipeline this playground pipeline is linked to. - `Optional metadataConfig` Metadata configuration for the pipeline. - `Optional> excludedEmbedMetadataKeys` List of metadata keys to exclude from embeddings - `Optional> excludedLlmMetadataKeys` List of metadata keys to exclude from LLM during retrieval - `Optional pipelineType` Type of pipeline. Either PLAYGROUND or MANAGED. - `PLAYGROUND("PLAYGROUND")` - `MANAGED("MANAGED")` - `Optional presetRetrievalParameters` Preset retrieval parameters for the pipeline. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. - `Optional sparseModelConfig` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional className` - `Optional modelType` The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). - `SPLADE("splade")` - `BM25("bm25")` - `AUTO("auto")` - `Optional status` Status of the pipeline. - `CREATED("CREATED")` - `DELETING("DELETING")` - `Optional transformConfig` Configuration for the transformation. - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` - `Optional updatedAt` Update datetime ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.Pipeline; import com.llamacloud_prod.api.models.pipelines.sync.SyncCancelParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); Pipeline pipeline = client.pipelines().sync().cancel("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "class_name": "class_name", "embed_batch_size": 1, "model_name": "openai-text-embedding-3-small", "num_workers": 0 }, "type": "MANAGED_OPENAI_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "embedding_config_hash": "embedding_config_hash", "parsing_config_hash": "parsing_config_hash", "transform_config_hash": "transform_config_hash" }, "created_at": "2019-12-27T18:11:19.117Z", "data_sink": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "sink_type": "PINECONE", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "additional_kwargs": { "foo": "bar" }, "api_base": "api_base", "api_key": "api_key", "api_version": "api_version", "azure_deployment": "azure_deployment", "azure_endpoint": "azure_endpoint", "class_name": "class_name", "default_headers": { "foo": "string" }, "dimensions": 0, "embed_batch_size": 1, "max_retries": 0, "model_name": "model_name", "num_workers": 0, "reuse_client": true, "timeout": 0 }, "type": "AZURE_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "llama_parse_parameters": { "adaptive_long_table": true, "aggressive_table_extraction": true, "annotate_links": true, "auto_mode": true, "auto_mode_configuration_json": "auto_mode_configuration_json", "auto_mode_trigger_on_image_in_page": true, "auto_mode_trigger_on_regexp_in_page": "auto_mode_trigger_on_regexp_in_page", "auto_mode_trigger_on_table_in_page": true, "auto_mode_trigger_on_text_in_page": "auto_mode_trigger_on_text_in_page", "azure_openai_api_version": "azure_openai_api_version", "azure_openai_deployment_name": "azure_openai_deployment_name", "azure_openai_endpoint": "azure_openai_endpoint", "azure_openai_key": "azure_openai_key", "bbox_bottom": 0, "bbox_left": 0, "bbox_right": 0, "bbox_top": 0, "bounding_box": "bounding_box", "compact_markdown_table": true, "complemental_formatting_instruction": "complemental_formatting_instruction", "content_guideline_instruction": "content_guideline_instruction", "continuous_mode": true, "disable_image_extraction": true, "disable_ocr": true, "disable_reconstruction": true, "do_not_cache": true, "do_not_unroll_columns": true, "enable_cost_optimizer": true, "extract_charts": true, "extract_layout": true, "extract_printed_page_number": true, "fast_mode": true, "formatting_instruction": "formatting_instruction", "gpt4o_api_key": "gpt4o_api_key", "gpt4o_mode": true, "guess_xlsx_sheet_name": true, "hide_footers": true, "hide_headers": true, "high_res_ocr": true, "html_make_all_elements_visible": true, "html_remove_fixed_elements": true, "html_remove_navigation_elements": true, "http_proxy": "http_proxy", "ignore_document_elements_for_layout_detection": true, "images_to_save": [ "screenshot" ], "inline_images_in_markdown": true, "input_s3_path": "input_s3_path", "input_s3_region": "input_s3_region", "input_url": "input_url", "internal_is_screenshot_job": true, "invalidate_cache": true, "is_formatting_instruction": true, "job_timeout_extra_time_per_page_in_seconds": 0, "job_timeout_in_seconds": 0, "keep_page_separator_when_merging_tables": true, "languages": [ "af" ], "layout_aware": true, "line_level_bounding_box": true, "markdown_table_multiline_header_separator": "markdown_table_multiline_header_separator", "max_pages": 0, "max_pages_enforced": 0, "merge_tables_across_pages_in_markdown": true, "model": "model", "outlined_table_extraction": true, "output_pdf_of_document": true, "output_s3_path_prefix": "output_s3_path_prefix", "output_s3_region": "output_s3_region", "output_tables_as_HTML": true, "page_error_tolerance": 0, "page_footer_prefix": "page_footer_prefix", "page_footer_suffix": "page_footer_suffix", "page_header_prefix": "page_header_prefix", "page_header_suffix": "page_header_suffix", "page_prefix": "page_prefix", "page_separator": "page_separator", "page_suffix": "page_suffix", "parse_mode": "parse_page_without_llm", "parsing_instruction": "parsing_instruction", "precise_bounding_box": true, "premium_mode": true, "presentation_out_of_bounds_content": true, "presentation_skip_embedded_data": true, "preserve_layout_alignment_across_pages": true, "preserve_very_small_text": true, "preset": "preset", "priority": "low", "project_id": "project_id", "remove_hidden_text": true, "replace_failed_page_mode": "raw_text", "replace_failed_page_with_error_message_prefix": "replace_failed_page_with_error_message_prefix", "replace_failed_page_with_error_message_suffix": "replace_failed_page_with_error_message_suffix", "save_images": true, "skip_diagonal_text": true, "specialized_chart_parsing_agentic": true, "specialized_chart_parsing_efficient": true, "specialized_chart_parsing_plus": true, "specialized_image_parsing": true, "spreadsheet_extract_sub_tables": true, "spreadsheet_force_formula_computation": true, "spreadsheet_include_hidden_sheets": true, "strict_mode_buggy_font": true, "strict_mode_image_extraction": true, "strict_mode_image_ocr": true, "strict_mode_reconstruction": true, "structured_output": true, "structured_output_json_schema": "structured_output_json_schema", "structured_output_json_schema_name": "structured_output_json_schema_name", "system_prompt": "system_prompt", "system_prompt_append": "system_prompt_append", "take_screenshot": true, "target_pages": "target_pages", "tier": "tier", "use_vendor_multimodal_model": true, "user_prompt": "user_prompt", "vendor_multimodal_api_key": "vendor_multimodal_api_key", "vendor_multimodal_model_name": "vendor_multimodal_model_name", "version": "version", "webhook_configurations": [ { "webhook_events": [ "parse.success", "parse.error" ], "webhook_headers": { "Authorization": "Bearer sk-..." }, "webhook_output_format": "json", "webhook_url": "https://example.com/webhooks/llamacloud" } ], "webhook_url": "webhook_url" }, "managed_pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "metadata_config": { "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ] }, "pipeline_type": "PLAYGROUND", "preset_retrieval_parameters": { "alpha": 0, "class_name": "class_name", "dense_similarity_cutoff": 0, "dense_similarity_top_k": 1, "enable_reranking": true, "files_top_k": 1, "rerank_top_n": 1, "retrieval_mode": "chunks", "retrieve_image_nodes": true, "retrieve_page_figure_nodes": true, "retrieve_page_screenshot_nodes": true, "search_filters": { "filters": [ { "key": "key", "value": 0, "operator": "==" } ], "condition": "and" }, "search_filters_inference_schema": { "foo": { "foo": "bar" } }, "sparse_similarity_top_k": 1 }, "sparse_model_config": { "class_name": "class_name", "model_type": "splade" }, "status": "CREATED", "transform_config": { "chunk_overlap": 0, "chunk_size": 1, "mode": "auto" }, "updated_at": "2019-12-27T18:11:19.117Z" } ``` # Data Sources ## List Pipeline Data Sources `List pipelines().dataSources().getDataSources(DataSourceGetDataSourcesParamsparams = DataSourceGetDataSourcesParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}/data-sources` Get data sources for a pipeline. ### Parameters - `DataSourceGetDataSourcesParams params` - `Optional pipelineId` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.datasources.DataSourceGetDataSourcesParams; import com.llamacloud_prod.api.models.pipelines.datasources.PipelineDataSource; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); List pipelineDataSources = client.pipelines().dataSources().getDataSources("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json [ { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "data_source_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "last_synced_at": "2019-12-27T18:11:19.117Z", "name": "name", "pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "source_type": "S3", "created_at": "2019-12-27T18:11:19.117Z", "custom_metadata": { "foo": { "foo": "bar" } }, "status": "NOT_STARTED", "status_updated_at": "2019-12-27T18:11:19.117Z", "sync_interval": 0, "sync_schedule_set_by": "sync_schedule_set_by", "updated_at": "2019-12-27T18:11:19.117Z", "version_metadata": { "reader_version": "1.0" } } ] ``` ## Add Data Sources To Pipeline `List pipelines().dataSources().updateDataSources(DataSourceUpdateDataSourcesParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **put** `/api/v1/pipelines/{pipeline_id}/data-sources` Add data sources to a pipeline. ### Parameters - `DataSourceUpdateDataSourcesParams params` - `Optional pipelineId` - `List body` - `String dataSourceId` The ID of the data source. - `Optional syncInterval` The interval at which the data source should be synced. Valid values are: 21600, 43200, 86400 ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.datasources.DataSourceUpdateDataSourcesParams; import com.llamacloud_prod.api.models.pipelines.datasources.PipelineDataSource; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DataSourceUpdateDataSourcesParams params = DataSourceUpdateDataSourcesParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .addBody(DataSourceUpdateDataSourcesParams.Body.builder() .dataSourceId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .build()) .build(); List pipelineDataSources = client.pipelines().dataSources().updateDataSources(params); } } ``` #### Response ```json [ { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "data_source_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "last_synced_at": "2019-12-27T18:11:19.117Z", "name": "name", "pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "source_type": "S3", "created_at": "2019-12-27T18:11:19.117Z", "custom_metadata": { "foo": { "foo": "bar" } }, "status": "NOT_STARTED", "status_updated_at": "2019-12-27T18:11:19.117Z", "sync_interval": 0, "sync_schedule_set_by": "sync_schedule_set_by", "updated_at": "2019-12-27T18:11:19.117Z", "version_metadata": { "reader_version": "1.0" } } ] ``` ## Update Pipeline Data Source `PipelineDataSource pipelines().dataSources().update(DataSourceUpdateParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **put** `/api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}` Update the configuration of a data source in a pipeline. ### Parameters - `DataSourceUpdateParams params` - `String pipelineId` - `Optional dataSourceId` - `Optional syncInterval` The interval at which the data source should be synced. ### Returns - `class PipelineDataSource:` Schema for a data source in a pipeline. - `String id` Unique identifier - `Component component` Component that implements the data source - `class UnionMember0:` - `class CloudS3DataSource:` - `String bucket` The name of the S3 bucket to read from. - `Optional awsAccessId` The AWS access ID to use for authentication. - `Optional awsAccessSecret` The AWS access secret to use for authentication. - `Optional className` - `Optional prefix` The prefix of the S3 objects to read from. - `Optional regexPattern` The regex pattern to filter S3 objects. Must be a valid regex pattern. - `Optional s3EndpointUrl` The S3 endpoint URL to use for authentication. - `Optional supportsAccessControl` - `class CloudAzStorageBlobDataSource:` - `String accountUrl` The Azure Storage Blob account URL to use for authentication. - `String containerName` The name of the Azure Storage Blob container to read from. - `Optional accountKey` The Azure Storage Blob account key to use for authentication. - `Optional accountName` The Azure Storage Blob account name to use for authentication. - `Optional blob` The blob name to read from. - `Optional className` - `Optional clientId` The Azure AD client ID to use for authentication. - `Optional clientSecret` The Azure AD client secret to use for authentication. - `Optional prefix` The prefix of the Azure Storage Blob objects to read from. - `Optional supportsAccessControl` - `Optional tenantId` The Azure AD tenant ID to use for authentication. - `class CloudGoogleDriveDataSource:` - `String folderId` The ID of the Google Drive folder to read from. - `Optional className` - `Optional serviceAccountKey` A dictionary containing secret values - `Optional supportsAccessControl` - `class CloudOneDriveDataSource:` - `String clientId` The client ID to use for authentication. - `String clientSecret` The client secret to use for authentication. - `String tenantId` The tenant ID to use for authentication. - `String userPrincipalName` The user principal name to use for authentication. - `Optional className` - `Optional folderId` The ID of the OneDrive folder to read from. - `Optional folderPath` The path of the OneDrive folder to read from. - `Optional> requiredExts` The list of required file extensions. - `Optional supportsAccessControl` - `TRUE(true)` - `class CloudSharepointDataSource:` - `String clientId` The client ID to use for authentication. - `String clientSecret` The client secret to use for authentication. - `String tenantId` The tenant ID to use for authentication. - `Optional className` - `Optional driveName` The name of the Sharepoint drive to read from. - `Optional> excludePathPatterns` List of regex patterns for file paths to exclude. Files whose paths (including filename) match any pattern will be excluded. Example: ['/temp/', '/backup/', '.git/', '.tmp$', '^~'] - `Optional folderId` The ID of the Sharepoint folder to read from. - `Optional folderPath` The path of the Sharepoint folder to read from. - `Optional getPermissions` Whether to get permissions for the sharepoint site. - `Optional> includePathPatterns` List of regex patterns for file paths to include. Full paths (including filename) must match at least one pattern to be included. Example: ['/reports/', '/docs/.*.pdf$', '^Report.*.pdf$'] - `Optional> requiredExts` The list of required file extensions. - `Optional siteId` The ID of the SharePoint site to download from. - `Optional siteName` The name of the SharePoint site to download from. - `Optional supportsAccessControl` - `TRUE(true)` - `class CloudSlackDataSource:` - `String slackToken` Slack Bot Token. - `Optional channelIds` Slack Channel. - `Optional channelPatterns` Slack Channel name pattern. - `Optional className` - `Optional earliestDate` Earliest date. - `Optional earliestDateTimestamp` Earliest date timestamp. - `Optional latestDate` Latest date. - `Optional latestDateTimestamp` Latest date timestamp. - `Optional supportsAccessControl` - `class CloudNotionPageDataSource:` - `String integrationToken` The integration token to use for authentication. - `Optional className` - `Optional databaseIds` The Notion Database Id to read content from. - `Optional pageIds` The Page ID's of the Notion to read from. - `Optional supportsAccessControl` - `class CloudConfluenceDataSource:` - `String authenticationMechanism` Type of Authentication for connecting to Confluence APIs. - `String serverUrl` The server URL of the Confluence instance. - `Optional apiToken` The API token to use for authentication. - `Optional className` - `Optional cql` The CQL query to use for fetching pages. - `Optional failureHandling` Configuration for handling failures during processing. Key-value object controlling failure handling behaviors. Example: { "skip_list_failures": true } Currently supports: - skip_list_failures: Skip failed batches/lists and continue processing - `Optional skipListFailures` Whether to skip failed batches/lists and continue processing - `Optional indexRestrictedPages` Whether to index restricted pages. - `Optional keepMarkdownFormat` Whether to keep the markdown format. - `Optional label` The label to use for fetching pages. - `Optional pageIds` The page IDs of the Confluence to read from. - `Optional spaceKey` The space key to read from. - `Optional supportsAccessControl` - `Optional userName` The username to use for authentication. - `class CloudJiraDataSource:` Cloud Jira Data Source integrating JiraReader. - `String authenticationMechanism` Type of Authentication for connecting to Jira APIs. - `String query` JQL (Jira Query Language) query to search. - `Optional apiToken` The API/ Access Token used for Basic, PAT and OAuth2 authentication. - `Optional className` - `Optional cloudId` The cloud ID, used in case of OAuth2. - `Optional email` The email address to use for authentication. - `Optional serverUrl` The server url for Jira Cloud. - `Optional supportsAccessControl` - `class CloudJiraDataSourceV2:` Cloud Jira Data Source integrating JiraReaderV2. - `String authenticationMechanism` Type of Authentication for connecting to Jira APIs. - `String query` JQL (Jira Query Language) query to search. - `String serverUrl` The server url for Jira Cloud. - `Optional apiToken` The API Access Token used for Basic, PAT and OAuth2 authentication. - `Optional apiVersion` Jira REST API version to use (2 or 3). 3 supports Atlassian Document Format (ADF). - `_2("2")` - `_3("3")` - `Optional className` - `Optional cloudId` The cloud ID, used in case of OAuth2. - `Optional email` The email address to use for authentication. - `Optional expand` Fields to expand in the response. - `Optional> fields` List of fields to retrieve from Jira. If None, retrieves all fields. - `Optional getPermissions` Whether to fetch project role permissions and issue-level security - `Optional requestsPerMinute` Rate limit for Jira API requests per minute. - `Optional supportsAccessControl` - `class CloudBoxDataSource:` - `AuthenticationMechanism authenticationMechanism` The type of authentication to use (Developer Token or CCG) - `DEVELOPER_TOKEN("developer_token")` - `CCG("ccg")` - `Optional className` - `Optional clientId` Box API key used for identifying the application the user is authenticating with - `Optional clientSecret` Box API secret used for making auth requests. - `Optional developerToken` Developer token for authentication if authentication_mechanism is 'developer_token'. - `Optional enterpriseId` Box Enterprise ID, if provided authenticates as service. - `Optional folderId` The ID of the Box folder to read from. - `Optional supportsAccessControl` - `Optional userId` Box User ID, if provided authenticates as user. - `String dataSourceId` The ID of the data source. - `LocalDateTime lastSyncedAt` The last time the data source was automatically synced. - `String name` The name of the data source. - `String pipelineId` The ID of the pipeline. - `String projectId` - `SourceType sourceType` - `S3("S3")` - `AZURE_STORAGE_BLOB("AZURE_STORAGE_BLOB")` - `GOOGLE_DRIVE("GOOGLE_DRIVE")` - `MICROSOFT_ONEDRIVE("MICROSOFT_ONEDRIVE")` - `MICROSOFT_SHAREPOINT("MICROSOFT_SHAREPOINT")` - `SLACK("SLACK")` - `NOTION_PAGE("NOTION_PAGE")` - `CONFLUENCE("CONFLUENCE")` - `JIRA("JIRA")` - `JIRA_V2("JIRA_V2")` - `BOX("BOX")` - `Optional createdAt` Creation datetime - `Optional customMetadata` Custom metadata that will be present on all data loaded from the data source - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional status` The status of the data source in the pipeline. - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `CANCELLED("CANCELLED")` - `Optional statusUpdatedAt` The last time the status was updated. - `Optional syncInterval` The interval at which the data source should be synced. - `Optional syncScheduleSetBy` The id of the user who set the sync schedule. - `Optional updatedAt` Update datetime - `Optional versionMetadata` Version metadata for the data source - `Optional readerVersion` The version of the reader to use for this data source. - `_1_0("1.0")` - `_2_0("2.0")` - `_2_1("2.1")` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.datasources.DataSourceUpdateParams; import com.llamacloud_prod.api.models.pipelines.datasources.PipelineDataSource; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DataSourceUpdateParams params = DataSourceUpdateParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .dataSourceId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .build(); PipelineDataSource pipelineDataSource = client.pipelines().dataSources().update(params); } } ``` #### Response ```json { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "data_source_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "last_synced_at": "2019-12-27T18:11:19.117Z", "name": "name", "pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "source_type": "S3", "created_at": "2019-12-27T18:11:19.117Z", "custom_metadata": { "foo": { "foo": "bar" } }, "status": "NOT_STARTED", "status_updated_at": "2019-12-27T18:11:19.117Z", "sync_interval": 0, "sync_schedule_set_by": "sync_schedule_set_by", "updated_at": "2019-12-27T18:11:19.117Z", "version_metadata": { "reader_version": "1.0" } } ``` ## Get Pipeline Data Source Status `ManagedIngestionStatusResponse pipelines().dataSources().getStatus(DataSourceGetStatusParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}/status` Get the status of a data source for a pipeline. ### Parameters - `DataSourceGetStatusParams params` - `String pipelineId` - `Optional dataSourceId` ### Returns - `class ManagedIngestionStatusResponse:` - `Status status` Status of the ingestion. - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `PARTIAL_SUCCESS("PARTIAL_SUCCESS")` - `CANCELLED("CANCELLED")` - `Optional deploymentDate` Date of the deployment. - `Optional effectiveAt` When the status is effective - `Optional> error` List of errors that occurred during ingestion. - `String jobId` ID of the job that failed. - `String message` List of errors that occurred during ingestion. - `Step step` Name of the job that failed. - `MANAGED_INGESTION("MANAGED_INGESTION")` - `DATA_SOURCE("DATA_SOURCE")` - `FILE_UPDATER("FILE_UPDATER")` - `PARSE("PARSE")` - `TRANSFORM("TRANSFORM")` - `INGESTION("INGESTION")` - `METADATA_UPDATE("METADATA_UPDATE")` - `Optional jobId` ID of the latest job. ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.ManagedIngestionStatusResponse; import com.llamacloud_prod.api.models.pipelines.datasources.DataSourceGetStatusParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DataSourceGetStatusParams params = DataSourceGetStatusParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .dataSourceId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .build(); ManagedIngestionStatusResponse managedIngestionStatusResponse = client.pipelines().dataSources().getStatus(params); } } ``` #### Response ```json { "status": "NOT_STARTED", "deployment_date": "2019-12-27T18:11:19.117Z", "effective_at": "2019-12-27T18:11:19.117Z", "error": [ { "job_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "message": "message", "step": "MANAGED_INGESTION" } ], "job_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e" } ``` ## Sync Pipeline Data Source `Pipeline pipelines().dataSources().sync(DataSourceSyncParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **post** `/api/v1/pipelines/{pipeline_id}/data-sources/{data_source_id}/sync` Run incremental ingestion: pull upstream changes from the data source into the data sink. ### Parameters - `DataSourceSyncParams params` - `String pipelineId` - `Optional dataSourceId` - `Optional> pipelineFileIds` ### Returns - `class Pipeline:` Schema for a pipeline. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` - `class ManagedOpenAIEmbedding:` - `Optional component` Configuration for the Managed OpenAI embedding model. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The name of the OpenAI embedding model. - `OPENAI_TEXT_EMBEDDING_3_SMALL("openai-text-embedding-3-small")` - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `MANAGED_OPENAI_EMBEDDING("MANAGED_OPENAI_EMBEDDING")` - `class AzureOpenAIEmbeddingConfig:` - `Optional component` Configuration for the Azure OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for Azure deployment. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for Azure OpenAI API. - `Optional azureDeployment` The Azure deployment to use. - `Optional azureEndpoint` The Azure endpoint to use. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `AZURE_EMBEDDING("AZURE_EMBEDDING")` - `class CohereEmbeddingConfig:` - `Optional component` Configuration for the Cohere embedding model. - `Optional apiKey` The Cohere API key. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embeddingType` Embedding type. If not provided float embedding_type is used when needed. - `Optional inputType` Model Input type. If not provided, search_document and search_query are used when needed. - `Optional modelName` The modelId of the Cohere model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional truncate` Truncation type - START/ END/ NONE - `Optional type` Type of the embedding model. - `COHERE_EMBEDDING("COHERE_EMBEDDING")` - `class GeminiEmbeddingConfig:` - `Optional component` Configuration for the Gemini embedding model. - `Optional apiBase` API base to access the model. Defaults to None. - `Optional apiKey` API key to access the model. Defaults to None. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional modelName` The modelId of the Gemini model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional outputDimensionality` Optional reduced dimension for output embeddings. Supported by models/text-embedding-004 and newer (e.g. gemini-embedding-001). Not supported by models/embedding-001. - `Optional taskType` The task for embedding model. - `Optional title` Title is only applicable for retrieval_document tasks, and is used to represent a document title. For other tasks, title is invalid. - `Optional transport` Transport to access the model. Defaults to None. - `Optional type` Type of the embedding model. - `GEMINI_EMBEDDING("GEMINI_EMBEDDING")` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `Optional component` Configuration for the HuggingFace Inference API embedding model. - `Optional token` Hugging Face token. Will default to the locally saved token. Pass token=False if you don’t want to send your token to the server. - `String` - `boolean` - `Optional className` - `Optional cookies` Additional cookies to send to the server. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional headers` Additional headers to send to the server. By default only the authorization and user-agent headers are sent. Values in this dictionary will override the default values. - `Optional modelName` Hugging Face model name. If None, the task will be used. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional pooling` Enum of possible pooling choices with pooling behaviors. - `CLS("cls")` - `MEAN("mean")` - `LAST("last")` - `Optional queryInstruction` Instruction to prepend during query embedding. - `Optional task` Optional task to pick Hugging Face's recommended model, used when model_name is left as default of None. - `Optional textInstruction` Instruction to prepend during text embedding. - `Optional timeout` The maximum number of seconds to wait for a response from the server. Loading a new model in Inference API can take up to several minutes. Defaults to None, meaning it will loop until the server is available. - `Optional type` Type of the embedding model. - `HUGGINGFACE_API_EMBEDDING("HUGGINGFACE_API_EMBEDDING")` - `class OpenAIEmbeddingConfig:` - `Optional component` Configuration for the OpenAI embedding model. - `Optional additionalKwargs` Additional kwargs for the OpenAI API. - `Optional apiBase` The base URL for OpenAI API. - `Optional apiKey` The OpenAI API key. - `Optional apiVersion` The version for OpenAI API. - `Optional className` - `Optional defaultHeaders` The default headers for API requests. - `Optional dimensions` The number of dimensions on the output embedding vectors. Works only with v3 embedding models. - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` Maximum number of retries. - `Optional modelName` The name of the OpenAI embedding model. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional reuseClient` Reuse the OpenAI client between requests. When doing anything with large volumes of async API calls, setting this to false can improve stability. - `Optional timeout` Timeout for each request. - `Optional type` Type of the embedding model. - `OPENAI_EMBEDDING("OPENAI_EMBEDDING")` - `class VertexAiEmbeddingConfig:` - `Optional component` Configuration for the VertexAI embedding model. - `Optional clientEmail` The client email for the VertexAI credentials. - `String location` The default location to use when making API calls. - `Optional privateKey` The private key for the VertexAI credentials. - `Optional privateKeyId` The private key ID for the VertexAI credentials. - `String project` The default GCP project to use when making Vertex API calls. - `Optional tokenUri` The token URI for the VertexAI credentials. - `Optional additionalKwargs` Additional kwargs for the Vertex. - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional embedMode` The embedding mode to use. - `DEFAULT("default")` - `CLASSIFICATION("classification")` - `CLUSTERING("clustering")` - `SIMILARITY("similarity")` - `RETRIEVAL("retrieval")` - `Optional modelName` The modelId of the VertexAI model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional type` Type of the embedding model. - `VERTEXAI_EMBEDDING("VERTEXAI_EMBEDDING")` - `class BedrockEmbeddingConfig:` - `Optional component` Configuration for the Bedrock embedding model. - `Optional additionalKwargs` Additional kwargs for the bedrock client. - `Optional awsAccessKeyId` AWS Access Key ID to use - `Optional awsSecretAccessKey` AWS Secret Access Key to use - `Optional awsSessionToken` AWS Session Token to use - `Optional className` - `Optional embedBatchSize` The batch size for embedding calls. - `Optional maxRetries` The maximum number of API retries. - `Optional modelName` The modelId of the Bedrock model to use. - `Optional numWorkers` The number of workers to use for async embedding calls. - `Optional profileName` The name of aws profile to use. If not given, then the default profile is used. - `Optional regionName` AWS region name to use. Uses region configured in AWS CLI if not passed - `Optional timeout` The timeout for the Bedrock API request in seconds. It will be used for both connect and read timeouts. - `Optional type` Type of the embedding model. - `BEDROCK_EMBEDDING("BEDROCK_EMBEDDING")` - `String name` - `String projectId` - `Optional configHash` Hashes for the configuration of a pipeline. - `Optional embeddingConfigHash` Hash of the embedding config. - `Optional parsingConfigHash` Hash of the llama parse parameters. - `Optional transformConfigHash` Hash of the transform config. - `Optional createdAt` Creation datetime - `Optional dataSink` Schema for a data sink. - `String id` Unique identifier - `Component component` Component that implements the data sink - `class UnionMember0:` - `class CloudPineconeVectorStore:` Cloud Pinecone Vector Store. This class is used to store the configuration for a Pinecone vector store, so that it can be created and used in LlamaCloud. Args: api_key (str): API key for authenticating with Pinecone index_name (str): name of the Pinecone index namespace (optional[str]): namespace to use in the Pinecone index insert_kwargs (optional[dict]): additional kwargs to pass during insertion - `String apiKey` The API key for authenticating with Pinecone - `String indexName` - `Optional className` - `Optional insertKwargs` - `Optional namespace` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudPostgresVectorStore:` - `String database` - `long embedDim` - `String host` - `String password` - `long port` - `String schemaName` - `String tableName` - `String user` - `Optional className` - `Optional hnswSettings` HNSW settings for PGVector. - `Optional distanceMethod` The distance method to use. - `L2("l2")` - `IP("ip")` - `COSINE("cosine")` - `L1("l1")` - `HAMMING("hamming")` - `JACCARD("jaccard")` - `Optional efConstruction` The number of edges to use during the construction phase. - `Optional efSearch` The number of edges to use during the search phase. - `Optional m` The number of bi-directional links created for each new element. - `Optional vectorType` The type of vector to use. - `VECTOR("vector")` - `HALF_VEC("half_vec")` - `BIT("bit")` - `SPARSE_VEC("sparse_vec")` - `Optional hybridSearch` - `Optional performSetup` - `Optional supportsNestedMetadataFilters` - `class CloudQdrantVectorStore:` Cloud Qdrant Vector Store. This class is used to store the configuration for a Qdrant vector store, so that it can be created and used in LlamaCloud. Args: collection_name (str): name of the Qdrant collection url (str): url of the Qdrant instance api_key (str): API key for authenticating with Qdrant max_retries (int): maximum number of retries in case of a failure. Defaults to 3 client_kwargs (dict): additional kwargs to pass to the Qdrant client - `String apiKey` - `String collectionName` - `String url` - `Optional className` - `Optional clientKwargs` - `Optional maxRetries` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `class CloudAzureAiSearchVectorStore:` Cloud Azure AI Search Vector Store. - `String searchServiceApiKey` - `String searchServiceEndpoint` - `Optional className` - `Optional clientId` - `Optional clientSecret` - `Optional embeddingDimension` - `Optional filterableMetadataFieldKeys` - `Optional indexName` - `Optional searchServiceApiVersion` - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `Optional tenantId` - `class CloudMongoDBAtlasVectorSearch:` Cloud MongoDB Atlas Vector Store. This class is used to store the configuration for a MongoDB Atlas vector store, so that it can be created and used in LlamaCloud. Args: mongodb_uri (str): URI for connecting to MongoDB Atlas db_name (str): name of the MongoDB database collection_name (str): name of the MongoDB collection vector_index_name (str): name of the MongoDB Atlas vector index fulltext_index_name (str): name of the MongoDB Atlas full-text index - `String collectionName` - `String dbName` - `String mongoDBUri` - `Optional className` - `Optional embeddingDimension` - `Optional fulltextIndexName` - `Optional supportsNestedMetadataFilters` - `Optional vectorIndexName` - `class CloudMilvusVectorStore:` Cloud Milvus Vector Store. - `String uri` - `Optional token` - `Optional className` - `Optional collectionName` - `Optional embeddingDimension` - `Optional supportsNestedMetadataFilters` - `class CloudAstraDbVectorStore:` Cloud AstraDB Vector Store. This class is used to store the configuration for an AstraDB vector store, so that it can be created and used in LlamaCloud. Args: token (str): The Astra DB Application Token to use. api_endpoint (str): The Astra DB JSON API endpoint for your database. collection_name (str): Collection name to use. If not existing, it will be created. embedding_dimension (int): Length of the embedding vectors in use. keyspace (optional[str]): The keyspace to use. If not provided, 'default_keyspace' - `String token` The Astra DB Application Token to use - `String apiEndpoint` The Astra DB JSON API endpoint for your database - `String collectionName` Collection name to use. If not existing, it will be created - `long embeddingDimension` Length of the embedding vectors in use - `Optional className` - `Optional keyspace` The keyspace to use. If not provided, 'default_keyspace' - `Optional supportsNestedMetadataFilters` - `TRUE(true)` - `String name` The name of the data sink. - `String projectId` - `SinkType sinkType` - `PINECONE("PINECONE")` - `POSTGRES("POSTGRES")` - `QDRANT("QDRANT")` - `AZUREAI_SEARCH("AZUREAI_SEARCH")` - `MONGODB_ATLAS("MONGODB_ATLAS")` - `MILVUS("MILVUS")` - `ASTRA_DB("ASTRA_DB")` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfig` Schema for an embedding model config. - `String id` Unique identifier - `EmbeddingConfig embeddingConfig` The embedding configuration for the embedding model config. - `class AzureOpenAIEmbeddingConfig:` - `class CohereEmbeddingConfig:` - `class GeminiEmbeddingConfig:` - `class HuggingFaceInferenceApiEmbeddingConfig:` - `class OpenAIEmbeddingConfig:` - `class VertexAiEmbeddingConfig:` - `class BedrockEmbeddingConfig:` - `String name` The name of the embedding model config. - `String projectId` - `Optional createdAt` Creation datetime - `Optional updatedAt` Update datetime - `Optional embeddingModelConfigId` The ID of the EmbeddingModelConfig this pipeline is using. - `Optional llamaParseParameters` Settings that can be configured for how to use LlamaParse to parse files within a LlamaCloud pipeline. - `Optional adaptiveLongTable` - `Optional aggressiveTableExtraction` - `Optional annotateLinks` - `Optional autoMode` - `Optional autoModeConfigurationJson` - `Optional autoModeTriggerOnImageInPage` - `Optional autoModeTriggerOnRegexpInPage` - `Optional autoModeTriggerOnTableInPage` - `Optional autoModeTriggerOnTextInPage` - `Optional azureOpenAIApiVersion` - `Optional azureOpenAIDeploymentName` - `Optional azureOpenAIEndpoint` - `Optional azureOpenAIKey` - `Optional bboxBottom` - `Optional bboxLeft` - `Optional bboxRight` - `Optional bboxTop` - `Optional boundingBox` - `Optional compactMarkdownTable` - `Optional complementalFormattingInstruction` - `Optional contentGuidelineInstruction` - `Optional continuousMode` - `Optional disableImageExtraction` - `Optional disableOcr` - `Optional disableReconstruction` - `Optional doNotCache` - `Optional doNotUnrollColumns` - `Optional enableCostOptimizer` - `Optional extractCharts` - `Optional extractLayout` - `Optional extractPrintedPageNumber` - `Optional fastMode` - `Optional formattingInstruction` - `Optional gpt4oApiKey` - `Optional gpt4oMode` - `Optional guessXlsxSheetName` - `Optional hideFooters` - `Optional hideHeaders` - `Optional highResOcr` - `Optional htmlMakeAllElementsVisible` - `Optional htmlRemoveFixedElements` - `Optional htmlRemoveNavigationElements` - `Optional httpProxy` - `Optional ignoreDocumentElementsForLayoutDetection` - `Optional> imagesToSave` - `SCREENSHOT("screenshot")` - `EMBEDDED("embedded")` - `LAYOUT("layout")` - `Optional inlineImagesInMarkdown` - `Optional inputS3Path` - `Optional inputS3Region` - `Optional inputUrl` - `Optional internalIsScreenshotJob` - `Optional invalidateCache` - `Optional isFormattingInstruction` - `Optional jobTimeoutExtraTimePerPageInSeconds` - `Optional jobTimeoutInSeconds` - `Optional keepPageSeparatorWhenMergingTables` - `Optional> languages` - `AF("af")` - `AZ("az")` - `BS("bs")` - `CS("cs")` - `CY("cy")` - `DA("da")` - `DE("de")` - `EN("en")` - `ES("es")` - `ET("et")` - `FR("fr")` - `GA("ga")` - `HR("hr")` - `HU("hu")` - `ID("id")` - `IS("is")` - `IT("it")` - `KU("ku")` - `LA("la")` - `LT("lt")` - `LV("lv")` - `MI("mi")` - `MS("ms")` - `MT("mt")` - `NL("nl")` - `NO("no")` - `OC("oc")` - `PI("pi")` - `PL("pl")` - `PT("pt")` - `RO("ro")` - `RS_LATIN("rs_latin")` - `SK("sk")` - `SL("sl")` - `SQ("sq")` - `SV("sv")` - `SW("sw")` - `TL("tl")` - `TR("tr")` - `UZ("uz")` - `VI("vi")` - `AR("ar")` - `FA("fa")` - `UG("ug")` - `UR("ur")` - `BN("bn")` - `AS("as")` - `MNI("mni")` - `RU("ru")` - `RS_CYRILLIC("rs_cyrillic")` - `BE("be")` - `BG("bg")` - `UK("uk")` - `MN("mn")` - `ABQ("abq")` - `ADY("ady")` - `KBD("kbd")` - `AVA("ava")` - `DAR("dar")` - `INH("inh")` - `CHE("che")` - `LBE("lbe")` - `LEZ("lez")` - `TAB("tab")` - `TJK("tjk")` - `HI("hi")` - `MR("mr")` - `NE("ne")` - `BH("bh")` - `MAI("mai")` - `ANG("ang")` - `BHO("bho")` - `MAH("mah")` - `SCK("sck")` - `NEW("new")` - `GOM("gom")` - `SA("sa")` - `BGC("bgc")` - `TH("th")` - `CH_SIM("ch_sim")` - `CH_TRA("ch_tra")` - `JA("ja")` - `KO("ko")` - `TA("ta")` - `TE("te")` - `KN("kn")` - `Optional layoutAware` - `Optional lineLevelBoundingBox` - `Optional markdownTableMultilineHeaderSeparator` - `Optional maxPages` - `Optional maxPagesEnforced` - `Optional mergeTablesAcrossPagesInMarkdown` - `Optional model` - `Optional outlinedTableExtraction` - `Optional outputPdfOfDocument` - `Optional outputS3PathPrefix` - `Optional outputS3Region` - `Optional outputTablesAsHtml` - `Optional pageErrorTolerance` - `Optional pageFooterPrefix` - `Optional pageFooterSuffix` - `Optional pageHeaderPrefix` - `Optional pageHeaderSuffix` - `Optional pagePrefix` - `Optional pageSeparator` - `Optional pageSuffix` - `Optional parseMode` Enum for representing the mode of parsing to be used. - `PARSE_PAGE_WITHOUT_LLM("parse_page_without_llm")` - `PARSE_PAGE_WITH_LLM("parse_page_with_llm")` - `PARSE_PAGE_WITH_LVM("parse_page_with_lvm")` - `PARSE_PAGE_WITH_AGENT("parse_page_with_agent")` - `PARSE_PAGE_WITH_LAYOUT_AGENT("parse_page_with_layout_agent")` - `PARSE_DOCUMENT_WITH_LLM("parse_document_with_llm")` - `PARSE_DOCUMENT_WITH_LVM("parse_document_with_lvm")` - `PARSE_DOCUMENT_WITH_AGENT("parse_document_with_agent")` - `Optional parsingInstruction` - `Optional preciseBoundingBox` - `Optional premiumMode` - `Optional presentationOutOfBoundsContent` - `Optional presentationSkipEmbeddedData` - `Optional preserveLayoutAlignmentAcrossPages` - `Optional preserveVerySmallText` - `Optional preset` - `Optional priority` The priority for the request. This field may be ignored or overwritten depending on the organization tier. - `LOW("low")` - `MEDIUM("medium")` - `HIGH("high")` - `CRITICAL("critical")` - `Optional projectId` - `Optional removeHiddenText` - `Optional replaceFailedPageMode` Enum for representing the different available page error handling modes. - `RAW_TEXT("raw_text")` - `BLANK_PAGE("blank_page")` - `ERROR_MESSAGE("error_message")` - `Optional replaceFailedPageWithErrorMessagePrefix` - `Optional replaceFailedPageWithErrorMessageSuffix` - `Optional saveImages` - `Optional skipDiagonalText` - `Optional specializedChartParsingAgentic` - `Optional specializedChartParsingEfficient` - `Optional specializedChartParsingPlus` - `Optional specializedImageParsing` - `Optional spreadsheetExtractSubTables` - `Optional spreadsheetForceFormulaComputation` - `Optional spreadsheetIncludeHiddenSheets` - `Optional strictModeBuggyFont` - `Optional strictModeImageExtraction` - `Optional strictModeImageOcr` - `Optional strictModeReconstruction` - `Optional structuredOutput` - `Optional structuredOutputJsonSchema` - `Optional structuredOutputJsonSchemaName` - `Optional systemPrompt` - `Optional systemPromptAppend` - `Optional takeScreenshot` - `Optional targetPages` - `Optional tier` - `Optional useVendorMultimodalModel` - `Optional userPrompt` - `Optional vendorMultimodalApiKey` - `Optional vendorMultimodalModelName` - `Optional version` - `Optional> webhookConfigurations` Outbound webhook endpoints to notify on job status changes - `Optional> webhookEvents` Events to subscribe to (e.g. 'parse.success', 'extract.error'). If null, all events are delivered. - `EXTRACT_PENDING("extract.pending")` - `EXTRACT_SUCCESS("extract.success")` - `EXTRACT_ERROR("extract.error")` - `EXTRACT_PARTIAL_SUCCESS("extract.partial_success")` - `EXTRACT_CANCELLED("extract.cancelled")` - `PARSE_PENDING("parse.pending")` - `PARSE_RUNNING("parse.running")` - `PARSE_SUCCESS("parse.success")` - `PARSE_ERROR("parse.error")` - `PARSE_PARTIAL_SUCCESS("parse.partial_success")` - `PARSE_CANCELLED("parse.cancelled")` - `CLASSIFY_PENDING("classify.pending")` - `CLASSIFY_RUNNING("classify.running")` - `CLASSIFY_SUCCESS("classify.success")` - `CLASSIFY_ERROR("classify.error")` - `CLASSIFY_PARTIAL_SUCCESS("classify.partial_success")` - `CLASSIFY_CANCELLED("classify.cancelled")` - `SHEETS_PENDING("sheets.pending")` - `SHEETS_SUCCESS("sheets.success")` - `SHEETS_ERROR("sheets.error")` - `SHEETS_PARTIAL_SUCCESS("sheets.partial_success")` - `SHEETS_CANCELLED("sheets.cancelled")` - `UNMAPPED_EVENT("unmapped_event")` - `Optional webhookHeaders` Custom HTTP headers sent with each webhook request (e.g. auth tokens) - `Optional webhookOutputFormat` Response format sent to the webhook: 'string' (default) or 'json' - `Optional webhookUrl` URL to receive webhook POST notifications - `Optional webhookUrl` - `Optional managedPipelineId` The ID of the ManagedPipeline this playground pipeline is linked to. - `Optional metadataConfig` Metadata configuration for the pipeline. - `Optional> excludedEmbedMetadataKeys` List of metadata keys to exclude from embeddings - `Optional> excludedLlmMetadataKeys` List of metadata keys to exclude from LLM during retrieval - `Optional pipelineType` Type of pipeline. Either PLAYGROUND or MANAGED. - `PLAYGROUND("PLAYGROUND")` - `MANAGED("MANAGED")` - `Optional presetRetrievalParameters` Preset retrieval parameters for the pipeline. - `Optional alpha` Alpha value for hybrid retrieval to determine the weights between dense and sparse retrieval. 0 is sparse retrieval and 1 is dense retrieval. - `Optional className` - `Optional denseSimilarityCutoff` Minimum similarity score wrt query for retrieval - `Optional denseSimilarityTopK` Number of nodes for dense retrieval. - `Optional enableReranking` Enable reranking for retrieval - `Optional filesTopK` Number of files to retrieve (only for retrieval mode files_via_metadata and files_via_content). - `Optional rerankTopN` Number of reranked nodes for returning. - `Optional retrievalMode` The retrieval mode for the query. - `CHUNKS("chunks")` - `FILES_VIA_METADATA("files_via_metadata")` - `FILES_VIA_CONTENT("files_via_content")` - `AUTO_ROUTED("auto_routed")` - `Optional retrieveImageNodes` Whether to retrieve image nodes. - `Optional retrievePageFigureNodes` Whether to retrieve page figure nodes. - `Optional retrievePageScreenshotNodes` Whether to retrieve page screenshot nodes. - `Optional searchFilters` Metadata filters for vector stores. - `List filters` - `class MetadataFilter:` Comprehensive metadata filter for vector stores to support more operators. Value uses Strict types, as int, float and str are compatible types and were all converted to string before. See: https://docs.pydantic.dev/latest/usage/types/#strict-types - `String key` - `Optional value` - `double` - `String` - `List` - `List` - `List` - `Optional operator` Vector store filter operator. - `EQUALS("==")` - `GREATER(">")` - `LESS("<")` - `NOT_EQUALS("!=")` - `GREATER_OR_EQUALS(">=")` - `LESS_OR_EQUALS("<=")` - `IN("in")` - `NIN("nin")` - `ANY("any")` - `ALL("all")` - `TEXT_MATCH("text_match")` - `TEXT_MATCH_INSENSITIVE("text_match_insensitive")` - `CONTAINS("contains")` - `IS_EMPTY("is_empty")` - `class MetadataFilters:` Metadata filters for vector stores. - `Optional condition` Vector store filter conditions to combine different filters. - `AND("and")` - `OR("or")` - `NOT("not")` - `Optional searchFiltersInferenceSchema` JSON Schema that will be used to infer search_filters. Omit or leave as null to skip inference. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional sparseSimilarityTopK` Number of nodes for sparse retrieval. - `Optional sparseModelConfig` Configuration for sparse embedding models used in hybrid search. This allows users to choose between Splade and BM25 models for sparse retrieval in managed data sinks. - `Optional className` - `Optional modelType` The sparse model type to use. 'bm25' uses Qdrant's FastEmbed BM25 model (default for new pipelines), 'splade' uses HuggingFace Splade model, 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade). - `SPLADE("splade")` - `BM25("bm25")` - `AUTO("auto")` - `Optional status` Status of the pipeline. - `CREATED("CREATED")` - `DELETING("DELETING")` - `Optional transformConfig` Configuration for the transformation. - `class AutoTransformConfig:` - `Optional chunkOverlap` Chunk overlap for the transformation. - `Optional chunkSize` Chunk size for the transformation. - `Optional mode` - `AUTO("auto")` - `class AdvancedModeTransformConfig:` - `Optional chunkingConfig` Configuration for the chunking. - `class NoneChunkingConfig:` - `Optional mode` - `NONE("none")` - `class CharacterChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `CHARACTER("character")` - `class TokenChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `TOKEN("token")` - `Optional separator` - `class SentenceChunkingConfig:` - `Optional chunkOverlap` - `Optional chunkSize` - `Optional mode` - `SENTENCE("sentence")` - `Optional paragraphSeparator` - `Optional separator` - `class SemanticChunkingConfig:` - `Optional breakpointPercentileThreshold` - `Optional bufferSize` - `Optional mode` - `SEMANTIC("semantic")` - `Optional mode` - `ADVANCED("advanced")` - `Optional segmentationConfig` Configuration for the segmentation. - `class NoneSegmentationConfig:` - `Optional mode` - `NONE("none")` - `class PageSegmentationConfig:` - `Optional mode` - `PAGE("page")` - `Optional pageSeparator` - `class ElementSegmentationConfig:` - `Optional mode` - `ELEMENT("element")` - `Optional updatedAt` Update datetime ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.Pipeline; import com.llamacloud_prod.api.models.pipelines.datasources.DataSourceSyncParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DataSourceSyncParams params = DataSourceSyncParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .dataSourceId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .build(); Pipeline pipeline = client.pipelines().dataSources().sync(params); } } ``` #### Response ```json { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "class_name": "class_name", "embed_batch_size": 1, "model_name": "openai-text-embedding-3-small", "num_workers": 0 }, "type": "MANAGED_OPENAI_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "embedding_config_hash": "embedding_config_hash", "parsing_config_hash": "parsing_config_hash", "transform_config_hash": "transform_config_hash" }, "created_at": "2019-12-27T18:11:19.117Z", "data_sink": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "component": { "foo": "bar" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "sink_type": "PINECONE", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config": { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "embedding_config": { "component": { "additional_kwargs": { "foo": "bar" }, "api_base": "api_base", "api_key": "api_key", "api_version": "api_version", "azure_deployment": "azure_deployment", "azure_endpoint": "azure_endpoint", "class_name": "class_name", "default_headers": { "foo": "string" }, "dimensions": 0, "embed_batch_size": 1, "max_retries": 0, "model_name": "model_name", "num_workers": 0, "reuse_client": true, "timeout": 0 }, "type": "AZURE_EMBEDDING" }, "name": "name", "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "created_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" }, "embedding_model_config_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "llama_parse_parameters": { "adaptive_long_table": true, "aggressive_table_extraction": true, "annotate_links": true, "auto_mode": true, "auto_mode_configuration_json": "auto_mode_configuration_json", "auto_mode_trigger_on_image_in_page": true, "auto_mode_trigger_on_regexp_in_page": "auto_mode_trigger_on_regexp_in_page", "auto_mode_trigger_on_table_in_page": true, "auto_mode_trigger_on_text_in_page": "auto_mode_trigger_on_text_in_page", "azure_openai_api_version": "azure_openai_api_version", "azure_openai_deployment_name": "azure_openai_deployment_name", "azure_openai_endpoint": "azure_openai_endpoint", "azure_openai_key": "azure_openai_key", "bbox_bottom": 0, "bbox_left": 0, "bbox_right": 0, "bbox_top": 0, "bounding_box": "bounding_box", "compact_markdown_table": true, "complemental_formatting_instruction": "complemental_formatting_instruction", "content_guideline_instruction": "content_guideline_instruction", "continuous_mode": true, "disable_image_extraction": true, "disable_ocr": true, "disable_reconstruction": true, "do_not_cache": true, "do_not_unroll_columns": true, "enable_cost_optimizer": true, "extract_charts": true, "extract_layout": true, "extract_printed_page_number": true, "fast_mode": true, "formatting_instruction": "formatting_instruction", "gpt4o_api_key": "gpt4o_api_key", "gpt4o_mode": true, "guess_xlsx_sheet_name": true, "hide_footers": true, "hide_headers": true, "high_res_ocr": true, "html_make_all_elements_visible": true, "html_remove_fixed_elements": true, "html_remove_navigation_elements": true, "http_proxy": "http_proxy", "ignore_document_elements_for_layout_detection": true, "images_to_save": [ "screenshot" ], "inline_images_in_markdown": true, "input_s3_path": "input_s3_path", "input_s3_region": "input_s3_region", "input_url": "input_url", "internal_is_screenshot_job": true, "invalidate_cache": true, "is_formatting_instruction": true, "job_timeout_extra_time_per_page_in_seconds": 0, "job_timeout_in_seconds": 0, "keep_page_separator_when_merging_tables": true, "languages": [ "af" ], "layout_aware": true, "line_level_bounding_box": true, "markdown_table_multiline_header_separator": "markdown_table_multiline_header_separator", "max_pages": 0, "max_pages_enforced": 0, "merge_tables_across_pages_in_markdown": true, "model": "model", "outlined_table_extraction": true, "output_pdf_of_document": true, "output_s3_path_prefix": "output_s3_path_prefix", "output_s3_region": "output_s3_region", "output_tables_as_HTML": true, "page_error_tolerance": 0, "page_footer_prefix": "page_footer_prefix", "page_footer_suffix": "page_footer_suffix", "page_header_prefix": "page_header_prefix", "page_header_suffix": "page_header_suffix", "page_prefix": "page_prefix", "page_separator": "page_separator", "page_suffix": "page_suffix", "parse_mode": "parse_page_without_llm", "parsing_instruction": "parsing_instruction", "precise_bounding_box": true, "premium_mode": true, "presentation_out_of_bounds_content": true, "presentation_skip_embedded_data": true, "preserve_layout_alignment_across_pages": true, "preserve_very_small_text": true, "preset": "preset", "priority": "low", "project_id": "project_id", "remove_hidden_text": true, "replace_failed_page_mode": "raw_text", "replace_failed_page_with_error_message_prefix": "replace_failed_page_with_error_message_prefix", "replace_failed_page_with_error_message_suffix": "replace_failed_page_with_error_message_suffix", "save_images": true, "skip_diagonal_text": true, "specialized_chart_parsing_agentic": true, "specialized_chart_parsing_efficient": true, "specialized_chart_parsing_plus": true, "specialized_image_parsing": true, "spreadsheet_extract_sub_tables": true, "spreadsheet_force_formula_computation": true, "spreadsheet_include_hidden_sheets": true, "strict_mode_buggy_font": true, "strict_mode_image_extraction": true, "strict_mode_image_ocr": true, "strict_mode_reconstruction": true, "structured_output": true, "structured_output_json_schema": "structured_output_json_schema", "structured_output_json_schema_name": "structured_output_json_schema_name", "system_prompt": "system_prompt", "system_prompt_append": "system_prompt_append", "take_screenshot": true, "target_pages": "target_pages", "tier": "tier", "use_vendor_multimodal_model": true, "user_prompt": "user_prompt", "vendor_multimodal_api_key": "vendor_multimodal_api_key", "vendor_multimodal_model_name": "vendor_multimodal_model_name", "version": "version", "webhook_configurations": [ { "webhook_events": [ "parse.success", "parse.error" ], "webhook_headers": { "Authorization": "Bearer sk-..." }, "webhook_output_format": "json", "webhook_url": "https://example.com/webhooks/llamacloud" } ], "webhook_url": "webhook_url" }, "managed_pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "metadata_config": { "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ] }, "pipeline_type": "PLAYGROUND", "preset_retrieval_parameters": { "alpha": 0, "class_name": "class_name", "dense_similarity_cutoff": 0, "dense_similarity_top_k": 1, "enable_reranking": true, "files_top_k": 1, "rerank_top_n": 1, "retrieval_mode": "chunks", "retrieve_image_nodes": true, "retrieve_page_figure_nodes": true, "retrieve_page_screenshot_nodes": true, "search_filters": { "filters": [ { "key": "key", "value": 0, "operator": "==" } ], "condition": "and" }, "search_filters_inference_schema": { "foo": { "foo": "bar" } }, "sparse_similarity_top_k": 1 }, "sparse_model_config": { "class_name": "class_name", "model_type": "splade" }, "status": "CREATED", "transform_config": { "chunk_overlap": 0, "chunk_size": 1, "mode": "auto" }, "updated_at": "2019-12-27T18:11:19.117Z" } ``` ## Domain Types ### Pipeline Data Source - `class PipelineDataSource:` Schema for a data source in a pipeline. - `String id` Unique identifier - `Component component` Component that implements the data source - `class UnionMember0:` - `class CloudS3DataSource:` - `String bucket` The name of the S3 bucket to read from. - `Optional awsAccessId` The AWS access ID to use for authentication. - `Optional awsAccessSecret` The AWS access secret to use for authentication. - `Optional className` - `Optional prefix` The prefix of the S3 objects to read from. - `Optional regexPattern` The regex pattern to filter S3 objects. Must be a valid regex pattern. - `Optional s3EndpointUrl` The S3 endpoint URL to use for authentication. - `Optional supportsAccessControl` - `class CloudAzStorageBlobDataSource:` - `String accountUrl` The Azure Storage Blob account URL to use for authentication. - `String containerName` The name of the Azure Storage Blob container to read from. - `Optional accountKey` The Azure Storage Blob account key to use for authentication. - `Optional accountName` The Azure Storage Blob account name to use for authentication. - `Optional blob` The blob name to read from. - `Optional className` - `Optional clientId` The Azure AD client ID to use for authentication. - `Optional clientSecret` The Azure AD client secret to use for authentication. - `Optional prefix` The prefix of the Azure Storage Blob objects to read from. - `Optional supportsAccessControl` - `Optional tenantId` The Azure AD tenant ID to use for authentication. - `class CloudGoogleDriveDataSource:` - `String folderId` The ID of the Google Drive folder to read from. - `Optional className` - `Optional serviceAccountKey` A dictionary containing secret values - `Optional supportsAccessControl` - `class CloudOneDriveDataSource:` - `String clientId` The client ID to use for authentication. - `String clientSecret` The client secret to use for authentication. - `String tenantId` The tenant ID to use for authentication. - `String userPrincipalName` The user principal name to use for authentication. - `Optional className` - `Optional folderId` The ID of the OneDrive folder to read from. - `Optional folderPath` The path of the OneDrive folder to read from. - `Optional> requiredExts` The list of required file extensions. - `Optional supportsAccessControl` - `TRUE(true)` - `class CloudSharepointDataSource:` - `String clientId` The client ID to use for authentication. - `String clientSecret` The client secret to use for authentication. - `String tenantId` The tenant ID to use for authentication. - `Optional className` - `Optional driveName` The name of the Sharepoint drive to read from. - `Optional> excludePathPatterns` List of regex patterns for file paths to exclude. Files whose paths (including filename) match any pattern will be excluded. Example: ['/temp/', '/backup/', '.git/', '.tmp$', '^~'] - `Optional folderId` The ID of the Sharepoint folder to read from. - `Optional folderPath` The path of the Sharepoint folder to read from. - `Optional getPermissions` Whether to get permissions for the sharepoint site. - `Optional> includePathPatterns` List of regex patterns for file paths to include. Full paths (including filename) must match at least one pattern to be included. Example: ['/reports/', '/docs/.*.pdf$', '^Report.*.pdf$'] - `Optional> requiredExts` The list of required file extensions. - `Optional siteId` The ID of the SharePoint site to download from. - `Optional siteName` The name of the SharePoint site to download from. - `Optional supportsAccessControl` - `TRUE(true)` - `class CloudSlackDataSource:` - `String slackToken` Slack Bot Token. - `Optional channelIds` Slack Channel. - `Optional channelPatterns` Slack Channel name pattern. - `Optional className` - `Optional earliestDate` Earliest date. - `Optional earliestDateTimestamp` Earliest date timestamp. - `Optional latestDate` Latest date. - `Optional latestDateTimestamp` Latest date timestamp. - `Optional supportsAccessControl` - `class CloudNotionPageDataSource:` - `String integrationToken` The integration token to use for authentication. - `Optional className` - `Optional databaseIds` The Notion Database Id to read content from. - `Optional pageIds` The Page ID's of the Notion to read from. - `Optional supportsAccessControl` - `class CloudConfluenceDataSource:` - `String authenticationMechanism` Type of Authentication for connecting to Confluence APIs. - `String serverUrl` The server URL of the Confluence instance. - `Optional apiToken` The API token to use for authentication. - `Optional className` - `Optional cql` The CQL query to use for fetching pages. - `Optional failureHandling` Configuration for handling failures during processing. Key-value object controlling failure handling behaviors. Example: { "skip_list_failures": true } Currently supports: - skip_list_failures: Skip failed batches/lists and continue processing - `Optional skipListFailures` Whether to skip failed batches/lists and continue processing - `Optional indexRestrictedPages` Whether to index restricted pages. - `Optional keepMarkdownFormat` Whether to keep the markdown format. - `Optional label` The label to use for fetching pages. - `Optional pageIds` The page IDs of the Confluence to read from. - `Optional spaceKey` The space key to read from. - `Optional supportsAccessControl` - `Optional userName` The username to use for authentication. - `class CloudJiraDataSource:` Cloud Jira Data Source integrating JiraReader. - `String authenticationMechanism` Type of Authentication for connecting to Jira APIs. - `String query` JQL (Jira Query Language) query to search. - `Optional apiToken` The API/ Access Token used for Basic, PAT and OAuth2 authentication. - `Optional className` - `Optional cloudId` The cloud ID, used in case of OAuth2. - `Optional email` The email address to use for authentication. - `Optional serverUrl` The server url for Jira Cloud. - `Optional supportsAccessControl` - `class CloudJiraDataSourceV2:` Cloud Jira Data Source integrating JiraReaderV2. - `String authenticationMechanism` Type of Authentication for connecting to Jira APIs. - `String query` JQL (Jira Query Language) query to search. - `String serverUrl` The server url for Jira Cloud. - `Optional apiToken` The API Access Token used for Basic, PAT and OAuth2 authentication. - `Optional apiVersion` Jira REST API version to use (2 or 3). 3 supports Atlassian Document Format (ADF). - `_2("2")` - `_3("3")` - `Optional className` - `Optional cloudId` The cloud ID, used in case of OAuth2. - `Optional email` The email address to use for authentication. - `Optional expand` Fields to expand in the response. - `Optional> fields` List of fields to retrieve from Jira. If None, retrieves all fields. - `Optional getPermissions` Whether to fetch project role permissions and issue-level security - `Optional requestsPerMinute` Rate limit for Jira API requests per minute. - `Optional supportsAccessControl` - `class CloudBoxDataSource:` - `AuthenticationMechanism authenticationMechanism` The type of authentication to use (Developer Token or CCG) - `DEVELOPER_TOKEN("developer_token")` - `CCG("ccg")` - `Optional className` - `Optional clientId` Box API key used for identifying the application the user is authenticating with - `Optional clientSecret` Box API secret used for making auth requests. - `Optional developerToken` Developer token for authentication if authentication_mechanism is 'developer_token'. - `Optional enterpriseId` Box Enterprise ID, if provided authenticates as service. - `Optional folderId` The ID of the Box folder to read from. - `Optional supportsAccessControl` - `Optional userId` Box User ID, if provided authenticates as user. - `String dataSourceId` The ID of the data source. - `LocalDateTime lastSyncedAt` The last time the data source was automatically synced. - `String name` The name of the data source. - `String pipelineId` The ID of the pipeline. - `String projectId` - `SourceType sourceType` - `S3("S3")` - `AZURE_STORAGE_BLOB("AZURE_STORAGE_BLOB")` - `GOOGLE_DRIVE("GOOGLE_DRIVE")` - `MICROSOFT_ONEDRIVE("MICROSOFT_ONEDRIVE")` - `MICROSOFT_SHAREPOINT("MICROSOFT_SHAREPOINT")` - `SLACK("SLACK")` - `NOTION_PAGE("NOTION_PAGE")` - `CONFLUENCE("CONFLUENCE")` - `JIRA("JIRA")` - `JIRA_V2("JIRA_V2")` - `BOX("BOX")` - `Optional createdAt` Creation datetime - `Optional customMetadata` Custom metadata that will be present on all data loaded from the data source - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional status` The status of the data source in the pipeline. - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `CANCELLED("CANCELLED")` - `Optional statusUpdatedAt` The last time the status was updated. - `Optional syncInterval` The interval at which the data source should be synced. - `Optional syncScheduleSetBy` The id of the user who set the sync schedule. - `Optional updatedAt` Update datetime - `Optional versionMetadata` Version metadata for the data source - `Optional readerVersion` The version of the reader to use for this data source. - `_1_0("1.0")` - `_2_0("2.0")` - `_2_1("2.1")` # Images ## List File Page Screenshots `List pipelines().images().listPageScreenshots(ImageListPageScreenshotsParamsparams = ImageListPageScreenshotsParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/files/{id}/page_screenshots` List metadata for all screenshots of pages from a file. ### Parameters - `ImageListPageScreenshotsParams params` - `Optional id` - `Optional organizationId` - `Optional projectId` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.images.ImageListPageScreenshotsParams; import com.llamacloud_prod.api.models.pipelines.images.ImageListPageScreenshotsResponse; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); List response = client.pipelines().images().listPageScreenshots("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json [ { "file_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "image_size": 0, "page_index": 0, "metadata": { "foo": "bar" } } ] ``` ## Get File Page Screenshot `JsonValue pipelines().images().getPageScreenshot(ImageGetPageScreenshotParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/files/{id}/page_screenshots/{page_index}` Get screenshot of a page from a file. ### Parameters - `ImageGetPageScreenshotParams params` - `String id` - `Optional pageIndex` - `Optional organizationId` - `Optional projectId` ### Returns - `class ImageGetPageScreenshotResponse:` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.images.ImageGetPageScreenshotParams; import com.llamacloud_prod.api.models.pipelines.images.ImageGetPageScreenshotResponse; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); ImageGetPageScreenshotParams params = ImageGetPageScreenshotParams.builder() .id("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .pageIndex(0L) .build(); ImageGetPageScreenshotResponse response = client.pipelines().images().getPageScreenshot(params); } } ``` #### Response ```json {} ``` ## Get File Page Figure `JsonValue pipelines().images().getPageFigure(ImageGetPageFigureParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/files/{id}/page-figures/{page_index}/{figure_name}` Get a specific figure from a page of a file. ### Parameters - `ImageGetPageFigureParams params` - `String id` - `long pageIndex` - `Optional figureName` - `Optional organizationId` - `Optional projectId` ### Returns - `class ImageGetPageFigureResponse:` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.images.ImageGetPageFigureParams; import com.llamacloud_prod.api.models.pipelines.images.ImageGetPageFigureResponse; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); ImageGetPageFigureParams params = ImageGetPageFigureParams.builder() .id("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .pageIndex(0L) .figureName("figure_name") .build(); ImageGetPageFigureResponse response = client.pipelines().images().getPageFigure(params); } } ``` #### Response ```json {} ``` ## List File Pages Figures `List pipelines().images().listPageFigures(ImageListPageFiguresParamsparams = ImageListPageFiguresParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/files/{id}/page-figures` List metadata for all figures from all pages of a file. ### Parameters - `ImageListPageFiguresParams params` - `Optional id` - `Optional organizationId` - `Optional projectId` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.images.ImageListPageFiguresParams; import com.llamacloud_prod.api.models.pipelines.images.ImageListPageFiguresResponse; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); List response = client.pipelines().images().listPageFigures("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json [ { "confidence": 0, "figure_name": "figure_name", "figure_size": 0, "file_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "page_index": 0, "is_likely_noise": true, "metadata": { "foo": "bar" } } ] ``` # Files ## Get Pipeline File Status Counts `FileGetStatusCountsResponse pipelines().files().getStatusCounts(FileGetStatusCountsParamsparams = FileGetStatusCountsParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}/files/status-counts` Get files for a pipeline. ### Parameters - `FileGetStatusCountsParams params` - `Optional pipelineId` - `Optional dataSourceId` - `Optional onlyManuallyUploaded` ### Returns - `class FileGetStatusCountsResponse:` - `Counts counts` The counts of files by status - `long totalCount` The total number of files - `Optional dataSourceId` The ID of the data source that the files belong to - `Optional onlyManuallyUploaded` Whether to only count manually uploaded files - `Optional pipelineId` The ID of the pipeline that the files belong to ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.files.FileGetStatusCountsParams; import com.llamacloud_prod.api.models.pipelines.files.FileGetStatusCountsResponse; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); FileGetStatusCountsResponse response = client.pipelines().files().getStatusCounts("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json { "counts": { "foo": 0 }, "total_count": 0, "data_source_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "only_manually_uploaded": true, "pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e" } ``` ## Get Pipeline File Status `ManagedIngestionStatusResponse pipelines().files().getStatus(FileGetStatusParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}/files/{file_id}/status` Get status of a file for a pipeline. ### Parameters - `FileGetStatusParams params` - `String pipelineId` - `Optional fileId` ### Returns - `class ManagedIngestionStatusResponse:` - `Status status` Status of the ingestion. - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `PARTIAL_SUCCESS("PARTIAL_SUCCESS")` - `CANCELLED("CANCELLED")` - `Optional deploymentDate` Date of the deployment. - `Optional effectiveAt` When the status is effective - `Optional> error` List of errors that occurred during ingestion. - `String jobId` ID of the job that failed. - `String message` List of errors that occurred during ingestion. - `Step step` Name of the job that failed. - `MANAGED_INGESTION("MANAGED_INGESTION")` - `DATA_SOURCE("DATA_SOURCE")` - `FILE_UPDATER("FILE_UPDATER")` - `PARSE("PARSE")` - `TRANSFORM("TRANSFORM")` - `INGESTION("INGESTION")` - `METADATA_UPDATE("METADATA_UPDATE")` - `Optional jobId` ID of the latest job. ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.ManagedIngestionStatusResponse; import com.llamacloud_prod.api.models.pipelines.files.FileGetStatusParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); FileGetStatusParams params = FileGetStatusParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .fileId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .build(); ManagedIngestionStatusResponse managedIngestionStatusResponse = client.pipelines().files().getStatus(params); } } ``` #### Response ```json { "status": "NOT_STARTED", "deployment_date": "2019-12-27T18:11:19.117Z", "effective_at": "2019-12-27T18:11:19.117Z", "error": [ { "job_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "message": "message", "step": "MANAGED_INGESTION" } ], "job_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e" } ``` ## Add Files To Pipeline Api `List pipelines().files().create(FileCreateParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **put** `/api/v1/pipelines/{pipeline_id}/files` Add files to a pipeline. ### Parameters - `FileCreateParams params` - `Optional pipelineId` - `List body` - `String fileId` The ID of the file - `Optional customMetadata` Custom metadata for the file - `class UnionMember0:` - `List` - `String` - `double` - `boolean` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.files.FileCreateParams; import com.llamacloud_prod.api.models.pipelines.files.PipelineFile; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); FileCreateParams params = FileCreateParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .addBody(FileCreateParams.Body.builder() .fileId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .build()) .build(); List pipelineFiles = client.pipelines().files().create(params); } } ``` #### Response ```json [ { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "foo": { "foo": "bar" } }, "created_at": "2019-12-27T18:11:19.117Z", "custom_metadata": { "foo": { "foo": "bar" } }, "data_source_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "external_file_id": "external_file_id", "file_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "file_size": 0, "file_type": "file_type", "indexed_page_count": 0, "last_modified_at": "2019-12-27T18:11:19.117Z", "name": "name", "permission_info": { "foo": { "foo": "bar" } }, "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "resource_info": { "foo": { "foo": "bar" } }, "status": "NOT_STARTED", "status_updated_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" } ] ``` ## Update Pipeline File `PipelineFile pipelines().files().update(FileUpdateParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **put** `/api/v1/pipelines/{pipeline_id}/files/{file_id}` Update a file for a pipeline. ### Parameters - `FileUpdateParams params` - `String pipelineId` - `Optional fileId` - `Optional customMetadata` Custom metadata for the file - `class UnionMember0:` - `List` - `String` - `double` - `boolean` ### Returns - `class PipelineFile:` A file associated with a pipeline. - `String id` Unique identifier for the pipeline file. - `String pipelineId` The ID of the pipeline that the file is associated with. - `Optional configHash` Hashes for the configuration of the pipeline. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional createdAt` When the pipeline file was created. - `Optional customMetadata` Custom metadata for the file. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional dataSourceId` The ID of the data source that the file belongs to. - `Optional externalFileId` The ID of the file in the external system. - `Optional fileId` The ID of the file. - `Optional fileSize` Size of the file in bytes. - `Optional fileType` File type (e.g. pdf, docx, etc.). - `Optional indexedPageCount` The number of pages that have been indexed for this file. - `Optional lastModifiedAt` The last modified time of the file. - `Optional name` Name of the file. - `Optional permissionInfo` Permission information for the file. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional projectId` The ID of the project that the file belongs to. - `Optional resourceInfo` Resource information for the file. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional status` Status of the pipeline file. - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `CANCELLED("CANCELLED")` - `Optional statusUpdatedAt` The last time the status was updated. - `Optional updatedAt` When the pipeline file was last updated. ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.files.FileUpdateParams; import com.llamacloud_prod.api.models.pipelines.files.PipelineFile; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); FileUpdateParams params = FileUpdateParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .fileId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .build(); PipelineFile pipelineFile = client.pipelines().files().update(params); } } ``` #### Response ```json { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "foo": { "foo": "bar" } }, "created_at": "2019-12-27T18:11:19.117Z", "custom_metadata": { "foo": { "foo": "bar" } }, "data_source_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "external_file_id": "external_file_id", "file_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "file_size": 0, "file_type": "file_type", "indexed_page_count": 0, "last_modified_at": "2019-12-27T18:11:19.117Z", "name": "name", "permission_info": { "foo": { "foo": "bar" } }, "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "resource_info": { "foo": { "foo": "bar" } }, "status": "NOT_STARTED", "status_updated_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" } ``` ## Delete Pipeline File `pipelines().files().delete(FileDeleteParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **delete** `/api/v1/pipelines/{pipeline_id}/files/{file_id}` Delete a file from a pipeline. ### Parameters - `FileDeleteParams params` - `String pipelineId` - `Optional fileId` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.files.FileDeleteParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); FileDeleteParams params = FileDeleteParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .fileId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .build(); client.pipelines().files().delete(params); } } ``` ## List Pipeline Files2 `FileListPage pipelines().files().list(FileListParamsparams = FileListParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}/files2` List files for a pipeline with optional filtering, sorting, and pagination. ### Parameters - `FileListParams params` - `Optional pipelineId` - `Optional dataSourceId` - `Optional fileNameContains` - `Optional limit` - `Optional offset` - `Optional onlyManuallyUploaded` - `Optional orderBy` - `Optional> statuses` Filter by file statuses - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `CANCELLED("CANCELLED")` ### Returns - `class PipelineFile:` A file associated with a pipeline. - `String id` Unique identifier for the pipeline file. - `String pipelineId` The ID of the pipeline that the file is associated with. - `Optional configHash` Hashes for the configuration of the pipeline. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional createdAt` When the pipeline file was created. - `Optional customMetadata` Custom metadata for the file. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional dataSourceId` The ID of the data source that the file belongs to. - `Optional externalFileId` The ID of the file in the external system. - `Optional fileId` The ID of the file. - `Optional fileSize` Size of the file in bytes. - `Optional fileType` File type (e.g. pdf, docx, etc.). - `Optional indexedPageCount` The number of pages that have been indexed for this file. - `Optional lastModifiedAt` The last modified time of the file. - `Optional name` Name of the file. - `Optional permissionInfo` Permission information for the file. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional projectId` The ID of the project that the file belongs to. - `Optional resourceInfo` Resource information for the file. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional status` Status of the pipeline file. - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `CANCELLED("CANCELLED")` - `Optional statusUpdatedAt` The last time the status was updated. - `Optional updatedAt` When the pipeline file was last updated. ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.files.FileListPage; import com.llamacloud_prod.api.models.pipelines.files.FileListParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); FileListPage page = client.pipelines().files().list("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json { "files": [ { "id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "pipeline_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "config_hash": { "foo": { "foo": "bar" } }, "created_at": "2019-12-27T18:11:19.117Z", "custom_metadata": { "foo": { "foo": "bar" } }, "data_source_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "external_file_id": "external_file_id", "file_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "file_size": 0, "file_type": "file_type", "indexed_page_count": 0, "last_modified_at": "2019-12-27T18:11:19.117Z", "name": "name", "permission_info": { "foo": { "foo": "bar" } }, "project_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "resource_info": { "foo": { "foo": "bar" } }, "status": "NOT_STARTED", "status_updated_at": "2019-12-27T18:11:19.117Z", "updated_at": "2019-12-27T18:11:19.117Z" } ], "limit": 0, "offset": 0, "total_count": 0 } ``` ## Domain Types ### Pipeline File - `class PipelineFile:` A file associated with a pipeline. - `String id` Unique identifier for the pipeline file. - `String pipelineId` The ID of the pipeline that the file is associated with. - `Optional configHash` Hashes for the configuration of the pipeline. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional createdAt` When the pipeline file was created. - `Optional customMetadata` Custom metadata for the file. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional dataSourceId` The ID of the data source that the file belongs to. - `Optional externalFileId` The ID of the file in the external system. - `Optional fileId` The ID of the file. - `Optional fileSize` Size of the file in bytes. - `Optional fileType` File type (e.g. pdf, docx, etc.). - `Optional indexedPageCount` The number of pages that have been indexed for this file. - `Optional lastModifiedAt` The last modified time of the file. - `Optional name` Name of the file. - `Optional permissionInfo` Permission information for the file. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional projectId` The ID of the project that the file belongs to. - `Optional resourceInfo` Resource information for the file. - `class UnionMember0:` - `List` - `String` - `double` - `boolean` - `Optional status` Status of the pipeline file. - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `CANCELLED("CANCELLED")` - `Optional statusUpdatedAt` The last time the status was updated. - `Optional updatedAt` When the pipeline file was last updated. # Metadata ## Import Pipeline Metadata `MetadataCreateResponse pipelines().metadata().create(MetadataCreateParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **put** `/api/v1/pipelines/{pipeline_id}/metadata` Import metadata for a pipeline. ### Parameters - `MetadataCreateParams params` - `Optional pipelineId` - `String uploadFile` ### Returns - `class MetadataCreateResponse:` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.metadata.MetadataCreateParams; import com.llamacloud_prod.api.models.pipelines.metadata.MetadataCreateResponse; import java.io.ByteArrayInputStream; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); MetadataCreateParams params = MetadataCreateParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .uploadFile(new ByteArrayInputStream("Example data".getBytes())) .build(); MetadataCreateResponse metadata = client.pipelines().metadata().create(params); } } ``` #### Response ```json { "foo": "string" } ``` ## Delete Pipeline Files Metadata `pipelines().metadata().deleteAll(MetadataDeleteAllParamsparams = MetadataDeleteAllParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **delete** `/api/v1/pipelines/{pipeline_id}/metadata` Delete metadata for all files in a pipeline. ### Parameters - `MetadataDeleteAllParams params` - `Optional pipelineId` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.metadata.MetadataDeleteAllParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); client.pipelines().metadata().deleteAll("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` # Documents ## Create Batch Pipeline Documents `List pipelines().documents().create(DocumentCreateParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **post** `/api/v1/pipelines/{pipeline_id}/documents` Batch create documents for a pipeline. ### Parameters - `DocumentCreateParams params` - `Optional pipelineId` - `List body` - `Metadata metadata` - `String text` - `Optional id` - `Optional> excludedEmbedMetadataKeys` - `Optional> excludedLlmMetadataKeys` - `Optional> pagePositions` indices in the CloudDocument.text where a new page begins. e.g. Second page starts at index specified by page_positions[1]. ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.core.JsonValue; import com.llamacloud_prod.api.models.pipelines.documents.CloudDocument; import com.llamacloud_prod.api.models.pipelines.documents.CloudDocumentCreate; import com.llamacloud_prod.api.models.pipelines.documents.DocumentCreateParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DocumentCreateParams params = DocumentCreateParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .addBody(CloudDocumentCreate.builder() .metadata(CloudDocumentCreate.Metadata.builder() .putAdditionalProperty("foo", JsonValue.from("bar")) .build()) .text("text") .build()) .build(); List cloudDocuments = client.pipelines().documents().create(params); } } ``` #### Response ```json [ { "id": "id", "metadata": { "foo": "bar" }, "text": "text", "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ], "page_positions": [ 0 ], "status_metadata": { "foo": "bar" } } ] ``` ## Paginated List Pipeline Documents `DocumentListPage pipelines().documents().list(DocumentListParamsparams = DocumentListParams.none(), RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}/documents/paginated` Return a list of documents for a pipeline. ### Parameters - `DocumentListParams params` - `Optional pipelineId` - `Optional fileId` - `Optional limit` - `Optional onlyApiDataSourceDocuments` - `Optional onlyDirectUpload` - `Optional skip` - `Optional statusRefreshPolicy` - `CACHED("cached")` - `TTL("ttl")` ### Returns - `class CloudDocument:` Cloud document stored in S3. - `String id` - `Metadata metadata` - `String text` - `Optional> excludedEmbedMetadataKeys` - `Optional> excludedLlmMetadataKeys` - `Optional> pagePositions` indices in the CloudDocument.text where a new page begins. e.g. Second page starts at index specified by page_positions[1]. - `Optional statusMetadata` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.documents.DocumentListPage; import com.llamacloud_prod.api.models.pipelines.documents.DocumentListParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DocumentListPage page = client.pipelines().documents().list("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e"); } } ``` #### Response ```json { "documents": [ { "id": "id", "metadata": { "foo": "bar" }, "text": "text", "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ], "page_positions": [ 0 ], "status_metadata": { "foo": "bar" } } ], "limit": 0, "offset": 0, "total_count": 0 } ``` ## Get Pipeline Document `CloudDocument pipelines().documents().get(DocumentGetParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}/documents/{document_id}` Return a single document for a pipeline. ### Parameters - `DocumentGetParams params` - `String pipelineId` - `Optional documentId` ### Returns - `class CloudDocument:` Cloud document stored in S3. - `String id` - `Metadata metadata` - `String text` - `Optional> excludedEmbedMetadataKeys` - `Optional> excludedLlmMetadataKeys` - `Optional> pagePositions` indices in the CloudDocument.text where a new page begins. e.g. Second page starts at index specified by page_positions[1]. - `Optional statusMetadata` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.documents.CloudDocument; import com.llamacloud_prod.api.models.pipelines.documents.DocumentGetParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DocumentGetParams params = DocumentGetParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .documentId("document_id") .build(); CloudDocument cloudDocument = client.pipelines().documents().get(params); } } ``` #### Response ```json { "id": "id", "metadata": { "foo": "bar" }, "text": "text", "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ], "page_positions": [ 0 ], "status_metadata": { "foo": "bar" } } ``` ## Delete Pipeline Document `pipelines().documents().delete(DocumentDeleteParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **delete** `/api/v1/pipelines/{pipeline_id}/documents/{document_id}` Delete a document from a pipeline; runs async (vectors first, then MongoDB record). ### Parameters - `DocumentDeleteParams params` - `String pipelineId` - `Optional documentId` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.documents.DocumentDeleteParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DocumentDeleteParams params = DocumentDeleteParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .documentId("document_id") .build(); client.pipelines().documents().delete(params); } } ``` ## Get Pipeline Document Status `ManagedIngestionStatusResponse pipelines().documents().getStatus(DocumentGetStatusParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}/documents/{document_id}/status` Return a single document for a pipeline. ### Parameters - `DocumentGetStatusParams params` - `String pipelineId` - `Optional documentId` ### Returns - `class ManagedIngestionStatusResponse:` - `Status status` Status of the ingestion. - `NOT_STARTED("NOT_STARTED")` - `IN_PROGRESS("IN_PROGRESS")` - `SUCCESS("SUCCESS")` - `ERROR("ERROR")` - `PARTIAL_SUCCESS("PARTIAL_SUCCESS")` - `CANCELLED("CANCELLED")` - `Optional deploymentDate` Date of the deployment. - `Optional effectiveAt` When the status is effective - `Optional> error` List of errors that occurred during ingestion. - `String jobId` ID of the job that failed. - `String message` List of errors that occurred during ingestion. - `Step step` Name of the job that failed. - `MANAGED_INGESTION("MANAGED_INGESTION")` - `DATA_SOURCE("DATA_SOURCE")` - `FILE_UPDATER("FILE_UPDATER")` - `PARSE("PARSE")` - `TRANSFORM("TRANSFORM")` - `INGESTION("INGESTION")` - `METADATA_UPDATE("METADATA_UPDATE")` - `Optional jobId` ID of the latest job. ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.ManagedIngestionStatusResponse; import com.llamacloud_prod.api.models.pipelines.documents.DocumentGetStatusParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DocumentGetStatusParams params = DocumentGetStatusParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .documentId("document_id") .build(); ManagedIngestionStatusResponse managedIngestionStatusResponse = client.pipelines().documents().getStatus(params); } } ``` #### Response ```json { "status": "NOT_STARTED", "deployment_date": "2019-12-27T18:11:19.117Z", "effective_at": "2019-12-27T18:11:19.117Z", "error": [ { "job_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", "message": "message", "step": "MANAGED_INGESTION" } ], "job_id": "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e" } ``` ## Sync Pipeline Document `JsonValue pipelines().documents().sync(DocumentSyncParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **post** `/api/v1/pipelines/{pipeline_id}/documents/{document_id}/sync` Sync a specific document for a pipeline. ### Parameters - `DocumentSyncParams params` - `String pipelineId` - `Optional documentId` ### Returns - `class DocumentSyncResponse:` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.documents.DocumentSyncParams; import com.llamacloud_prod.api.models.pipelines.documents.DocumentSyncResponse; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DocumentSyncParams params = DocumentSyncParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .documentId("document_id") .build(); DocumentSyncResponse response = client.pipelines().documents().sync(params); } } ``` #### Response ```json {} ``` ## List Pipeline Document Chunks `List pipelines().documents().getChunks(DocumentGetChunksParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **get** `/api/v1/pipelines/{pipeline_id}/documents/{document_id}/chunks` Return a list of chunks for a pipeline document. ### Parameters - `DocumentGetChunksParams params` - `String pipelineId` - `Optional documentId` ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.models.pipelines.documents.DocumentGetChunksParams; import com.llamacloud_prod.api.models.pipelines.documents.TextNode; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DocumentGetChunksParams params = DocumentGetChunksParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .documentId("document_id") .build(); List textNodes = client.pipelines().documents().getChunks(params); } } ``` #### Response ```json [ { "class_name": "class_name", "embedding": [ 0 ], "end_char_idx": 0, "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ], "extra_info": { "foo": "bar" }, "id_": "id_", "metadata_seperator": "metadata_seperator", "metadata_template": "metadata_template", "mimetype": "mimetype", "relationships": { "foo": { "node_id": "node_id", "class_name": "class_name", "hash": "hash", "metadata": { "foo": "bar" }, "node_type": "1" } }, "start_char_idx": 0, "text": "text", "text_template": "text_template" } ] ``` ## Upsert Batch Pipeline Documents `List pipelines().documents().upsert(DocumentUpsertParamsparams, RequestOptionsrequestOptions = RequestOptions.none())` **put** `/api/v1/pipelines/{pipeline_id}/documents` Batch create or update a document for a pipeline. ### Parameters - `DocumentUpsertParams params` - `Optional pipelineId` - `List body` - `Metadata metadata` - `String text` - `Optional id` - `Optional> excludedEmbedMetadataKeys` - `Optional> excludedLlmMetadataKeys` - `Optional> pagePositions` indices in the CloudDocument.text where a new page begins. e.g. Second page starts at index specified by page_positions[1]. ### Example ```java package com.llamacloud_prod.api.example; import com.llamacloud_prod.api.client.LlamaCloudClient; import com.llamacloud_prod.api.client.okhttp.LlamaCloudOkHttpClient; import com.llamacloud_prod.api.core.JsonValue; import com.llamacloud_prod.api.models.pipelines.documents.CloudDocument; import com.llamacloud_prod.api.models.pipelines.documents.CloudDocumentCreate; import com.llamacloud_prod.api.models.pipelines.documents.DocumentUpsertParams; public final class Main { private Main() {} public static void main(String[] args) { LlamaCloudClient client = LlamaCloudOkHttpClient.fromEnv(); DocumentUpsertParams params = DocumentUpsertParams.builder() .pipelineId("182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e") .addBody(CloudDocumentCreate.builder() .metadata(CloudDocumentCreate.Metadata.builder() .putAdditionalProperty("foo", JsonValue.from("bar")) .build()) .text("text") .build()) .build(); List cloudDocuments = client.pipelines().documents().upsert(params); } } ``` #### Response ```json [ { "id": "id", "metadata": { "foo": "bar" }, "text": "text", "excluded_embed_metadata_keys": [ "string" ], "excluded_llm_metadata_keys": [ "string" ], "page_positions": [ 0 ], "status_metadata": { "foo": "bar" } } ] ``` ## Domain Types ### Cloud Document - `class CloudDocument:` Cloud document stored in S3. - `String id` - `Metadata metadata` - `String text` - `Optional> excludedEmbedMetadataKeys` - `Optional> excludedLlmMetadataKeys` - `Optional> pagePositions` indices in the CloudDocument.text where a new page begins. e.g. Second page starts at index specified by page_positions[1]. - `Optional statusMetadata` ### Cloud Document Create - `class CloudDocumentCreate:` Create a new cloud document. - `Metadata metadata` - `String text` - `Optional id` - `Optional> excludedEmbedMetadataKeys` - `Optional> excludedLlmMetadataKeys` - `Optional> pagePositions` indices in the CloudDocument.text where a new page begins. e.g. Second page starts at index specified by page_positions[1]. ### Text Node - `class TextNode:` Provided for backward compatibility. - `Optional className` - `Optional> embedding` Embedding of the node. - `Optional endCharIdx` End char index of the node. - `Optional> excludedEmbedMetadataKeys` Metadata keys that are excluded from text for the embed model. - `Optional> excludedLlmMetadataKeys` Metadata keys that are excluded from text for the LLM. - `Optional extraInfo` A flat dictionary of metadata fields - `Optional id` Unique ID of the node. - `Optional metadataSeperator` Separator between metadata fields when converting to string. - `Optional metadataTemplate` Template for how metadata is formatted, with {key} and {value} placeholders. - `Optional mimetype` MIME type of the node content. - `Optional relationships` A mapping of relationships to other node information. - `class RelatedNodeInfo:` - `String nodeId` - `Optional className` - `Optional hash` - `Optional metadata` - `Optional nodeType` - `_1("1")` - `_2("2")` - `_3("3")` - `_4("4")` - `_5("5")` - `List` - `String nodeId` - `Optional className` - `Optional hash` - `Optional metadata` - `Optional nodeType` - `_1("1")` - `_2("2")` - `_3("3")` - `_4("4")` - `_5("5")` - `Optional startCharIdx` Start char index of the node. - `Optional text` Text content of the node. - `Optional textTemplate` Template for how text is formatted, with {content} and {metadata_str} placeholders.