Source code for venice_ai.resources.embeddings

"""
Venice AI Embeddings API resources.

This module provides classes for interacting with the Venice AI Embeddings API,
allowing clients to generate embeddings from text or token inputs. These embeddings
are vector representations of text that capture semantic meaning and can be used
for tasks such as semantic search, clustering, and classification.
"""

from typing import List, Literal, Optional, Sequence, TypedDict, Union, Any, Dict, TYPE_CHECKING
import httpx # For creating dummy response objects

from .._resource import APIResource, AsyncAPIResource
from ..exceptions import InvalidRequestError
from venice_ai.types.embeddings import CreateEmbeddingRequest, EmbeddingList

if TYPE_CHECKING:
    from .._client import VeniceClient
    from .._async_client import AsyncVeniceClient


[docs] class Embeddings(APIResource): """ Provides access to text embedding generation operations. This class manages synchronous embedding operations through the Venice AI API. Embeddings are vector representations of text that capture semantic meaning and can be used for various natural language processing tasks such as semantic search, clustering, classification, and similarity analysis. :param client: The Venice AI client instance used to make API requests. :type client: venice_ai._client.VeniceClient """
[docs] def create( self, *, model: str, # type: ignore input: Union[str, List[str], List[int], List[List[int]]], # type: ignore dimensions: Optional[int] = None, encoding_format: Optional[Literal["float", "base64"]] = None, user: Optional[str] = None, ) -> EmbeddingList: """ Generates embeddings for input text(s). This method sends a request to the Venice AI API to generate vector embeddings for the provided text or token inputs using the specified model. The embeddings can be used for semantic search, clustering, classification, and other NLP tasks. :param model: The ID of the embedding model to use. Available models can be retrieved using the models API. Example: ``'text-embedding-bge-m3'``. :type model: str :param input: The input text(s) to generate embeddings for. Can be a single string, a list of strings for batch processing, a list of token integers, or a list of token lists. For batch processing, all inputs will be processed together in a single API call. :type input: Union[str, List[str], List[int], List[List[int]]] :param dimensions: The number of dimensions for the output embeddings. If not specified, uses the model's default dimensionality. Some models support reducing dimensions for efficiency. :type dimensions: Optional[int] :param encoding_format: The format for the returned embeddings. Defaults to ``'float'`` for numerical arrays. Use ``'base64'`` for base64-encoded string representation. :type encoding_format: Optional[Literal["float", "base64"]] :param user: A unique identifier representing your end-user. This parameter is supported for compatibility with OpenAI clients but is discarded by the Venice API and does not affect the response. :type user: Optional[str] :return: A response object containing the generated embeddings and usage data. The response includes an array of embedding objects, each containing the vector representation and associated metadata. :rtype: :class:`~venice_ai.types.embeddings.EmbeddingList` :raises venice_ai.exceptions.InvalidRequestError: If parameter values are invalid (e.g., empty model or input, unsupported encoding format). :raises venice_ai.exceptions.AuthenticationError: If the API key is invalid or missing. :raises venice_ai.exceptions.PermissionDeniedError: If access to the specified model is denied. :raises venice_ai.exceptions.NotFoundError: If the specified model is not found. :raises venice_ai.exceptions.RateLimitError: If rate limits are exceeded. :raises venice_ai.exceptions.APIError: For other API-related errors. **Examples:** Generate an embedding for a single string: .. code-block:: python from venice_ai import VeniceClient client = VeniceClient(api_key="your-api-key") response = client.embeddings.create( model="text-embedding-bge-m3", input="The quick brown fox jumps over the lazy dog." ) embedding = response.data[0].embedding print(f"Embedding dimensions: {len(embedding)}") print(f"First 5 dimensions: {embedding[:5]}") Generate embeddings for multiple strings (batch processing): .. code-block:: python inputs = [ "First sentence for embedding.", "Second sentence for embedding.", "Third sentence for embedding." ] batch_response = client.embeddings.create( model="text-embedding-bge-m3", input=inputs ) for i, data_item in enumerate(batch_response.data): print(f"Embedding for '{inputs[i]}' (first 3 dims): {data_item.embedding[:3]}") print(f"Total tokens used: {batch_response.usage.total_tokens}") Using optional parameters: .. code-block:: python response = client.embeddings.create( model="text-embedding-bge-m3", input="Sample text for embedding", dimensions=512, # Reduce dimensions if supported encoding_format="base64", # Get base64-encoded embeddings user="user-123" # Track usage by user ) """ if not model: # Create a dummy request and response for client-side validation error dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings"))) dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": "model parameter is required and cannot be empty."}}) raise InvalidRequestError("model parameter is required and cannot be empty.", request=dummy_request, response=dummy_response, body=dummy_response.json()) if not input: # Handles empty string and empty list dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings"))) dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": "input cannot be empty."}}) raise InvalidRequestError("input cannot be empty.", request=dummy_request, response=dummy_response, body=dummy_response.json()) # Validate array length constraint if isinstance(input, list) and len(input) > 2048: dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings"))) dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": f"input array must have 2048 or fewer items, but got {len(input)} items."}}) raise InvalidRequestError(f"input array must have 2048 or fewer items, but got {len(input)} items.", request=dummy_request, response=dummy_response, body=dummy_response.json()) # Validate array length constraint if isinstance(input, list) and len(input) > 2048: dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings"))) dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": f"input array must have 2048 or fewer items, but got {len(input)} items."}}) raise InvalidRequestError(f"input array must have 2048 or fewer items, but got {len(input)} items.", request=dummy_request, response=dummy_response, body=dummy_response.json()) # Build the request body body: Dict[str, Any] = { "model": model, "input": input, } # Add optional parameters if they're not None if dimensions is not None: body["dimensions"] = dimensions if encoding_format is not None: body["encoding_format"] = encoding_format if user is not None: body["user"] = user # Remove None values to avoid sending unnecessary fields body = {k: v for k, v in body.items() if v is not None} # Make the API request and return the response return self._client.post("embeddings", json_data=body)
[docs] class AsyncEmbeddings(AsyncAPIResource): """ Provides access to text embedding generation operations (asynchronous). This class manages asynchronous embedding operations through the Venice AI API. It provides the same functionality as the synchronous :class:`Embeddings` class but uses async/await patterns for non-blocking operations. Embeddings are vector representations of text that capture semantic meaning and can be used for various natural language processing tasks. :param client: The async Venice AI client instance used to make API requests. :type client: venice_ai._async_client.AsyncVeniceClient """
[docs] async def create( self, *, model: str, # type: ignore input: Union[str, List[str], List[int], List[List[int]]], # type: ignore dimensions: Optional[int] = None, encoding_format: Optional[Literal["float", "base64"]] = None, user: Optional[str] = None, ) -> EmbeddingList: """ Generates embeddings for input text(s) asynchronously. This method sends an asynchronous request to the Venice AI API to generate vector embeddings for the provided text or token inputs using the specified model. The embeddings can be used for semantic search, clustering, classification, and other NLP tasks. :param model: The ID of the embedding model to use. Available models can be retrieved using the models API. Example: ``'text-embedding-bge-m3'``. :type model: str :param input: The input text(s) to generate embeddings for. Can be a single string, a list of strings for batch processing, a list of token integers, or a list of token lists. For batch processing, all inputs will be processed together in a single API call. :type input: Union[str, List[str], List[int], List[List[int]]] :param dimensions: The number of dimensions for the output embeddings. If not specified, uses the model's default dimensionality. Some models support reducing dimensions for efficiency. :type dimensions: Optional[int] :param encoding_format: The format for the returned embeddings. Defaults to ``'float'`` for numerical arrays. Use ``'base64'`` for base64-encoded string representation. :type encoding_format: Optional[Literal["float", "base64"]] :param user: A unique identifier representing your end-user. This parameter is supported for compatibility with OpenAI clients but is discarded by the Venice API and does not affect the response. :type user: Optional[str] :return: A response object containing the generated embeddings and usage data. The response includes an array of embedding objects, each containing the vector representation and associated metadata. :rtype: :class:`~venice_ai.types.embeddings.EmbeddingList` :raises venice_ai.exceptions.InvalidRequestError: If parameter values are invalid (e.g., empty model or input, unsupported encoding format). :raises venice_ai.exceptions.AuthenticationError: If the API key is invalid or missing. :raises venice_ai.exceptions.PermissionDeniedError: If access to the specified model is denied. :raises venice_ai.exceptions.NotFoundError: If the specified model is not found. :raises venice_ai.exceptions.RateLimitError: If rate limits are exceeded. :raises venice_ai.exceptions.APIError: For other API-related errors. **Examples:** Generate an embedding for a single string: .. code-block:: python import asyncio from venice_ai import AsyncVeniceClient async def create_embedding(): async with AsyncVeniceClient(api_key="your-api-key") as client: response = await client.embeddings.create( model="text-embedding-bge-m3", input="The quick brown fox jumps over the lazy dog." ) embedding = response.data[0].embedding print(f"Embedding dimensions: {len(embedding)}") print(f"First 5 dimensions: {embedding[:5]}") asyncio.run(create_embedding()) Generate embeddings for multiple strings (batch processing): .. code-block:: python async def create_batch_embeddings(): inputs = [ "First sentence for embedding.", "Second sentence for embedding.", "Third sentence for embedding." ] async with AsyncVeniceClient(api_key="your-api-key") as client: batch_response = await client.embeddings.create( model="text-embedding-bge-m3", input=inputs ) for i, data_item in enumerate(batch_response.data): print(f"Embedding for '{inputs[i]}' (first 3 dims): {data_item.embedding[:3]}") print(f"Total tokens used: {batch_response.usage.total_tokens}") asyncio.run(create_batch_embeddings()) Using optional parameters: .. code-block:: python async def create_custom_embedding(): async with AsyncVeniceClient(api_key="your-api-key") as client: response = await client.embeddings.create( model="text-embedding-bge-m3", input="Sample text for embedding", dimensions=512, # Reduce dimensions if supported encoding_format="base64", # Get base64-encoded embeddings user="user-123" # Track usage by user ) asyncio.run(create_custom_embedding()) """ if not model: # Create a dummy request and response for client-side validation error dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings"))) dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": "model parameter is required and cannot be empty."}}) raise InvalidRequestError("model parameter is required and cannot be empty.", request=dummy_request, response=dummy_response, body=dummy_response.json()) if not input: # Handles empty string and empty list dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings"))) dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": "input cannot be empty."}}) raise InvalidRequestError("input cannot be empty.", request=dummy_request, response=dummy_response, body=dummy_response.json()) # Validate array length constraint if isinstance(input, list) and len(input) > 2048: dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings"))) dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": f"input array must have 2048 or fewer items, but got {len(input)} items."}}) raise InvalidRequestError(f"input array must have 2048 or fewer items, but got {len(input)} items.", request=dummy_request, response=dummy_response, body=dummy_response.json()) # Build the request body body: Dict[str, Any] = { "model": model, "input": input, } # Add optional parameters if they're not None if dimensions is not None: body["dimensions"] = dimensions if encoding_format is not None: body["encoding_format"] = encoding_format if user is not None: body["user"] = user # Remove None values to avoid sending unnecessary fields body = {k: v for k, v in body.items() if v is not None} # Make the API request and return the response return await self._client.post("embeddings", json_data=body)