Source code for venice_ai.resources.embeddings

"""
Venice AI Embeddings API resources.

This module provides classes for interacting with the Venice AI Embeddings API,
allowing clients to generate embeddings from text or token inputs. These embeddings
are vector representations of text that capture semantic meaning and can be used
for tasks such as semantic search, clustering, and classification.
"""

from typing import List, Literal, Optional, Sequence, TypedDict, Union, Any, Dict, TYPE_CHECKING
import httpx # For creating dummy response objects

from .._resource import APIResource, AsyncAPIResource
from ..exceptions import InvalidRequestError
from venice_ai.types.embeddings import CreateEmbeddingRequest, EmbeddingList

if TYPE_CHECKING:
    from .._client import VeniceClient
    from .._async_client import AsyncVeniceClient



[docs]
class Embeddings(APIResource):
    """
    Provides access to text embedding generation operations.
    
    This class manages synchronous embedding operations through the Venice AI API.
    Embeddings are vector representations of text that capture semantic meaning
    and can be used for various natural language processing tasks such as semantic
    search, clustering, classification, and similarity analysis.
    
    :param client: The Venice AI client instance used to make API requests.
    :type client: venice_ai._client.VeniceClient
    """
    

[docs]
    def create(
        self,
        *,
        model: str, # type: ignore
        input: Union[str, List[str], List[int], List[List[int]]], # type: ignore
        dimensions: Optional[int] = None,
        encoding_format: Optional[Literal["float", "base64"]] = None,
        user: Optional[str] = None,
    ) -> EmbeddingList:
        """
        Generates embeddings for input text(s).
        
        This method sends a request to the Venice AI API to generate vector embeddings
        for the provided text or token inputs using the specified model. The embeddings
        can be used for semantic search, clustering, classification, and other NLP tasks.
        
        :param model: The ID of the embedding model to use. Available models can be
            retrieved using the models API. Example: ``'text-embedding-bge-m3'``.
        :type model: str
        :param input: The input text(s) to generate embeddings for. Can be a single
            string, a list of strings for batch processing, a list of token integers,
            or a list of token lists. For batch processing, all inputs will be
            processed together in a single API call.
        :type input: Union[str, List[str], List[int], List[List[int]]]
        :param dimensions: The number of dimensions for the output embeddings.
            If not specified, uses the model's default dimensionality. Some models
            support reducing dimensions for efficiency.
        :type dimensions: Optional[int]
        :param encoding_format: The format for the returned embeddings. Defaults
            to ``'float'`` for numerical arrays. Use ``'base64'`` for base64-encoded
            string representation.
        :type encoding_format: Optional[Literal["float", "base64"]]
        :param user: A unique identifier representing your end-user. This parameter
            is supported for compatibility with OpenAI clients but is discarded by
            the Venice API and does not affect the response.
        :type user: Optional[str]

        :return: A response object containing the generated embeddings and usage data.
            The response includes an array of embedding objects, each containing the
            vector representation and associated metadata.
        :rtype: :class:`~venice_ai.types.embeddings.EmbeddingList`

        :raises venice_ai.exceptions.InvalidRequestError: If parameter values are invalid
            (e.g., empty model or input, unsupported encoding format).
        :raises venice_ai.exceptions.AuthenticationError: If the API key is invalid
            or missing.
        :raises venice_ai.exceptions.PermissionDeniedError: If access to the specified
            model is denied.
        :raises venice_ai.exceptions.NotFoundError: If the specified model is not found.
        :raises venice_ai.exceptions.RateLimitError: If rate limits are exceeded.
        :raises venice_ai.exceptions.APIError: For other API-related errors.
            
        **Examples:**

        Generate an embedding for a single string:

        .. code-block:: python

            from venice_ai import VeniceClient
            
            client = VeniceClient(api_key="your-api-key")
            response = client.embeddings.create(
                model="text-embedding-bge-m3",
                input="The quick brown fox jumps over the lazy dog."
            )
            embedding = response.data[0].embedding
            print(f"Embedding dimensions: {len(embedding)}")
            print(f"First 5 dimensions: {embedding[:5]}")

        Generate embeddings for multiple strings (batch processing):

        .. code-block:: python

            inputs = [
                "First sentence for embedding.",
                "Second sentence for embedding.",
                "Third sentence for embedding."
            ]
            batch_response = client.embeddings.create(
                model="text-embedding-bge-m3",
                input=inputs
            )
            for i, data_item in enumerate(batch_response.data):
                print(f"Embedding for '{inputs[i]}' (first 3 dims): {data_item.embedding[:3]}")
            print(f"Total tokens used: {batch_response.usage.total_tokens}")

        Using optional parameters:

        .. code-block:: python

            response = client.embeddings.create(
                model="text-embedding-bge-m3",
                input="Sample text for embedding",
                dimensions=512,  # Reduce dimensions if supported
                encoding_format="base64",  # Get base64-encoded embeddings
                user="user-123"  # Track usage by user
            )

        """
        if not model:
            # Create a dummy request and response for client-side validation error
            dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings")))
            dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": "model parameter is required and cannot be empty."}})
            raise InvalidRequestError("model parameter is required and cannot be empty.", request=dummy_request, response=dummy_response, body=dummy_response.json())
        if not input: # Handles empty string and empty list
            dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings")))
            dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": "input cannot be empty."}})
            raise InvalidRequestError("input cannot be empty.", request=dummy_request, response=dummy_response, body=dummy_response.json())
        
        # Validate array length constraint
        if isinstance(input, list) and len(input) > 2048:
            dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings")))
            dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": f"input array must have 2048 or fewer items, but got {len(input)} items."}})
            raise InvalidRequestError(f"input array must have 2048 or fewer items, but got {len(input)} items.", request=dummy_request, response=dummy_response, body=dummy_response.json())
        
        # Validate array length constraint
        if isinstance(input, list) and len(input) > 2048:
            dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings")))
            dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": f"input array must have 2048 or fewer items, but got {len(input)} items."}})
            raise InvalidRequestError(f"input array must have 2048 or fewer items, but got {len(input)} items.", request=dummy_request, response=dummy_response, body=dummy_response.json())

        # Build the request body
        body: Dict[str, Any] = {
            "model": model,
            "input": input,
        }
        
        # Add optional parameters if they're not None
        if dimensions is not None:
            body["dimensions"] = dimensions
            
        if encoding_format is not None:
            body["encoding_format"] = encoding_format
            
        if user is not None:
            body["user"] = user
            
        # Remove None values to avoid sending unnecessary fields
        body = {k: v for k, v in body.items() if v is not None}
            
        # Make the API request and return the response
        return self._client.post("embeddings", json_data=body)





[docs]
class AsyncEmbeddings(AsyncAPIResource):
    """
    Provides access to text embedding generation operations (asynchronous).
    
    This class manages asynchronous embedding operations through the Venice AI API.
    It provides the same functionality as the synchronous :class:`Embeddings` class
    but uses async/await patterns for non-blocking operations. Embeddings are vector
    representations of text that capture semantic meaning and can be used for various
    natural language processing tasks.
    
    :param client: The async Venice AI client instance used to make API requests.
    :type client: venice_ai._async_client.AsyncVeniceClient
    """
    

[docs]
    async def create(
        self,
        *,
        model: str, # type: ignore
        input: Union[str, List[str], List[int], List[List[int]]], # type: ignore
        dimensions: Optional[int] = None,
        encoding_format: Optional[Literal["float", "base64"]] = None,
        user: Optional[str] = None,
    ) -> EmbeddingList:
        """
        Generates embeddings for input text(s) asynchronously.
        
        This method sends an asynchronous request to the Venice AI API to generate
        vector embeddings for the provided text or token inputs using the specified
        model. The embeddings can be used for semantic search, clustering,
        classification, and other NLP tasks.
        
        :param model: The ID of the embedding model to use. Available models can be
            retrieved using the models API. Example: ``'text-embedding-bge-m3'``.
        :type model: str
        :param input: The input text(s) to generate embeddings for. Can be a single
            string, a list of strings for batch processing, a list of token integers,
            or a list of token lists. For batch processing, all inputs will be
            processed together in a single API call.
        :type input: Union[str, List[str], List[int], List[List[int]]]
        :param dimensions: The number of dimensions for the output embeddings.
            If not specified, uses the model's default dimensionality. Some models
            support reducing dimensions for efficiency.
        :type dimensions: Optional[int]
        :param encoding_format: The format for the returned embeddings. Defaults
            to ``'float'`` for numerical arrays. Use ``'base64'`` for base64-encoded
            string representation.
        :type encoding_format: Optional[Literal["float", "base64"]]
        :param user: A unique identifier representing your end-user. This parameter
            is supported for compatibility with OpenAI clients but is discarded by
            the Venice API and does not affect the response.
        :type user: Optional[str]

        :return: A response object containing the generated embeddings and usage data.
            The response includes an array of embedding objects, each containing the
            vector representation and associated metadata.
        :rtype: :class:`~venice_ai.types.embeddings.EmbeddingList`

        :raises venice_ai.exceptions.InvalidRequestError: If parameter values are invalid
            (e.g., empty model or input, unsupported encoding format).
        :raises venice_ai.exceptions.AuthenticationError: If the API key is invalid
            or missing.
        :raises venice_ai.exceptions.PermissionDeniedError: If access to the specified
            model is denied.
        :raises venice_ai.exceptions.NotFoundError: If the specified model is not found.
        :raises venice_ai.exceptions.RateLimitError: If rate limits are exceeded.
        :raises venice_ai.exceptions.APIError: For other API-related errors.
            
        **Examples:**

        Generate an embedding for a single string:

        .. code-block:: python

            import asyncio
            from venice_ai import AsyncVeniceClient
            
            async def create_embedding():
                async with AsyncVeniceClient(api_key="your-api-key") as client:
                    response = await client.embeddings.create(
                        model="text-embedding-bge-m3",
                        input="The quick brown fox jumps over the lazy dog."
                    )
                    embedding = response.data[0].embedding
                    print(f"Embedding dimensions: {len(embedding)}")
                    print(f"First 5 dimensions: {embedding[:5]}")
            
            asyncio.run(create_embedding())

        Generate embeddings for multiple strings (batch processing):

        .. code-block:: python

            async def create_batch_embeddings():
                inputs = [
                    "First sentence for embedding.",
                    "Second sentence for embedding.",
                    "Third sentence for embedding."
                ]
                async with AsyncVeniceClient(api_key="your-api-key") as client:
                    batch_response = await client.embeddings.create(
                        model="text-embedding-bge-m3",
                        input=inputs
                    )
                    for i, data_item in enumerate(batch_response.data):
                        print(f"Embedding for '{inputs[i]}' (first 3 dims): {data_item.embedding[:3]}")
                    print(f"Total tokens used: {batch_response.usage.total_tokens}")
            
            asyncio.run(create_batch_embeddings())

        Using optional parameters:

        .. code-block:: python

            async def create_custom_embedding():
                async with AsyncVeniceClient(api_key="your-api-key") as client:
                    response = await client.embeddings.create(
                        model="text-embedding-bge-m3",
                        input="Sample text for embedding",
                        dimensions=512,  # Reduce dimensions if supported
                        encoding_format="base64",  # Get base64-encoded embeddings
                        user="user-123"  # Track usage by user
                    )
            
            asyncio.run(create_custom_embedding())

        """
        if not model:
            # Create a dummy request and response for client-side validation error
            dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings")))
            dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": "model parameter is required and cannot be empty."}})
            raise InvalidRequestError("model parameter is required and cannot be empty.", request=dummy_request, response=dummy_response, body=dummy_response.json())
        if not input: # Handles empty string and empty list
            dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings")))
            dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": "input cannot be empty."}})
            raise InvalidRequestError("input cannot be empty.", request=dummy_request, response=dummy_response, body=dummy_response.json())
        
        # Validate array length constraint
        if isinstance(input, list) and len(input) > 2048:
            dummy_request = httpx.Request("POST", str(self._client._base_url.join("embeddings")))
            dummy_response = httpx.Response(400, request=dummy_request, json={"error": {"message": f"input array must have 2048 or fewer items, but got {len(input)} items."}})
            raise InvalidRequestError(f"input array must have 2048 or fewer items, but got {len(input)} items.", request=dummy_request, response=dummy_response, body=dummy_response.json())

        # Build the request body
        body: Dict[str, Any] = {
            "model": model,
            "input": input,
        }
        
        # Add optional parameters if they're not None
        if dimensions is not None:
            body["dimensions"] = dimensions
            
        if encoding_format is not None:
            body["encoding_format"] = encoding_format
            
        if user is not None:
            body["user"] = user
            
        # Remove None values to avoid sending unnecessary fields
        body = {k: v for k, v in body.items() if v is not None}
            
        # Make the API request and return the response
        return await self._client.post("embeddings", json_data=body)