"""
Type definitions for Venice AI Audio API.
This module contains TypedDict definitions and Enums for request objects
in the Venice AI Audio API, covering the speech creation endpoint.
"""
from enum import Enum
from typing import List, Literal, Optional, TypedDict
__all__ = [
"Voice",
"ResponseFormat",
"CreateSpeechRequest",
"VoiceDetail",
"VoiceList",
]
[docs]
class Voice(str, Enum):
"""
Available voices for speech generation in the Venice AI Audio API.
This enumeration defines the complete set of voice options that can be used
when generating speech from text via the text-to-speech endpoint. Each voice
represents different speaker characteristics including gender, accent, and
vocal qualities. Voice names follow a pattern indicating language/region
and gender (e.g., ``af`` for American Female, ``am`` for American Male).
"""
AF_ALLOY = "af_alloy"
AF_AOEDE = "af_aoede"
AF_BELLA = "af_bella"
AF_HEART = "af_heart"
AF_JADZIA = "af_jadzia"
AF_JESSICA = "af_jessica"
AF_KORE = "af_kore"
AF_NICOLE = "af_nicole"
AF_NOVA = "af_nova"
AF_RIVER = "af_river"
AF_SARAH = "af_sarah"
AF_SKY = "af_sky"
AM_ADAM = "am_adam"
AM_ECHO = "am_echo"
AM_ERIC = "am_eric"
AM_FENRIR = "am_fenrir"
AM_LIAM = "am_liam"
AM_MICHAEL = "am_michael"
AM_ONYX = "am_onyx"
AM_PUCK = "am_puck"
AM_SANTA = "am_santa"
BF_ALICE = "bf_alice"
BF_EMMA = "bf_emma"
BF_LILY = "bf_lily"
BM_DANIEL = "bm_daniel"
BM_FABLE = "bm_fable"
BM_GEORGE = "bm_george"
BM_LEWIS = "bm_lewis"
ZF_XIAOBEI = "zf_xiaobei"
ZF_XIAONI = "zf_xiaoni"
ZF_XIAOXIAO = "zf_xiaoxiao"
ZF_XIAOYI = "zf_xiaoyi"
ZM_YUNJIAN = "zm_yunjian"
ZM_YUNXI = "zm_yunxi"
ZM_YUNXIA = "zm_yunxia"
ZM_YUNYANG = "zm_yunyang"
FF_SIWIS = "ff_siwis"
HF_ALPHA = "hf_alpha"
HF_BETA = "hf_beta"
HM_OMEGA = "hm_omega"
HM_PSI = "hm_psi"
IF_SARA = "if_sara"
IM_NICOLA = "im_nicola"
JF_ALPHA = "jf_alpha"
JF_GONGITSUNE = "jf_gongitsune"
JF_NEZUMI = "jf_nezumi"
JF_TEBUKURO = "jf_tebukuro"
JM_KUMO = "jm_kumo"
PF_DORA = "pf_dora"
PM_ALEX = "pm_alex"
PM_SANTA = "pm_santa"
EF_DORA = "ef_dora"
EM_ALEX = "em_alex"
EM_SANTA = "em_santa"
# Aliases for backward compatibility with existing code and tests
NOVA = AF_NOVA # Alias for AF_NOVA (American Female Nova voice)
ALLOY = AF_ALLOY # Alias for AF_ALLOY (American Female Alloy voice)
ONYX = AM_ONYX # Alias for AM_ONYX (American Male Onyx voice)
SHIMMER = AF_RIVER # Alias for AF_RIVER (approximation for legacy SHIMMER voice)
[docs]
class CreateSpeechRequest(TypedDict, total=False):
"""
Request parameters for creating speech audio from text input.
This TypedDict defines the structure for requests to the POST /audio/speech
endpoint, which converts text into spoken audio using specified voice
characteristics and output format. The request allows customization of
voice selection, audio format, playback speed, and user identification
for tracking purposes.
Attributes:
model: ID of the model to use for speech generation (e.g., "tts-kokoro").
input: The text to convert to speech. Maximum length varies by model.
voice: The voice to use for the generated audio. See :class:`~Voice` for available options.
response_format: Optional. The format to return the audio in. Defaults to "mp3".
See :class:`~ResponseFormat` for available formats.
speed: Optional. The speed of the generated audio. Select a value from 0.25 to 4.0.
Defaults to 1.0.
user: Optional. A unique identifier representing the end-user, which can help Venice AI
to monitor and detect abuse.
"""
model: str # Required: ID of the speech model to use for generation
input: str # Required: The text to convert to speech
voice: Voice # Required: Voice to use for the generated audio
response_format: ResponseFormat # Optional: Format of returned audio (defaults to "mp3")
speed: Optional[float] # Optional: Speed of the generated audio (0.25-4.0, defaults to 1.0)
user: Optional[str] # Optional: Unique identifier representing the end-user for monitoring
[docs]
class VoiceDetail(TypedDict):
"""
Detailed information about a single text-to-speech voice.
This TypedDict represents the structure of voice information returned by the
get_voices() method. It contains metadata about a voice including its unique
identifier, associated model, gender characteristics, and regional/language
information derived from the voice ID.
Attributes:
id: The unique identifier for the voice as provided by the API (e.g., "af_alloy", "zm_yunjian").
model_id: The ID of the TTS model this voice is associated with (e.g., "tts-kokoro").
gender: The perceived gender of the voice, parsed from the voice ID prefix.
"unknown" if the prefix is not recognized or ambiguous.
region_code: The raw two-letter prefix from the voice ID that typically indicates
region/language and gender (e.g., "af", "zm").
language: A descriptive name of the primary language associated with the voice,
derived from the region_code (e.g., "American English", "Mandarin Chinese").
accent: A descriptive name of the accent or locale associated with the voice,
derived from the region_code (e.g., "US", "Standard Chinese").
"""
id: str
model_id: str
gender: Optional[Literal["male", "female", "unknown"]]
region_code: Optional[str]
language: Optional[str]
accent: Optional[str]
[docs]
class VoiceList(TypedDict):
"""
A list of voice details with optional filtering metadata.
This TypedDict represents the structure returned by the get_voices() method,
containing a list of VoiceDetail objects along with metadata about any filters
that were applied to generate the list. This follows the standard API pattern
for list responses.
Attributes:
object: A string indicating the type of API object, always "list" for lists.
data: A list containing VoiceDetail objects.
model_id_filter: The model_id that was used to filter the voices, if any.
None if no model ID filter was applied.
gender_filter: The gender that was used to filter the voices, if any.
None if no gender filter was applied.
region_code_filter: The region_code (e.g., "af", "zm") that was used to filter
the voices, if any. None if no region code filter was applied.
"""
object: Literal["list"]
data: List[VoiceDetail]
model_id_filter: Optional[str]
gender_filter: Optional[Literal["male", "female", "unknown"]]
region_code_filter: Optional[str]