-
Notifications
You must be signed in to change notification settings - Fork 10
Model Config: DB-driven validator, seed sarvamai/elevenlabs/google #859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
9b40427
d7e7169
85d31ec
06f94ef
3bc791a
4be5b8c
45e6b6a
5fa40d2
48d56ef
944efdd
e00df2e
e0f63d4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,147 @@ | ||
| """seed stt/tts model_config rows for google, sarvamai, elevenlabs | ||
|
|
||
| Revision ID: 063 | ||
| Revises: 062 | ||
| Create Date: 2026-05-19 00:00:00.000000 | ||
|
|
||
| """ | ||
|
|
||
| from alembic import op | ||
|
|
||
| # revision identifiers, used by Alembic. | ||
| revision = "063" | ||
| down_revision = "062" | ||
| branch_labels = None | ||
| depends_on = None | ||
|
|
||
|
|
||
| SEEDED_MODELS = [ | ||
| ("google", "gemini-2.5-pro"), | ||
| ("google", "gemini-3.1-pro-preview"), | ||
| ("google", "gemini-3-flash-preview"), | ||
| ("google", "gemini-2.5-flash"), | ||
| ("google", "gemini-2.5-flash-preview-tts"), | ||
| ("google", "gemini-2.5-pro-preview-tts"), | ||
| ("sarvamai", "saaras:v3"), | ||
| ("sarvamai", "bulbul:v3"), | ||
| ("elevenlabs", "scribe_v2"), | ||
| ("elevenlabs", "eleven_v3"), | ||
| ] | ||
|
|
||
|
|
||
| def upgrade(): | ||
| # 1. Create enum types | ||
| op.execute( | ||
| "CREATE TYPE global.provider_enum AS ENUM ('openai', 'google', 'sarvamai', 'elevenlabs')" | ||
| ) | ||
| op.execute("CREATE TYPE global.completion_type_enum AS ENUM ('text', 'stt', 'tts')") | ||
|
|
||
| # 2. Alter provider column to use enum; add completion_type column | ||
| op.execute( | ||
| """ | ||
| ALTER TABLE global.model_config | ||
| ALTER COLUMN provider TYPE global.provider_enum | ||
| USING provider::global.provider_enum, | ||
| ADD COLUMN completion_type global.completion_type_enum | ||
| """ | ||
| ) | ||
|
|
||
| # 3. Backfill completion_type for pre-existing rows (openai models seeded before this migration) | ||
| op.execute( | ||
| """ | ||
| UPDATE global.model_config SET completion_type = | ||
| CASE | ||
| WHEN 'AUDIO' = ANY(input_modalities::text[]) AND NOT ('AUDIO' = ANY(output_modalities::text[])) THEN 'stt'::global.completion_type_enum | ||
| WHEN 'AUDIO' = ANY(output_modalities::text[]) AND NOT ('AUDIO' = ANY(input_modalities::text[])) THEN 'tts'::global.completion_type_enum | ||
| ELSE 'text'::global.completion_type_enum | ||
| END | ||
| WHERE completion_type IS NULL | ||
| """ | ||
| ) | ||
|
|
||
| # 4. Set NOT NULL now that all rows are backfilled | ||
| op.execute( | ||
| "ALTER TABLE global.model_config ALTER COLUMN completion_type SET NOT NULL" | ||
| ) | ||
|
|
||
| # 5. Add indexes | ||
| op.execute( | ||
| "CREATE INDEX ix_model_config_provider_active ON global.model_config (provider, is_active)" | ||
| ) | ||
| op.execute( | ||
| "CREATE INDEX ix_model_config_provider_type_active ON global.model_config (provider, completion_type, is_active)" | ||
| ) | ||
| op.execute( | ||
| "CREATE INDEX ix_model_config_input_modalities ON global.model_config USING gin (input_modalities)" | ||
| ) | ||
| op.execute( | ||
| "CREATE INDEX ix_model_config_output_modalities ON global.model_config USING gin (output_modalities)" | ||
| ) | ||
|
|
||
| # 6. Seed rows — pricing per 1M tokens (USD): response/batch = text i/o; audio = audio-modal i/o | ||
| op.execute( | ||
| """ | ||
| INSERT INTO global.model_config | ||
| (provider, model_name, completion_type, config, input_modalities, output_modalities, pricing, is_active, inserted_at, updated_at) | ||
| VALUES | ||
| ('google', 'gemini-2.5-pro', 'stt', | ||
| '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}}', | ||
| '{AUDIO}', '{TEXT}', | ||
| '{"response": {"input_token_cost": 1.25, "output_token_cost": 10.0}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5.0}, "audio": {"input_token_cost": 3.5, "output_token_cost": 10.0}}', | ||
| true, NOW(), NOW()), | ||
| ('google', 'gemini-3.1-pro-preview', 'stt', | ||
| '{"thinking_level": {"type": "enum", "default": "high", "options": ["low", "medium", "high"], "description": "Max reasoning depth before output. high = best quality, low = faster/cheaper."}}', | ||
| '{AUDIO}', '{TEXT}', | ||
| '{"response": {"input_token_cost": 2.0, "output_token_cost": 12.0}, "batch": {"input_token_cost": 1.0, "output_token_cost": 6.0}, "audio": {"input_token_cost": 3.5, "output_token_cost": 12.0}}', | ||
| true, NOW(), NOW()), | ||
| ('google', 'gemini-3-flash-preview', 'stt', | ||
| '{"thinking_level": {"type": "enum", "default": "high", "options": ["minimal", "low", "medium", "high"], "description": "Max reasoning depth before output."}}', | ||
| '{AUDIO}', '{TEXT}', | ||
| '{"response": {"input_token_cost": 0.5, "output_token_cost": 3.0}, "batch": {"input_token_cost": 0.25, "output_token_cost": 1.5}, "audio": {"input_token_cost": 1.0, "output_token_cost": 3.0}}', | ||
| true, NOW(), NOW()), | ||
| ('google', 'gemini-2.5-flash', 'stt', | ||
| '{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}}', | ||
| '{AUDIO}', '{TEXT}', | ||
| '{"response": {"input_token_cost": 0.3, "output_token_cost": 2.5}, "batch": {"input_token_cost": 0.15, "output_token_cost": 1.25}, "audio": {"input_token_cost": 1.0, "output_token_cost": 2.5}}', | ||
| true, NOW(), NOW()), | ||
| ('google', 'gemini-2.5-flash-preview-tts', 'tts', | ||
| '{"voice": {"type": "enum", "default": "Kore", "options": ["Kore", "Orus", "Leda", "Charon"], "description": "TTS voice."}}', | ||
| '{TEXT}', '{AUDIO}', | ||
| '{"response": {"input_token_cost": 0.5, "output_token_cost": 10.0}, "batch": {"input_token_cost": 0.25, "output_token_cost": 5.0}, "audio": {"input_token_cost": 0.5, "output_token_cost": 10.0}}', | ||
| true, NOW(), NOW()), | ||
| ('google', 'gemini-2.5-pro-preview-tts', 'tts', | ||
| '{"voice": {"type": "enum", "default": "Kore", "options": ["Kore", "Orus", "Leda", "Charon"], "description": "TTS voice."}}', | ||
| '{TEXT}', '{AUDIO}', | ||
| '{"response": {"input_token_cost": 1.0, "output_token_cost": 20.0}, "batch": {"input_token_cost": 0.5, "output_token_cost": 10.0}, "audio": {"input_token_cost": 1.0, "output_token_cost": 20.0}}', | ||
| true, NOW(), NOW()), | ||
| ('sarvamai', 'saaras:v3', 'stt', | ||
| '{}', '{AUDIO}', '{TEXT}', NULL, true, NOW(), NOW()), | ||
| ('sarvamai', 'bulbul:v3', 'tts', | ||
| '{"voice": {"type": "enum", "default": "simran", "options": ["simran", "shubh", "roopa"], "description": "TTS voice."}}', | ||
| '{TEXT}', '{AUDIO}', NULL, true, NOW(), NOW()), | ||
| ('elevenlabs', 'scribe_v2', 'stt', | ||
| '{}', '{AUDIO}', '{TEXT}', NULL, true, NOW(), NOW()), | ||
| ('elevenlabs', 'eleven_v3', 'tts', | ||
| '{"voice": {"type": "enum", "default": "Sarah", "options": ["Sarah", "George", "Callum", "Liam"], "description": "TTS voice."}}', | ||
| '{TEXT}', '{AUDIO}', NULL, true, NOW(), NOW()) | ||
| ON CONFLICT (provider, model_name) DO NOTHING | ||
| """ | ||
| ) | ||
|
|
||
|
|
||
| def downgrade(): | ||
| op.execute("DROP INDEX IF EXISTS global.ix_model_config_output_modalities") | ||
| op.execute("DROP INDEX IF EXISTS global.ix_model_config_input_modalities") | ||
| op.execute("DROP INDEX IF EXISTS global.ix_model_config_provider_type_active") | ||
| op.execute("DROP INDEX IF EXISTS global.ix_model_config_provider_active") | ||
|
|
||
| op.execute( | ||
| """ | ||
| ALTER TABLE global.model_config | ||
| DROP COLUMN completion_type, | ||
| ALTER COLUMN provider TYPE varchar USING provider::varchar | ||
| """ | ||
| ) | ||
|
|
||
| op.execute("DROP TYPE IF EXISTS global.completion_type_enum") | ||
| op.execute("DROP TYPE IF EXISTS global.provider_enum") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,13 +1,23 @@ | ||
| from typing import Any, Literal | ||
|
|
||
| from fastapi import HTTPException | ||
| from sqlmodel import Session, select | ||
|
|
||
| from app.models import ModelConfig | ||
| from app.models.llm.request import ConfigBlob | ||
| from app.models.model_config import CompletionType | ||
|
|
||
| Provider = Literal["openai", "google", "sarvamai", "elevenlabs"] | ||
|
|
||
|
|
||
| def _normalize_provider(raw: str) -> str: | ||
| """Map NativeCompletionConfig providers (e.g. 'openai-native') to model_config provider names.""" | ||
| return raw[: -len("-native")] if raw.endswith("-native") else raw | ||
|
|
||
|
|
||
| def list_active_model_configs( | ||
| session: Session, | ||
| provider: Literal["openai", "google"] | None = None, | ||
| provider: Provider | None = None, | ||
| skip: int = 0, | ||
| limit: int = 100, | ||
| ) -> tuple[list[ModelConfig], bool]: | ||
|
|
@@ -30,7 +40,7 @@ def list_active_model_configs( | |
|
|
||
| def list_all_active_model_configs( | ||
| session: Session, | ||
| provider: Literal["openai", "google"] | None = None, | ||
| provider: Provider | None = None, | ||
| ) -> list[ModelConfig]: | ||
| statement = select(ModelConfig).where(ModelConfig.is_active) | ||
|
|
||
|
|
@@ -42,7 +52,7 @@ def list_all_active_model_configs( | |
|
|
||
|
|
||
| def get_model_config( | ||
| session: Session, provider: Literal["openai", "google"], model_name: str | ||
| session: Session, provider: Provider, model_name: str | ||
| ) -> ModelConfig | None: | ||
| statement = select(ModelConfig).where( | ||
| ModelConfig.provider == provider, | ||
|
|
@@ -52,9 +62,109 @@ def get_model_config( | |
| return session.exec(statement).first() | ||
|
|
||
|
|
||
| def is_reasoning_model( | ||
| session: Session, provider: Literal["openai", "google"], model_name: str | ||
| def list_supported_models( | ||
| session: Session, provider: Provider, completion_type: CompletionType | ||
| ) -> list[str]: | ||
| """Return active model names for a provider + completion type.""" | ||
| stmt = select(ModelConfig.model_name).where( | ||
| ModelConfig.provider == provider, | ||
| ModelConfig.completion_type == completion_type, | ||
| ModelConfig.is_active, | ||
| ) | ||
| return list(session.exec(stmt).all()) | ||
|
Comment on lines
+69
to
+74
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Order the supported-model list before surfacing it. This list is returned directly in the 400 detail on Lines 141-144. Without an Proposed fix stmt = select(ModelConfig.model_name).where(
ModelConfig.provider == provider,
ModelConfig.completion_type == completion_type,
ModelConfig.is_active,
)
+ stmt = stmt.order_by(ModelConfig.model_name)
return list(session.exec(stmt).all())🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def is_model_supported( | ||
| session: Session, | ||
| provider: Provider, | ||
| completion_type: CompletionType, | ||
| model_name: str, | ||
| ) -> bool: | ||
| """Check whether (provider, model_name) is active and matches the completion type.""" | ||
| stmt = select(ModelConfig.id).where( | ||
| ModelConfig.provider == provider, | ||
| ModelConfig.model_name == model_name, | ||
| ModelConfig.completion_type == completion_type, | ||
| ModelConfig.is_active, | ||
| ) | ||
| return session.exec(stmt).first() is not None | ||
|
|
||
|
|
||
| def validate_blob_model_or_raise(session: Session, blob: ConfigBlob) -> None: | ||
| """Reject ConfigBlob whose completion.params.model is not in model_config. | ||
|
|
||
| model_config is the source of truth — all providers/types validated. | ||
| Native configs are exempt (they forward raw params to the provider). | ||
| """ | ||
| completion = blob.completion | ||
| raw_provider = completion.provider | ||
| completion_type = completion.type | ||
| if raw_provider is None: | ||
| return | ||
|
|
||
| if raw_provider.endswith("-native"): | ||
| return | ||
|
|
||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| provider = _normalize_provider(raw_provider) | ||
|
|
||
| model_name = (completion.params or {}).get("model") | ||
| if not model_name: | ||
| raise HTTPException( | ||
| status_code=400, | ||
| detail=f"completion.params.model is required for provider='{raw_provider}'", | ||
| ) | ||
|
|
||
| model_row = get_model_config( | ||
| session=session, | ||
| provider=provider, # type: ignore[arg-type] | ||
| model_name=model_name, | ||
| ) | ||
| if model_row is None: | ||
| raise HTTPException( | ||
| status_code=400, | ||
| detail=f"Model '{model_name}' not found for provider='{provider}'.", | ||
| ) | ||
|
|
||
| if not is_model_supported( | ||
| session=session, | ||
| provider=provider, # type: ignore[arg-type] | ||
| completion_type=completion_type, | ||
| model_name=model_name, | ||
| ): | ||
| allowed = list_supported_models( | ||
| session=session, | ||
| provider=provider, # type: ignore[arg-type] | ||
| completion_type=completion_type, | ||
| ) | ||
| raise HTTPException( | ||
| status_code=400, | ||
| detail=( | ||
| f"Model '{model_name}' is not supported for provider='{provider}' " | ||
| f"type='{completion_type}'. Allowed: {allowed}" | ||
| ), | ||
| ) | ||
|
|
||
| if completion_type == "tts": | ||
| voice = (completion.params or {}).get("voice") | ||
| voice_spec = ( | ||
| model_row.config.get("voice") | ||
| if isinstance(model_row.config, dict) | ||
| else None | ||
| ) | ||
| allowed_voices = ( | ||
| voice_spec.get("options") if isinstance(voice_spec, dict) else None | ||
| ) | ||
| if voice and allowed_voices and voice not in allowed_voices: | ||
| raise HTTPException( | ||
| status_code=400, | ||
| detail=( | ||
| f"Voice '{voice}' is not supported for provider='{provider}' " | ||
| f"model='{model_name}'. Allowed: {allowed_voices}" | ||
| ), | ||
| ) | ||
|
|
||
|
|
||
| def is_reasoning_model(session: Session, provider: Provider, model_name: str) -> bool: | ||
| """Return True if the model is configured with a reasoning `effort` control. | ||
|
|
||
| A model is considered reasoning-capable if its `config` JSON contains an | ||
|
|
@@ -69,7 +179,7 @@ def is_reasoning_model( | |
|
|
||
| def estimate_model_cost( | ||
| session: Session, | ||
| provider: Literal["openai", "google"], | ||
| provider: Provider, | ||
| model_name: str, | ||
| input_tokens: int, | ||
| output_tokens: int, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rollback still leaves post-063 providers behind.
downgrade()removes the enum/column changes, but it keeps thesarvamaiandelevenlabsrows introduced by this revision. Pre-063 code only handledopenai/google, so a rollback can still leaveglobal.model_configunreadable to the older app layer. Either clean up providers introduced here during downgrade or mark the migration as intentionally irreversible.🤖 Prompt for AI Agents