Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""add pending job monitoring indexes
Revision ID: 061
Revises: 060
Revision ID: 062
Revises: 061
Create Date: 2026-05-13 00:00:00.000000
"""
Expand Down
147 changes: 147 additions & 0 deletions backend/app/alembic/versions/063_seed_stt_tts_model_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
"""seed stt/tts model_config rows for google, sarvamai, elevenlabs

Revision ID: 063
Revises: 062
Create Date: 2026-05-19 00:00:00.000000

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "063"
down_revision = "062"
branch_labels = None
depends_on = None


SEEDED_MODELS = [
("google", "gemini-2.5-pro"),
("google", "gemini-3.1-pro-preview"),
("google", "gemini-3-flash-preview"),
("google", "gemini-2.5-flash"),
("google", "gemini-2.5-flash-preview-tts"),
("google", "gemini-2.5-pro-preview-tts"),
("sarvamai", "saaras:v3"),
("sarvamai", "bulbul:v3"),
("elevenlabs", "scribe_v2"),
("elevenlabs", "eleven_v3"),
]


def upgrade():
# 1. Create enum types
op.execute(
"CREATE TYPE global.provider_enum AS ENUM ('openai', 'google', 'sarvamai', 'elevenlabs')"
)
op.execute("CREATE TYPE global.completion_type_enum AS ENUM ('text', 'stt', 'tts')")

# 2. Alter provider column to use enum; add completion_type column
op.execute(
"""
ALTER TABLE global.model_config
ALTER COLUMN provider TYPE global.provider_enum
USING provider::global.provider_enum,
ADD COLUMN completion_type global.completion_type_enum
"""
)

# 3. Backfill completion_type for pre-existing rows (openai models seeded before this migration)
op.execute(
"""
UPDATE global.model_config SET completion_type =
CASE
WHEN 'AUDIO' = ANY(input_modalities::text[]) AND NOT ('AUDIO' = ANY(output_modalities::text[])) THEN 'stt'::global.completion_type_enum
WHEN 'AUDIO' = ANY(output_modalities::text[]) AND NOT ('AUDIO' = ANY(input_modalities::text[])) THEN 'tts'::global.completion_type_enum
ELSE 'text'::global.completion_type_enum
END
WHERE completion_type IS NULL
"""
)

# 4. Set NOT NULL now that all rows are backfilled
op.execute(
"ALTER TABLE global.model_config ALTER COLUMN completion_type SET NOT NULL"
)

# 5. Add indexes
op.execute(
"CREATE INDEX ix_model_config_provider_active ON global.model_config (provider, is_active)"
)
op.execute(
"CREATE INDEX ix_model_config_provider_type_active ON global.model_config (provider, completion_type, is_active)"
)
op.execute(
"CREATE INDEX ix_model_config_input_modalities ON global.model_config USING gin (input_modalities)"
)
op.execute(
"CREATE INDEX ix_model_config_output_modalities ON global.model_config USING gin (output_modalities)"
)

# 6. Seed rows — pricing per 1M tokens (USD): response/batch = text i/o; audio = audio-modal i/o
op.execute(
"""
INSERT INTO global.model_config
(provider, model_name, completion_type, config, input_modalities, output_modalities, pricing, is_active, inserted_at, updated_at)
VALUES
('google', 'gemini-2.5-pro', 'stt',
'{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 1.25, "output_token_cost": 10.0}, "batch": {"input_token_cost": 0.625, "output_token_cost": 5.0}, "audio": {"input_token_cost": 3.5, "output_token_cost": 10.0}}',
true, NOW(), NOW()),
('google', 'gemini-3.1-pro-preview', 'stt',
'{"thinking_level": {"type": "enum", "default": "high", "options": ["low", "medium", "high"], "description": "Max reasoning depth before output. high = best quality, low = faster/cheaper."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 2.0, "output_token_cost": 12.0}, "batch": {"input_token_cost": 1.0, "output_token_cost": 6.0}, "audio": {"input_token_cost": 3.5, "output_token_cost": 12.0}}',
true, NOW(), NOW()),
('google', 'gemini-3-flash-preview', 'stt',
'{"thinking_level": {"type": "enum", "default": "high", "options": ["minimal", "low", "medium", "high"], "description": "Max reasoning depth before output."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 0.5, "output_token_cost": 3.0}, "batch": {"input_token_cost": 0.25, "output_token_cost": 1.5}, "audio": {"input_token_cost": 1.0, "output_token_cost": 3.0}}',
true, NOW(), NOW()),
('google', 'gemini-2.5-flash', 'stt',
'{"temperature": {"type": "float", "default": 1.0, "min": 0.0, "max": 2.0, "description": "Controls randomness. Lower = more deterministic."}}',
'{AUDIO}', '{TEXT}',
'{"response": {"input_token_cost": 0.3, "output_token_cost": 2.5}, "batch": {"input_token_cost": 0.15, "output_token_cost": 1.25}, "audio": {"input_token_cost": 1.0, "output_token_cost": 2.5}}',
true, NOW(), NOW()),
('google', 'gemini-2.5-flash-preview-tts', 'tts',
'{"voice": {"type": "enum", "default": "Kore", "options": ["Kore", "Orus", "Leda", "Charon"], "description": "TTS voice."}}',
'{TEXT}', '{AUDIO}',
'{"response": {"input_token_cost": 0.5, "output_token_cost": 10.0}, "batch": {"input_token_cost": 0.25, "output_token_cost": 5.0}, "audio": {"input_token_cost": 0.5, "output_token_cost": 10.0}}',
true, NOW(), NOW()),
('google', 'gemini-2.5-pro-preview-tts', 'tts',
'{"voice": {"type": "enum", "default": "Kore", "options": ["Kore", "Orus", "Leda", "Charon"], "description": "TTS voice."}}',
'{TEXT}', '{AUDIO}',
'{"response": {"input_token_cost": 1.0, "output_token_cost": 20.0}, "batch": {"input_token_cost": 0.5, "output_token_cost": 10.0}, "audio": {"input_token_cost": 1.0, "output_token_cost": 20.0}}',
true, NOW(), NOW()),
('sarvamai', 'saaras:v3', 'stt',
'{}', '{AUDIO}', '{TEXT}', NULL, true, NOW(), NOW()),
('sarvamai', 'bulbul:v3', 'tts',
'{"voice": {"type": "enum", "default": "simran", "options": ["simran", "shubh", "roopa"], "description": "TTS voice."}}',
'{TEXT}', '{AUDIO}', NULL, true, NOW(), NOW()),
('elevenlabs', 'scribe_v2', 'stt',
'{}', '{AUDIO}', '{TEXT}', NULL, true, NOW(), NOW()),
('elevenlabs', 'eleven_v3', 'tts',
'{"voice": {"type": "enum", "default": "Sarah", "options": ["Sarah", "George", "Callum", "Liam"], "description": "TTS voice."}}',
'{TEXT}', '{AUDIO}', NULL, true, NOW(), NOW())
ON CONFLICT (provider, model_name) DO NOTHING
"""
)


def downgrade():
op.execute("DROP INDEX IF EXISTS global.ix_model_config_output_modalities")
op.execute("DROP INDEX IF EXISTS global.ix_model_config_input_modalities")
op.execute("DROP INDEX IF EXISTS global.ix_model_config_provider_type_active")
op.execute("DROP INDEX IF EXISTS global.ix_model_config_provider_active")

op.execute(
"""
ALTER TABLE global.model_config
DROP COLUMN completion_type,
ALTER COLUMN provider TYPE varchar USING provider::varchar
"""
)

op.execute("DROP TYPE IF EXISTS global.completion_type_enum")
op.execute("DROP TYPE IF EXISTS global.provider_enum")
Comment on lines +138 to +147
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Rollback still leaves post-063 providers behind.

downgrade() removes the enum/column changes, but it keeps the sarvamai and elevenlabs rows introduced by this revision. Pre-063 code only handled openai/google, so a rollback can still leave global.model_config unreadable to the older app layer. Either clean up providers introduced here during downgrade or mark the migration as intentionally irreversible.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/app/alembic/versions/063_seed_stt_tts_model_configs.py` around lines
138 - 147, The downgrade currently drops the new enum types and column but
leaves rows with providers introduced in this revision, which will break older
code; update the downgrade() in this migration (the function containing the
op.execute calls) to either delete rows with provider IN
('sarvamai','elevenlabs') from global.model_config before reverting the
column/type changes or explicitly raise a RuntimeError/sa_exc to mark the
revision as irreversible; if choosing deletion, run an op.execute("DELETE FROM
global.model_config WHERE provider IN ('sarvamai','elevenlabs')") prior to
dropping/altering the provider enum and column so the pre-063 app can read the
table.

3 changes: 3 additions & 0 deletions backend/app/crud/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from sqlmodel import Session, and_, select

from app.core.util import now
from app.crud.model_config import validate_blob_model_or_raise
from app.models import (
Config,
ConfigCreate,
Expand Down Expand Up @@ -33,6 +34,8 @@ def create_or_raise(
"""
self._check_unique_name_or_raise(config_create.name)

validate_blob_model_or_raise(self.session, config_create.config_blob)

try:
config = Config(
name=config_create.name,
Expand Down
3 changes: 3 additions & 0 deletions backend/app/crud/config/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from sqlmodel import Session, and_, select

from app.core.util import now
from app.crud.model_config import validate_blob_model_or_raise
from app.models import (
Config,
ConfigVersion,
Expand Down Expand Up @@ -81,6 +82,8 @@ def create_or_raise(self, version_create: ConfigVersionUpdate) -> ConfigVersion:
)
raise HTTPException(status_code=400, detail=validation_errors)

validate_blob_model_or_raise(self.session, validated_blob)

try:
next_version = self._get_next_version(self.config_id)

Expand Down
122 changes: 116 additions & 6 deletions backend/app/crud/model_config.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
from typing import Any, Literal

from fastapi import HTTPException
from sqlmodel import Session, select

from app.models import ModelConfig
from app.models.llm.request import ConfigBlob
from app.models.model_config import CompletionType

Provider = Literal["openai", "google", "sarvamai", "elevenlabs"]


def _normalize_provider(raw: str) -> str:
"""Map NativeCompletionConfig providers (e.g. 'openai-native') to model_config provider names."""
return raw[: -len("-native")] if raw.endswith("-native") else raw


def list_active_model_configs(
session: Session,
provider: Literal["openai", "google"] | None = None,
provider: Provider | None = None,
skip: int = 0,
limit: int = 100,
) -> tuple[list[ModelConfig], bool]:
Expand All @@ -30,7 +40,7 @@ def list_active_model_configs(

def list_all_active_model_configs(
session: Session,
provider: Literal["openai", "google"] | None = None,
provider: Provider | None = None,
) -> list[ModelConfig]:
statement = select(ModelConfig).where(ModelConfig.is_active)

Expand All @@ -42,7 +52,7 @@ def list_all_active_model_configs(


def get_model_config(
session: Session, provider: Literal["openai", "google"], model_name: str
session: Session, provider: Provider, model_name: str
) -> ModelConfig | None:
statement = select(ModelConfig).where(
ModelConfig.provider == provider,
Expand All @@ -52,9 +62,109 @@ def get_model_config(
return session.exec(statement).first()


def is_reasoning_model(
session: Session, provider: Literal["openai", "google"], model_name: str
def list_supported_models(
session: Session, provider: Provider, completion_type: CompletionType
) -> list[str]:
"""Return active model names for a provider + completion type."""
stmt = select(ModelConfig.model_name).where(
ModelConfig.provider == provider,
ModelConfig.completion_type == completion_type,
ModelConfig.is_active,
)
return list(session.exec(stmt).all())
Comment on lines +69 to +74
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Order the supported-model list before surfacing it.

This list is returned directly in the 400 detail on Lines 141-144. Without an ORDER BY, PostgreSQL can return models in different orders, which makes the API payload and any assertions against it flaky.

Proposed fix
     stmt = select(ModelConfig.model_name).where(
         ModelConfig.provider == provider,
         ModelConfig.completion_type == completion_type,
         ModelConfig.is_active,
     )
+    stmt = stmt.order_by(ModelConfig.model_name)
     return list(session.exec(stmt).all())
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/app/crud/model_config.py` around lines 69 - 74, The query building up
stmt using select(ModelConfig.model_name).where(... ) returns rows in arbitrary
DB order; add an explicit ordering to the statement (e.g., order by
ModelConfig.model_name ascending) before calling session.exec(stmt).all() so the
returned list is deterministic; update the stmt construction that references
select(ModelConfig.model_name), ModelConfig.provider,
ModelConfig.completion_type and ModelConfig.is_active to include an
order_by(...) clause.



def is_model_supported(
session: Session,
provider: Provider,
completion_type: CompletionType,
model_name: str,
) -> bool:
"""Check whether (provider, model_name) is active and matches the completion type."""
stmt = select(ModelConfig.id).where(
ModelConfig.provider == provider,
ModelConfig.model_name == model_name,
ModelConfig.completion_type == completion_type,
ModelConfig.is_active,
)
return session.exec(stmt).first() is not None


def validate_blob_model_or_raise(session: Session, blob: ConfigBlob) -> None:
"""Reject ConfigBlob whose completion.params.model is not in model_config.

model_config is the source of truth — all providers/types validated.
Native configs are exempt (they forward raw params to the provider).
"""
completion = blob.completion
raw_provider = completion.provider
completion_type = completion.type
if raw_provider is None:
return

if raw_provider.endswith("-native"):
return

Comment thread
coderabbitai[bot] marked this conversation as resolved.
provider = _normalize_provider(raw_provider)

model_name = (completion.params or {}).get("model")
if not model_name:
raise HTTPException(
status_code=400,
detail=f"completion.params.model is required for provider='{raw_provider}'",
)

model_row = get_model_config(
session=session,
provider=provider, # type: ignore[arg-type]
model_name=model_name,
)
if model_row is None:
raise HTTPException(
status_code=400,
detail=f"Model '{model_name}' not found for provider='{provider}'.",
)

if not is_model_supported(
session=session,
provider=provider, # type: ignore[arg-type]
completion_type=completion_type,
model_name=model_name,
):
allowed = list_supported_models(
session=session,
provider=provider, # type: ignore[arg-type]
completion_type=completion_type,
)
raise HTTPException(
status_code=400,
detail=(
f"Model '{model_name}' is not supported for provider='{provider}' "
f"type='{completion_type}'. Allowed: {allowed}"
),
)

if completion_type == "tts":
voice = (completion.params or {}).get("voice")
voice_spec = (
model_row.config.get("voice")
if isinstance(model_row.config, dict)
else None
)
allowed_voices = (
voice_spec.get("options") if isinstance(voice_spec, dict) else None
)
if voice and allowed_voices and voice not in allowed_voices:
raise HTTPException(
status_code=400,
detail=(
f"Voice '{voice}' is not supported for provider='{provider}' "
f"model='{model_name}'. Allowed: {allowed_voices}"
),
)


def is_reasoning_model(session: Session, provider: Provider, model_name: str) -> bool:
"""Return True if the model is configured with a reasoning `effort` control.

A model is considered reasoning-capable if its `config` JSON contains an
Expand All @@ -69,7 +179,7 @@ def is_reasoning_model(

def estimate_model_cost(
session: Session,
provider: Literal["openai", "google"],
provider: Provider,
model_name: str,
input_tokens: int,
output_tokens: int,
Expand Down
Loading
Loading