diff --git a/README.md b/README.md index 9bdb698..09fd9fc 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,9 @@ > *separate* set of definitions per team โ†’ it answers questions over an > incomplete database โ†’ it remembers every definition and conversation. -This is the **v4.1 rebuild** (see [`docs/discord_first_redesign_v4_1.md`](docs/discord_first_redesign_v4_1.md)). +๐Ÿ‘‰ **ํ”„๋กœ์ ํŠธ ์ „์ฒด ๊ทธ๋ฆผ(๋‹จ์ผ SSOT)**: [`docs/PROJECT.md`](docs/PROJECT.md) ยท **์ปจํŠธ๋ฆฌ๋ทฐํ„ฐ ํ•œ๋ˆˆ ๊ฐ€์ด๋“œ**: [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) + +This is the **v4.1 rebuild** (๋ฐฐ๊ฒฝ/์„ค๊ณ„ ์˜๋„: [`docs/discord_first_redesign_v4_1.md`](docs/discord_first_redesign_v4_1.md)). Where most text-to-SQL projects compete on *"generate better SQL,"* Lang2SQL competes on everything *around* the query: business-context learning, per-team semantics, robustness to messy databases, and memory. **Discord is the Phase 1 @@ -143,6 +145,8 @@ for the full architecture write-up. ## ๐Ÿค ๊ธฐ์—ฌํ•˜๊ธฐ +**์ฒ˜์Œ ๋ณด์‹œ๋Š” ๋ถ„์€ [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md)** โ€” ๋””๋ ‰ํ† ๋ฆฌยท๋ ˆ์ด์–ด ์ฑ…์ž„, ํ•œ ๋ฉ”์‹œ์ง€์˜ lifecycle, *์–ด๋””๋ฅผ ์ˆ˜์ •ํ•˜๋ฉด ์ข‹์„์ง€* ๊ฐ€ ํ•œ๊ณณ์— ์ •๋ฆฌ๋ผ ์žˆ์Šต๋‹ˆ๋‹ค. + ```bash git clone https://github.com/CausalInferenceLab/lang2sql.git cd lang2sql diff --git a/dev/create_faiss.py b/dev/create_faiss.py deleted file mode 100644 index 6547d41..0000000 --- a/dev/create_faiss.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -dev/create_faiss.py - -CSV ํŒŒ์ผ์—์„œ ํ…Œ์ด๋ธ”๊ณผ ์ปฌ๋Ÿผ ์ •๋ณด๋ฅผ ๋ถˆ๋Ÿฌ์™€ OpenAI ์ž„๋ฒ ๋”ฉ์œผ๋กœ ๋ฒกํ„ฐํ™”ํ•œ ๋’ค, -FAISS ์ธ๋ฑ์Šค๋ฅผ ์ƒ์„ฑํ•˜๊ณ  ๋กœ์ปฌ ๋””๋ ‰ํ† ๋ฆฌ์— ์ €์žฅํ•œ๋‹ค. - -ํ™˜๊ฒฝ ๋ณ€์ˆ˜: - OPEN_AI_KEY: OpenAI API ํ‚ค - OPEN_AI_EMBEDDING_MODEL: ์‚ฌ์šฉํ•  ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์ด๋ฆ„ - -์ถœ๋ ฅ: - ์ง€์ •๋œ OUTPUT_DIR ๊ฒฝ๋กœ์— FAISS ์ธ๋ฑ์Šค ์ €์žฅ -""" - -import csv -import os -from collections import defaultdict - -from dotenv import load_dotenv -from langchain_community.vectorstores import FAISS -from langchain_openai import OpenAIEmbeddings - -load_dotenv() -# CSV ํŒŒ์ผ ๊ฒฝ๋กœ -CSV_PATH = "./dev/table_catalog.csv" -# .env์˜ VECTORDB_LOCATION๊ณผ ๋™์ผํ•˜๊ฒŒ ๋งž์ถ”์„ธ์š” -OUTPUT_DIR = "./dev/table_info_db" - -tables = defaultdict(lambda: {"desc": "", "columns": []}) -with open(CSV_PATH, newline="", encoding="utf-8") as f: - reader = csv.DictReader(f) - for row in reader: - t = row["table_name"].strip() - tables[t]["desc"] = row["table_description"].strip() - col = row["column_name"].strip() - col_desc = row["column_description"].strip() - tables[t]["columns"].append((col, col_desc)) - -docs = [] -for t, info in tables.items(): - cols = "\n".join([f"{c}: {d}" for c, d in info["columns"]]) - page = f"{t}: {info['desc']}\nColumns:\n {cols}" - from langchain.schema import Document - - docs.append(Document(page_content=page)) - -emb = OpenAIEmbeddings( - model=os.getenv("OPEN_AI_EMBEDDING_MODEL"), openai_api_key=os.getenv("OPEN_AI_KEY") -) -db = FAISS.from_documents(docs, emb) -os.makedirs(OUTPUT_DIR, exist_ok=True) -db.save_local(OUTPUT_DIR) -print(f"FAISS index saved to: {OUTPUT_DIR}") diff --git a/dev/create_pgvector.py b/dev/create_pgvector.py deleted file mode 100644 index 77edd9f..0000000 --- a/dev/create_pgvector.py +++ /dev/null @@ -1,54 +0,0 @@ -""" -dev/create_pgvector.py - -CSV ํŒŒ์ผ์—์„œ ํ…Œ์ด๋ธ”๊ณผ ์ปฌ๋Ÿผ ์ •๋ณด๋ฅผ ๋ถˆ๋Ÿฌ์™€ OpenAI ์ž„๋ฒ ๋”ฉ์œผ๋กœ ๋ฒกํ„ฐํ™”ํ•œ ๋’ค, -pgvector์— ์ ์žฌํ•œ๋‹ค. - -ํ™˜๊ฒฝ ๋ณ€์ˆ˜: - OPEN_AI_KEY: OpenAI API ํ‚ค - OPEN_AI_EMBEDDING_MODEL: ์‚ฌ์šฉํ•  ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์ด๋ฆ„ - VECTORDB_LOCATION: pgvector ์—ฐ๊ฒฐ ๋ฌธ์ž์—ด - PGVECTOR_COLLECTION: pgvector ์ปฌ๋ ‰์…˜ ์ด๋ฆ„ -""" - -import csv -import os -from collections import defaultdict - -from dotenv import load_dotenv -from langchain.schema import Document -from langchain_openai import OpenAIEmbeddings -from langchain_postgres.vectorstores import PGVector - -load_dotenv() -# CSV ํŒŒ์ผ ๊ฒฝ๋กœ -CSV_PATH = "./dev/table_catalog.csv" -# .env์˜ VECTORDB_LOCATION๊ณผ ๋™์ผํ•˜๊ฒŒ ๋งž์ถ”์„ธ์š” -CONN = ( - os.getenv("VECTORDB_LOCATION") - or "postgresql://pgvector:pgvector@localhost:5432/postgres" -) -COLLECTION = os.getenv("PGVECTOR_COLLECTION", "table_info_db") - -tables = defaultdict(lambda: {"desc": "", "columns": []}) -with open(CSV_PATH, newline="", encoding="utf-8") as f: - reader = csv.DictReader(f) - for row in reader: - t = row["table_name"].strip() - tables[t]["desc"] = row["table_description"].strip() - col = row["column_name"].strip() - col_desc = row["column_description"] - tables[t]["columns"].append((col, col_desc)) - -docs = [] -for t, info in tables.items(): - cols = "\n".join([f"{c}: {d}" for c, d in info["columns"]]) - docs.append(Document(page_content=f"{t}: {info['desc']}\nColumns:\n {cols}")) - -emb = OpenAIEmbeddings( - model=os.getenv("OPEN_AI_EMBEDDING_MODEL"), openai_api_key=os.getenv("OPEN_AI_KEY") -) -PGVector.from_documents( - documents=docs, embedding=emb, connection=CONN, collection_name=COLLECTION -) -print(f"pgvector collection populated: {COLLECTION}") diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index 4b3dd4a..0000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,32 +0,0 @@ -# 1. Base image -FROM python:3.12-slim-bullseye - -# 2. ์‹œ์Šคํ…œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์„ค์น˜ -RUN apt-get update && apt-get install -y \ - build-essential \ - curl \ - git \ - libpq-dev \ - && rm -rf /var/lib/apt/lists/* - -# 3. uv ์„ค์น˜ -RUN pip install --no-cache-dir uv - -# 4. ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์„ค์ • -WORKDIR /app - -# 5. ์†Œ์Šค ์ฝ”๋“œ ๋ณต์‚ฌ ๋ฐ ์˜์กด์„ฑ ์„ค์น˜ -COPY pyproject.toml ./ -COPY . . -RUN uv pip install --system --upgrade pip setuptools wheel \ - && uv pip install --system . - -# 6. ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ • -ENV PYTHONPATH=/app -ENV PYTHONUNBUFFERED=1 - -# 7. ํฌํŠธ ์„ค์ • -ENV STREAMLIT_SERVER_PORT=8501 - -# 8. ์‹คํ–‰ ๋ช…๋ น -CMD ["lang2sql", "run-streamlit"] diff --git a/docker/Dockerfile.dockerignore b/docker/Dockerfile.dockerignore deleted file mode 100644 index 44447fb..0000000 --- a/docker/Dockerfile.dockerignore +++ /dev/null @@ -1,10 +0,0 @@ -.git -__pycache__/ -*.pyc -*.pyo -*.pyd -*.db -*.log -venv/ -.env -docker/ diff --git a/docker/docker-compose-pgvector.yml b/docker/docker-compose-pgvector.yml deleted file mode 100644 index 8ad5e16..0000000 --- a/docker/docker-compose-pgvector.yml +++ /dev/null @@ -1,23 +0,0 @@ -# docker compose -f docker-compose-pgvector.yml up -# docker compose -f docker-compose-pgvector.yml down - -services: - pgvector: - image: pgvector/pgvector:pg17 - hostname: pgvector - container_name: pgvector - restart: always - ports: - - "5432:5432" - environment: - POSTGRES_USER: pgvector - POSTGRES_PASSWORD: pgvector - POSTGRES_DB: pgvector - TZ: Asia/Seoul - LANG: en_US.utf8 - volumes: - - pgvector_data:/var/lib/postgresql/data - - ./pgvector/init:/docker-entrypoint-initdb.d - -volumes: - pgvector_data: diff --git a/docker/docker-compose-postgres.yml b/docker/docker-compose-postgres.yml deleted file mode 100644 index 696f7e1..0000000 --- a/docker/docker-compose-postgres.yml +++ /dev/null @@ -1,23 +0,0 @@ -# docker compose -f docker-compose-postgres.yml up -# docker compose -f docker-compose-postgres.yml down - -services: - postgres: - image: postgres:15 - hostname: postgres - container_name: postgres - restart: always - ports: - - "5432:5432" - environment: - POSTGRES_USER: postgres - POSTGRES_PASSWORD: postgres - POSTGRES_DB: postgres - TZ: Asia/Seoul - LANG: en_US.utf8 - volumes: - - postgres_data:/var/lib/postgresql/data - - ./postgres/init:/docker-entrypoint-initdb.d - -volumes: - postgres_data: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml deleted file mode 100644 index 115575a..0000000 --- a/docker/docker-compose.yml +++ /dev/null @@ -1,34 +0,0 @@ -services: - streamlit: - hostname: streamlit - container_name: streamlit - build: - context: .. - dockerfile: docker/Dockerfile - ports: - - "8501:8501" - volumes: - - ../:/app - env_file: - - ../.env - environment: - - STREAMLIT_SERVER_PORT=8501 - - DATABASE_URL=postgresql://pgvector:pgvector@localhost:5432/streamlit - depends_on: - - pgvector - - pgvector: - image: pgvector/pgvector:pg17 - hostname: pgvector - container_name: pgvector - environment: - POSTGRES_USER: pgvector - POSTGRES_PASSWORD: pgvector - POSTGRES_DB: streamlit - ports: - - "5432:5432" - volumes: - - pgdata:/var/lib/postgresql/data - -volumes: - pgdata: diff --git a/docker/pgvector/init/001_create_database.sql b/docker/pgvector/init/001_create_database.sql deleted file mode 100644 index 2173146..0000000 --- a/docker/pgvector/init/001_create_database.sql +++ /dev/null @@ -1,2 +0,0 @@ -CREATE DATABASE lang2sql; -CREATE DATABASE test; diff --git a/docker/pgvector/init/002_create_user_and_grant.sql b/docker/pgvector/init/002_create_user_and_grant.sql deleted file mode 100644 index 8da26fb..0000000 --- a/docker/pgvector/init/002_create_user_and_grant.sql +++ /dev/null @@ -1,5 +0,0 @@ -CREATE USER lang2sql WITH PASSWORD 'lang2sqlpassword'; -GRANT ALL PRIVILEGES ON DATABASE lang2sql TO lang2sql; - -CREATE USER test WITH PASSWORD 'testpassword'; -GRANT ALL PRIVILEGES ON DATABASE test TO test; diff --git a/docker/postgres/init/001_create_database.sql b/docker/postgres/init/001_create_database.sql deleted file mode 100644 index 2173146..0000000 --- a/docker/postgres/init/001_create_database.sql +++ /dev/null @@ -1,2 +0,0 @@ -CREATE DATABASE lang2sql; -CREATE DATABASE test; diff --git a/docker/postgres/init/002_create_user_and_grant.sql b/docker/postgres/init/002_create_user_and_grant.sql deleted file mode 100644 index 8da26fb..0000000 --- a/docker/postgres/init/002_create_user_and_grant.sql +++ /dev/null @@ -1,5 +0,0 @@ -CREATE USER lang2sql WITH PASSWORD 'lang2sqlpassword'; -GRANT ALL PRIVILEGES ON DATABASE lang2sql TO lang2sql; - -CREATE USER test WITH PASSWORD 'testpassword'; -GRANT ALL PRIVILEGES ON DATABASE test TO test; diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..b7fc58f --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,245 @@ +# Architecture โ€” ๊ธฐ์—ฌ์ž์šฉ ํ•œ๋ˆˆ ๊ฐ€์ด๋“œ + +์ด ๋ฌธ์„œ๋Š” *์ฒ˜์Œ ๋ณด๋Š” ์‚ฌ๋žŒ๋„ 10๋ถ„ ์•ˆ์— ์–ด๋”” ๋ฌด์—‡์ด ์žˆ๋Š”์ง€ / ์–ด๋””๋ฅผ ์†๋Œ€๋ฉด ์ข‹์€์ง€* ์•Œ ์ˆ˜ ์žˆ๋„๋ก ์“ฐ์—ฌ์กŒ์Šต๋‹ˆ๋‹ค. ์ƒ์„ธ ์„ค๊ณ„ ์˜๋„๋Š” [`docs/discord_first_redesign_v4_1.md`](./discord_first_redesign_v4_1.md)์— ์žˆ์Šต๋‹ˆ๋‹ค. + +--- + +## 1. ํ•œ ๋ˆˆ์— ๋ณด๋Š” ์•„ํ‚คํ…์ฒ˜ + +``` + USER (Discord / CLI / ํ–ฅํ›„ SlackยทWeb) + โ”‚ + โ–ผ +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ frontends/ โ† ์ž…๋ ฅ ๋ฐ›๊ณ  ์ถœ๋ ฅ ๋ณด๋‚ด๊ธฐ (transport) โ”‚ +โ”‚ discord/ cli/ slack/(๋นˆ) web/(๋นˆ) โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ–ผ (์ธํ„ฐ๋ž™์…˜ โ†’ Identity) +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ tenancy/ ContextConcierge โ† *์กฐ๋ฆฝ์ * โ”‚ +โ”‚ ์š”์ฒญ๋งˆ๋‹ค HarnessContext๋ฅผ ํ•˜๋‚˜ ๋งŒ๋“ค์–ด ๋„˜๊น€ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ–ผ (ctx = LLM+tools+session+...) +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ harness/ agent_loop โ”‚ +โ”‚ system prompt โ†’ LLM โ†’ tool ํ˜ธ์ถœ โ†’ ๋‹ค์Œ ํ„ด/์ข…๋ฃŒ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ–ผ (๋„๊ตฌ๊ฐ€ ctx์˜ ํฌํŠธ๋ฅผ ํ˜ธ์ถœ) +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ semantic/(โ˜…โ‘ฃ)โ”‚safety/(โ˜…โ‘ )โ”‚memory/(โ˜…โ‘ก)โ”‚ingest/(โ˜…โ‘ข)โ”‚ โ† 4๊ธฐ๋‘ฅ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ”‚ + โ–ผ (๋ชจ๋‘ ํฌํŠธ(Protocol)๋กœ ์™ธ๋ถ€์™€ ๋ถ„๋ฆฌ) +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ adapters/ ์™ธ๋ถ€ ์‹œ์Šคํ…œ๊ณผ์˜ ๋งˆ์ง€๋ง‰ ํ•œ ์ค„ โ”‚ +โ”‚ llm/openai_ ยท llm/fake โ”‚ +โ”‚ db/sqlalchemy_explorer ยท db/d1_explorer ยท db/postgres_explorer โ”‚ +โ”‚ storage/sqlite_store ยท storage/sqlite_semantic โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +ํ•ต์‹ฌ ์›์น™: **๋กœ์ง์€ ํฌํŠธ(์ถ”์ƒ)์—๋งŒ ์˜์กด, ์–ด๋Œ‘ํ„ฐ(๊ตฌ์ฒด)๋Š” ๊ฐ€์žฅ์ž๋ฆฌ์—๋งŒ**. ๊ทธ๋ž˜์„œ ์ƒˆ LLMยท์ƒˆ DBยท์ƒˆ frontend๋ฅผ *๊ธฐ์กด ์ฝ”๋“œ ์•ˆ ๊ฑด๋“œ๋ฆฌ๊ณ * ๋ผ์šธ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. + +--- + +## 2. ์™œ ์ด๋Ÿฐ ๊ตฌ์กฐ? โ€” 4๊ธฐ๋‘ฅ (ํ•ด๊ฒฐํ•˜๋ ค๋Š” ๋ฌธ์ œ) + +| โ˜… | ์ด๋ฆ„ | ํ’€๋ ค๋Š” ํ˜„์‹ค ๋ฌธ์ œ | ํ•ต์‹ฌ ํŒŒ์ผ | +|---|---|---|---| +| โ‘  | **Safety pipeline** | SQL์ด *์‹ค์ˆ˜๋กœ/์•…์˜๋กœ* DB๋ฅผ ๋ง์น˜๋Š” ์ผ | [`src/lang2sql/safety/`](../src/lang2sql/safety/) | +| โ‘ก | **Memory 3์ถ•** | ๋ด‡์ด ์–ด์ œ ํ•œ ์–˜๊ธฐยท์ •์˜๋ฅผ *๊ธฐ์–ต ๋ชป ํ•จ* | [`src/lang2sql/memory/`](../src/lang2sql/memory/) | +| โ‘ข | **Ingestion ๋งคํŠธ๋ฆญ์Šค** | ๋น„์ฆˆ๋‹ˆ์Šค ์ •์˜๋ฅผ *์‚ฌ๋žŒ์ด ์ผ์ผ์ด* ์ž…๋ ฅํ•ด์•ผ ํ•จ | [`src/lang2sql/ingestion/`](../src/lang2sql/ingestion/) | +| โ‘ฃ | **Semantic federation** | ๊ฐ™์€ *"ํ™œ์„ฑ ์‚ฌ์šฉ์ž"* ๊ฐ€ ํŒ€๋งˆ๋‹ค ์˜๋ฏธ ๋‹ค๋ฆ„ | [`src/lang2sql/semantic/`](../src/lang2sql/semantic/) | + +์ž์„ธํ•œ ๋ฐฐ๊ฒฝ์€ redesign ๋ฌธ์„œ ยง3์„ ์ฐธ๊ณ . + +--- + +## 3. ๋””๋ ‰ํ† ๋ฆฌยท๋ ˆ์ด์–ด ๊ฐ€์ด๋“œ + +> ์˜์กด ๋ฐฉํ–ฅ: `frontends โ†’ tenancy โ†’ harness โ†’ semantic/safety/memory/ingestion/tools โ†’ core โ† adapters` +> `core/`๋Š” ๋ˆ„๊ตฌ๋„ ์˜์กดํ•˜์ง€ ์•Š๋Š” *์ˆœ์ˆ˜* ์˜์—ญ(ํƒ€์ž…+ํฌํŠธ). ์ƒˆ ๋ชจ๋“ˆ ์ถ”๊ฐ€ ์‹œ ์ด ๋ฐฉํ–ฅ์„ ๊นจ์ง€ ์•Š๊ฒŒ. + +### `src/lang2sql/core/` โ€” ์ˆœ์ˆ˜ ํƒ€์ž… + ํฌํŠธ (โ˜… ์†๋Œ€์ง€ ๋งˆ์„ธ์š”) +์‹œ์Šคํ…œ ์ „์ฒด์˜ *์–ดํœ˜*๊ฐ€ ๋ชจ์—ฌ ์žˆ์Šต๋‹ˆ๋‹ค. ์™ธ๋ถ€ ์˜์กด 0, I/O 0. +- [`types.py`](../src/lang2sql/core/types.py) โ€” `Message`, `ToolCall`, `ToolResult`, `Completion`, `Role` +- [`identity.py`](../src/lang2sql/core/identity.py) โ€” `Identity`, `Scope`, federation์˜ `scope_chain()` ์ˆœ์„œ (narrowโ†’wide) +- [`ports/`](../src/lang2sql/core/ports/) โ€” 11๊ฐœ Protocol: `LLMPort`, `ExplorerPort`, `ToolPort`, `SafetyLayerPort`, `SafetyPipelinePort`, `StorePort`, `RecallPort`, `ExtractorPort` (memory), `SourcePort`, `DocExtractorPort`, `ScopeResolverPort`, `FrontendPort`, `SecretsPort`, `SessionStorePort`, `AuditPort` + +### `src/lang2sql/harness/` โ€” ์—์ด์ „ํŠธ ํ•œ ํ„ด์˜ ์—”์ง„ +- [`context.py`](../src/lang2sql/harness/context.py) โ€” `HarnessContext` (llm + tools + safety + explorer + scope_resolver + session ํ•œ ๋‹ค๋ฐœ) +- [`session.py`](../src/lang2sql/harness/session.py) โ€” ๋Œ€ํ™” transcript +- [`loop.py`](../src/lang2sql/harness/loop.py) โ€” `agent_loop`: system prompt โ†’ LLM โ†’ tool ํ˜ธ์ถœ โ†’ ๋‹ค์Œ ํ„ด +- [`tool_registry.py`](../src/lang2sql/harness/tool_registry.py) โ€” ์ด๋ฆ„โ†’๋„๊ตฌ dispatch +- [`system_prompt.py`](../src/lang2sql/harness/system_prompt.py) โ€” ์‹œ๋ฉ˜ํ‹ฑ + ์Šคํ‚ค๋งˆ ์ฃผ์ž… + +### `src/lang2sql/semantic/` โ€” ์‹œ๋ฉ˜ํ‹ฑ ๋ ˆ์ด์–ด + federation (โ˜…โ‘ฃ) +- [`types.py`](../src/lang2sql/semantic/types.py) โ€” `SemanticEntry` (METRIC/DIMENSION/RELATIONSHIP/RULE) +- [`layer.py`](../src/lang2sql/semantic/layer.py) โ€” `SemanticLayer.render()` (์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ๋กœ ๋“ค์–ด๊ฐ) +- [`scoped_layer.py`](../src/lang2sql/semantic/scoped_layer.py) โ€” *๊ฐ€์žฅ ๊ตฌ์ฒด์  scope๊ฐ€ ์Šน๋ฆฌ*ํ•˜๋Š” merge +- [`store.py`](../src/lang2sql/semantic/store.py) โ€” in-memory store +- [`sql_composer.py`](../src/lang2sql/semantic/sql_composer.py) โ€” metric ์ด๋ฆ„ โ†’ ์ •์˜ ํŽผ์น˜๊ธฐ (V1 ์ตœ์†Œ) + +### `src/lang2sql/safety/` โ€” Read-only ๊ฒŒ์ดํŠธ (โ˜…โ‘ ) +- [`pipeline.py`](../src/lang2sql/safety/pipeline.py) โ€” layer๋ฅผ ์ˆœ์„œ๋Œ€๋กœ ํ†ต๊ณผ, *์ฒซ ๋น„-PASS์—์„œ ์ฐจ๋‹จ* +- [`layers/whitelist.py`](../src/lang2sql/safety/layers/whitelist.py) โ€” SELECT/WITH๋งŒ ํ†ต๊ณผ, DML ํ‚ค์›Œ๋“œ fail-closed +- [`layers/timeout.py`](../src/lang2sql/safety/layers/timeout.py) โ€” ์‹คํ–‰ timeout config +- [`tests/test_safety.py`](../tests/test_safety.py) โ€” **12๊ฐœ ํšŒ๊ท€ ์ผ€์ด์Šค** (๋จธ์ง€ ๊ฒŒ์ดํŠธ) + +### `src/lang2sql/memory/` โ€” Hermes 3์ถ• (โ˜…โ‘ก) +- [`stores/in_memory.py`](../src/lang2sql/memory/stores/in_memory.py) โ€” Where +- [`recall/inject_all.py`](../src/lang2sql/memory/recall/inject_all.py) โ€” What +- [`extractors/manual.py`](../src/lang2sql/memory/extractors/manual.py) โ€” How new +- [`service.py`](../src/lang2sql/memory/service.py) โ€” ์…‹์„ ๋ฌถ์Œ + +### `src/lang2sql/ingestion/` โ€” ๋ฌธ์„œ โ†’ ์‹œ๋ฉ˜ํ‹ฑ ํ›„๋ณด (โ˜…โ‘ข) +- [`sources/file_source.py`](../src/lang2sql/ingestion/sources/file_source.py) โ€” ์–ด๋””์„œ +- [`extractors/llm_extractor.py`](../src/lang2sql/ingestion/extractors/llm_extractor.py) โ€” ์–ด๋–ป๊ฒŒ ์ถ”์ถœ +- [`pipeline.py`](../src/lang2sql/ingestion/pipeline.py) โ€” Source ร— Extractor matrix + +### `src/lang2sql/tools/` โ€” ์—์ด์ „ํŠธ๊ฐ€ ๋ถ€๋ฅด๋Š” capability +6๊ฐœ ๋„๊ตฌ (๋ชจ๋‘ ctx-aware, async): +- [`run_sql.py`](../src/lang2sql/tools/run_sql.py) โ€” safety ํ†ต๊ณผ ํ›„ explorer๋กœ ์‹คํ–‰ +- [`explore_schema.py`](../src/lang2sql/tools/explore_schema.py) โ€” ํ…Œ์ด๋ธ”/์ปฌ๋Ÿผ introspection +- [`define_metric.py`](../src/lang2sql/tools/define_metric.py) โ€” scope-aware ์ •์˜ ์“ฐ๊ธฐ +- [`remember.py`](../src/lang2sql/tools/remember.py) โ€” fact ์ €์žฅ +- [`ask_user.py`](../src/lang2sql/tools/ask_user.py) โ€” ๋ชจํ˜ธํ•˜๋ฉด ์‚ฌ์šฉ์ž์—๊ฒŒ ์งˆ๋ฌธ +- [`ingest_doc.py`](../src/lang2sql/tools/ingest_doc.py) โ€” ๋ฌธ์„œ โ†’ ํ›„๋ณด ์ œ์•ˆ +- [`__init__.py: build_default_tools`](../src/lang2sql/tools/__init__.py) โ€” ์–ด์…ˆ๋ธ”๋ฆฌ + +### `src/lang2sql/tenancy/` โ€” ์กฐ๋ฆฝ์  +- [`concierge.py`](../src/lang2sql/tenancy/concierge.py) โ€” *์œ ์ผํ•˜๊ฒŒ* ๊ตฌ์ฒด ํด๋ž˜์Šค๋ฅผ import ํ•˜๋Š” ๊ณณ. ์š”์ฒญ๋งˆ๋‹ค `HarnessContext` ๋งŒ๋“ฆ. +- [`scope_resolver.py`](../src/lang2sql/tenancy/scope_resolver.py) โ€” `ScopeResolverPort` ๊ตฌํ˜„ (semantic ์œ„) +- [`encrypted_secrets.py`](../src/lang2sql/tenancy/encrypted_secrets.py) โ€” `cryptography.Fernet` ์‹ค ์•”ํ˜ธํ™” + +### `src/lang2sql/adapters/` โ€” ์™ธ๋ถ€ ์‹œ์Šคํ…œ๊ณผ์˜ ๋งˆ์ง€๋ง‰ ์ค„ +- `llm/openai_.py` โ€” urllib ๊ธฐ๋ฐ˜ OpenAI tool-calling +- `llm/fake.py` โ€” ์˜คํ”„๋ผ์ธ ํ…Œ์ŠคํŠธ์šฉ ๊ฒฐ์ •์  LLM +- `db/sqlalchemy_explorer.py` โ€” **DSN๋งŒ ๋ฐ”๊พธ๋ฉด Postgres/MySQL/Snowflake/BigQuery/DuckDB ๋‹ค ์ปค๋ฒ„** +- `db/d1_explorer.py` โ€” Cloudflare D1 (HTTP API, urllib) +- `db/factory.py` โ€” `build_explorer(connection)` scheme ๋ผ์šฐํŒ… +- `db/postgres_explorer.py` โ€” V1 stub (psycopg ๋ฏธ์„ค์น˜ ํ™˜๊ฒฝ์šฉ) +- `storage/sqlite_store.py` โ€” `AuditPort` + `SessionStorePort` + kv +- `storage/sqlite_semantic.py` โ€” ์‹œ๋ฉ˜ํ‹ฑ ์ •์˜ ์˜์†ํ™” + +### `src/lang2sql/frontends/` โ€” ์‚ฌ์šฉ์ž ์ธํ„ฐํŽ˜์ด์Šค +- [`discord/bot.py`](../src/lang2sql/frontends/discord/bot.py) โ€” **์œ ์ผํ•˜๊ฒŒ** `discord.py`๋ฅผ import +- [`discord/commands.py`](../src/lang2sql/frontends/discord/commands.py) โ€” ์ˆœ์ˆ˜ ํ•ธ๋“ค๋Ÿฌ (discord ๋น„์˜์กด, ํ…Œ์ŠคํŠธ ๊ฐ€๋Šฅ) +- [`discord/setup_wizard.py`](../src/lang2sql/frontends/discord/setup_wizard.py) โ€” `/setup` Modal/Select +- [`discord/session_router.py`](../src/lang2sql/frontends/discord/session_router.py) โ€” discord ID โ†’ `Identity` +- [`discord/render.py`](../src/lang2sql/frontends/discord/render.py) โ€” >50ํ–‰์ด๋ฉด CSV ์ฒจ๋ถ€ +- [`cli/app.py`](../src/lang2sql/frontends/cli/app.py) โ€” ๊ฐœ๋ฐœ์šฉ CLI + +--- + +## 4. ํ•œ ๋ฉ”์‹œ์ง€์˜ lifecycle (๋””์Šค์ฝ”๋“œ ๋ฉ˜์…˜ ํ•œ ๋ฒˆ ๋”ฐ๋ผ๊ฐ€๊ธฐ) + +``` +1. ์‚ฌ์šฉ์ž: "@lang2sql-test ์ด๋ฒˆ ๋‹ฌ ๋งค์ถœ ์•Œ๋ ค์ค˜" +2. discord/bot.py: on_message โ†’ _message_context()๋กœ (guild_id, channel_id, user_id) ๋ฝ‘์Œ +3. session_router.to_identity() โ†’ Identity(...) +4. CommandHandlers.query(identity, "์ด๋ฒˆ ๋‹ฌ ๋งค์ถœ ์•Œ๋ ค์ค˜") +5. ContextConcierge.build_context(identity) + - secrets์—์„œ ๊ธธ๋“œ๋ณ„ db_dsn ์žˆ๋‚˜? โ†’ ์žˆ์œผ๋ฉด build_explorer๋กœ ๊ทธ DB ์‚ฌ์šฉ (์บ์‹œ) + - SqliteStore์—์„œ ์„ธ์…˜ ๋กœ๋“œ (์—†์œผ๋ฉด ์ƒˆ๋กœ) + - build_default_tools()๋กœ ToolRegistry ์ฑ„์›€ + - HarnessContext ๋ฐ˜ํ™˜ +6. agent_loop(ctx, "์ด๋ฒˆ ๋‹ฌ ๋งค์ถœ ์•Œ๋ ค์ค˜") + - system_prompt: ์‹œ๋ฉ˜ํ‹ฑ effective_layer + ์Šคํ‚ค๋งˆ ์ฃผ์ž… + - LLM(GPT-4.1-mini): "run_sql ๋„๊ตฌ๋ฅผ ๋ถ€๋ฅด์„ธ์š”" ์‘๋‹ต + - tools.dispatch("run_sql", {sql: "SELECT ..."}, ctx) + โ†’ safety.evaluate(sql) โ†’ PASS + โ†’ explorer.execute(sql) โ†’ ํ–‰๋“ค ๋ฐ˜ํ™˜ + - ๊ฒฐ๊ณผ messages์— ์ถ”๊ฐ€, LLM ๋‹ค์‹œ ํ˜ธ์ถœ โ†’ ์ตœ์ข… ๋‹ต๋ณ€ +7. concierge.store.save(session_key, ctx.session) โ† ์„ธ์…˜ ์˜์†ํ™” +8. render_answer(answer) โ†’ OutboundMessage +9. interaction.followup.send(...) โ†’ Discord์— ๋‹ต +``` + +--- + +## 5. ์–ด๋””๋ฅผ ์ˆ˜์ •ํ•˜๋ฉด ์ข‹์„๊นŒ โ€” Extension Points + +๊ธฐ์—ฌ PR์„ ๋ฐ›๊ธฐ ๊ฐ€์žฅ ์‰ฌ์šด ์ง€์ ๋“ค. ์ „๋ถ€ *๊ธฐ์กด ์ฝ”๋“œ ์•ˆ ๊ฑด๋“œ๋ฆฌ๊ณ  ์ถ”๊ฐ€๋งŒ ํ•˜๋ฉด ๋ฉ๋‹ˆ๋‹ค*. + +### LLM ์ถ”๊ฐ€ (์˜ˆ: Anthropic Claude, NIM) +1. `src/lang2sql/adapters/llm/_.py` ์ƒˆ๋กœ ์ž‘์„ฑ, `LLMPort` ๊ตฌํ˜„ +2. `tenancy/concierge.py: _default_llm()`์— ๋ถ„๊ธฐ ์ถ”๊ฐ€ +3. tests/ ์— `test__adapter.py` + +### ์ƒˆ DB ์ง€์› +SQLAlchemy ์ง€์› DB๋ผ๋ฉด: +1. `pyproject.toml`์˜ `[project.optional-dependencies]`์— extra ์ถ”๊ฐ€ +2. ๋. `SqlAlchemyExplorer`๊ฐ€ DSN์œผ๋กœ ์•Œ์•„์„œ ์ฒ˜๋ฆฌ + +SQLAlchemy ๋ฏธ์ง€์› (์˜ˆ: ์ž์ฒด HTTP API): +1. `adapters/db/_explorer.py`์— `ExplorerPort` ๊ตฌํ˜„ +2. `adapters/db/factory.py`์˜ `build_explorer`์— scheme ๋ถ„๊ธฐ +3. `adapters/db/dsn_builder.py`์— `build_()` + `FIELD_SCHEMA[]` +4. tests/ + +### ์ƒˆ safety layer (์˜ˆ: AST ์ •๋ฐ€ ๊ฒ€์ฆ, ํ•จ์ˆ˜ ์ฐจ๋‹จ, EXPLAIN ๋น„์šฉ) +1. `safety/layers/.py`์— `SafetyLayerPort` ๊ตฌํ˜„ +2. `safety/pipeline.py`์˜ `SafetyPipeline` ๊ธฐ๋ณธ layers ๋ชฉ๋ก์— ๋ผ์šฐ๊ฑฐ๋‚˜, ์˜ต์…”๋„๋กœ ๋…ธ์ถœ +3. tests/test_safety.py์— ํšŒ๊ท€ ์ผ€์ด์Šค ์ถ”๊ฐ€ + +### ๋” ๋˜‘๋˜‘ํ•œ memory recall (์˜ˆ: ํ‚ค์›Œ๋“œ, ๋ฒกํ„ฐ) +1. `memory/recall/.py`์— `RecallPort` ๊ตฌํ˜„ +2. concierge์—์„œ ์˜ต์…˜์œผ๋กœ ์„ ํƒ ๊ฐ€๋Šฅํ•˜๊ฒŒ +3. tests/ + +### ์ƒˆ ingestion source (์˜ˆ: URL, Notion MCP) +1. `ingestion/sources/.py`์— `SourcePort` ๊ตฌํ˜„ +2. ingestion ๋„๊ตฌ ํ๋ฆ„์ด ์ž๋™ ๋งคํŠธ๋ฆญ์Šค์ด๋ฏ€๋กœ ์ถ”๊ฐ€ ์ฝ”๋“œ ๊ฑฐ์˜ ์—†์Œ + +### ์ƒˆ frontend (์˜ˆ: Slack, Web) +1. `frontends//` ๋””๋ ‰ํ† ๋ฆฌ์— transport ์ž‘์„ฑ +2. `commands.py`๋Š” ๊ทธ๋Œ€๋กœ ์žฌ์‚ฌ์šฉ (discord ๋น„์˜์กด์ด๋ผ) +3. `core/ports/frontend.py`์˜ `FrontendPort` ์ธํ„ฐํŽ˜์ด์Šค ๋”ฐ๋ฅด๊ธฐ + +### ์ƒˆ ๋„๊ตฌ (์˜ˆ: visualize, write_code) +1. `tools/.py`์— `ToolPort` ๊ตฌํ˜„ (spec + run) +2. `tools/__init__.py: build_default_tools()`์— ์ถ”๊ฐ€ +3. tests/ + +--- + +## 6. ๋น ๋ฅธ ๊ธฐ์—ฌ ์‹œ์ž‘ (5๋ถ„) + +```bash +git clone https://github.com/CausalInferenceLab/Lang2SQL.git +cd Lang2SQL +uv sync # ๊ธฐ๋ณธ deps +.venv/bin/pytest -q # 106 ํ…Œ์ŠคํŠธ ํ†ต๊ณผ ํ™•์ธ +.venv/bin/python bench/ecommerce_demo.py # federation + safety ๋กœ์ปฌ ๋ฐ๋ชจ +``` + +๋ธŒ๋žœ์น˜ โ†’ ์ฝ”๋“œ + ํ…Œ์ŠคํŠธ โ†’ PR. CI๋Š” ๋”ฐ๋กœ ์—†์œผ๋‹ˆ *๋กœ์ปฌ์—์„œ pytest ํ™•์ธ ํ›„ PR*. + +--- + +## 7. ์ฝ”๋“œ ์ปจ๋ฒค์…˜ (์ž‘์€ ์•ฝ์†) + +| ๊ทœ์น™ | ์ด์œ  | +|---|---| +| **ํฌํŠธ๋Š” `typing.Protocol`** (`runtime_checkable` ๊ถŒ์žฅ) | ๋•ํƒ€์ดํ•‘ + isinstance ๊ฐ€๋Šฅ | +| **์–ด๋Œ‘ํ„ฐ์˜ engine/connection์€ lazy** | ๋ผ์šฐํŒ… ๋‹จ๊ณ„์—์„œ ๋“œ๋ผ์ด๋ฒ„ ๋ฏธ์„ค์น˜์—ฌ๋„ OK | +| **blocking ํ˜ธ์ถœ์€ `asyncio.to_thread`** | discord ์ด๋ฒคํŠธ ๋ฃจํ”„ ๋ง‰์ง€ ์•Š๊ธฐ | +| **frontends/discord์—์„œ `discord.py` import๋Š” `bot.py`ยท`setup_wizard.py`๋งŒ** | ๋กœ์ง์ธต์€ ์œ ๋‹›ํ…Œ์ŠคํŠธ ๊ฐ€๋Šฅํ•ด์•ผ ํ•จ | +| **์ƒˆ ํ™˜๊ฒฝ๋ณ€์ˆ˜๋Š” `.env.example`์—๋„ ๋ฌธ์„œํ™”** | ์‹ ๊ทœ ์ปจํŠธ๋ฆฌ๋ทฐํ„ฐ ์นœํ™” | +| **ํ…Œ์ŠคํŠธ๋Š” ํ† ํฐ/๋„คํŠธ์›Œํฌ ์—†์ด๋„ ํ†ต๊ณผํ•ด์•ผ ํ•จ** | `FakeLLM` / mock transport ํ™œ์šฉ | +| **ํฌํŠธ(`core/ports/`)๋Š” ๊ฑฐ์˜ frozen** | ๋ณ€๊ฒฝ์€ ๋ชจ๋“  ์–ด๋Œ‘ํ„ฐ/๊ตฌํ˜„์— ์˜ํ–ฅ โ€” ์ •๋ง ํ•„์š”ํ•œ์ง€ ํ•œ ๋ฒˆ ๋” ๊ณ ๋ฏผ | + +--- + +## 8. ๋” ๊นŠ์ด ๋ณด๊ณ  ์‹ถ๋‹ค๋ฉด + +- [`docs/discord_first_redesign_v4_1.md`](./discord_first_redesign_v4_1.md) โ€” *์™œ ์ด๋ ‡๊ฒŒ ๋งŒ๋“ค์—ˆ๋‚˜* (์žฅ๋ฌธ) +- [`docs/discord_first_redesign_v4_2.md`](./discord_first_redesign_v4_2.md) โ€” ํ™•์ • ์ปจ์…‰ ์š”์•ฝ (๋‹จ๋ฌธ) +- [`docs/DEPLOY.md`](./DEPLOY.md) โ€” Discord ๋ด‡ ์šด์˜ +- [`bench/ecommerce_demo.py`](../bench/ecommerce_demo.py) โ€” federation/safety ๋ผ์ด๋ธŒ ๋ฐ๋ชจ +- ํ…Œ์ŠคํŠธ๊ฐ€ ์‚ฌ์‹ค์ƒ ์‚ฌ์–‘์„œ โ€” `tests/test_*.py`๋ฅผ *๋ชจ๋“ˆ๋ณ„ ๊ฐ€์ด๋“œ*๋กœ ํ™œ์šฉ + +--- + +์งˆ๋ฌธ/์ œ์•ˆ์€ [Discord](https://discord.gg/EPurkHVtp2) ๋˜๋Š” GitHub Issues ํ™˜์˜. diff --git a/docs/BaseComponent_ko.md b/docs/BaseComponent_ko.md deleted file mode 100644 index 534be30..0000000 --- a/docs/BaseComponent_ko.md +++ /dev/null @@ -1,248 +0,0 @@ -# BaseComponent - -`BaseComponent`๋Š” **define-by-run(์ˆœ์ˆ˜ ํŒŒ์ด์ฌ ์ œ์–ด)** ์ฒ ํ•™์„ ์œ ์ง€ํ•˜๋ฉด์„œ๋„, ์ปดํฌ๋„ŒํŠธ ์‹คํ–‰์„ **๊ด€์ธก ๊ฐ€๋Šฅ(observable)** ํ•˜๊ฒŒ ๋งŒ๋“ค๊ธฐ ์œ„ํ•œ **์„ ํƒ์ (opt-in) ํ‘œ์ค€ ๋ ˆ์ด์–ด**์ž…๋‹ˆ๋‹ค. - -* ํŒŒ์ดํ”„๋ผ์ธ์€ ๊ทธ๋ƒฅ ํ•จ์ˆ˜/์ฝœ๋Ÿฌ๋ธ”๋งŒ์œผ๋กœ๋„ ์ถฉ๋ถ„ํžˆ ๋™์ž‘ํ•ฉ๋‹ˆ๋‹ค. -* `BaseComponent`๋Š” ๊ทธ ์œ„์— **์ถ”์ (hooks), ์—๋Ÿฌ ํ‘œ์ค€ํ™”, ์ด๋ฆ„/ํ˜•์‹ ํ†ต์ผ**์„ ์–น์–ด์ฃผ๋Š” ์—ญํ• ์„ ํ•ฉ๋‹ˆ๋‹ค. - -์ฆ‰, **ํ•„์ˆ˜๋Š” ์•„๋‹ˆ์ง€๋งŒ**, ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ/ํŒ€ ๋‹จ์œ„ ๊ฐœ๋ฐœ์—์„œ "์šด์˜ ๊ฐ€๋Šฅํ•œ ํ˜•ํƒœ"๋กœ ๋งŒ๋“ค๊ณ  ์‹ถ์„ ๋•Œ ์œ ์šฉํ•ฉ๋‹ˆ๋‹ค. - ---- - -## ์™œ ํ•„์š”ํ•œ๊ฐ€? - -### 1) ๊ด€์ธก์„ฑ(Tracing)์„ "๊ทธ๋ž˜ํ”„ ์—”์ง„ ์—†์ด" ์–ป๊ธฐ ์œ„ํ•ด - -Lang2SQL์€ ๊ทธ๋ž˜ํ”„ ์—”์ง„์„ ๊ฐ•์ œํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ๋Œ€์‹ : - -* ์‚ฌ์šฉ์ž๋Š” Python `if/for/while`๋กœ ์ œ์–ดํ•œ๋‹ค. -* ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋Š” ๊ด€์ธก์„ฑ์€ **hook ์ด๋ฒคํŠธ**๋กœ ์ œ๊ณตํ•œ๋‹ค. - -`BaseComponent`๋Š” ๊ฐ ์ปดํฌ๋„ŒํŠธ ์‹คํ–‰์˜ `start/end/error`๋ฅผ ์ด๋ฒคํŠธ๋กœ ๋‚จ๊น๋‹ˆ๋‹ค. - -### 2) ์—๋Ÿฌ๋ฅผ "๋„๋ฉ”์ธ ์นœํ™”์ ์œผ๋กœ" ์ •๋ฆฌํ•˜๊ธฐ ์œ„ํ•ด - -ํ˜„์‹ค์—์„œ๋Š” `ValueError`, `KeyError`, ์™ธ๋ถ€ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์˜ˆ์™ธ ๋“ฑ์ด ์„ž์—ฌ์„œ ์˜ฌ๋ผ์˜ต๋‹ˆ๋‹ค. - -`BaseComponent`๋Š”: - -* `Lang2SQLError`(ValidationError, IntegrationMissingError ๋“ฑ)๋Š” **๊ทธ๋Œ€๋กœ ์œ ์ง€** -* ๊ทธ ์™ธ ์˜ˆ์™ธ๋Š” `ComponentError`๋กœ **ํ‘œ์ค€ ๋ž˜ํ•‘**(+ ์›์ธ ์˜ˆ์™ธ๋ฅผ `cause`๋กœ ๋ณด์กด) - -โ†’ ์‚ฌ์šฉ์ž/์šด์˜์ž ๊ด€์ ์—์„œ "์–ด๋””์„œ ํ„ฐ์กŒ๋Š”์ง€"๊ฐ€ ๋ถ„๋ช…ํ•ด์ง‘๋‹ˆ๋‹ค. - -### 3) "์ปดํฌ๋„ŒํŠธ ๋‹จ์œ„ ํ‘œ์ค€"์„ ๋งŒ๋“ค๊ธฐ ์œ„ํ•ด - -๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์ œ๊ณต ์ปดํฌ๋„ŒํŠธ๋ฅผ ๋ชจ๋‘ BaseComponent ๊ธฐ๋ฐ˜์œผ๋กœ ๋งŒ๋“ค๋ฉด: - -* ๋กœ๊ทธ/ํŠธ๋ ˆ์ด์Šค์˜ ํฌ๋งท์ด ํ†ต์ผ -* ํ…Œ์ŠคํŠธ/๋””๋ฒ„๊น… ๊ฒฝํ—˜์ด ์ผ์ • -* ๋ฌธ์„œ/ํƒ€์ž… ํžŒํŠธ๊ฐ€ ์ผ๊ด€ - ---- - -## BaseComponent๊ฐ€ ์ œ๊ณตํ•˜๋Š” API - -### ์ƒ์„ฑ์ž - -```python -BaseComponent(name: str | None = None, hook: TraceHook | None = None) -``` - -* `name`: ์ด๋ฒคํŠธ์— ์ฐํž ์ปดํฌ๋„ŒํŠธ ์ด๋ฆ„ (๊ธฐ๋ณธ๊ฐ’: ํด๋ž˜์Šค๋ช…) -* `hook`: ์ด๋ฒคํŠธ ์ˆ˜์‹ ์ž. ๊ธฐ๋ณธ๊ฐ’์€ `NullHook()` (์•„๋ฌด๊ฒƒ๋„ ํ•˜์ง€ ์•Š์Œ) - -### ๊ตฌํ˜„ํ•ด์•ผ ํ•˜๋Š” ๊ฒƒ: `_run()` - -์„œ๋ธŒํด๋ž˜์Šค๋Š” `_run()`์„ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค. ์ธ์ž ํƒ€์ž…๊ณผ ๋ฐ˜ํ™˜ ํƒ€์ž…์€ ๊ฐ ์ปดํฌ๋„ŒํŠธ์— ๋งž๊ฒŒ ์ž์œ ๋กญ๊ฒŒ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค. - -```python -class MyRetriever(BaseComponent): - def __init__(self, catalog: list, **kwargs): - super().__init__(**kwargs) - self._catalog = catalog - - def _run(self, query: str) -> list[dict]: - # ๋น„์ฆˆ๋‹ˆ์Šค ๋กœ์ง - return [t for t in self._catalog if query in t["description"]] -``` - -### ํ˜ธ์ถœ: `run()` / `__call__` - -`comp.run(query)` ๋˜๋Š” `comp(query)`๋ฅผ ํ˜ธ์ถœํ•˜๋ฉด ๋‚ด๋ถ€์ ์œผ๋กœ ์•„๋ž˜๋ฅผ ์ž๋™ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค. - -* `component.run start ์ด๋ฒคํŠธ ๋ฐœํ–‰` -* `self._run(...)` ์‹คํ–‰ -* ์„ฑ๊ณต ์‹œ `end ์ด๋ฒคํŠธ` + `duration_ms` -* ์‹คํŒจ ์‹œ `error ์ด๋ฒคํŠธ` - - * ๋„๋ฉ”์ธ ์˜ˆ์™ธ(`Lang2SQLError`)๋Š” ๊ทธ๋Œ€๋กœ raise - * ๊ทธ ์™ธ ์˜ˆ์™ธ๋Š” `ComponentError`๋กœ ๋ž˜ํ•‘ํ•ด์„œ raise - ---- - -## ํƒ€์ž… ์ธ์ž ํŒจํ„ด - -Lang2SQL์˜ ์ปดํฌ๋„ŒํŠธ๋Š” **๋ช…์‹œ์  ํƒ€์ž… ์ธ์ž**๋ฅผ ๋ฐ›๊ณ , **๋ช…์‹œ์  ํƒ€์ž… ๊ฒฐ๊ณผ**๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค. - -```python -# ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋‚ด์žฅ ์ปดํฌ๋„ŒํŠธ ์‹œ๊ทธ๋‹ˆ์ฒ˜ ์˜ˆ์‹œ -KeywordRetriever._run(query: str) -> list[CatalogEntry] -SQLGenerator._run(query: str, schemas: list[CatalogEntry], context: str = "") -> str -SQLExecutor._run(sql: str) -> list[dict] -``` - -### ๊ตฌ์„ฑ(config)์€ `__init__`์—, ์š”์ฒญ๋ณ„ ๋ฐ์ดํ„ฐ๋Š” `_run()` ์ธ์ž์— - -```python -class SQLGenerator(BaseComponent): - def __init__(self, llm: LLMPort, db_dialect: str = "default", **kwargs): - super().__init__(**kwargs) - self._llm = llm # ๊ณ ์ • ์„ค์ • - self._dialect = db_dialect - - def _run(self, query: str, schemas: list[CatalogEntry], context: str = "") -> str: - # ์š”์ฒญ๋งˆ๋‹ค ๋‹ฌ๋ผ์ง€๋Š” ๊ฐ’์€ _run() ์ธ์ž๋กœ ๋ฐ›๋Š”๋‹ค - ... -``` - ---- - -## ์–ธ์ œ BaseComponent๋ฅผ ์“ฐ๋Š”๊ฐ€? - -### BaseComponent๋ฅผ ์“ฐ๋Š” ๊ฒŒ ์ข‹์€ ๊ฒฝ์šฐ - -* ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๊ธฐ๋ณธ ์ œ๊ณต ์ปดํฌ๋„ŒํŠธ(retriever/generator/executor) -* ํŒ€/์ œํ’ˆ ํ™˜๊ฒฝ์—์„œ **๊ด€์ธก์„ฑ(ํŠธ๋ ˆ์ด์‹ฑ)์ด ํ•„์š”ํ•œ ๊ฒฝ์šฐ** -* ์˜ˆ์™ธ ํ‘œ์ค€ํ™”๊ฐ€ ์ค‘์š”ํ•œ ๊ฒฝ์šฐ(์šด์˜/ํ…Œ์ŠคํŠธ/๋””๋ฒ„๊น…) - -### BaseComponent ์—†์ด ํ•จ์ˆ˜๋กœ ๋‘๋Š” ๊ฒŒ ์ข‹์€ ๊ฒฝ์šฐ - -* `policy`, `eval`, metric ๊ณ„์‚ฐ์ฒ˜๋Ÿผ **์ˆœ์ˆ˜ ํ•จ์ˆ˜ ์„ฑ๊ฒฉ**์ด ๊ฐ•ํ•œ ๋กœ์ง -* "์œ ์ €๊ฐ€ ๋น ๋ฅด๊ฒŒ ๋ถ™์—ฌ ๋„ฃ์–ด ์“ฐ๋Š”" ์ดˆ๊ฒฝ๋Ÿ‰ ์ปค์Šคํ…€ ๋กœ์ง -* ์‹คํ–‰ ๋‹จ์œ„๊ฐ€ ๋„ˆ๋ฌด ์ž‘์•„ ์ด๋ฒคํŠธ๊ฐ€ ๊ณผ๋„ํ•ด์ง€๋Š” ๊ฒฝ์šฐ - -์ฆ‰, **ํ•ต์‹ฌ ํŒŒ์ดํ”„๋ผ์ธ ์ถ•**์€ BaseComponent๋กœ ์žก๊ณ , -๊ทธ ์™ธ์˜ ์ž‘์€ ๋กœ์ง์€ ํ•จ์ˆ˜๋กœ ๋‘๋Š” ํ˜ผํ•ฉํ˜•์ด ๊ฐ€์žฅ ์ž์—ฐ์Šค๋Ÿฝ์Šต๋‹ˆ๋‹ค. - ---- - -## ์ปค์Šคํ…€ ์ปดํฌ๋„ŒํŠธ ์˜ˆ์‹œ - -```python -from lang2sql.core.base import BaseComponent - -class UpperCaseSQL(BaseComponent): - """SQL์„ ๋Œ€๋ฌธ์ž๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ›„์ฒ˜๋ฆฌ ์ปดํฌ๋„ŒํŠธ.""" - def _run(self, sql: str) -> str: - return sql.upper() - -upper = UpperCaseSQL() -print(upper.run("select 1")) # SELECT 1 -``` - -hook์„ ์ฃผ์ž…ํ•˜๋ฉด ์‹คํ–‰ ์ถ”์ ๋„ ์ž๋™์œผ๋กœ ๋ฉ๋‹ˆ๋‹ค: - -```python -from lang2sql import MemoryHook - -hook = MemoryHook() -upper = UpperCaseSQL(hook=hook) -upper.run("select 1") - -for e in hook.snapshot(): - print(e.component, e.phase, e.duration_ms) -# UpperCaseSQL start 0.0 -# UpperCaseSQL end 0.1 -``` - ---- - -## ํ›…(Tracing) ์‹œ์Šคํ…œ - -### Hook์ด๋ž€? - -์ปดํฌ๋„ŒํŠธ/ํ”Œ๋กœ์šฐ ์‹คํ–‰ ์‹œ์ ์— **์ด๋ฒคํŠธ(Event)** ๋ฅผ ๋ฐ›๋Š” ์ธํ„ฐํŽ˜์ด์Šค์ž…๋‹ˆ๋‹ค. - -* `start/end/error` ์‹œ์  ๊ธฐ๋ก -* ์†Œ์š” ์‹œ๊ฐ„(duration_ms) -* ์ž…๋ ฅ/์ถœ๋ ฅ ์š”์•ฝ(input_summary/output_summary) - -### ์–ด๋””์„œ ํ™•์ธํ•˜๋‚˜? - -๊ฐ€์žฅ ์‰ฌ์šด ๊ฑด `MemoryHook`์ž…๋‹ˆ๋‹ค. - -```python -from lang2sql import MemoryHook, HybridNL2SQL - -hook = MemoryHook() -pipeline = HybridNL2SQL(catalog=catalog, llm=llm, db=db, embedding=embedding, hook=hook) -pipeline.run("์ง€๋‚œ๋‹ฌ ๋งค์ถœ") - -for e in hook.snapshot(): - print(e.phase, e.component, e.duration_ms, e.error) -``` - -### ์šด์˜์šฉ ๊ด€์ธก์„ฑ์€ ์–ด๋””์„œ ์ œ์–ดํ•˜๋‚˜? - -์šด์˜์—์„œ๋Š” `MemoryHook` ๋Œ€์‹  ๋‹ค์Œ์ด ์ผ๋ฐ˜์ ์ž…๋‹ˆ๋‹ค. - -* ๋กœ๊ทธ๋กœ ํ˜๋ฆฌ๋Š” Hook (stdout / JSON log) -* APM/Tracing์œผ๋กœ ๋ณด๋‚ด๋Š” Hook (OpenTelemetry span ๋“ฑ) -* ํ•„ํ„ฐ๋ง Hook (ํŠน์ • ์ปดํฌ๋„ŒํŠธ๋งŒ ์ƒ˜ํ”Œ๋ง) - -ํ•ต์‹ฌ์€: **๊ด€์ธก์„ฑ์€ hook ๊ตฌํ˜„์ฒด์—์„œ ์ œ์–ด**ํ•˜๊ณ , ํŒŒ์ดํ”„๋ผ์ธ/์ปดํฌ๋„ŒํŠธ ์ฝ”๋“œ๋Š” ์ตœ๋Œ€ํ•œ "๋น„์ฆˆ๋‹ˆ์Šค ๋กœ์ง"๋งŒ ๊ฐ–๋„๋ก ๋ถ„๋ฆฌํ•ฉ๋‹ˆ๋‹ค. - ---- - -## ์ค‘์ฒฉ(์„œ๋ธŒํ”Œ๋กœ์šฐ/๋ž˜ํ•‘)ํ•˜๋ฉด ํŠธ๋ ˆ์ด์‹ฑ์ด ๊นจ์ง€๋‚˜? - -"๊นจ์ง„๋‹ค"๊ธฐ๋ณด๋‹ค๋Š” **์ด๋ฒคํŠธ๊ฐ€ ๋” ๋งŽ์ด ์ฐํž™๋‹ˆ๋‹ค.** - -* `flow_b` ์•ˆ์— `flow_a`๋ฅผ step์œผ๋กœ ๋„ฃ์œผ๋ฉด - - * `flow_b` ์ด๋ฒคํŠธ 2๊ฐœ(์‹œ์ž‘/๋) - * `flow_a` ์ด๋ฒคํŠธ 2๊ฐœ(์‹œ์ž‘/๋) - * `a1/a2` ์ปดํฌ๋„ŒํŠธ ์ด๋ฒคํŠธ๋„ ๊ฐ๊ฐ ์ฐํž˜(์ปดํฌ๋„ŒํŠธ๊ฐ€ BaseComponent๋ผ๋ฉด) - -์ด๊ฒŒ ์‹ซ๋‹ค๋ฉด ๋‘ ๊ฐ€์ง€ ์„ ํƒ์ง€๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. - -1. **์ƒ์œ„ ๋ ˆ๋ฒจ(Flow)๋งŒ ํŠธ๋ ˆ์ด์‹ฑํ•˜๊ณ  ๋‚ด๋ถ€๋Š” ํ•จ์ˆ˜๋กœ ๋‘”๋‹ค** -2. **Hook์—์„œ ํ•„ํ„ฐ๋ง/์ƒ˜ํ”Œ๋งํ•œ๋‹ค** (์˜ˆ: component ์ด๋ฆ„ prefix๋กœ ์ œ์™ธ) - -์ถ”๊ฐ€ ๋ฌธ๋ฒ• ์—†์ด ํ•ด๊ฒฐํ•˜๋ ค๋ฉด 2๋ฒˆ์ด ๊ฐ€์žฅ ํ˜„์‹ค์ ์ž…๋‹ˆ๋‹ค. - ---- - -## ๋ฒ ์ŠคํŠธ ํ”„๋ž™ํ‹ฐ์Šค - -### 1) ๊ตฌ์„ฑ(config)์€ `__init__`์—, ์š”์ฒญ๋ณ„ ๋ฐ์ดํ„ฐ๋Š” `_run()` ์ธ์ž์— - -๊ณ ์ • ์„ค์ •(๋ชจ๋ธ, ์นดํƒˆ๋กœ๊ทธ, DB ์—ฐ๊ฒฐ ๋“ฑ)์€ ์ƒ์„ฑ์ž์—์„œ ๋ฐ›๊ณ , -์š”์ฒญ๋งˆ๋‹ค ๋‹ฌ๋ผ์ง€๋Š” ๊ฐ’(์ฟผ๋ฆฌ, ์Šคํ‚ค๋งˆ ๋ชฉ๋ก ๋“ฑ)์€ `_run()` ์ธ์ž๋กœ ์ „๋‹ฌํ•ฉ๋‹ˆ๋‹ค. - -### 2) `_run()`์˜ ๋ฐ˜ํ™˜๊ฐ’์€ ๋ช…์‹œ์ ์œผ๋กœ - -๋ฐ˜ํ™˜ ํƒ€์ž…์„ ๋ช…ํ™•ํžˆ ์ •์˜ํ•˜๋ฉด Flow์—์„œ ์ปดํฌ๋„ŒํŠธ๋ฅผ ์กฐํ•ฉํ•  ๋•Œ ์•ˆ์ „ํ•ฉ๋‹ˆ๋‹ค. - -### 3) "์ž‘์€ ๋กœ์ง(policy/eval)์€ ๊ทธ๋ƒฅ ํ•จ์ˆ˜" - -* BaseComponent๋กœ ๊ฐ์‹ธ๋Š” ๊ฑด ์„ ํƒ -* ์šด์˜์—์„œ ๊ผญ ์ถ”์ ์ด ํ•„์š”ํ•  ๋•Œ๋งŒ ๊ฐ์‹ผ๋‹ค - ---- - -## FAQ - -### Q. "๊ทธ๋ƒฅ ํ•จ์ˆ˜๋งŒ ์จ๋„ ๋˜๋Š”๋ฐ ์™œ ๊ตณ์ด BaseComponent?" - -A. **์šด์˜/๋””๋ฒ„๊น…/ํ˜‘์—…์—์„œ** ์ฐจ์ด๊ฐ€ ํฝ๋‹ˆ๋‹ค. -๋ฌธ์ œ ๋‚ฌ์„ ๋•Œ "์–ด๋””์„œ, ์–ด๋–ค ์ž…๋ ฅ์œผ๋กœ, ์–ผ๋งˆ๋‚˜ ๊ฑธ๋ฆฌ๋‹ค, ์–ด๋–ค ์—๋Ÿฌ๋กœ" ํ„ฐ์กŒ๋Š”์ง€ ์ž๋™์œผ๋กœ ๋‚จ๋Š” ๊ฒŒ ํ•ต์‹ฌ ๊ฐ€์น˜์ž…๋‹ˆ๋‹ค. - -### Q. "BaseComponent๋ฅผ ์œ ์ €๊ฐ€ ์ง์ ‘ ์จ์•ผ ํ•˜๋‚˜?" - -A. ํ•„์ˆ˜ ์•„๋‹™๋‹ˆ๋‹ค. -์ดˆ๊ธ‰ ์œ ์ €๋Š” **ํ”„๋ฆฌ์…‹ Flow + ํ”„๋ฆฌ์…‹ ์ปดํฌ๋„ŒํŠธ**๋งŒ์œผ๋กœ ์ถฉ๋ถ„ํžˆ ์“ฐ๊ฒŒ ํ•˜๊ณ , -๊ณ ๊ธ‰/์šด์˜ ์œ ์ €์—๊ฒŒ BaseComponent/Hook์„ ์ œ๊ณตํ•˜๋Š” ๊ตฌ์„ฑ์ด ๊ฐ€์žฅ ์ž์—ฐ์Šค๋Ÿฝ์Šต๋‹ˆ๋‹ค. - ---- diff --git a/docs/BaseFlow_ko.md b/docs/BaseFlow_ko.md deleted file mode 100644 index 9ed9120..0000000 --- a/docs/BaseFlow_ko.md +++ /dev/null @@ -1,191 +0,0 @@ -# BaseFlow - -`BaseFlow`๋Š” Lang2SQL์—์„œ **define-by-run(์ˆœ์ˆ˜ ํŒŒ์ด์ฌ ์ œ์–ด)** ์ฒ ํ•™์„ ๊ตฌํ˜„ํ•˜๊ธฐ ์œ„ํ•œ "ํ”Œ๋กœ์šฐ์˜ ์ตœ์†Œ ์ถ”์ƒํ™”(minimal abstraction)"์ž…๋‹ˆ๋‹ค. - -* ํŒŒ์ดํ”„๋ผ์ธ์˜ **์ œ์–ด๊ถŒ(control-flow)** ์„ ํ”„๋ ˆ์ž„์›Œํฌ DSL์ด ์•„๋‹ˆ๋ผ **์‚ฌ์šฉ์ž ์ฝ”๋“œ(Python)** ๊ฐ€ ๊ฐ–์Šต๋‹ˆ๋‹ค. -* ๊ทธ๋ž˜ํ”„ ์—”์ง„์„ ๊ฐ•์ œํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. -* ๋Œ€์‹ , ์‹คํ–‰ ๋‹จ์œ„๋ฅผ `Flow`๋กœ ๋ฌถ๊ณ  **๊ด€์ธก์„ฑ(hooks)** ๊ณผ **์—๋Ÿฌ ๊ทœ์•ฝ**์„ ํ†ต์ผํ•ฉ๋‹ˆ๋‹ค. - ---- - -## ์™œ ํ•„์š”ํ•œ๊ฐ€? - -### 1) "์ œ์–ด๋Š” ํŒŒ์ด์ฌ์œผ๋กœ"๋ฅผ ์ง€ํ‚ค๊ธฐ ์œ„ํ•ด - -Text2SQL์€ ํ˜„์‹ค์ ์œผ๋กœ ๋‹ค์Œ ์ œ์–ด๊ฐ€ ์ž์ฃผ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. - -* ์žฌ์‹œ๋„ ๋ฃจํ”„ (`while`, `for`) -* ์กฐ๊ฑด ๋ถ„๊ธฐ (`if`, `match`) -* ๋ถ€๋ถ„ ํŒŒ์ดํ”„๋ผ์ธ(์„œ๋ธŒํ”Œ๋กœ์šฐ) ํ˜ธ์ถœ -* ์ •์ฑ…(policy) ๊ธฐ๋ฐ˜ ํ–‰๋™ ๊ฒฐ์ • - -`BaseFlow`๋Š” ์ด๋Ÿฐ ์ œ์–ด๋ฅผ **์‚ฌ์šฉ์ž๊ฐ€ Python์œผ๋กœ ์ง์ ‘ ์ž‘์„ฑ**ํ•˜๊ฒŒ ๋‘๊ณ , ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋Š” "์‹คํ–‰ ์ปจํ…Œ์ด๋„ˆ + ๊ด€์ธก์„ฑ"๋งŒ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. - -### 2) ์š”์ฒญ ๋‹จ์œ„ ๊ด€์ธก์„ฑ(Flow-level tracing) - -์šด์˜/๋””๋ฒ„๊น…์—์„œ๋Š” "์ด ์š”์ฒญ ์ „์ฒด๊ฐ€ ์–ธ์ œ ์‹œ์ž‘ํ–ˆ๊ณ , ์–ด๋””์„œ ์‹คํŒจํ–ˆ๊ณ , ์–ผ๋งˆ๋‚˜ ๊ฑธ๋ ธ๋Š”์ง€"๊ฐ€ ๋จผ์ € ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค. - -`BaseFlow`๋Š” ๋‹ค์Œ ์ด๋ฒคํŠธ๋ฅผ ๋ฐœํ–‰ํ•ฉ๋‹ˆ๋‹ค. - -* `flow.run` start / end / error -* ์‹คํ–‰ ์‹œ๊ฐ„(`duration_ms`) - -โ†’ ์š”์ฒญ 1๊ฑด์„ **Flow ๋‹จ์œ„๋กœ ๋น ๋ฅด๊ฒŒ ํŒŒ์•…**ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - ---- - -## BaseFlow๊ฐ€ ์ œ๊ณตํ•˜๋Š” API - -### 1) ๊ตฌํ˜„ํ•ด์•ผ ํ•˜๋Š” ๊ฒƒ: `_run()` - -```python -class MyFlow(BaseFlow): - def _run(self, query: str) -> list[dict]: - ... - return result -``` - -* Flow์˜ ๋ณธ์ฒด ๋กœ์ง์€ ์—ฌ๊ธฐ์— ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. -* ์ œ์–ด๋Š” Python์œผ๋กœ ์ง์ ‘ ์ž‘์„ฑํ•ฉ๋‹ˆ๋‹ค. (`if/for/while`) -* ์ž…์ถœ๋ ฅ ํƒ€์ž…์€ ์ž์œ ๋กญ๊ฒŒ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค. - -### 2) ํ˜ธ์ถœ: `run()` / `__call__` - -```python -out = flow.run("์ง€๋‚œ๋‹ฌ ๋งค์ถœ") -# ๋˜๋Š” -out = flow("์ง€๋‚œ๋‹ฌ ๋งค์ถœ") -``` - -* ๋‚ด๋ถ€์ ์œผ๋กœ `_run(...)`์„ ํ˜ธ์ถœํ•ฉ๋‹ˆ๋‹ค. -* hook ์ด๋ฒคํŠธ๋ฅผ `start/end/error`๋กœ ๊ธฐ๋กํ•ฉ๋‹ˆ๋‹ค. - ---- - -## ์‚ฌ์šฉ ํŒจํ„ด - -### 1) ์ดˆ๊ธ‰: ํ”„๋ฆฌ์…‹ Flow๋กœ ๋ฐ”๋กœ ์‹คํ–‰ - -์ดˆ๊ธ‰ ์‚ฌ์šฉ์ž๋Š” ๋ณดํ†ต "๊ตฌ์„ฑ๋งŒ ํ•˜๊ณ  ์‹คํ–‰"ํ•˜๋ฉด ๋ฉ๋‹ˆ๋‹ค. - -```python -pipeline = BaselineNL2SQL(catalog=catalog, llm=llm, db=db) -rows = pipeline.run("์ง€๋‚œ๋‹ฌ ๋งค์ถœ") -``` - -### 2) ๊ณ ๊ธ‰: CustomFlow๋กœ ์ œ์–ด(while/if/policy) - -์ •์ฑ…/๋ฃจํ”„/์žฌ์‹œ๋„ ๊ฐ™์€ ์ œ์–ด๊ฐ€ ๋“ค์–ด์˜ค๋ฉด `BaseFlow`๋ฅผ ์ง์ ‘ ์ƒ์†ํ•ด ์ž‘์„ฑํ•˜๋Š” ๊ฒƒ์ด ๊ฐ€์žฅ ๊น”๋”ํ•ฉ๋‹ˆ๋‹ค. - -```python -class RetryFlow(BaseFlow): - def _run(self, query: str) -> str: - for _ in range(3): - schemas = retriever(query) - sql = generator(query, schemas) - if validator(sql): - return sql - return sql -``` - ---- - -## Hook(Tracing)์€ ์–ด๋””์„œ ํ™•์ธํ•˜๋‚˜? - -Flow๋„ hook์„ ๋ฐ›์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - -```python -from lang2sql import MemoryHook, BaselineNL2SQL - -hook = MemoryHook() -pipeline = BaselineNL2SQL(catalog=catalog, llm=llm, db=db, hook=hook) - -rows = pipeline.run("์ง€๋‚œ๋‹ฌ ๋งค์ถœ") - -for e in hook.snapshot(): - print(e.name, e.phase, e.component, e.duration_ms, e.error) -``` - -์šด์˜์—์„œ๋Š” `MemoryHook` ๋Œ€์‹  ๋กœ๊ทธ/OTel/ํ•„ํ„ฐ๋ง ํ›…์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. -๊ด€์ธก์„ฑ ์ œ์–ด๋Š” **hook ๊ตฌํ˜„์ฒด์—์„œ** ๋‹ด๋‹นํ•˜๊ณ , Flow ์ฝ”๋“œ๋Š” ๋น„์ฆˆ๋‹ˆ์Šค ๋กœ์ง์— ์ง‘์ค‘ํ•˜๋„๋ก ๋ถ„๋ฆฌํ•ฉ๋‹ˆ๋‹ค. - ---- - -## (๊ด€๋ จ ๊ฐœ๋…) BaseFlow์™€ BaseComponent์˜ ๊ด€๊ณ„ - -* `BaseFlow`๋Š” "์–ด๋–ป๊ฒŒ ์‹คํ–‰ํ• ์ง€(์ œ์–ด/์กฐ๋ฆฝ)"๋ฅผ ๋‹ด๋‹นํ•ฉ๋‹ˆ๋‹ค. -* `BaseComponent`๋Š” "ํ•œ ๋‹จ๊ณ„์—์„œ ๋ฌด์—‡์„ ํ• ์ง€(์ž‘์—… ๋‹จ์œ„)"๋ฅผ ๋‹ด๋‹นํ•ฉ๋‹ˆ๋‹ค. - -์ผ๋ฐ˜์ ์œผ๋กœ: - -* **Flow๋Š” ์—ฌ๋Ÿฌ Component๋ฅผ ํ˜ธ์ถœ**ํ•ฉ๋‹ˆ๋‹ค. -* **์ „์šฉ Flow(BaselineNL2SQL ๋“ฑ)๋Š” Component ๊ฐ„ ์™€์ด์–ด๋ง์„ ๋‚ด๋ถ€์—์„œ ์ฒ˜๋ฆฌ**ํ•ฉ๋‹ˆ๋‹ค. - -์ฆ‰, **Flow๊ฐ€ ์ƒ์œ„ ๋ ˆ๋ฒจ ์˜ค์ผ€์ŠคํŠธ๋ ˆ์ด์…˜**, Component๊ฐ€ **์žฌ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ถ€ํ’ˆ**์ž…๋‹ˆ๋‹ค. - ---- - -## SequentialFlow์˜ ์•Œ๋ ค์ง„ ์ œํ•œ - -`SequentialFlow`๋Š” `value = step(value)` ๋‹จ์ผ ๊ฐ’ ์ „๋‹ฌ ๋ฐฉ์‹์œผ๋กœ ๋™์ž‘ํ•ฉ๋‹ˆ๋‹ค. -์ด ์„ค๊ณ„๋Š” ๋‹จ์ˆœํ•œ ๋ณ€ํ™˜ ์ฒด์ธ์—๋Š” ์ ํ•ฉํ•˜์ง€๋งŒ, NL2SQL ํŒŒ์ดํ”„๋ผ์ธ์—์„œ ๋‹ค์Œ ํ•œ๊ณ„๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. - -### ๋ฌธ์ œ 1: ์ปจํ…์ŠคํŠธ ์†Œ์‹ค - -ํŒŒ์ดํ”„๋ผ์ธ์ด ์ง„ํ–‰๋˜๋ฉด์„œ ์ดˆ๊ธฐ ์ž…๋ ฅ(`query`)์ด ์ค‘๊ฐ„ ๋‹จ๊ณ„ ์ถœ๋ ฅ์œผ๋กœ ๋Œ€์ฒด๋˜์–ด ์‚ฌ๋ผ์ง‘๋‹ˆ๋‹ค. - -```python -flow.run("์ฃผ๋ฌธ ๋‚ด์—ญ ํ™•์ธ") -โ†“ -retriever("์ฃผ๋ฌธ ๋‚ด์—ญ ํ™•์ธ") โ†’ list[CatalogEntry] -โ†“ -generator(list[CatalogEntry]) # โ† ์—ฌ๊ธฐ์„œ original query๊ฐ€ ์—†์Œ -โ†“ -TypeError ๋˜๋Š” ์ž˜๋ชป๋œ ๊ฒฐ๊ณผ -``` - -### ๋ฌธ์ œ 2: ๋‹ค์ค‘ ์ธ์ž ์ปดํฌ๋„ŒํŠธ์™€ ํ˜ธํ™˜ ๋ถˆ๊ฐ€ - -`SQLGenerator._run(query, schemas)`์ฒ˜๋Ÿผ 2๊ฐœ ์ด์ƒ์˜ ์ธ์ž๋ฅผ ๋ฐ›๋Š” ์ปดํฌ๋„ŒํŠธ๋Š” -`SequentialFlow`์˜ ๋‹จ์ผ ๊ฐ’ ์ „๋‹ฌ๋กœ ์—ฐ๊ฒฐํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. - -```python -# โŒ ๋™์ž‘ํ•˜์ง€ ์•Š์Œ โ€” generator๋Š” (query, schemas) 2๊ฐœ ์ธ์ž๊ฐ€ ํ•„์š” -flow = SequentialFlow(steps=[retriever, generator, executor]) -flow.run("์ฃผ๋ฌธ ๋‚ด์—ญ") # TypeError: _run() missing 1 required positional argument: 'schemas' -``` - -### ํ•ด๊ฒฐ ๋ฐฉ๋ฒ• - -NL2SQL ํŒŒ์ดํ”„๋ผ์ธ์€ `SequentialFlow` ๋Œ€์‹  **์ „์šฉ Flow**๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”. -์ „์šฉ Flow๋Š” ๋‚ด๋ถ€์—์„œ ๋‹ค์ค‘ ์ธ์ž ์™€์ด์–ด๋ง์„ ์˜ฌ๋ฐ”๋ฅด๊ฒŒ ์ฒ˜๋ฆฌํ•ฉ๋‹ˆ๋‹ค. - -```python -# KeywordRetriever ๊ธฐ๋ฐ˜ -pipeline = BaselineNL2SQL(catalog=catalog, llm=llm, db=db) - -# Keyword + Vector ๊ธฐ๋ฐ˜ -pipeline = HybridNL2SQL(catalog=catalog, llm=llm, db=db, embedding=embedding) - -# Gate + ํ”„๋กœํŒŒ์ผ๋ง + ๋ณด๊ฐ• ํฌํ•จ ํ’€ ํŒŒ์ดํ”„๋ผ์ธ -pipeline = EnrichedNL2SQL(catalog=catalog, llm=llm, db=db, embedding=embedding) - -rows = pipeline.run("์ฃผ๋ฌธ ๋‚ด์—ญ") -``` - -`SequentialFlow`๋Š” ๋‹จ์ผ ๊ฐ’ ๋ณ€ํ™˜ ์ฒด์ธ(์˜ˆ: ํ…์ŠคํŠธ ์ „์ฒ˜๋ฆฌ, ๋‹จ๊ณ„๋ณ„ ํ•„ํ„ฐ๋ง)์— ์ ํ•ฉํ•ฉ๋‹ˆ๋‹ค. - ---- - -## FAQ - -### Q. BaseFlow๊ฐ€ ํ•„์ˆ˜์ธ๊ฐ€? - -A. Flow๋ผ๋Š” ๊ฐœ๋…์€ ์‚ฌ์‹ค์ƒ ํ•„์š”ํ•˜์ง€๋งŒ, **๋ชจ๋“  ์‚ฌ์šฉ์ž๊ฐ€ BaseFlow๋ฅผ ์ง์ ‘ ์ƒ์†ํ•  ํ•„์š”๋Š” ์—†์Šต๋‹ˆ๋‹ค.** - -* ์ดˆ๊ธ‰: ํ”„๋ฆฌ์…‹ Flow(`BaselineNL2SQL`, `HybridNL2SQL`, `EnrichedNL2SQL`)๋งŒ ์‚ฌ์šฉ -* ๊ณ ๊ธ‰: `BaseFlow`๋ฅผ ์ƒ์†ํ•ด์„œ ์ œ์–ด๋ฅผ ์ง์ ‘ ์ž‘์„ฑ - -### Q. Flow์˜ ๋ฐ˜ํ™˜ ํƒ€์ž…์€? - -A. `_run()`์˜ ์ž…์ถœ๋ ฅ ํƒ€์ž…์€ ์ž์œ ๋กญ์Šต๋‹ˆ๋‹ค. ์ปดํฌ๋„ŒํŠธ๋ผ๋ฆฌ ํ•ฉ์˜ํ•œ ํƒ€์ž…์„ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉํ•˜๋ฉด ๋ฉ๋‹ˆ๋‹ค. diff --git a/docs/Core_concept_ko.md b/docs/Core_concept_ko.md deleted file mode 100644 index 13b9f00..0000000 --- a/docs/Core_concept_ko.md +++ /dev/null @@ -1,99 +0,0 @@ -# Core Concepts - -Lang2SQL์€ "๊ทธ๋ž˜ํ”„ ์—”์ง„/DSL"์„ ๊ฐ•์ œํ•˜์ง€ ์•Š๊ณ , **์ˆœ์ˆ˜ Python ์ฝ”๋“œ๋กœ ํŒŒ์ดํ”„๋ผ์ธ์„ ์ œ์–ด**ํ•˜๋Š” define-by-run ์ฒ ํ•™์„ ๋”ฐ๋ฆ…๋‹ˆ๋‹ค. -๊ฐ ์ปดํฌ๋„ŒํŠธ๋Š” **๋ช…์‹œ์  ํƒ€์ž… ์ธ์ž**๋ฅผ ๋ฐ›๊ณ , ๋ช…์‹œ์  ํƒ€์ž… ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค. - ---- - -## 1) Define-by-run: ์ œ์–ด๋Š” Python์œผ๋กœ - -Lang2SQL์—์„œ ํŒŒ์ดํ”„๋ผ์ธ ์ œ์–ด๋Š” ํ”„๋ ˆ์ž„์›Œํฌ๊ฐ€ ์•„๋‹ˆ๋ผ **์‚ฌ์šฉ์ž ์ฝ”๋“œ๊ฐ€ ๊ฐ€์ง‘๋‹ˆ๋‹ค.** - -* ๋ถ„๊ธฐ: `if / match` -* ๋ฐ˜๋ณต/์žฌ์‹œ๋„: `for / while` -* ์กฐ๊ฑด๋ถ€ ์‹คํ–‰: policy ๊ธฐ๋ฐ˜ action -* ์„œ๋ธŒํ”Œ๋กœ์šฐ: flow๋ฅผ step์ฒ˜๋Ÿผ ํ˜ธ์ถœ - -์˜ˆ์‹œ: - -```python -retriever = KeywordRetriever(catalog=catalog) -generator = SQLGenerator(llm=llm, db_dialect="sqlite") - -while True: - schemas = retriever.run(query) - sql = generator.run(query, schemas) - if validator(sql): - break - -rows = executor.run(sql) -``` - -**ํ•ต์‹ฌ:** Lang2SQL์€ ์œ„ ํŒจํ„ด์„ "ํ”„๋ ˆ์ž„์›Œํฌ ๋ฌธ๋ฒ•"์œผ๋กœ ๋ฐ”๊พธ์ง€ ์•Š์Šต๋‹ˆ๋‹ค. -๊ทธ๋ƒฅ Python์œผ๋กœ ์“ฐ๋˜, ๊ฐ ์ปดํฌ๋„ŒํŠธ์˜ ์ž…์ถœ๋ ฅ์ด **ํƒ€์ž…์œผ๋กœ ๋ช…ํ™•ํžˆ ์ •์˜**๋˜์–ด ์žˆ์–ด ์•ˆ์ „ํ•˜๊ฒŒ ์กฐํ•ฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - ---- - -## 2) ํƒ€์ž… ์ธ์ž ํŒจํ„ด - -Text2SQL ํŒŒ์ดํ”„๋ผ์ธ์€ ํ˜„์‹ค์ ์œผ๋กœ ๋‹จ๊ณ„๊ฐ€ ๋Š˜์–ด๋‚ฉ๋‹ˆ๋‹ค. - -* retriever 1๊ฐœ๊ฐ€ ์•„๋‹ˆ๋ผ 10๊ฐœ, 100๊ฐœ๊ฐ€ ๋  ์ˆ˜ ์žˆ์Œ -* ์ค‘๊ฐ„ ์‚ฐ์ถœ๋ฌผ(์„ ํƒ๋œ ํ…Œ์ด๋ธ”, ์ปจํ…์ŠคํŠธ, ํ›„๋ณด SQL, ๊ฒ€์ฆ ๊ฒฐ๊ณผ, ์ ์ˆ˜/๋ฉ”ํŠธ๋ฆญ)์ด ๋Š˜์–ด๋‚จ -* loop/branch๊ฐ€ ๋“ค์–ด๊ฐ€๋ฉด์„œ "์–ด๋–ค ๋‹จ๊ณ„์—์„œ ๋ฌด์—‡์ด ์ƒ์„ฑ๋˜์—ˆ๋Š”์ง€" ์ถ”์ ์ด ์–ด๋ ค์›Œ์ง - -Lang2SQL์€ ๊ฐ ์ปดํฌ๋„ŒํŠธ์˜ `_run()` ๋ฉ”์„œ๋“œ๊ฐ€ **๋ช…์‹œ์  ํƒ€์ž… ์ธ์ž๋ฅผ ๋ฐ›๊ณ  ํƒ€์ž… ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜**ํ•˜๋„๋ก ์„ค๊ณ„ํ•ฉ๋‹ˆ๋‹ค. - -``` -KeywordRetriever._run(query: str) -> list[CatalogEntry] -SQLGenerator._run(query: str, schemas: list[CatalogEntry], context: str) -> str -SQLExecutor._run(sql: str) -> list[dict] -``` - -์ด ๋ฐฉ์‹์˜ ์žฅ์ : - -* ๊ฐ ์ปดํฌ๋„ŒํŠธ์˜ ์ž…์ถœ๋ ฅ์ด ์ฝ”๋“œ์— ๋ช…ํ™•ํžˆ ๋“œ๋Ÿฌ๋‚จ -* IDE ์ž๋™์™„์„ฑ๊ณผ ํƒ€์ž… ์ฒดํฌ๋ฅผ ํ™œ์šฉํ•  ์ˆ˜ ์žˆ์Œ -* ์ปดํฌ๋„ŒํŠธ๋ฅผ ๋…๋ฆฝ์ ์œผ๋กœ ํ…Œ์ŠคํŠธํ•˜๊ธฐ ์‰ฌ์›€ - -### ์ปดํฌ๋„ŒํŠธ ๊ฐ„ ๋ฐ์ดํ„ฐ ์ „๋‹ฌ - -์ปดํฌ๋„ŒํŠธ ๊ฐ„ ์™€์ด์–ด๋ง์€ **์ „์šฉ Flow๊ฐ€ ๋‚ด๋ถ€์—์„œ ์ฒ˜๋ฆฌ**ํ•ฉ๋‹ˆ๋‹ค. - -```python -# BaselineNL2SQL._run() ๋‚ด๋ถ€ ๊ตฌํ˜„ -def _run(self, query: str) -> list[dict]: - schemas = self._retriever(query) # list[CatalogEntry] - sql = self._generator(query, schemas) # str - return self._executor(sql) # list[dict] -``` - -์‚ฌ์šฉ์ž ๊ด€์ ์—์„œ๋Š” Flow์˜ `run()` ํ•˜๋‚˜๋งŒ ํ˜ธ์ถœํ•˜๋ฉด ๋ฉ๋‹ˆ๋‹ค: - -```python -rows = pipeline.run("์ง€๋‚œ๋‹ฌ ๋งค์ถœ") -``` - ---- - -## 3) ์ปดํฌ๋„ŒํŠธ vs ํ”Œ๋กœ์šฐ - -| | BaseComponent | BaseFlow | -|---|---|---| -| ์—ญํ•  | ๋‹จ์ผ ์ž‘์—… ๋‹จ์œ„ (๊ฒ€์ƒ‰, ์ƒ์„ฑ, ์‹คํ–‰) | ์—ฌ๋Ÿฌ ์ปดํฌ๋„ŒํŠธ์˜ ์กฐํ•ฉ/์ œ์–ด | -| ๊ตฌํ˜„ | `_run()` ๋ฉ”์„œ๋“œ | `_run()` ๋ฉ”์„œ๋“œ | -| ๊ด€์ธก์„ฑ | `component.run` ์ด๋ฒคํŠธ | `flow.run` ์ด๋ฒคํŠธ | -| ์˜ˆ์‹œ | `KeywordRetriever`, `SQLGenerator` | `BaselineNL2SQL`, `HybridNL2SQL` | - -๋‘˜ ๋‹ค **`_run()`์— ๋น„์ฆˆ๋‹ˆ์Šค ๋กœ์ง**์„ ์ž‘์„ฑํ•˜๊ณ , `run()` / `__call__()` ํ˜ธ์ถœ ์‹œ ์ž๋™์œผ๋กœ hook ์ด๋ฒคํŠธ๋ฅผ ๋ฐœํ–‰ํ•ฉ๋‹ˆ๋‹ค. - ---- - -## ๊ถŒ์žฅ ๊ทœ์•ฝ ์š”์•ฝ - -* **์ œ์–ด๋Š” Python์œผ๋กœ ํ•œ๋‹ค** (define-by-run) -* **์ปดํฌ๋„ŒํŠธ์˜ ์ž…์ถœ๋ ฅ์€ ๋ช…์‹œ์  ํƒ€์ž… ์ธ์ž๋กœ ์ •์˜ํ•œ๋‹ค** (`_run(query: str) -> list[CatalogEntry]`) -* **๊ตฌ์„ฑ(config)์€ `__init__`์—, ์š”์ฒญ๋ณ„ ๋ฐ์ดํ„ฐ๋Š” `_run()` ์ธ์ž์—** -* policy/eval์ฒ˜๋Ÿผ ๊ด€์ธก์„ฑ์ด ๋ถˆํ•„์š”ํ•œ ๋กœ์ง์€ **์ˆœ์ˆ˜ ํ•จ์ˆ˜๋กœ ๋‘ฌ๋„ ๋œ๋‹ค** - ---- diff --git a/docs/Hook_and_exception_ko.md b/docs/Hook_and_exception_ko.md deleted file mode 100644 index ccc9c14..0000000 --- a/docs/Hook_and_exception_ko.md +++ /dev/null @@ -1,311 +0,0 @@ -# Hooks (Tracing) - -Lang2SQL์˜ hooks ์‹œ์Šคํ…œ์€ **๊ทธ๋ž˜ํ”„ ์—”์ง„ ์—†์ด๋„ ๊ด€์ธก์„ฑ(observability)์„ ์ œ๊ณต**ํ•˜๊ธฐ ์œ„ํ•œ ์ตœ์†Œ ๋ ˆ์ด์–ด์ž…๋‹ˆ๋‹ค. -Flow/Component ์‹คํ–‰ ๊ณผ์ •์—์„œ ์ด๋ฒคํŠธ๋ฅผ ๋ฐœํ–‰ํ•˜๊ณ , ์‚ฌ์šฉ์ž๋Š” hook ๊ตฌํ˜„์ฒด๋กœ ์ด๋ฅผ ์ˆ˜์ง‘/์ถœ๋ ฅ/์ „์†กํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - -ํ•ต์‹ฌ ์ปจ์…‰์€ ๋‹จ ํ•˜๋‚˜์ž…๋‹ˆ๋‹ค: - -> **โ€œ์‹คํ–‰ ์ค‘ ๋ฌด์Šจ ์ผ์ด ์ผ์–ด๋‚ฌ๋Š”์ง€(Event)๋ฅผ hook์ด ๋ฐ›๋Š”๋‹ค.โ€** - ---- - -## Event - -`Event`๋Š” Flow/Component ์‹คํ–‰ ์ค‘ ๋ฐœ์ƒํ•œ โ€œ๊ด€์ธก ๋‹จ์œ„โ€์ž…๋‹ˆ๋‹ค. - -```py -@dataclass -class Event: - name: str # e.g., "component.run" / "flow.run" - component: str # e.g., "KeywordTableRetriever" / "SequentialFlow" - phase: Literal["start", "end", "error"] - ts: float # unix timestamp - duration_ms: Optional[float] = None - - input_summary: Optional[str] = None - output_summary: Optional[str] = None - error: Optional[str] = None - - data: dict[str, Any] = field(default_factory=dict) -``` - -### ํ•„๋“œ ์˜๋ฏธ - -* `name` - - * ์ด๋ฒคํŠธ ์ข…๋ฅ˜๋ฅผ ๋‚˜ํƒ€๋‚ด๋Š” ๋ฌธ์ž์—ด - * ์˜ˆ: `"component.run"`, `"flow.run"` -* `component` - - * ์ด๋ฒคํŠธ๋ฅผ ๋ฐœ์ƒ์‹œํ‚จ ์‹คํ–‰ ๋‹จ์œ„ ์ด๋ฆ„ - * ์˜ˆ: `"KeywordTableRetriever"`, `"SequentialFlow"` -* `phase` - - * `"start" | "end" | "error"` -* `ts` - - * ์ด๋ฒคํŠธ ๋ฐœ์ƒ ์‹œ๊ฐ„(Unix timestamp) -* `duration_ms` - - * `end/error`์—์„œ๋งŒ ์ฃผ๋กœ ์ฑ„์›€(์‹คํ–‰ ์‹œ๊ฐ„) -* `input_summary`, `output_summary` - - * ๋””๋ฒ„๊น…์„ ์œ„ํ•œ โ€œ์‚ฌ๋žŒ์ด ์ฝ๊ธฐ ์‰ฌ์šดโ€ ์š”์•ฝ ๋ฌธ์ž์—ด -* `error` - - * ์‹คํŒจ ์‹œ ์˜ค๋ฅ˜ ์š”์•ฝ ๋ฌธ์ž์—ด -* `data` - - * UI/ํ•„ํ„ฐ๋ง/ํ…Œ์ŠคํŠธ/์ถ”๊ฐ€ ๋ฉ”ํƒ€๋ฅผ ์œ„ํ•œ ๊ตฌ์กฐํ™” payload - * ๊ธฐ๋ณธ์€ ๋นˆ dict์ด๋ฉฐ, ํ•„์š”ํ•  ๋•Œ๋งŒ ์ฑ„์šฐ๋Š” ๊ฒƒ์„ ๊ถŒ์žฅํ•ฉ๋‹ˆ๋‹ค. - ---- - -## TraceHook - -`TraceHook`์€ ์ด๋ฒคํŠธ๋ฅผ ๋ฐ›๋Š” ์ธํ„ฐํŽ˜์ด์Šค์ž…๋‹ˆ๋‹ค. - -```py -class TraceHook(Protocol): - def on_event(self, event: Event) -> None: ... -``` - -* Lang2SQL์˜ Flow/Component๋Š” ์‹คํ–‰ ์‹œ์ ์— `hook.on_event(Event(...))` ํ˜•ํƒœ๋กœ ์ด๋ฒคํŠธ๋ฅผ ๋ฐœํ–‰ํ•ฉ๋‹ˆ๋‹ค. -* hook์€ **์˜ต์…˜**์ด๋ฉฐ, ์—†์œผ๋ฉด `NullHook`์ด ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค. - ---- - -## ๊ธฐ๋ณธ Hook ๊ตฌํ˜„์ฒด - -### NullHook - -```py -class NullHook: - def on_event(self, event: Event) -> None: - return -``` - -* ๊ธฐ๋ณธ๊ฐ’ -* ์•„๋ฌด ๊ฒƒ๋„ ํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค. -* hook ๋น„์šฉ์„ ์—†์• ๊ณ  ์‹ถ์„ ๋•Œ ํ•ญ์ƒ ์•ˆ์ „ํ•œ ๊ธฐ๋ณธ ๊ตฌํ˜„์ž…๋‹ˆ๋‹ค. - -### MemoryHook - -```py -class MemoryHook: - def __init__(self) -> None: - self.events: list[Event] = [] - - def on_event(self, event: Event) -> None: - self.events.append(event) - - def clear(self) -> None: - self.events.clear() - - def snapshot(self) -> list[Event]: - return list(self.events) -``` - -* ์ด๋ฒคํŠธ๋ฅผ ๋ฉ”๋ชจ๋ฆฌ์— ๋ˆ„์ ํ•ฉ๋‹ˆ๋‹ค. -* ํ…Œ์ŠคํŠธ/๋””๋ฒ„๊น…์— ๊ฐ€์žฅ ์œ ์šฉํ•ฉ๋‹ˆ๋‹ค. - -#### MemoryHook ์‚ฌ์šฉ ์˜ˆ์‹œ - -```python -from lang2sql.core.hooks import MemoryHook -from lang2sql.flows.baseline import SequentialFlow - -hook = MemoryHook() -flow = SequentialFlow(steps=[...], hook=hook) - -out = flow.run("์ง€๋‚œ๋‹ฌ ๋งค์ถœ") - -for e in hook.snapshot(): - print(e.name, e.phase, e.component, e.duration_ms, e.error) -``` - -#### clear()๋ฅผ ์œ ์ €๊ฐ€ ์ง์ ‘ ํ˜ธ์ถœํ•ด์•ผ ํ•˜๋‚˜? - -* ๋ณดํ†ต์€ **ํ…Œ์ŠคํŠธ์—์„œ๋งŒ** `clear()`๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. (์ผ€์ด์Šค ๊ฐ„ ์ด๋ฒคํŠธ ์„ž์ž„ ๋ฐฉ์ง€) -* ์ผ๋ฐ˜ ์‚ฌ์šฉ์ž๋Š” ๋ณดํ†ต โ€œ์š”์ฒญ 1ํšŒ โ†’ hook 1๊ฐœ ์ƒ์„ฑโ€ ํŒจํ„ด์œผ๋กœ ์ถฉ๋ถ„ํ•ฉ๋‹ˆ๋‹ค. - -์˜ˆ: - -```py -hook = MemoryHook() -out = flow.run_query("q") # ์—ฌ๊ธฐ์„œ๋งŒ ์“ฐ๊ณ  ๋ -events = hook.snapshot() -``` - ---- - -## ์œ ํ‹ธ ํ•จ์ˆ˜ - -### now() - -```py -def now() -> float: - return time.time() -``` - -* timestamp ์ƒ์„ฑ์— ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค. - -### ms() - -```py -def ms(start: float, end: float) -> float: - return (end - start) * 1000.0 -``` - -* duration(ms) ๊ณ„์‚ฐ์— ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค. - -### summarize() - -```py -def summarize(x: Any, max_len: int = 240) -> str: - ... -``` - -* repr(x)๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ์š”์•ฝ ๋ฌธ์ž์—ด์„ ๋งŒ๋“ค๊ณ  ๊ธธ์ด๋ฅผ ์ œํ•œํ•ฉ๋‹ˆ๋‹ค. -* ์ด๋ฒคํŠธ์˜ `input_summary/output_summary`์— ์‚ฌ์šฉ๋ฉ๋‹ˆ๋‹ค. - ---- - -## ์šด์˜(Production)์—์„œ๋Š” ์–ด๋–ป๊ฒŒ ์“ฐ๋‚˜? - -MemoryHook์€ ํ…Œ์ŠคํŠธ์šฉ์ž…๋‹ˆ๋‹ค. ์šด์˜์—์„œ๋Š” ๋ณดํ†ต ๋‹ค์Œ ํ˜•ํƒœ๋กœ ํ™•์žฅํ•ฉ๋‹ˆ๋‹ค. - -* `LoggingHook`: JSON ๋กœ๊ทธ๋กœ ๋‚จ๊ธฐ๊ธฐ -* `OTelHook`: OpenTelemetry span์œผ๋กœ ์ „์†ก -* `FilteringHook`: ํŠน์ • component๋งŒ ์ƒ˜ํ”Œ๋ง/ํ•„ํ„ฐ๋ง - -๊ด€์ธก์„ฑ ์ œ์–ด๋Š” **hook ๊ตฌํ˜„์ฒด์—์„œ** ํ•˜๊ณ , Flow/Component ๋กœ์ง์€ ๋น„์ฆˆ๋‹ˆ์Šค์— ์ง‘์ค‘ํ•˜๋Š” ๊ฒƒ์ด ๊ธฐ๋ณธ ์ฒ ํ•™์ž…๋‹ˆ๋‹ค. - ---- - -# Exceptions - -Lang2SQL ์˜ˆ์™ธ ์‹œ์Šคํ…œ์€ ๋‘ ๋ชฉํ‘œ๋ฅผ ๊ฐ€์ง‘๋‹ˆ๋‹ค. - -1. **๋„๋ฉ”์ธ ์—๋Ÿฌ๋Š” ๋„๋ฉ”์ธ ํƒ€์ž…์œผ๋กœ ์œ ์ง€**ํ•œ๋‹ค. -2. ์™ธ๋ถ€/์ผ๋ฐ˜ ์˜ˆ์™ธ๋Š” โ€œ์–ด๋””์„œ ํ„ฐ์กŒ๋Š”์ง€โ€๊ฐ€ ๋ณด์ด๋„๋ก **ํ‘œ์ค€ ๋ž˜ํ•‘**ํ•œ๋‹ค. - ---- - -## Lang2SQLError (Base) - -```py -class Lang2SQLError(Exception): - """Base error for lang2sql.""" -``` - -* Lang2SQL์—์„œ ๋ฐœ์ƒํ•˜๋Š” ๋ชจ๋“  ๋„๋ฉ”์ธ ์˜ˆ์™ธ์˜ ๋ฒ ์ด์Šค์ž…๋‹ˆ๋‹ค. -* `BaseComponent` / `BaseFlow`๋Š” ์ผ๋ฐ˜์ ์œผ๋กœ **Lang2SQLError๋Š” ๊ทธ๋Œ€๋กœ ๋‹ค์‹œ raise**ํ•ฉ๋‹ˆ๋‹ค. - ---- - -## IntegrationMissingError - -```py -class IntegrationMissingError(Lang2SQLError): - def __init__(self, integration: str, extra: str | None = None, hint: str | None = None): - ... -``` - -### ์–ธ์ œ ๋ฐœ์ƒ? - -* ์„ ํƒ์  ์˜์กด์„ฑ(optional integration)์ด ํ•„์š”ํ•œ๋ฐ ์„ค์น˜๋˜์–ด ์žˆ์ง€ ์•Š์„ ๋•Œ - -์˜ˆ: - -* `faiss` retriever๋ฅผ ์“ฐ๋Š”๋ฐ `faiss`๊ฐ€ ์„ค์น˜๋˜์–ด ์žˆ์ง€ ์•Š์Œ - -### ๋ฉ”์‹œ์ง€ ํŠน์ง• - -* `extra`๊ฐ€ ์žˆ์œผ๋ฉด ์„ค์น˜ ํžŒํŠธ๋ฅผ ํฌํ•จํ•ฉ๋‹ˆ๋‹ค. - -์˜ˆ ๋ฉ”์‹œ์ง€: - -* `Missing optional integration: faiss. Install with: pip install 'lang2sql[faiss]'` - ---- - -## ValidationError - -```py -class ValidationError(Lang2SQLError): - pass -``` - -### ์–ธ์ œ ๋ฐœ์ƒ? - -* SQL ๊ฒ€์ฆ ์‹คํŒจ, ์ •์ฑ…์ƒ ๊ธˆ์ง€ ์ฟผ๋ฆฌ, ์Šคํ‚ค๋งˆ ๋ถˆ์ผ์น˜ ๋“ฑ -* โ€œ์œ ์ € ์ž…๋ ฅ/์ƒ์„ฑ ๊ฒฐ๊ณผ๊ฐ€ ์œ ํšจํ•˜์ง€ ์•Š๋‹คโ€์— ํ•ด๋‹นํ•˜๋Š” ์—๋Ÿฌ๋ฅผ ๋‹ด๋Š” ๋Œ€ํ‘œ ๋„๋ฉ”์ธ ์˜ˆ์™ธ - ---- - -## ContractError - -```py -class ContractError(Lang2SQLError): - """Raised when a component violates a required call/return contract.""" - pass -``` - -### ์–ธ์ œ ๋ฐœ์ƒ? - -* Lang2SQL์ด ์š”๊ตฌํ•˜๋Š” ํ˜ธ์ถœ/๋ฐ˜ํ™˜ ๊ณ„์•ฝ์„ ์œ„๋ฐ˜ํ–ˆ์„ ๋•Œ -* ์˜ˆ: `_run()`์ด ๋ฐ˜๋“œ์‹œ ๋ฐ˜ํ™˜ํ•ด์•ผ ํ•˜๋Š” ํƒ€์ž…๊ณผ ๋‹ค๋ฅธ ๊ฐ’์„ ๋ฐ˜ํ™˜ - -์ด ์—๋Ÿฌ๋Š” โ€œ์‚ฌ์šฉ์ž ์ฝ”๋“œ ๋ฒ„๊ทธ๋ฅผ ๋นจ๋ฆฌ ๋ฐœ๊ฒฌ(fail-fast)โ€ํ•˜๊ธฐ ์œ„ํ•œ ํƒ€์ž…์ž…๋‹ˆ๋‹ค. - ---- - -## ComponentError - -```py -class ComponentError(Lang2SQLError): - def __init__(self, component: str, message: str, *, cause: Exception | None = None): - self.component = component - self.cause = cause - super().__init__(f"[{component}] {message}") -``` - -### ๋ชฉ์  - -* โ€œ์ผ๋ฐ˜ ์˜ˆ์™ธ(ValueError, KeyError ๋“ฑ)โ€๋ฅผ ๋„๋ฉ”์ธ ๋ ˆ์ด์–ด๋กœ ๋Œ์–ด์˜ฌ ๋•Œ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. -* ์–ด๋–ค ์ปดํฌ๋„ŒํŠธ์—์„œ ํ„ฐ์กŒ๋Š”์ง€ ์‹๋ณ„ ๊ฐ€๋Šฅํ•˜๊ฒŒ ๋งŒ๋“ญ๋‹ˆ๋‹ค. - -### cause - -* ์›๋ณธ ์˜ˆ์™ธ๋ฅผ ๋ณด์กดํ•ฉ๋‹ˆ๋‹ค. -* ํ…Œ์ŠคํŠธ/๋””๋ฒ„๊น…์—์„œ error chain์„ ํ™•์ธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. - ---- - -## ์˜ˆ์™ธ๊ฐ€ Flow/Component์—์„œ ์–ด๋–ป๊ฒŒ ์ฒ˜๋ฆฌ๋˜๋‚˜? - -(ํ˜„์žฌ BaseComponent ์„ค๊ณ„ ๊ธฐ์ค€) - -* `Lang2SQLError` ๊ณ„์—ด - - * ๊ทธ๋Œ€๋กœ ์ด๋ฒคํŠธ์— ๊ธฐ๋กํ•˜๊ณ  ๊ทธ๋Œ€๋กœ raise -* ๊ทธ ์™ธ ๋ชจ๋“  ์˜ˆ์™ธ - - * ์ด๋ฒคํŠธ์— ๊ธฐ๋กํ•˜๊ณ  `ComponentError(..., cause=e)`๋กœ ๋ž˜ํ•‘ํ•˜์—ฌ raise - -์ฆ‰: - -* **๋„๋ฉ”์ธ ์˜ˆ์™ธ๋Š” โ€œ์ •์ƒ์ ์ธ ์‹คํŒจโ€๋กœ ์ทจ๊ธ‰** -* **์ผ๋ฐ˜ ์˜ˆ์™ธ๋Š” โ€œ๋ฒ„๊ทธ/์˜ˆ์ƒ ๋ฐ– ์‹คํŒจโ€๋กœ ํ‘œ์ค€ํ™”** - ---- - -## ๊ถŒ์žฅ ์‚ฌ์šฉ ๊ฐ€์ด๋“œ - -* โ€œ์‚ฌ์šฉ์ž ์ž…๋ ฅ/์ •์ฑ…/๊ฒ€์ฆ ์‹คํŒจโ€๋Š” `ValidationError` -* โ€œ์˜์กด์„ฑ ์„ค์น˜ ๋ฌธ์ œโ€๋Š” `IntegrationMissingError` -* โ€œ๊ณ„์•ฝ ์œ„๋ฐ˜(๋ฐ˜ํ™˜ ํƒ€์ž…/ํ˜ธ์ถœ ๊ทœ์•ฝ)โ€์€ `ContractError` -* โ€œ์™ธ๋ถ€ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ/์˜ˆ์ƒ ๋ฐ– ์˜ˆ์™ธโ€๋Š” `ComponentError`๋กœ ๋ž˜ํ•‘๋˜์–ด ์˜ฌ๋ผ์˜ค๋Š” ๊ฒƒ์„ ๊ธฐ๋ณธ์œผ๋กœ ํ•ฉ๋‹ˆ๋‹ค. - ---- diff --git a/docs/PROJECT.md b/docs/PROJECT.md new file mode 100644 index 0000000..7b71dd8 --- /dev/null +++ b/docs/PROJECT.md @@ -0,0 +1,152 @@ +# Lang2SQL โ€” ํ”„๋กœ์ ํŠธ SSOT + +> *"์งˆ๋ฌธํ•˜๋ฉด SQL ์งœ์ฃผ๋Š” ๋ด‡์ด ์•„๋‹ˆ๋ผ, ํ˜„์‹ค์˜ messyํ•จ์— ๊ฒฌ๋””๋Š” ๋ถ„์„ ์—์ด์ „ํŠธ."* + +์ด ๋ฌธ์„œ๋Š” ์ด ํ”„๋กœ์ ํŠธ๊ฐ€ *๋ฌด์—‡์ด๊ณ , ์™œ ์กด์žฌํ•˜๋ฉฐ, ์ง€๊ธˆ ์–ด๋””๊นŒ์ง€ ์™€ ์žˆ๋Š”์ง€*๋ฅผ **๋‹จ์ผํ•˜๊ฒŒ** ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค. ๋‹ค๋ฅธ ๋ชจ๋“  ๋ฌธ์„œยทREADMEยท๋””์ž์ธ๋…ธํŠธ๋Š” ์ด ๋ฌธ์„œ๋ฅผ ์ฐธ์กฐํ•˜๊ฑฐ๋‚˜ ๋ณด์ถฉํ•ฉ๋‹ˆ๋‹ค. + +--- + +## 1. ํ•œ ์ค„ ์ •์ฒด์„ฑ + +**Lang2SQL**์€ *๋ฌธ์„œ๋กœ ๋น„์ฆˆ๋‹ˆ์Šค ๋งฅ๋ฝ์„ ํ•™์Šตํ•˜๊ณ , ํŒ€๋ณ„๋กœ ์‹œ๋ฉ˜ํ‹ฑ์ด ๋ถ„๊ธฐ๋˜๊ณ , ๋ถˆ์™„์ „ํ•œ DB์—์„œ๋„ ๋‹ตํ•˜๊ณ , ๋ชจ๋“  ์ •์˜ยท๋Œ€ํ™”๋ฅผ ๊ธฐ์–ตํ•˜๋Š”* ์˜คํ”ˆ์†Œ์Šค ๋ถ„์„ ์—์ด์ „ํŠธ์ž…๋‹ˆ๋‹ค. Phase 1 ์ธํ„ฐํŽ˜์ด์Šค๋Š” **Discord**. + +--- + +## 2. ์™œ ์กด์žฌํ•˜๋Š”๊ฐ€ + +Vanna AI(~20kโ˜…), Wren AI(~12kโ˜…), SQLCoder ๊ฐ™์€ Text-to-SQL ์˜คํ”ˆ์†Œ์Šค๋“ค์€ *์งˆ๋ฌธโ†’SQL ํŒŒ์ดํ”„๋ผ์ธ* ์ž์ฒด๋Š” ์ด๋ฏธ ์ž˜ ํ’‰๋‹ˆ๋‹ค. "๋” ์ข‹์€ SQL ์ƒ์„ฑ"์€ ๋ชจ๋ธ fine-tuning ์‹ธ์›€์ด๊ณ , ๊ทธ ์˜์—ญ์—” ๋“ค์–ด๊ฐ€์ง€ ์•Š์Šต๋‹ˆ๋‹ค. + +๋Œ€์‹  *์‹ค๋ฌด์— ๋„ฃ์–ด๋ณด๋ฉด ์ง„์งœ ๋ง‰ํžˆ๋Š”* **ํ˜„์‹ค์˜ ์ง€์ €๋ถ„ํ•จ 4๊ฐ€์ง€**๋ฅผ ๋‹ค๋ฃน๋‹ˆ๋‹ค: + +| ์•ฝ์  | ๊ธฐ์กด ์ฒ˜๋ฆฌ | ์šฐ๋ฆฌ ํ•ด๊ฒฐ | +|---|---|---| +| DB ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๊ฐ€ ๋น„์–ด ์žˆ๋‹ค | Vanna: ํ•™์Šต ๋ฐ์ดํ„ฐ ์˜์กด | โ˜…โ‘  **DB ๊ฐ•๊ฑด์„ฑ**: safety pipeline + ์ž๋™ ๋ณด๊ฐ• (V1.5) | +| ๋ด‡์ด ์–ด์ œ ํ•œ ์–˜๊ธฐ๋ฅผ ๋ชป ๊ธฐ์–ตํ•œ๋‹ค | ๋Œ€๋ถ€๋ถ„ stateless | โ˜…โ‘ก **Hermes ๊ธฐ์–ต**: 3์ถ• ๋ถ„๋ฆฌ(Store/Recall/Extractor) | +| ๋น„์ฆˆ๋‹ˆ์Šค ์ •์˜๋ฅผ ์‚ฌ๋žŒ์ด ์ผ์ผ์ด ์ž…๋ ฅ | Wren: MDL ์ˆ˜๋™ | โ˜…โ‘ข **Ingestion ๋งคํŠธ๋ฆญ์Šค**: ๋ฌธ์„œ โ†’ ์‹œ๋ฉ˜ํ‹ฑ ํ›„๋ณด | +| ๊ฐ™์€ *"ํ™œ์„ฑ ์‚ฌ์šฉ์ž"*๊ฐ€ ํŒ€๋งˆ๋‹ค ๋‹ค๋ฅด๋‹ค | Wren: ๋‹จ์ผ MDL โ†’ ์ถฉ๋Œ | โ˜…โ‘ฃ **Semantic federation**: git-like ๋ถ„๊ธฐ, ๊ฐ€์žฅ ๊ตฌ์ฒด์  scope ์Šน๋ฆฌ | + +์ด 4๊ฐ€์ง€๋Š” *๋น„์ฆˆ๋‹ˆ์Šค๋งˆ๋‹ค ๋‹ค๋ฅด๊ธฐ ๋•Œ๋ฌธ์— ๋ฒค์น˜๋งˆํฌ๊ฐ€ ์•ˆ ๋‚˜์˜ค๋Š” ์˜์—ญ* โ†’ ๊ทธ๋ž˜์„œ ์˜คํ”ˆ์†Œ์Šค๊ฐ€ ์•ˆ ๊ฑด๋“œ๋ฆผ โ†’ **๊ทธ๋ž˜์„œ ๊ธฐํšŒ**. + +--- + +## 3. ๋ฌด์—‡์„ ๋‹ค๋ฅด๊ฒŒ ํ•˜๋Š”๊ฐ€ โ€” 4๊ธฐ๋‘ฅ + +| ๊ธฐ๋‘ฅ | ํ•œ ์ค„ | ์ž์„ธํžˆ | +|---|---|---| +| **โ˜…โ‘  Safety pipeline** | ๋ชจ๋“  SQL์ด ํ†ต๊ณผํ•ด์•ผ ํ•˜๋Š” *๊ณตํ•ญ ๋ณด์•ˆ ๊ฒ€์ƒ‰๋Œ€* | layer๋ฅผ ์ค„ ์„ธ์šฐ๋Š” ํŒจํ„ด โ€” ์ƒˆ ๊ฒ€์‚ฌ(์˜ˆ: AST ๊ฒ€์ฆ, ํ•จ์ˆ˜ ์ฐจ๋‹จ)๋Š” ํ•œ ์นธ ๋ผ์šฐ๊ธฐ | +| **โ˜…โ‘ก Memory 3์ถ•** | Store/Recall/Extractor ๊ฐ๊ฐ ๋…๋ฆฝ ์ง„ํ™” | V1์—” in-memory/inject-all/manual, V1.5์—” SQLite/keyword/auto | +| **โ˜…โ‘ข Ingestion matrix** | Source ร— Extractor ์ž์œ  ์กฐํ•ฉ | ํŒŒ์ผร—LLM์ด V1, URL/Notion/DDL์€ V1.5+ | +| **โ˜…โ‘ฃ Semantic federation** | git์ฒ˜๋Ÿผ ํŒ€๋ณ„ ์ •์˜ ๋ถ„๊ธฐ, *๊ฐ€์žฅ ๊ตฌ์ฒด์ ์ด ์Šน๋ฆฌ* | ์ถฉ๋Œ์ด ์‚ฌ๋ผ์ง. Wren์˜ "ํ•œ ํšŒ์‚ฌ ํ•œ MDL"์ด ๋ชป ํ‘ธ๋Š” ์˜์—ญ | + +**ํ•ต์‹ฌ ๋ฉ”ํƒ€์›์น™**: ๋ชจ๋“  ์™ธ๋ถ€ ์‹œ์Šคํ…œ ์˜์กด์„ฑ์„ *ํฌํŠธ(Protocol)*๋กœ ์ถ”์ƒํ™”. *์–ด๋Œ‘ํ„ฐ*๋Š” ๊ฐ€์žฅ์ž๋ฆฌ์—๋งŒ. ๊ทธ๋ž˜์„œ ์ƒˆ LLM / ์ƒˆ DB / ์ƒˆ frontend ์ถ”๊ฐ€๊ฐ€ *๊ธฐ์กด ์ฝ”๋“œ ์•ˆ ๊ฑด๋“œ๋ฆฌ๊ณ  ๋ผ์šฐ๊ธฐ*๋กœ ๋๋‚จ. + +--- + +## 4. ์ง€๊ธˆ ์–ด๋””๊นŒ์ง€ ์™€ ์žˆ๋Š”๊ฐ€ โ€” ์ •์งํ•œ ํ˜„ํ™ฉ + +### โœ… V1 ์™„๋ฃŒ (master์—์„œ ๋™์ž‘) +- **core ํฌํŠธ 11์ข…** โ€” ๋ชจ๋“  ์™ธ๋ถ€ ์˜์กด์„ Protocol๋กœ ์ถ”์ƒํ™” +- **harness** โ€” agent_loop(LLM โ†’ tool โ†’ ๋‹ค์Œ ํ„ด), Session, HarnessContext +- **โ˜…โ‘ ~โ˜…โ‘ฃ 4๊ธฐ๋‘ฅ** ์ตœ์†Œ ๊ตฌํ˜„ โ€” safety 12 ํšŒ๊ท€, memory 3์ถ•, ingestion ๋งคํŠธ๋ฆญ์Šค, federation 3-scope +- **๋„๊ตฌ 6์ข…** โ€” run_sql ยท explore_schema ยท define_metric ยท remember ยท ask_user ยท ingest_doc +- **Discord ํ”„๋ก ํŠธ์—”๋“œ** โ€” 6๊ฐœ ์Šฌ๋ž˜์‹œ ๋ช…๋ น + `/setup` ์œ„์ €๋“œ (๋น„๊ฐœ๋ฐœ์ž DSN-free flow) + bot.py +- **์˜์†ํ™”** โ€” SQLite ์‹œ๋ฉ˜ํ‹ฑ store + Fernet ์‹ค์•”ํ˜ธํ™” secrets +- **DB ์–ด๋Œ‘ํ„ฐ** โ€” `SqlAlchemyExplorer` 1๊ฐœ๋กœ Postgres/MySQL/Snowflake/BigQuery/DuckDB ์ปค๋ฒ„ + Cloudflare D1 HTTP ์–ด๋Œ‘ํ„ฐ + `build_explorer(DSN)` ์ž๋™ ๋ผ์šฐํŒ… +- **106๊ฐœ ์ž๋™ํ™” ํ…Œ์ŠคํŠธ** (safety ํšŒ๊ท€ 12 ํฌํ•จ) +- **bench ๋ฐ๋ชจ** โ€” federation + safety ๋ผ์ด๋ธŒ ์‹œ์—ฐ (`bench/ecommerce_demo.py`) + +### โš ๏ธ Stub / ๋ฏธ๊ฒ€์ฆ +| ํ•ญ๋ชฉ | ์ƒํƒœ | +|---|---| +| PostgreSQL ์‹ค ์—ฐ๊ฒฐ | psycopg ์–ด๋Œ‘ํ„ฐ๋Š” ์žˆ์Œ. ์‹ค PG ํ…Œ์ŠคํŠธ ๋ฏธ์ˆ˜ํ–‰ | +| ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ž๋™ ๋ณด๊ฐ• (โ˜…โ‘ ์˜ ํ•ต์‹ฌ ์ฐจ๋ณ„์ ) | V1.5 | +| ํ‚ค์›Œ๋“œ/๋ฒกํ„ฐ recall | V1.5/V2 | +| LLM ์ž๋™ fact ์ถ”์ถœ | V1.5 | +| `/semantic diff`, `/semantic promote` | V1.5 | +| URL/Notion ๋ฌธ์„œ ์ž…๋ ฅ | V1.5/V2 | +| Slack/Web frontend | Phase 2/3 | +| Audit hash chain | V2 | + +--- + +## 5. ๋กœ๋“œ๋งต + +``` +V1 โœ… ๊ณจ๊ฒฉ + 4๊ธฐ๋‘ฅ ์ตœ์†Œ + Discord ์–ด๋Œ‘ํ„ฐ + ์˜์†ํ™” โ† ์ง€๊ธˆ +V1.5 โ†’ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ž๋™ ๋ณด๊ฐ•(โ˜…โ‘ ) + ํ‚ค์›Œ๋“œ recall + + LLM ์ž๋™ fact ์ถ”์ถœ + /semantic diffยทpromote + + URL/DDL ingestion + ํšŒ๊ท€ ๊ฐ•ํ™” +V2 โ†’ ๋ฒกํ„ฐ recall + ๋น„์šฉ ๊ฒŒ์ดํŠธ(EXPLAIN) + Notion MCP + + ์™ธ๋ถ€ git semantic ๋™๊ธฐํ™” + Slack frontend +V2.5 โ†’ PostgreSQL ๋ฉ€ํ‹ฐ์ธ์Šคํ„ด์Šค + branch fork/merge UI + + Web frontend +``` + +๊ฐ ๋‹จ๊ณ„์˜ ๋””ํ…Œ์ผ์€ [`docs/discord_first_redesign_v4_1.md`](./discord_first_redesign_v4_1.md) ยง3. + +--- + +## 6. ๋น ๋ฅธ ์‹œ์ž‘ + +```bash +git clone https://github.com/CausalInferenceLab/Lang2SQL.git +cd Lang2SQL +uv sync # ๊ธฐ๋ณธ deps +.venv/bin/pytest -q # 106 ํ…Œ์ŠคํŠธ +.venv/bin/python bench/ecommerce_demo.py # federation + safety ๋ฐ๋ชจ +``` + +Discord ๋ด‡ ์šด์˜: [`docs/DEPLOY.md`](./DEPLOY.md) + +--- + +## 7. ์•„ํ‚คํ…์ฒ˜ & ๊ธฐ์—ฌ + +- **์•„ํ‚คํ…์ฒ˜ ํ•œ๋ˆˆ ๊ฐ€์ด๋“œ + ์–ด๋”” ์†๋Œ€๋ฉด ์ข‹์€์ง€**: [`docs/ARCHITECTURE.md`](./ARCHITECTURE.md) +- **PR ์ž‘์„ฑ ํ˜•์‹**: [`docs/pull_request_guidelines.md`](./pull_request_guidelines.md) +- **๋ธŒ๋žœ์น˜ ์ „๋žต**: [`docs/branch_guidelines.md`](./branch_guidelines.md) + +๊ธฐ์—ฌ PR์„ ๊ฐ€์žฅ ๋ฐ›๊ธฐ ์‰ฌ์šด ์ง€์ ๋“ค (์ž์„ธํ•œ ์œ„์น˜/๋ฐฉ๋ฒ•์€ ARCHITECTURE.md ยง5): +- ์ƒˆ LLM ์–ด๋Œ‘ํ„ฐ (`adapters/llm/.py`) +- ์ƒˆ safety layer (`safety/layers/.py`) +- ์ƒˆ memory recall ์ „๋žต (`memory/recall/.py`) +- ์ƒˆ ingestion source (`ingestion/sources/.py`) +- ์ƒˆ frontend (`frontends//`) +- ์ƒˆ ๋„๊ตฌ (`tools/.py`) + +--- + +## 8. ํ•ต์‹ฌ ์„ค๊ณ„ ๊ฒฐ์ • (์™œ ์ด ๊ธธ์„ ํƒํ–ˆ๋‚˜) + +| ๊ฒฐ์ • | ์ด์œ  | +|---|---| +| **๋ฐฑ์ง€ ์žฌ์ž‘์„ฑ** (LangGraph/Streamlit ํŒŒ์ดํ”„๋ผ์ธ โ†’ ports & adapters ์—์ด์ „ํŠธ) | ํŒŒ์ดํ”„๋ผ์ธ ์œ„์— 4๊ธฐ๋‘ฅ์„ ์–น๋Š” ๊ฒƒ๋ณด๋‹ค, 4๊ธฐ๋‘ฅ์„ *์ „์ œ*๋กœ ์ƒˆ๋กœ ์ง“๋Š” ๊ฒŒ ๊น”๋”ํ•จ | +| **ํฌํŠธ & ์–ด๋Œ‘ํ„ฐ** (์ฝ˜์„ผํŠธ์™€ ๊ฐ€์ „) | V1์—” ๋‹จ์ˆœ ๊ตฌํ˜„ 1๊ฐœ์”ฉ, ์–ด๋Œ‘ํ„ฐ ์ถ”๊ฐ€๋Š” ๊ธฐ์กด ์ฝ”๋“œ ์•ˆ ๊ฑด๋“œ๋ฆผ. *์—ฐ๊ตฌ์‹ค ์ฝ”๋“œ๊ฐ€ ๊ณง ์ œํ’ˆ layer* | +| **Discord 1๊ธ‰ frontend, ๋‚˜๋จธ์ง€๋Š” ์ถ”์ƒ** | "์˜คํ”ˆ์†Œ์Šค ๋ถ„์„ ๋ด‡"์˜ ์ž์—ฐ์Šค๋Ÿฌ์šด ๊ฑฐ์ฃผ์ง€. Slack/Web์€ ์–ด๋Œ‘ํ„ฐ ์ถ”๊ฐ€ | +| **"๊ฐ•๊ฑด์„ฑ"์„ ๋‘ ์ถ•์œผ๋กœ ๋ถ„๋ฆฌ** (DB โ˜…โ‘  + ์‹œ๋ฉ˜ํ‹ฑ โ˜…โ‘ฃ) | ์‹ค๋ฌด์—์„  ํ›„์ž๊ฐ€ ๋” ์ž์ฃผ ํ„ฐ์ง„๋‹ค๋Š” ๋ฐœ๊ฒฌ. ํ•™๊ณ„์—” โ˜…โ‘ , ๊ฑฐ๋ฒ„๋„Œ์Šค์—” โ˜…โ‘ฃ | +| **federation = git-like ๋ถ„๊ธฐ** | "ํ•œ ํšŒ์‚ฌ ํ•œ ์ •์˜"๋Š” ์กฐ์ง ํ˜„์‹ค๊ณผ ์ถฉ๋Œ. ๊ฐ์ž scope์—์„œ ์‚ด๊ฒŒ ํ•จ | +| **Read-only๋ฅผ fail-closed๋กœ ๊ฐ•์ œ** | safety pipeline์˜ whitelist๋Š” SELECT/WITH ์™ธ BLOCK. DROP/INSERT๊ฐ€ ๋ชจ๋ธ ํ™˜๊ฐ์œผ๋กœ ์ƒˆ๋Š” ์‚ฌ๊ณ  ๋ฐฉ์ง€ | +| **stdlib โ†’ ํ•„์š” ์‹œ lean dep** | ์ดˆ๊ธฐ์—” ์˜์กด์„ฑ 0(urllib OpenAI ์–ด๋Œ‘ํ„ฐ), V1.5์—์„œ cryptography/discord.py๋งŒ ํ•€ | + +--- + +## 9. ํ”„๋กœ์ ํŠธ ๋ฉ”ํƒ€ + +- **License**: [MIT](https://opensource.org/licenses/MIT) +- **์šด์˜**: [๊ฐ€์งœ์—ฐ๊ตฌ์†Œ](https://pseudo-lab.com/) ์ธ๊ณผ์ถ”๋ก ํŒ€ +- **์ปค๋ฎค๋‹ˆํ‹ฐ**: [Discord](https://discord.gg/EPurkHVtp2) +- **์ด์Šˆ/๊ธฐ๋Šฅ ์š”์ฒญ**: [GitHub Issues](https://github.com/CausalInferenceLab/Lang2SQL/issues) +- **๋ฐฑ์—…**: ์˜› v0.3 ์•„ํ‚คํ…์ฒ˜๋Š” `archive/pre-v4.1-rebuild` ํƒœ๊ทธ๋กœ ๋ณต์› ๊ฐ€๋Šฅ + +--- + +## 10. ๋ณ€์ฒœ (๊ฐ„๋‹จ) + +| ์‹œ๊ธฐ | ์‚ฌ๊ฑด | +|---|---| +| ~v0.3 | LangGraph + Streamlit ํŒŒ์ดํ”„๋ผ์ธ (์งˆ๋ฌธโ†’retrievalโ†’gateโ†’generationโ†’execution) | +| 2026 ๋ด„ | **๋ฐฉํ–ฅ ์ „ํ™˜**: Vanna/Wren๋„ ์ด๋ฏธ ์ž˜ ํ‘ธ๋Š” ์˜์—ญ์—์„œ ๊ฒฝ์Ÿ ๊ทธ๋งŒ, "ํ˜„์‹ค robustness"๋กœ ์ด๋™ | +| 2026-05 | v4.1 plan ํ™•์ • โ†’ ports & adapters๋กœ ๋ฐฑ์ง€ ์žฌ์ž‘์„ฑ (PR #227โ€“#230) | +| (์ง€๊ธˆ) | V1 master ์•ˆ์ฐฉ. ๋‹ค์Œ์€ V1.5 โ€” โ˜…โ‘ ์˜ *์ง„์งœ ์ฐจ๋ณ„์ *์ธ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ž๋™ ๋ณด๊ฐ• | + +โ€” *"๋” ๋˜‘๋˜‘ํ•œ SQL ์ƒ์„ฑ๊ธฐ๊ฐ€ ์•„๋‹ˆ๋ผ, ํ˜„์‹ค์˜ messyํ•จ์— ๊ฒฌ๋””๋Š” ๋„๊ตฌ."* diff --git a/docs/discord_first_redesign.md b/docs/discord_first_redesign.md deleted file mode 100644 index 0363975..0000000 --- a/docs/discord_first_redesign.md +++ /dev/null @@ -1,611 +0,0 @@ -# Lang2SQL โ€” Discord-First ์žฌ์„ค๊ณ„ ๋ช…์„ธ - -> **์ž‘์„ฑ์ผ**: 2026-05-18 -> **๊ฒฐ์ •์ž**: ryan@brain-crew.com -> **์ƒํƒœ**: ๊ณจ๊ฒฉ ์Šน์ธ. ยง10 ์ž”์—ฌ ๊ฒฐ์ • ํ•ญ๋ชฉ ๋ณด์™„ ํ›„ Week 1 ์ฐฉ์ˆ˜. -> **๋ฒ”์œ„**: ํ˜„ `lang2sql/` ํŠธ๋ฆฌ๋ฅผ ๋น„์šฐ๊ณ  ๋ณธ ๋ฌธ์„œ๋Œ€๋กœ ์ƒˆ๋กœ ์ฑ„์šด๋‹ค. - ---- - -## 0. ์ œํ’ˆ ํ•œ ์ค„๊ณผ ํƒ€์ด๋ธŒ๋ ˆ์ด์ปค - -> **"Discord์—์„œ ์“ฐ๋Š” read-only ยท audit-by-default SQL ์—์ด์ „ํŠธ. -> DB๋Š” ์ ˆ๋Œ€ ๋ณ€ํ•˜์ง€ ์•Š๊ณ , ๋ชจ๋“  ์ฟผ๋ฆฌ๋Š” ์˜ˆ์‚ฐ/์‹œ๊ฐ„/ํ–‰ ํ•œ๋„ ์•ˆ์—์„œ ๋ˆ๋‹ค. -> ๋Œ€ํ™” ๋งฅ๋ฝ์€ ๋Š๊ฒจ๋„ ์˜์†๋œ๋‹ค."** - -์„ค๊ณ„ ์ถฉ๋Œ ์‹œ ์šฐ์„ ์ˆœ์œ„: - -1. **Discord UX๊ฐ€ ๋‹จ์ˆœํ•œ๊ฐ€?** -2. **DB๋Š” ์•ˆ ๊นจ์ง€๋Š”๊ฐ€?** -3. **๋งฅ๋ฝ์€ ๋ณด์กด๋˜๋Š”๊ฐ€?** - -์ด 3๊ฐ€์ง€์— ๋ถ€์ •์œผ๋กœ ๋‹ตํ•˜๋Š” ์–ด๋–ค ์ถ”์ƒํ™”๋„ ๋„์ž…ํ•˜์ง€ ์•Š๋Š”๋‹ค. - ---- - -## 1. ํ™•์ •๋œ ๊ธฐ์ˆ  ๊ฒฐ์ • - -| # | ํ•ญ๋ชฉ | ์„ ํƒ | ๊ทผ๊ฑฐ | -|---|---|---|---| -| 1 | Discord lib | **`discord.py` 2.x** | ๊ฐ€์žฅ ์„ฑ์ˆ™ยท์•ˆ์ •. ๋ฌธ์„œ/์˜ˆ์ œ ํ’๋ถ€. ๋น„๋™๊ธฐ native. | -| 2 | DB ์—”์ง„ ํƒ€๊นƒ (V1) | **PostgreSQL** | EXPLAINยทstatement_timeoutยทRO roleยทsslmode ๋“ฑ ๋ชจ๋“  safety ๊ธฐ๋ฒ•์ด ์ •์งํ•˜๊ฒŒ ๋™์ž‘. MySQL/SQLite/BigQuery๋Š” V2 ์ดํ›„. | -| 3 | ์„ธ์…˜ ์˜์†ํ™” | **Hermes-style write-through** | ๋งค ๋ฉ”์‹œ์ง€ยท๋งค ๋„๊ตฌ ํ˜ธ์ถœ/๊ฒฐ๊ณผยท๋งค pauseยท๋งค audit ํ•ญ๋ชฉ์„ ์ฆ‰์‹œ ๋””์Šคํฌ์— flush. ๋ด‡ ์žฌ์‹œ์ž‘/๋ฐฐํฌ/๋„คํŠธ์›Œํฌ ๋‹จ์ ˆ ํ›„ ๋‹ค์‹œ ๋ถ™์–ด๋„ ๋™์ผ ๋งฅ๋ฝ ๋ณต๊ตฌ. | -| 4 | ์„ธ์…˜ + ์ž๊ฒฉ์ฆ๋ช… ์Šคํ† ์–ด | **AES-GCM ์•”ํ˜ธํ™”๋œ SQLite ๋‹จ์ผ ํŒŒ์ผ** | ํŠธ๋ผ์ด์–ผ ๋‹จ๊ณ„ ๋ฌด๋ฃŒยท๋ฌด์˜์กด. JSON1์œผ๋กœ conversation ์ €์žฅ. ์™ธ๋ถ€ secret manager๋Š” V2 ์˜ต์…˜. | -| 5 | LLM ๋ฐฑ์—”๋“œ | **OpenAI + NVIDIA NIM ๋“€์–ผ ์–ด๋Œ‘ํ„ฐ** | NIM endpoint๊ฐ€ OpenAI ํ˜ธํ™˜(`/v1/chat/completions`) โ†’ ์–ด๋Œ‘ํ„ฐ ๊ฑฐ์˜ ๊ทธ๋Œ€๋กœ ์žฌ์‚ฌ์šฉ. ์Šฌ๋ž˜์‹œ ๋ช…๋ น `/model` ๋กœ ์œ ์ € ์ „ํ™˜. | -| 6 | Go TUI | **์œ ์ง€ (๊ฐœ๋ฐœ ๋„๊ตฌ)** | Discord ๋ถˆ๊ฐ€ ํ™˜๊ฒฝ fallback. `serve.py` NDJSON ์ธํ„ฐํŽ˜์ด์Šค ์œ ์ง€. | -| 7 | ์ฐจํŠธ ์ถœ๋ ฅ | **PNG ์ฒจ๋ถ€ + ์ž๋™ ํŽ˜์ด์ง€๋„ค์ด์…˜** | matplotlib + Pillow. Discord ์ฒจ๋ถ€ ํ•œ๋„(๋ฌด๋ฃŒ 8MB/๋ฉ”์‹œ์ง€, ์ž„๋ฒ ๋“œ 10๊ฐœ)์— ๋งž์ถฐ ์ž˜๋ผ ๋ณด๋ƒ„. | -| 8 | ํ˜ธ์ŠคํŒ… (ํŠธ๋ผ์ด์–ผ) | **์†Œํ˜• VPS** | ยง1.1 ์ฐธ์กฐ. Cloudflare Workers ๋ถ€์ ํ•ฉ. | - -### 1.1 ํ˜ธ์ŠคํŒ… โ€” "Cloudflare 5์ฒœ์›" ๊ฑด์— ๋Œ€ํ•œ ์†”์งํ•œ ๋‹ต - -`discord.py` ๋Š” Discord WebSocket gateway์— **๊ณ„์† ๋ถ™์–ด ์žˆ์–ด์•ผ ํ•˜๋Š” ์žฅ๊ธฐ ์‹คํ–‰ Python ํ”„๋กœ์„ธ์Šค**์ž…๋‹ˆ๋‹ค. Cloudflare Workers์˜ edge runtime์€ ์งง์€ HTTP ์š”์ฒญ-์‘๋‹ต ๋ชจ๋ธ์ด๋ผ ๋ด‡ ๋ณธ์ฒด๋ฅผ ๋ชป ์˜ฌ๋ฆฝ๋‹ˆ๋‹ค. (D1/KV/R2 ๊ฐ™์€ backing store๋กœ๋Š” ํ™œ์šฉ ๊ฐ€๋Šฅํ•˜์ง€๋งŒ V1 ๋ฒ”์œ„ ๋ฐ–.) - -**์ €๋ ดยทํŠธ๋ผ์ด์–ผ ๊ถŒ์žฅ ์˜ต์…˜** - -| ์˜ต์…˜ | ๋น„์šฉ | ๋ฉ”๋ชจ | -|---|---|---| -| **Oracle Cloud Always Free** | $0 (๋ฌด๊ธฐํ•œ) | ARM Ampere 4์ฝ”์–ด/24GB. ๊ฐ€์„ฑ๋น„ ์ตœ๊ฐ•. ๊ฐ€์ž… ์‹œ ์นด๋“œ ๊ฒ€์ฆ. | -| **fly.io shared-cpu-1x** | $0 ~ $5/์›” | 256MB๋กœ ์‹œ์ž‘ ๊ฐ€๋Šฅ. ํ•œ๊ตญ์—์„œ ๊ฐ€๊นŒ์šด region(NRT/SIN). | -| **Hetzner CX11** | โ‚ฌ4.5/์›” (~6,500์›) | ์•ˆ์ •ยท๋น ๋ฆ„. ์œ ๋Ÿฝ region. | -| **๋ณธ์ธ PC + 24h on** | $0 | ๋ฐ๋ชจ ๋‹จ๊ณ„ ์ถฉ๋ถ„. ๋™์  IP๋ฉด ์ด์ฃผ ๊ถŒ์žฅ. | - -**๋น„์ถ”**: Cloudflare Workers(๋Ÿฐํƒ€์ž„ ๋น„ํ˜ธํ™˜), Heroku ๋ฌด๋ฃŒ(ํ์ง€), Render ๋ฌด๋ฃŒ(60s sleep์œผ๋กœ ๋ด‡ ๋Š๊น€). - -### 1.2 "ํƒ€๊นƒ = ๊ฐ€์งœ์—ฐ๊ตฌ์†Œ ๋””์Šค์ฝ”๋“œ?" ์ •๋ฆฌ - -์งˆ๋ฌธํ•˜์‹  *"์ด ํƒ€๊นƒ์ด ๊ฐ€์งœ์—ฐ๊ตฌ์†Œ ๋””์Šค์ฝ”๋“œ?"* ์— ๋Œ€ํ•ด: ๋ฌธ์„œ์—์„œ ์“ด "ํƒ€๊นƒ"์€ **์ง€์›ํ•  DB ์—”์ง„ ์ข…๋ฅ˜**(PG vs MySQL vs BQ)๋ฅผ ์˜๋ฏธํ•œ ๊ฒƒ์ด๊ณ , **๋ด‡์ด ์ดˆ๋Œ€๋  ๋””์Šค์ฝ”๋“œ ์ปค๋ฎค๋‹ˆํ‹ฐ**์™€๋Š” ๋ณ„๊ฐœ์ž…๋‹ˆ๋‹ค. - -- **DB ์—”์ง„ ํƒ€๊นƒ**: PostgreSQL (ํ™•์ •) -- **๋ฐฐํฌ ์ปค๋ฎค๋‹ˆํ‹ฐ**: ๊ฐ€์งœ์—ฐ๊ตฌ์†Œ ๋””์Šค์ฝ”๋“œ๋ผ๋ฉด ๊ทธ์ชฝ ๊ธธ๋“œ์— ๋ด‡์„ ์ดˆ๋Œ€ํ•˜๋Š” ๋ณ„๊ฐœ ๊ฒฐ์ •. V1 ์ฝ”๋“œ๋Š” ์–ด๋–ค ๊ธธ๋“œ์—๋“  ๋™์ž‘ํ•˜๋„๋ก ์ž‘์„ฑ. - -### 1.3 "์ž๊ฒฉ์ฆ๋ช… ์ €์žฅ ์œ„์น˜" ํ’€์ด - -`/connect` ๋กœ ์ž…๋ ฅ๋ฐ›๋Š” **DB ์‚ฌ์šฉ์ž๋ช…ยท๋น„๋ฐ€๋ฒˆํ˜ธ๋ฅผ ์–ด๋””์— ์ €์žฅํ•˜๋А๋ƒ**์˜ ์งˆ๋ฌธ์ž…๋‹ˆ๋‹ค. - -| ์˜ต์…˜ | ๋น„์šฉ | ๊ฐ•๋„ | ์ถ”์ฒœ ์‹œ์  | -|---|---|---|---| -| **๋กœ์ปฌ AES-GCM SQLite** | $0 | ๋งˆ์Šคํ„ฐํ‚ค(env) + per-row IV | **V1 โ€” ์ฑ„ํƒ** | -| AWS Secrets Manager | ~$0.40/secret/์›” | ๊ฐ•ํ•จ | ๋‹ค์ค‘ ์ธ์Šคํ„ด์ŠคยทSOC2 | -| GCP Secret Manager | ~$0.06/secret/์›” | ๊ฐ•ํ•จ | GCP ํ™˜๊ฒฝ์ผ ๋•Œ | -| HashiCorp Vault | self-host | ๊ฐ•ํ•จ | ์‚ฌ๋‚ด ์šด์˜ | - -V1์€ **๋กœ์ปฌ ์•”ํ˜ธํ™” SQLite**. ๋งˆ์Šคํ„ฐํ‚ค๋Š” `LANG2SQL_MASTER_KEY` env๋กœ ์ฃผ์ž…. ํ‚ค ๋ถ„์‹ค ์‹œ secrets๋งŒ ๋ฌดํšจํ™”๋˜๊ณ  conversation/audit์€ ๊ทธ๋Œ€๋กœ (๋ถ„๋ฆฌ ์ €์žฅ). - ---- - -## 2. ์ƒ์œ„ ์•„ํ‚คํ…์ฒ˜ - -``` - โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” - โ”‚ DISCORD GATEWAY โ”‚ - โ”‚ - DM (private analytics) โ”‚ - โ”‚ - Guild channel / thread โ”‚ - โ”‚ - Slash commands / modals โ”‚ - โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ - โ”‚ events - โ–ผ -โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” -โ”‚ DISCORD ADAPTER (1๊ธ‰) โ”‚ -โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ -โ”‚ โ”‚ Onboarding โ”‚ โ”‚ SessionRouterโ”‚ โ”‚ Streaming Renderer โ”‚ โ”‚ -โ”‚ โ”‚ /connect โ”‚ โ”‚ key ๊ฒฐ์ • โ”‚ โ”‚ ํ† ํฐ โ†’ message edits โ”‚ โ”‚ -โ”‚ โ”‚ modal+test โ”‚ โ”‚ โ”‚ โ”‚ rows โ†’ PNG (ํŽ˜์ด์ง€) โ”‚ โ”‚ -โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ -โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ -โ”‚ โ”‚ Interactive โ”‚ โ”‚ Permissions โ”‚ โ”‚ DiscordRateLimit โ”‚ โ”‚ -โ”‚ โ”‚ ask_user / โ”‚ โ”‚ guild-admin/ โ”‚ โ”‚ message edit throttle โ”‚ โ”‚ -โ”‚ โ”‚ show_plan โ”‚ โ”‚ owner/user โ”‚ โ”‚ per-user / per-guild โ”‚ โ”‚ -โ”‚ โ”‚ โ†’ buttons โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ -โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ -โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ - โ”‚ ctx = ContextConcierge.build(session_key, principal) - โ–ผ -โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” -โ”‚ TENANCY LAYER โ”‚ -โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ -โ”‚ โ”‚TenantRegistryโ”‚ โ”‚ExplorerCache โ”‚ โ”‚ EncryptedStore (AES-GCM) โ”‚ โ”‚ -โ”‚ โ”‚keyโ†’DBSpec โ”‚ โ”‚LRU engines โ”‚ โ”‚ secrets + conversation + โ”‚ โ”‚ -โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ audit (๋‹จ์ผ SQLite) โ”‚ โ”‚ -โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ -โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ -โ”‚ โ”‚ ContextConcierge โ€” session_key + principal โ†’ HarnessContext โ”‚ โ”‚ -โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ -โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ - โ–ผ ctx -โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” -โ”‚ HARNESS KERNEL โ”‚ -โ”‚ agent_loop ยท system_prompt(per turn) ยท ToolRegistry(ctx ์ฃผ์ž…) โ”‚ -โ”‚ Session(live layer + pending_call + write-through) ยท Hooks(pub/sub) โ”‚ -โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ - โ”‚ ports โ”‚ โ”‚ - โ–ผ โ–ผ โ–ผ -โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” -โ”‚ SAFETY LAYER (์ œํ’ˆ์˜ moat) โ”‚ -โ”‚ L0 connect (RO role, TLS) โ†’ L1 stmt gate (sqlglot AST) โ”‚ -โ”‚ L2 cost gate (EXPLAIN) โ†’ L3 runtime (timeout/row/byte) โ”‚ -โ”‚ L4 audit (append-only) โ†’ L5 rate limit (token bucket) โ”‚ -โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ - โ–ผ โ–ผ โ–ผ -โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” -โ”‚ OUTBOUND ADAPTERS โ”‚ -โ”‚ LLM (openai / nvidia-nim) ยท DB (sqlalchemy-postgres) โ”‚ -โ”‚ Secrets (encrypted-sqlite) ยท SessionStore (encrypted-sqlite) โ”‚ -โ”‚ AuditSink (sqlite, file) โ”‚ -โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ -``` - -๊ทœ์น™: ํ™”์‚ดํ‘œ๋Š” **์•ˆ์ชฝ์œผ๋กœ๋งŒ**. Kernel/Safety๋Š” ์–ด๋Œ‘ํ„ฐ๋ฅผ importํ•˜์ง€ ์•Š๋Š”๋‹ค. ์ƒˆ frontendยท์ƒˆ LLMยท์ƒˆ DBยท์ƒˆ session store๋Š” ์–ด๋Œ‘ํ„ฐ ํ•œ ์žฅ ์ถ”๊ฐ€๋กœ ๋. - ---- - -## 3. ์„ธ์…˜ ์ „๋žต โ€” Hermes-style ์˜์†ํ™” - -### 3.1 session_key ์ •์ฑ… - -| Discord ์ปจํ…์ŠคํŠธ | session_key | DB credential ์ถœ์ฒ˜ | ๋ฉค๋ฒ„ ๊ฐ€์‹œ์„ฑ | ์šฉ๋„ | -|---|---|---|---|---| -| **๋ด‡๊ณผ์˜ DM** | `dm:{user_id}` | ์œ ์ € ๋ณธ์ธ์˜ ๋น„๋ฐ€์ €์žฅ์†Œ ํ•ญ๋ชฉ | ๋ณธ์ธ๋งŒ | **๊ฐœ์ธ ๋ถ„์„ (๊ธฐ๋ณธ ๊ฒฝ๋กœ)** | -| **๊ธธ๋“œ ์ฑ„๋„(๋ฉ”์ธ)** | `chan:{guild_id}:{channel_id}` | ๊ธธ๋“œ ์–ด๋“œ๋ฏผ์ด ๋“ฑ๋กํ•œ ๊ณต์šฉ DB | ์ฑ„๋„ ๋ฉค๋ฒ„ ์ „์› | ํŒ€ ๊ณต์šฉ ๋Œ€์‹œ๋ณด๋“œ ์งˆ์˜ | -| **๊ธธ๋“œ ์Šค๋ ˆ๋“œ** | `thr:{guild_id}:{channel_id}:{thread_id}` | ์ƒ์œ„ ์ฑ„๋„๊ณผ ๋™์ผ | ์Šค๋ ˆ๋“œ ์ฐธ์—ฌ์ž | ๋ณ‘๋ ฌ ์กฐ์‚ฌ (1์กฐ์‚ฌ = 1์Šค๋ ˆ๋“œ) | - -**์›์น™**: -- ๋ฏผ๊ฐ DB ์ž๊ฒฉ์ฆ๋ช…์€ **DM์—์„œ๋งŒ ๋ฐ›์Œ**. ์ฑ„๋„ ๋ฉ”์‹œ์ง€๋Š” ์˜๊ตฌํžˆ history์— ๋‚จ๊ณ  ๋‹ค๋ฅธ ๋ฉค๋ฒ„์—๊ฒŒ ๋ณด์ž„. -- ์œ„ 3๊ฐ€์ง€ ์™ธ์˜ session_key๋Š” ์‚ฌ์šฉ ๊ธˆ์ง€. ๋ณ€ํ˜•์ด ๋Š˜๋ฉด "๋ˆ„๊ฐ€ ๋ฌด์—‡์„ ๋ณด๋Š”์ง€" ์ถ”์  ๋ถˆ๊ฐ€. -- **principal** (= ๋ˆ„๊ฐ€ ๋ณด๋‚ธ ๋ฉ”์‹œ์ง€์ธ๊ฐ€) ์€ session_key์™€ ๋ณ„๊ฐœ๋กœ ๋งค ์š”์ฒญ ๊ธฐ๋ก. ๊ณต์šฉ ์ฑ„๋„์—์„œ๋„ audit log์— *"์ด ์ฟผ๋ฆฌ๋Š” ๋ˆ„๊ฐ€ ์‹œ์ผฐ๋Š”์ง€"* ๊ฐ€ ๋‚จ์Œ. - -### 3.2 ์˜์†ํ™” ๋ฉ”์ปค๋‹ˆ์ฆ˜ (write-through) - -``` -User msg โ”€โ”€โ–ถ Discord adapter โ”€โ”€โ–ถ session.append_message(msg) - โ”‚ - โ”œโ”€โ–ถ in-memory state update - โ””โ”€โ–ถ store.write(session_id, msg) โ† ๋™๊ธฐ, ์ฆ‰์‹œ - โ”‚ - โ–ผ - AES-GCM SQLite (append-only segment) - -Tool call/result โ”€โ”€โ–ถ same path, immediate flush -Pause(ask_user) โ”€โ”€โ–ถ pending_call snapshot, immediate flush -LLM stream delta โ”€โ”€โ–ถ buffered in-mem, flush at chunk boundary -``` - -**๋ณด์žฅ**: ๋ด‡ ํ”„๋กœ์„ธ์Šค๊ฐ€ ์ž„์˜ ์‹œ์ ์— ์ฃฝ์–ด๋„ *๋””์Šคํฌ์— ๋งˆ์ง€๋ง‰์œผ๋กœ flush๋œ ์ƒํƒœ๊นŒ์ง€๋Š”* ์•ˆ์ „. ์žฌ์‹œ์ž‘ ์‹œ: - -1. `SessionStore.iter_active(since=now-30d)` ๋กœ ๋ฏธ์™„ ์„ธ์…˜ ๋กœ๋“œ -2. `pending_call` ์ด ์žˆ๋Š” ์„ธ์…˜์€ *์‚ฌ์šฉ์ž ๋‹ต์„ ๊ธฐ๋‹ค๋ฆฌ๋Š” ์ƒํƒœ*๋กœ ๋ณต์› -3. Discord ์–ด๋Œ‘ํ„ฐ๊ฐ€ DMยท์ฑ„๋„ยท์Šค๋ ˆ๋“œ๋ณ„๋กœ reattach -4. ์‚ฌ์šฉ์ž๊ฐ€ ๋‹ค์Œ ๋ฉ”์‹œ์ง€๋ฅผ ๋ณด๋‚ด๋ฉด **๋Š๊น€ ์—†์ด ์ด์–ด์ง** โ€” ์ด๊ฒŒ Hermes-style ํ•ต์‹ฌ - -**์ €์žฅ ๋ชจ๋ธ** (SQLite ์Šคํ‚ค๋งˆ): - -```sql --- ์„ธ์…˜ ๋ฉ”ํƒ€ -CREATE TABLE sessions ( - id TEXT PRIMARY KEY, -- session_key - principal TEXT NOT NULL, -- ๋งˆ์ง€๋ง‰ ๋ฐœํ™” ์œ ์ € - kind TEXT NOT NULL, -- 'dm' | 'channel' | 'thread' - db_spec_id TEXT, -- secrets.id ์ฐธ์กฐ - created_at TIMESTAMP NOT NULL, - updated_at TIMESTAMP NOT NULL, - pending_call TEXT, -- JSON | NULL - closed_at TIMESTAMP -- NULL = active -); - --- ๋ฉ”์‹œ์ง€ append-only ๋กœ๊ทธ -CREATE TABLE messages ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - session_id TEXT REFERENCES sessions(id), - ts TIMESTAMP NOT NULL, - role TEXT NOT NULL, -- system|user|assistant|tool_result - content TEXT, -- JSON or text - tool_calls TEXT, -- JSON | NULL - tool_call_id TEXT, - principal TEXT -); -CREATE INDEX ON messages(session_id, id); - --- ์•”ํ˜ธํ™” ์ž๊ฒฉ์ฆ๋ช… -CREATE TABLE secrets ( - id TEXT PRIMARY KEY, -- e.g. "user:123:default" - owner TEXT NOT NULL, -- discord user id - label TEXT, -- "prod-pg" ๊ฐ™์€ ํ‘œ์‹œ๋ช… - ciphertext BLOB NOT NULL, -- AES-GCM - iv BLOB NOT NULL, - tag BLOB NOT NULL, - created_at TIMESTAMP NOT NULL, - last_used_at TIMESTAMP -); - --- ๊ฐ์‚ฌ ๋กœ๊ทธ (append-only) -CREATE TABLE audit ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - ts TIMESTAMP NOT NULL, - session_id TEXT, - principal TEXT, - kind TEXT, -- 'sql' | 'tool' | 'pause' | 'safety_block' - payload TEXT, -- JSON - duration_ms INTEGER, - status TEXT -- 'ok' | 'error' | 'blocked' -); -``` - -### 3.3 ๋™์‹œ์„ฑ - -``` -์ „์—ญ concurrency = ๋ฌด์ œํ•œ (asyncio) -per-session = FIFO 1 (๊ฐ™์€ conversation์— ๋™์‹œ LLM ํ˜ธ์ถœ ๊ธˆ์ง€) -per-user rate limit = token bucket (LLM 20/min, DB 60/min) -per-guild rate limit = token bucket (DB 200/min) -LLM stream = ์„ธ์…˜๋งˆ๋‹ค ๋…๋ฆฝ (ํ•œ ์‚ฌ์šฉ์ž๊ฐ€ ํญ์ฃผํ•ด๋„ ๋‹ค๋ฅธ ์‚ฌ์šฉ์ž ์˜ํ–ฅ X) -``` - -์ด ๋ชจ๋ธ์ด๋ฉด **์‚ฌ์šฉ์ž 1000๋ช…๋„ ๋‹จ์ผ ํ”„๋กœ์„ธ์Šค ์ฒ˜๋ฆฌ ๊ฐ€๋Šฅ**. ํ•œ ์œ ์ €๊ฐ€ ํญ์ฃผํ•ด๋„ ๋‹ค๋ฅธ ์œ ์ €๋Š” ์˜ํ–ฅ ์—†์Œ. - ---- - -## 4. DB ์—ฐ๊ฒฐ ์˜จ๋ณด๋”ฉ (Discord-native) - -``` -์‚ฌ์šฉ์ž Bot (DM) Tenancy Layer - โ”‚ โ”‚ โ”‚ - โ”‚โ”€โ”€ /connect โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ถโ”‚ โ”‚ - โ”‚ โ”‚ Modal ์ถœ๋ ฅ: โ”‚ - โ”‚ โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ - โ”‚ โ”‚ โ”‚ label: "prod-pg" โ”‚ โ”‚ - โ”‚ โ”‚ โ”‚ host: โ”‚ โ”‚ - โ”‚ โ”‚ โ”‚ port: 5432 โ”‚ โ”‚ - โ”‚ โ”‚ โ”‚ database: โ”‚ โ”‚ - โ”‚ โ”‚ โ”‚ user: โ”‚ โ”‚ - โ”‚ โ”‚ โ”‚ password: [hide] โ”‚ โ”‚ - โ”‚ โ”‚ โ”‚ schema: (opt) โ”‚ โ”‚ - โ”‚ โ”‚ โ”‚ sslmode: require โ”‚ โ”‚ - โ”‚ โ”‚ โ”‚ โ˜‘ RO ๊ณ„์ •์ธ๊ฐ€์š”? โ”‚ โ”‚ - โ”‚ โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ - โ”‚โ”€โ”€ submit โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ถโ”‚ โ”‚ - โ”‚ โ”‚โ”€โ”€ build url โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”‚ - โ”‚ โ”‚โ”€โ”€ encrypt (AES-GCM) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ถโ”‚ secrets.put - โ”‚ โ”‚โ”€โ”€ test SELECT 1 โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ถโ”‚ Explorer ์ž„์‹œ - โ”‚ โ”‚โ”€โ”€ probe RO (BEGIN; CREATE TABLE __probe; ROLLBACK) - โ”‚ โ”‚ - ์‹คํŒจํ•˜๋ฉด โœ… RO ํ™•์ • โ”‚ - โ”‚ โ”‚ - ์„ฑ๊ณตํ•˜๋ฉด โš ๏ธ ๊ฒฝ๊ณ  + ๋ช…์‹œ์  confirm - โ”‚ โ”‚โ”€โ”€ SHOW TABLES โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”‚ - โ”‚ โ”‚โ—€โ”€โ”€ ok, 17 tables โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”‚ - โ”‚โ—€โ”€โ”€ ephemeral embed โ”€โ”€โ”€โ”‚ โ”‚ - โ”‚ โœ… Connected to โ”‚ โ”‚ - โ”‚ "prod-pg" (17 tbls) โ”‚ โ”‚ - โ”‚ Try: "tables ๋ณด์—ฌ์ค˜"โ”‚ โ”‚ - โ”‚ โ”‚ โ”‚ - โ”‚โ”€โ”€ "tables ๋ณด์—ฌ์ค˜" โ”€โ”€โ”€โ–ถโ”‚ โ”‚ - โ”‚ โ”‚โ”€โ”€ agent_loop(ctx) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ–ถโ”‚ kernel -``` - -**ํ•ต์‹ฌ ๊ฒฐ์ •**: -- **Modal๋กœ๋งŒ** ์ž๊ฒฉ์ฆ๋ช… ์ž…๋ ฅ (์Šฌ๋ž˜์‹œ ๋ช…๋ น ์ธ์ž๋กœ URL ๋ฐ›์ง€ ์•Š์Œ) โ†’ ๋น„๋ฐ€๋ฒˆํ˜ธ๊ฐ€ ์ฑ„๋„ ๋กœ๊ทธ์— ์•ˆ ๋‚จ์Œ. -- **์ž๋™ RO ๊ฒ€์ฆ**: `BEGIN; CREATE TABLE __probe(x int); ROLLBACK;` ์‹œ๋„ โ†’ ์‹คํŒจํ•˜๋ฉด RO ํ™•์ •, ์„ฑ๊ณตํ•˜๋ฉด ๊ฒฝ๊ณ . -- **`db_url`์€ ์ ˆ๋Œ€ ํ‰๋ฌธ ์ €์žฅ ์•ˆ ํ•จ**. AES-GCM ์•”ํ˜ธํ™” ํ›„ SQLite secrets ํ…Œ์ด๋ธ”์— ์ €์žฅ. -- **์—ฌ๋Ÿฌ DB ๋“ฑ๋ก ๊ฐ€๋Šฅ**: `/connections` ๋กœ ๋ชฉ๋ก, `/use