-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup_project.py
More file actions
124 lines (98 loc) · 3.74 KB
/
setup_project.py
File metadata and controls
124 lines (98 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
RAG System Setup and Initialization Script.
Run this script to set up the project environment:
python setup_project.py
"""
import os
import sys
from pathlib import Path
def check_env_file():
"""Check if .env file exists, create from template if needed."""
env_path = Path(".env")
env_template_path = Path(".env.template")
if env_path.exists():
print("✅ .env file already exists")
return True
if not env_template_path.exists():
print("❌ .env.template file not found")
return False
print("📝 Creating .env file from template...")
env_content = env_template_path.read_text()
env_path.write_text(env_content)
print("✅ .env file created from template")
print("⚠️ Please edit .env and add your API keys before running the application")
return True
def check_dependencies():
"""Check if required packages are installed."""
try:
import langchain
import pinecone
import google.generativeai
import streamlit
print("✅ All dependencies are installed")
return True
except ImportError as e:
print(f"❌ Missing dependency: {e}")
print("\nInstall dependencies with:")
print(" pip install -r requirements.txt")
return False
def create_sample_docs_directory():
"""Create sample_docs directory for testing."""
sample_dir = Path("sample_docs")
if not sample_dir.exists():
sample_dir.mkdir()
print("✅ Created sample_docs directory")
# Create a sample document
sample_file = sample_dir / "sample.txt"
sample_content = """This is a sample document for testing the RAG system.
The RAG (Retrieval-Augmented Generation) system combines document retrieval with
language model generation to provide accurate, context-grounded answers.
Key features of this system:
1. Document Upload - Support for .txt, .pdf, and .docx files
2. Intelligent Chunking - Automatic text splitting for better context
3. Vector Embeddings - Using Google Gemini for semantic understanding
4. Pinecone Storage - Scalable vector database
5. Conversational AI - Google Gemini LLM with RAG
You can upload your own documents and ask questions about them.
The system will search through your documents and provide answers
based only on the content you upload, preventing hallucination."""
sample_file.write_text(sample_content)
print("✅ Created sample.txt in sample_docs/")
else:
print("✅ sample_docs directory already exists")
def main():
"""Main setup function."""
print("\n" + "="*50)
print("RAG System - Project Setup")
print("="*50 + "\n")
# Check environment file
if not check_env_file():
print("\n⚠️ Setup incomplete - please create .env file manually")
return False
print()
# Check dependencies
if not check_dependencies():
print("\n⚠️ Setup incomplete - please install dependencies")
return False
print()
# Create sample docs directory
create_sample_docs_directory()
print("\n" + "="*50)
print("✅ Setup complete!")
print("="*50)
print("\nNext steps:")
print("1. Edit .env file with your API keys:")
print(" - GOOGLE_API_KEY")
print(" - PINECONE_API_KEY")
print(" - PINECONE_ENVIRONMENT")
print("\n2. Initialize Pinecone index:")
print(" python main.py init")
print("\n3. Start the Streamlit app:")
print(" streamlit run app.py")
print("\nOr use the CLI to process documents:")
print(" python main.py process sample_docs/")
print("\n" + "="*50 + "\n")
return True
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)