nanoagent/setup_test_db.py

223 lines
7.0 KiB
Python

#!/usr/bin/env python3
"""
Setup test database with sample data for demo purposes.
Usage:
# Start PostgreSQL (Docker)
docker run -d --name pg-analyzer-demo \
-e POSTGRES_PASSWORD=demo \
-e POSTGRES_DB=shop \
-p 5432:5432 postgres:15
# Setup test data
export PG_CONNECTION_STRING="postgresql://postgres:demo@localhost:5432/shop"
python setup_test_db.py
# Run demo
python demo.py
"""
import os
import sys
import psycopg2
from psycopg2.extras import execute_values
from datetime import datetime, timedelta
import random
def create_tables(conn):
"""Create test tables."""
cursor = conn.cursor()
cursor.execute("""
DROP TABLE IF EXISTS order_items CASCADE;
DROP TABLE IF EXISTS orders CASCADE;
DROP TABLE IF EXISTS products CASCADE;
DROP TABLE IF EXISTS users CASCADE;
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
name VARCHAR(100),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_login TIMESTAMP,
country VARCHAR(50)
);
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(200) NOT NULL,
category VARCHAR(50),
price DECIMAL(10,2) NOT NULL,
stock_quantity INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
total_amount DECIMAL(10,2) NOT NULL,
status VARCHAR(20) DEFAULT 'pending',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
shipping_country VARCHAR(50)
);
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER REFERENCES orders(id),
product_id INTEGER REFERENCES products(id),
quantity INTEGER NOT NULL,
unit_price DECIMAL(10,2) NOT NULL
);
""")
conn.commit()
cursor.close()
print("✅ Tables created")
def insert_sample_data(conn):
"""Insert realistic sample data."""
cursor = conn.cursor()
# Sample data
countries = ['USA', 'UK', 'Canada', 'Germany', 'France', 'Japan', 'Australia']
categories = ['Electronics', 'Clothing', 'Books', 'Home', 'Sports']
statuses = ['completed', 'completed', 'completed', 'pending', 'cancelled'] # 60/20/20 split
# Generate users
users_data = []
for i in range(100):
email = f"user{i+1}@example.com"
name = f"User {i+1}"
created = datetime.now() - timedelta(days=random.randint(1, 365))
last_login = created + timedelta(days=random.randint(0, 100)) if random.random() > 0.3 else None
country = random.choice(countries)
users_data.append((email, name, created, last_login, country))
execute_values(cursor, """
INSERT INTO users (email, name, created_at, last_login, country)
VALUES %s
""", users_data)
# Generate products
products_data = [
('Wireless Headphones', 'Electronics', 89.99, 150),
('Running Shoes', 'Sports', 129.50, 80),
('Python Programming Book', 'Books', 45.00, 200),
('Coffee Maker', 'Home', 79.99, 45),
('Yoga Mat', 'Sports', 35.00, 120),
('Smart Watch', 'Electronics', 249.99, 60),
('Winter Jacket', 'Clothing', 189.00, 40),
('Desk Lamp', 'Home', 34.99, 90),
('Novel Collection', 'Books', 65.00, 75),
('Tennis Racket', 'Sports', 159.00, 30),
('Bluetooth Speaker', 'Electronics', 59.99, 110),
('Sneakers', 'Clothing', 95.00, 65),
('Cookware Set', 'Home', 149.99, 25),
('Science Fiction Set', 'Books', 55.00, 85),
('Basketball', 'Sports', 29.99, 150),
]
execute_values(cursor, """
INSERT INTO products (name, category, price, stock_quantity)
VALUES %s
""", products_data)
# Generate orders (500 orders)
orders_data = []
for i in range(500):
user_id = random.randint(1, 100)
total = round(random.uniform(25, 500), 2)
status = random.choice(statuses)
created = datetime.now() - timedelta(days=random.randint(1, 180))
country = random.choice(countries)
orders_data.append((user_id, total, status, created, country))
execute_values(cursor, """
INSERT INTO orders (user_id, total_amount, status, created_at, shipping_country)
VALUES %s
""", orders_data)
# Generate order items (2-3 items per order)
items_data = []
for order_id in range(1, 501):
num_items = random.randint(1, 3)
for _ in range(num_items):
product_id = random.randint(1, 15)
quantity = random.randint(1, 5)
unit_price = round(random.uniform(15, 250), 2)
items_data.append((order_id, product_id, quantity, unit_price))
execute_values(cursor, """
INSERT INTO order_items (order_id, product_id, quantity, unit_price)
VALUES %s
""", items_data)
conn.commit()
cursor.close()
print(f"✅ Inserted: 100 users, 15 products, 500 orders, {len(items_data)} order items")
def verify_data(conn):
"""Print summary of created data."""
cursor = conn.cursor()
print("\n📊 Test Database Summary:")
print("-" * 50)
cursor.execute("SELECT COUNT(*) FROM users")
print(f"Users: {cursor.fetchone()[0]:,}")
cursor.execute("SELECT COUNT(*) FROM products")
print(f"Products: {cursor.fetchone()[0]:,}")
cursor.execute("SELECT COUNT(*) FROM orders")
print(f"Orders: {cursor.fetchone()[0]:,}")
cursor.execute("SELECT COUNT(*) FROM order_items")
print(f"Order Items: {cursor.fetchone()[0]:,}")
cursor.execute("SELECT status, COUNT(*) FROM orders GROUP BY status")
print("\nOrder Status Distribution:")
for row in cursor.fetchall():
print(f"{row[0]}: {row[1]}")
cursor.execute("SELECT category, COUNT(*) FROM products GROUP BY category")
print("\nProduct Categories:")
for row in cursor.fetchall():
print(f"{row[0]}: {row[1]}")
cursor.close()
def main():
conn_str = os.environ.get("PG_CONNECTION_STRING")
if not conn_str:
print("Error: PG_CONNECTION_STRING not set")
print("\nExample:")
print(" export PG_CONNECTION_STRING=\"postgresql://postgres:demo@localhost:5432/shop\"")
sys.exit(1)
try:
conn = psycopg2.connect(conn_str)
print("🔌 Connected to database\n")
create_tables(conn)
insert_sample_data(conn)
verify_data(conn)
print("\n✅ Test database setup complete!")
print(f"\nConnection string: {conn_str}")
print("\nNext: Run 'python demo.py' to analyze the data")
conn.close()
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()