trading/quant_strategy_backtest_dem...

# =============================================================================
# Quantitative Trading — Strategy Development & Backtesting Demo
# 量化交易 — 策略开发与回测演示
# =============================================================================
#
# 本文件是数据管道 (quant_data_pipeline_demo.py) 的续集。
# This file is the sequel to the data pipeline demo.
#
# Topics covered / 涵盖主题:
#   1. Technical Indicators  技术指标  (MA, RSI, MACD, Bollinger Bands)
#   2. Signal Generation     信号生成  (entry & exit rules)
#   3. Two Demo Strategies   两个示范策略:
#        A. Dual Moving Average Crossover  双均线金叉死叉策略
#        B. RSI Mean Reversion             RSI 均值回归策略
#   4. Vectorized Backtest Engine  向量化回测引擎
#   5. Performance Metrics         绩效指标
#        (Sharpe, Sortino, Max Drawdown, Win Rate …)
#   6. Visualization               可视化
#
# Prerequisites / 前置条件:
#   pip install numpy pandas matplotlib scipy
#
# Running / 运行方式:
#   python quant_strategy_backtest_demo.py
# =============================================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# 中文字体配置 / Chinese font config
plt.rcParams['font.sans-serif'] = ['WenQuanYi Zen Hei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

np.random.seed(42)
print("=" * 70)
print("  量化交易策略开发与回测演示")
print("  Quantitative Trading: Strategy Development & Backtesting Demo")
print("=" * 70)


# =============================================================================
# SECTION 0: Synthetic Price Data  合成价格数据
# -----------------------------------------------------------------------------
# We simulate a single stock using Geometric Brownian Motion (几何布朗运动),
# the classical model that underlies the Black-Scholes formula.
#
# GBM formula:
#   dS = μ·S·dt + σ·S·dW
#
# Discrete form (what we actually compute each day):
#   S_t = S_{t-1} · exp( (μ - σ²/2)·dt + σ·√dt·ε )
#
# where:
#   μ  = drift / 年化漂移率 (expected annual return)
#   σ  = volatility / 年化波动率
#   dt = 1/252   (one trading day as a fraction of a year)
#   ε  ~ N(0,1) (standard normal random shock / 标准正态随机扰动)
# =============================================================================

def generate_price_series(
    n_days: int = 1500,
    mu: float = 0.10,       # 年化预期收益率 / annual expected return
    sigma: float = 0.25,    # 年化波动率 / annual volatility
    s0: float = 100.0,      # 初始价格 / initial price
    seed: int = 42,
) -> pd.Series:
    """
    Generate a synthetic daily price series via GBM.
    用几何布朗运动生成合成日线价格序列。
    """
    np.random.seed(seed)
    dt = 1.0 / 252                                       # 每个交易日占一年的比例
    epsilon = np.random.randn(n_days)                    # 每日随机冲击
    log_returns = (mu - 0.5 * sigma ** 2) * dt + sigma * np.sqrt(dt) * epsilon
    prices = s0 * np.exp(np.cumsum(log_returns))         # 累积乘积 → 价格路径

    # 生成工作日日期序列 / generate business-day date index
    dates = pd.bdate_range(start="2019-01-02", periods=n_days)
    return pd.Series(prices, index=dates, name="close")


price = generate_price_series()
print(f"\n[数据] 生成模拟股票价格: {len(price)} 个交易日")
print(f"       价格区间: {price.min():.2f} ~ {price.max():.2f}")


# =============================================================================
# SECTION 1: Technical Indicators  技术指标
# -----------------------------------------------------------------------------
# Technical indicators transform raw price/volume data into signals.
# 技术指标将原始价格/成交量数据转化为交易信号。
#
# They are divided into two broad families:
# 主要分为两大类:
#
#   ① Trend-following indicators  趋势跟随指标
#       → Moving Averages (MA), MACD
#       → Work well in trending markets (趋势市中效果好)
#
#   ② Oscillators / Mean-reversion indicators  震荡/均值回归指标
#       → RSI, Bollinger Bands
#       → Work well in range-bound / choppy markets (震荡市中效果好)
# =============================================================================

# ── 1-A  Simple Moving Average  简单移动平均线 (SMA) ──────────────────────────
#
# SMA_n(t) = (P_{t} + P_{t-1} + … + P_{t-n+1}) / n
#
# The SMA smooths out daily noise to reveal the underlying trend.
# SMA 平滑日内噪音，揭示潜在趋势。
# A longer window → smoother, but lags more behind recent price action.
# 窗口越长 → 越平滑，但对价格变化的反应越滞后。

def sma(prices: pd.Series, window: int) -> pd.Series:
    """Simple Moving Average / 简单移动平均线"""
    return prices.rolling(window=window).mean()


# ── 1-B  Exponential Moving Average  指数移动平均线 (EMA) ───────────────────
#
# EMA gives MORE weight to recent prices (recent data matters more).
# EMA 给予近期价格更高权重（近期数据更重要）。
#
# EMA_t = α · P_t + (1 - α) · EMA_{t-1}
# where  α = 2 / (n + 1)   (smoothing factor / 平滑因子)
#
# EMA reacts faster than SMA to price changes.
# EMA 对价格变动的反应比 SMA 更灵敏。

def ema(prices: pd.Series, span: int) -> pd.Series:
    """Exponential Moving Average / 指数移动平均线"""
    return prices.ewm(span=span, adjust=False).mean()


# ── 1-C  RSI  相对强弱指数 (Relative Strength Index) ─────────────────────────
#
# RSI measures the speed and magnitude of recent price changes.
# RSI 衡量近期价格变动的速度和幅度。
#
# Formula:
#   RS  = average_gain / average_loss  (over last n days)
#   RSI = 100 - 100 / (1 + RS)
#
# Interpretation / 指标解读:
#   RSI > 70  →  Overbought  超买  (price may be due for a pullback / 价格可能回调)
#   RSI < 30  →  Oversold   超卖  (price may be due for a bounce  / 价格可能反弹)
#   RSI = 50  →  Neutral    中性

def rsi(prices: pd.Series, window: int = 14) -> pd.Series:
    """
    Compute Wilder's RSI.
    计算 Wilder 平滑法 RSI。
    """
    delta = prices.diff()                      # 每日价格变化 / daily price change
    gain = delta.clip(lower=0)                 # 只保留上涨部分 / keep only up-days
    loss = -delta.clip(upper=0)                # 只保留下跌部分 / keep only down-days

    # Wilder uses EMA with span = 2*n - 1 (equivalent to 1/n smoothing)
    avg_gain = gain.ewm(alpha=1.0 / window, adjust=False).mean()
    avg_loss = loss.ewm(alpha=1.0 / window, adjust=False).mean()

    rs = avg_gain / avg_loss                   # 相对强弱值 / relative strength
    return 100 - (100 / (1 + rs))             # 转换为 0~100 范围


# ── 1-D  MACD  指数平滑异同移动平均线 ────────────────────────────────────────
#
# MACD reveals the relationship between two EMAs.
# MACD 揭示两条 EMA 之间的关系。
#
# Components / 构成:
#   MACD Line  MACD线  = EMA(12) - EMA(26)   (fast minus slow / 快线减慢线)
#   Signal Line 信号线 = EMA(9) of MACD Line  (trigger line / 触发线)
#   Histogram  柱状图  = MACD Line - Signal Line
#
# Trading rules / 交易规则:
#   MACD crosses above Signal  →  Bullish (金叉, buy signal  / 买入信号)
#   MACD crosses below Signal  →  Bearish (死叉, sell signal / 卖出信号)

def macd(prices: pd.Series,
         fast: int = 12, slow: int = 26, signal: int = 9
         ) -> pd.DataFrame:
    """
    Compute MACD, Signal line, and Histogram.
    计算 MACD线、信号线和柱状图。
    """
    ema_fast   = ema(prices, fast)
    ema_slow   = ema(prices, slow)
    macd_line  = ema_fast - ema_slow           # MACD 线
    signal_line = ema(macd_line, signal)       # 信号线 (DIF的EMA)
    histogram  = macd_line - signal_line       # 柱状图 (MACD Bar)
    return pd.DataFrame({
        "macd": macd_line,
        "signal": signal_line,
        "histogram": histogram,
    })


# ── 1-E  Bollinger Bands  布林带 ─────────────────────────────────────────────
#
# Bollinger Bands place upper/lower envelopes around a moving average.
# 布林带在移动平均线上下各画一条"包络线"。
#
# Formula:
#   Middle Band  中轨  = SMA(n)
#   Upper Band   上轨  = SMA(n) + k·σ_n     (k = 2 by default / 默认 k=2)
#   Lower Band   下轨  = SMA(n) - k·σ_n
#
# where σ_n is the rolling standard deviation / 滚动标准差
#
# When price touches the lower band → oversold area (超卖区域)
# When price touches the upper band → overbought area (超买区域)
# Band width (带宽) contracts before explosive moves (波动收窄常预示突破)

def bollinger_bands(prices: pd.Series, window: int = 20, k: float = 2.0
                    ) -> pd.DataFrame:
    """
    Compute Bollinger Bands.
    计算布林带（上轨、中轨、下轨）。
    """
    mid    = sma(prices, window)               # 中轨 (SMA)
    std    = prices.rolling(window).std()      # 滚动标准差
    upper  = mid + k * std                     # 上轨
    lower  = mid - k * std                     # 下轨
    # %B indicator: where is the current price within the band?
    # %B 指标：当前价格在带宽中的位置 (0=下轨, 1=上轨)
    pct_b  = (prices - lower) / (upper - lower)
    return pd.DataFrame({
        "upper": upper, "mid": mid, "lower": lower, "pct_b": pct_b
    })


# Compute all indicators on our simulated price series
# 对模拟价格序列计算所有指标
sma20  = sma(price, 20)       # 20日均线 / 20-day SMA
sma60  = sma(price, 60)       # 60日均线 / 60-day SMA (longer trend)
rsi14  = rsi(price, 14)       # 14日RSI  / 14-day RSI
macd_df = macd(price)         # MACD (12/26/9)
bb     = bollinger_bands(price, window=20, k=2.0)

print("\n[指标] 技术指标计算完成:")
print(f"  SMA20   — 首个有效值日期: {sma20.first_valid_index().date()}")
print(f"  SMA60   — 首个有效值日期: {sma60.first_valid_index().date()}")
print(f"  RSI14   — 首个有效值日期: {rsi14.first_valid_index().date()}")
print(f"  MACD    — 首个有效值日期: {macd_df['macd'].first_valid_index().date()}")
print(f"  BollingerBands — 首个有效值日期: {bb['mid'].first_valid_index().date()}")


# =============================================================================
# SECTION 2: Strategy A — Dual Moving Average Crossover
#            策略 A — 双均线金叉/死叉策略
# -----------------------------------------------------------------------------
# One of the oldest and most intuitive trend-following strategies.
# 最古老也最直观的趋势跟随策略之一。
#
# Logic / 逻辑:
#   Golden Cross (金叉): short MA crosses ABOVE long MA  → BUY  (做多)
#   Death Cross  (死叉): short MA crosses BELOW long MA  → SELL (平仓)
#
# Rationale / 原理:
#   When the short-term average rises above the long-term average, it signals
#   that recent momentum is stronger than the historical trend → bullish.
#   短期均线上穿长期均线，意味着近期动能强于历史趋势 → 看涨。
#
# Parameters / 参数:
#   SHORT_WINDOW = 20  (fast line / 快线)
#   LONG_WINDOW  = 60  (slow line / 慢线)
# =============================================================================

SHORT_WIN = 20   # 短期均线窗口 / short-term MA window
LONG_WIN  = 60   # 长期均线窗口 / long-term MA window

ma_short = sma(price, SHORT_WIN)
ma_long  = sma(price, LONG_WIN)

# ── Signal generation  信号生成 ───────────────────────────────────────────────
#
# Signal (信号) = +1 when we should be LONG (持多仓), 0 when out of market (空仓)
#
# Step 1: raw_signal = 1 whenever short MA > long MA (short MA above long MA)
# Step 2: detect crossovers (cross = today's signal ≠ yesterday's signal)
#
# We use a "position" approach — hold the position until it reverses.
# 使用"持仓"方式 — 持有直到信号翻转。

# raw_signal: 1 = short above long (看多区域), 0 = short below long (看空区域)
raw_signal = (ma_short > ma_long).astype(int)

# Align signals: use yesterday's signal to trade today (avoid lookahead bias)
# 用昨天的信号决定今天的仓位，避免"未来数据偷窥" (前视偏差 / lookahead bias)
ma_signal = raw_signal.shift(1).fillna(0)

print("\n[策略A] 双均线信号生成完成")
print(f"  多头持仓天数 (Signal=1): {int(ma_signal.sum())} 天")
print(f"  空仓天数     (Signal=0): {int((ma_signal == 0).sum())} 天")


# =============================================================================
# SECTION 3: Strategy B — RSI Mean Reversion
#            策略 B — RSI 均值回归策略
# -----------------------------------------------------------------------------
# This is a contrarian strategy: buy when the market seems "too weak",
# sell when it seems "too strong".
# 这是一个逆势策略：市场"跌过头"时买入，"涨过头"时卖出。
#
# Logic / 逻辑:
#   RSI drops below oversold level (超卖线, default 30)  →  BUY  signal
#   RSI rises above overbought level (超买线, default 70) →  SELL signal
#
# This exploits mean reversion (均值回归): extreme prices tend to revert.
# 利用均值回归特性：极端价格倾向于回归均值。
#
# Risk / 风险:
#   In a strong trend, RSI can stay oversold/overbought for long stretches.
#   在强趋势中，RSI 可以长时间停留在超卖/超买区域，造成连续亏损。
# =============================================================================

RSI_OVERSOLD  = 30   # 超卖线 / oversold threshold
RSI_OVERBOUGHT = 70  # 超买线 / overbought threshold

def rsi_signal(rsi_series: pd.Series,
               oversold: float = 30,
               overbought: float = 70) -> pd.Series:
    """
    Generate long/short/flat signals from RSI.
    根据 RSI 生成多空平信号。

    Returns a Series of:
      +1  →  Long  (做多)
      -1  →  Short (做空)
       0  →  Flat  (空仓, no position)
    """
    position = pd.Series(0, index=rsi_series.index, dtype=float)
    current_pos = 0   # 当前持仓状态 / current position state

    for i in range(1, len(rsi_series)):
        r = rsi_series.iloc[i]
        if pd.isna(r):
            position.iloc[i] = 0
            continue

        # Entry rules / 入场规则
        if r < oversold and current_pos == 0:
            current_pos = 1     # 超卖 → 做多 / oversold → go long

        elif r > overbought and current_pos == 0:
            current_pos = -1    # 超买 → 做空 / overbought → go short

        # Exit rules / 出场规则
        # Exit long when RSI recovers above 50 (回到中性区域 / back to neutral)
        elif current_pos == 1 and r > 50:
            current_pos = 0

        # Exit short when RSI falls below 50
        elif current_pos == -1 and r < 50:
            current_pos = 0

        position.iloc[i] = current_pos

    return position


rsi_pos = rsi_signal(rsi14, RSI_OVERSOLD, RSI_OVERBOUGHT)

# Shift by 1 day to avoid lookahead bias / 前移一天避免前视偏差
rsi_signal_shifted = rsi_pos.shift(1).fillna(0)

print("\n[策略B] RSI信号生成完成")
print(f"  多头持仓天数 (Signal=+1): {int((rsi_signal_shifted == 1).sum())} 天")
print(f"  空头持仓天数 (Signal=-1): {int((rsi_signal_shifted == -1).sum())} 天")
print(f"  空仓天数     (Signal= 0): {int((rsi_signal_shifted == 0).sum())} 天")


# =============================================================================
# SECTION 4: Vectorized Backtest Engine  向量化回测引擎
# -----------------------------------------------------------------------------
# A backtest (回测) simulates how a strategy would have performed
# on historical data. It is the primary tool for validating a strategy
# before risking real money.
# 回测是在历史数据上模拟策略表现的工具，是真实投资前验证策略的主要手段。
#
# Two main backtest styles / 两种主要回测方式:
#
#   ① Vectorized backtest  向量化回测
#       - Compute all positions & P&L as array operations at once (numpy/pandas)
#       - Very fast; good for strategy exploration
#       - 所有仓位和盈亏一次性用数组运算计算，速度极快，适合策略探索
#
#   ② Event-driven backtest  事件驱动回测
#       - Simulate time step-by-step, reacting to each market event
#       - More realistic (handles fills, slippage, latency, order queuing)
#       - 逐笔模拟市场事件，更真实（考虑成交、滑点、延迟等），速度较慢
#
# We use the vectorized approach here for clarity and speed.
# 此处使用向量化方式，兼顾清晰度和速度。
#
# Cost model  交易成本模型:
#   - Commission (佣金): charged each time you trade (per trade)
#   - Slippage   (滑点): the difference between the expected fill price and
#                        the actual fill price (price moves against you)
#   We approximate both as a percentage of the trade value.
#   两者合并近似为交易金额的固定比例。
# =============================================================================

class VectorizedBacktester:
    """
    A simple vectorized backtesting engine.
    简单的向量化回测引擎。

    Assumptions / 假设:
      • Long-only or long/short positions
      • Trade at next-day's open (用下一天开盘价成交) — conservative assumption
        We approximate this by using the same day's close shifted by 1 day.
      • Round-trip cost (单次交易成本) = 2 × cost_per_trade
        (pay cost on entry AND exit / 进出各收一次)
      • No leverage (无杠杆), position size is 100% of capital when in trade
    """

    def __init__(
        self,
        prices: pd.Series,
        signal: pd.Series,
        cost_per_trade: float = 0.001,  # 0.1% one-way / 单向 0.1% (含佣金+滑点)
        initial_capital: float = 1_000_000.0,  # 初始资金 / initial capital
        name: str = "Strategy",
    ):
        self.prices = prices
        self.signal = signal.reindex(prices.index).fillna(0)
        self.cost_per_trade = cost_per_trade
        self.initial_capital = initial_capital
        self.name = name
        self._run()

    def _run(self):
        """Core backtesting logic.  核心回测逻辑。"""
        prices = self.prices
        signal = self.signal

        # ── Daily price return  日收益率 ────────────────────────────────────
        daily_ret = prices.pct_change().fillna(0)

        # ── Strategy return (before costs)  策略日收益率（扣除成本前）─────────
        # Strategy return = signal × market return
        # 策略当日收益率 = 持仓方向 × 市场当日收益率
        strat_ret_gross = signal * daily_ret

        # ── Transaction cost  交易成本 ──────────────────────────────────────
        # Detect position changes (signal changes from one day to the next)
        # 检测仓位变化（信号从一天到下一天发生变化）
        position_change = signal.diff().fillna(0).abs()  # >0 means we traded
        # Cost is charged each time position changes
        # 每次仓位变化时扣除成本
        cost = position_change * self.cost_per_trade

        # ── Net strategy return  策略净收益率 ───────────────────────────────
        strat_ret_net = strat_ret_gross - cost

        # ── Equity curve  净值曲线 ───────────────────────────────────────────
        # The equity curve tracks how 1 unit of capital grows over time.
        # 净值曲线追踪单位资本随时间的增长。
        # (1 + daily_net_return) compounded every day
        equity = self.initial_capital * (1 + strat_ret_net).cumprod()
        equity_bh = self.initial_capital * (1 + daily_ret).cumprod()  # Buy & Hold benchmark

        # ── Drawdown  回撤 ──────────────────────────────────────────────────
        # Drawdown measures how far we are from the peak at any point in time.
        # 回撤衡量当前净值距离历史最高点的跌幅。
        rolling_max = equity.cummax()
        drawdown = (equity - rolling_max) / rolling_max  # always <= 0

        # Store results for later analysis
        self.daily_ret    = daily_ret
        self.strat_ret    = strat_ret_net
        self.equity       = equity
        self.equity_bh    = equity_bh
        self.drawdown     = drawdown
        self.n_trades     = int((position_change > 0).sum())
        self.total_cost   = cost.sum()

    # ── Performance metrics  绩效指标 ──────────────────────────────────────────
    #
    # A well-rounded strategy evaluation uses multiple metrics, because
    # no single number captures the full picture.
    # 全面的策略评估需要多个指标，因为单一数字无法描述全貌。
    #
    # Key metrics / 关键指标:
    #   Total Return    总收益率  — how much did we make in total?
    #   CAGR            年化复合增长率 — annualized compounded growth rate
    #   Sharpe Ratio    夏普比率  — return per unit of total risk (risk-adjusted)
    #   Sortino Ratio   索提诺比率 — return per unit of DOWNSIDE risk only
    #   Max Drawdown    最大回撤  — worst peak-to-trough decline
    #   Calmar Ratio    卡玛比率  — CAGR / Max Drawdown (reward vs worst loss)
    #   Win Rate        胜率     — fraction of days (or trades) with positive P&L
    #   Profit Factor   盈亏比   — total profit / total loss

    def metrics(self) -> dict:
        """Compute and return a dictionary of performance metrics.
           计算并返回绩效指标字典。"""
        r = self.strat_ret
        eq = self.equity
        n  = len(r)
        years = n / 252.0          # approximate years in sample / 样本年数估算

        # Total return / 总收益率
        total_return = (eq.iloc[-1] / self.initial_capital) - 1

        # CAGR  年化复合增长率
        # CAGR = (EndValue / StartValue)^(1/years) - 1
        cagr = (1 + total_return) ** (1 / years) - 1

        # Annualized volatility  年化波动率
        ann_vol = r.std() * np.sqrt(252)

        # Sharpe Ratio  夏普比率
        # Sharpe = (Mean excess return) / StdDev(return) × √252
        # Excess return = strategy return - risk-free rate
        # 超额收益率 = 策略收益率 - 无风险利率
        # We use 0 as risk-free rate for simplicity (or assume it's netted out)
        risk_free = 0.0
        sharpe = (r.mean() - risk_free / 252) / r.std() * np.sqrt(252) if r.std() > 0 else 0

        # Sortino Ratio  索提诺比率
        # Like Sharpe but only penalizes DOWNSIDE volatility
        # 类似夏普，但只惩罚下行波动率（亏损波动率）
        downside = r[r < 0]
        downside_std = downside.std() * np.sqrt(252) if len(downside) > 0 else 1e-9
        sortino = (cagr - risk_free) / downside_std if downside_std > 0 else 0

        # Maximum Drawdown  最大回撤
        max_dd = self.drawdown.min()           # most negative value (最大负值)

        # Calmar Ratio  卡玛比率
        # Calmar = CAGR / |Max Drawdown|
        calmar = cagr / abs(max_dd) if max_dd != 0 else 0

        # Win rate  胜率 (fraction of trading days with positive return)
        win_rate = (r > 0).mean()

        # Profit factor  盈亏比
        # = Sum of positive returns / |Sum of negative returns|
        gross_profit = r[r > 0].sum()
        gross_loss   = abs(r[r < 0].sum())
        profit_factor = gross_profit / gross_loss if gross_loss > 0 else np.inf

        return {
            "总收益率   Total Return":    f"{total_return:.2%}",
            "年化收益率 CAGR":            f"{cagr:.2%}",
            "年化波动率 Ann. Volatility":  f"{ann_vol:.2%}",
            "夏普比率  Sharpe Ratio":     f"{sharpe:.3f}",
            "索提诺比率 Sortino Ratio":   f"{sortino:.3f}",
            "最大回撤  Max Drawdown":     f"{max_dd:.2%}",
            "卡玛比率  Calmar Ratio":     f"{calmar:.3f}",
            "胜率      Win Rate":         f"{win_rate:.2%}",
            "盈亏比    Profit Factor":    f"{profit_factor:.3f}",
            "交易次数  # Trades":         str(self.n_trades),
            "总成本    Total Cost":       f"{self.total_cost:.4%}",
        }

    def print_metrics(self):
        """Pretty-print the performance report.  格式化打印绩效报告。"""
        print(f"\n{'=' * 55}")
        print(f"  策略绩效报告 / Performance Report: {self.name}")
        print(f"{'=' * 55}")
        for k, v in self.metrics().items():
            print(f"  {k:<35} {v}")
        print(f"{'=' * 55}")


# =============================================================================
# SECTION 5: Run Backtests  执行回测
# =============================================================================

# ── Strategy A: MA Crossover  双均线策略 ──────────────────────────────────────
bt_ma = VectorizedBacktester(
    prices=price,
    signal=ma_signal,      # +1 = long, 0 = flat
    cost_per_trade=0.001,  # 0.1% per trade (reasonable for liquid stocks)
    name="双均线策略 (MA Crossover 20/60)",
)

# ── Strategy B: RSI Mean Reversion  RSI均值回归策略 ──────────────────────────
bt_rsi = VectorizedBacktester(
    prices=price,
    signal=rsi_signal_shifted,   # +1 = long, -1 = short, 0 = flat
    cost_per_trade=0.001,
    name="RSI均值回归策略 (RSI Mean Reversion 14)",
)

# ── Benchmark: Buy & Hold  基准：买入并持有 ───────────────────────────────────
# Buy & Hold (买入持有) is always our benchmark: simply hold the asset forever.
# It requires zero skill and zero effort — any strategy must beat this to
# justify the extra complexity and transaction costs.
# 买入持有是永远的基准策略：无需技能、零成本。任何策略都必须超越它才有意义。
bt_bh = VectorizedBacktester(
    prices=price,
    signal=pd.Series(1, index=price.index, dtype=float),  # always long / 始终做多
    cost_per_trade=0.0,   # no trading costs / 无交易成本
    name="Buy & Hold 基准 (买入持有)",
)

bt_ma.print_metrics()
bt_rsi.print_metrics()
bt_bh.print_metrics()


# =============================================================================
# SECTION 6: Visualization  可视化
# =============================================================================

fig = plt.figure(figsize=(16, 22))
gs  = gridspec.GridSpec(6, 2, figure=fig, hspace=0.45, wspace=0.3)

# ── Plot 1: Price + MA signals  价格 + 均线信号 ────────────────────────────────
ax1 = fig.add_subplot(gs[0, :])   # span full width
ax1.plot(price, color="#1f77b4", linewidth=1, label="价格 Price")
ax1.plot(ma_short, color="orange", linewidth=1.2, label=f"SMA{SHORT_WIN} (快线)")
ax1.plot(ma_long,  color="red",    linewidth=1.2, label=f"SMA{LONG_WIN}  (慢线)")

# Shade long periods (持多仓的区间着色)
ax1.fill_between(
    price.index, price.min(), price.max(),
    where=(ma_signal == 1).values,
    alpha=0.12, color="green", label="多头持仓区间 Long Period"
)
ax1.set_title("策略A — 双均线信号  (MA Crossover Signals)", fontsize=13, fontweight="bold")
ax1.legend(loc="upper left", fontsize=8)
ax1.set_ylabel("价格 Price")
ax1.grid(alpha=0.3)

# ── Plot 2: RSI  RSI指标 ────────────────────────────────────────────────────
ax2 = fig.add_subplot(gs[1, :])
ax2.plot(rsi14, color="purple", linewidth=1)
ax2.axhline(RSI_OVERBOUGHT, color="red",   linestyle="--", linewidth=1, label=f"超买线 {RSI_OVERBOUGHT}")
ax2.axhline(RSI_OVERSOLD,   color="green", linestyle="--", linewidth=1, label=f"超卖线 {RSI_OVERSOLD}")
ax2.axhline(50,             color="gray",  linestyle=":",  linewidth=0.8)
ax2.fill_between(rsi14.index, RSI_OVERSOLD, rsi14,
                  where=(rsi14 < RSI_OVERSOLD), alpha=0.25, color="green",
                  label="超卖区域 Oversold")
ax2.fill_between(rsi14.index, rsi14, RSI_OVERBOUGHT,
                  where=(rsi14 > RSI_OVERBOUGHT), alpha=0.25, color="red",
                  label="超买区域 Overbought")
ax2.set_ylim(0, 100)
ax2.set_title(f"策略B指标 — RSI({14})  均值回归信号  (RSI Mean Reversion)", fontsize=13, fontweight="bold")
ax2.set_ylabel("RSI")
ax2.legend(loc="upper left", fontsize=8, ncol=2)
ax2.grid(alpha=0.3)

# ── Plot 3: Bollinger Bands  布林带 ────────────────────────────────────────────
ax3 = fig.add_subplot(gs[2, :])
ax3.plot(price,      color="#1f77b4", linewidth=1,   label="价格 Price")
ax3.plot(bb["mid"],  color="orange",  linewidth=1.2, label="中轨 Middle (SMA20)")
ax3.plot(bb["upper"],color="red",     linewidth=1,   linestyle="--", label="上轨 Upper (+2σ)")
ax3.plot(bb["lower"],color="green",   linewidth=1,   linestyle="--", label="下轨 Lower (-2σ)")
ax3.fill_between(price.index, bb["upper"], bb["lower"], alpha=0.07, color="blue")
ax3.set_title("布林带 (Bollinger Bands 20, 2σ)", fontsize=13, fontweight="bold")
ax3.set_ylabel("价格 Price")
ax3.legend(loc="upper left", fontsize=8, ncol=2)
ax3.grid(alpha=0.3)

# ── Plot 4: MACD  MACD指标 ────────────────────────────────────────────────────
ax4 = fig.add_subplot(gs[3, :])
ax4.plot(macd_df["macd"],   color="blue",   linewidth=1,   label="MACD线 (DIF)")
ax4.plot(macd_df["signal"], color="orange", linewidth=1,   label="信号线 (DEA)")
colors = ["green" if v >= 0 else "red" for v in macd_df["histogram"]]
ax4.bar(macd_df.index, macd_df["histogram"], color=colors, alpha=0.5, width=1, label="柱状图 Histogram")
ax4.axhline(0, color="black", linewidth=0.8)
ax4.set_title("MACD (12/26/9) — 趋势确认指标  (Trend Confirmation)", fontsize=13, fontweight="bold")
ax4.set_ylabel("MACD")
ax4.legend(loc="upper left", fontsize=8)
ax4.grid(alpha=0.3)

# ── Plot 5: Equity Curves  净值曲线 ────────────────────────────────────────────
ax5 = fig.add_subplot(gs[4, :])
ax5.plot(bt_ma.equity,   color="blue",   linewidth=1.5, label="策略A: 双均线  MA Crossover")
ax5.plot(bt_rsi.equity,  color="purple", linewidth=1.5, label="策略B: RSI 均值回归  RSI Reversion")
ax5.plot(bt_bh.equity,   color="gray",   linewidth=1.2, linestyle="--", label="基准: 买入持有  Buy & Hold")
ax5.set_title("净值曲线对比  (Equity Curve Comparison)", fontsize=13, fontweight="bold")
ax5.set_ylabel("账户价值  Portfolio Value (元)")
ax5.legend(loc="upper left", fontsize=9)
ax5.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"¥{x/1e4:.0f}万"))
ax5.grid(alpha=0.3)

# ── Plot 6: Drawdown  回撤曲线 ────────────────────────────────────────────────
ax6 = fig.add_subplot(gs[5, :])
ax6.fill_between(bt_ma.drawdown.index,  bt_ma.drawdown,  0, alpha=0.5, color="blue",   label="策略A")
ax6.fill_between(bt_rsi.drawdown.index, bt_rsi.drawdown, 0, alpha=0.5, color="purple", label="策略B")
ax6.fill_between(bt_bh.drawdown.index,  bt_bh.drawdown,  0, alpha=0.3, color="gray",   label="Buy & Hold")
ax6.set_title("回撤曲线  (Drawdown Curves)", fontsize=13, fontweight="bold")
ax6.set_ylabel("回撤幅度  Drawdown")
ax6.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{x:.0%}"))
ax6.legend(loc="lower left", fontsize=9)
ax6.grid(alpha=0.3)

plt.suptitle(
    "量化交易策略开发与回测演示\nQuantitative Trading: Strategy Development & Backtesting",
    fontsize=15, fontweight="bold", y=1.005,
)
plt.savefig("strategy_backtest_demo.png", dpi=120, bbox_inches="tight")
plt.show()
print("\n[图表] 已保存至 strategy_backtest_demo.png")


# =============================================================================
# SECTION 7: Walk-Forward Validation  滚动前向验证
# -----------------------------------------------------------------------------
# A critical warning for all new quant traders / 对所有量化新手的重要警告:
#
# In-sample overfitting (样本内过拟合) is the #1 trap in backtesting.
# 样本内过拟合是回测中最大的陷阱。
#
# If you test 100 different parameter sets on the same data and pick the best,
# that "best" result will almost certainly NOT hold out of sample.
# 如果在同一份数据上测试100组参数并选最好的，这个"最优"结果在样本外几乎必然失效。
# This is called data snooping bias / 数据窥探偏差 or p-hacking.
#
# Walk-Forward Validation (滚动前向验证) helps guard against this:
# ┌──────────────────────────────────────────────────────────────────────┐
# │ Window 1:  [TRAIN period 1] → optimize params → TEST on period 1+   │
# │ Window 2:  [TRAIN period 2] → optimize params → TEST on period 2+   │
# │  …repeat, always training on past, testing on future                │
# │  始终用过去数据训练，用未来数据测试                                          │
# └──────────────────────────────────────────────────────────────────────┘
# Only report the concatenated OUT-OF-SAMPLE test results.
# 只汇报样本外（OOS）的测试结果。
#
# Below: a simplified version — we just split into train / test (80/20).
# 下面是简化版：直接按 80/20 切分训练集和测试集。
# =============================================================================

TRAIN_RATIO = 0.8
split_idx = int(len(price) * TRAIN_RATIO)
split_date = price.index[split_idx]

price_train = price.iloc[:split_idx]
price_test  = price.iloc[split_idx:]

print(f"\n{'=' * 55}")
print(f"  滚动前向验证 / Walk-Forward Split")
print(f"{'=' * 55}")
print(f"  训练期 Train: {price_train.index[0].date()} → {price_train.index[-1].date()}  ({len(price_train)} 天)")
print(f"  测试期 Test : {price_test.index[0].date()}  → {price_test.index[-1].date()}  ({len(price_test)} 天)")

# ── Optimize MA windows on TRAIN set  在训练集上优化均线参数 ────────────────────
#
# Grid search (网格搜索): try all combinations in the parameter space.
# This is the simplest optimization method — good for small parameter spaces.
# 网格搜索：遍历参数空间内的所有组合。适合参数空间小的情形。

print("\n[优化] 在训练集上搜索最优均线参数...")
print("  (搜索空间: short=[5,10,15,20,30], long=[30,40,50,60,80,100])")

best_sharpe = -np.inf
best_short  = SHORT_WIN
best_long   = LONG_WIN
results_grid = []

for sw in [5, 10, 15, 20, 30]:
    for lw in [30, 40, 50, 60, 80, 100]:
        if sw >= lw:
            continue   # short must be shorter than long / 短期必须小于长期
        ma_s = sma(price_train, sw)
        ma_l = sma(price_train, lw)
        sig  = (ma_s > ma_l).astype(int).shift(1).fillna(0)
        bt   = VectorizedBacktester(price_train, sig, cost_per_trade=0.001, name="grid")
        m    = bt.metrics()
        sharpe_val = float(m["夏普比率  Sharpe Ratio"])
        results_grid.append({"short": sw, "long": lw, "sharpe": sharpe_val})
        if sharpe_val > best_sharpe:
            best_sharpe = sharpe_val
            best_short  = sw
            best_long   = lw

print(f"\n  最优参数 (训练集 in-sample): short={best_short}, long={best_long}")
print(f"  训练集夏普比率 In-sample Sharpe: {best_sharpe:.3f}")

# ── Apply best params on TEST set  将最优参数应用于测试集 ──────────────────────
ma_s_test = sma(price_test, best_short)
ma_l_test = sma(price_test, best_long)
sig_test  = (ma_s_test > ma_l_test).astype(int).shift(1).fillna(0)

bt_test   = VectorizedBacktester(price_test, sig_test, cost_per_trade=0.001,
                                  name=f"MA({best_short}/{best_long}) — 测试集 OOS")
bt_test.print_metrics()

print("\n" + "=" * 55)
print("  ⚠️  注意 / WARNING:")
print("  训练集(in-sample)夏普 通常高于 测试集(out-of-sample)夏普")
print("  In-sample Sharpe is typically HIGHER than out-of-sample.")
print("  夏普衰减 (Sharpe decay) 是策略过拟合的典型信号。")
print("  Sharpe decay is a classic sign of overfitting.")
print("=" * 55)


# =============================================================================
# SECTION 8: Return Distribution Analysis  收益率分布分析
# -----------------------------------------------------------------------------
# Before trusting your Sharpe ratio, check if the return distribution
# violates the normality assumption.
# 在相信夏普比率之前，检验收益率分布是否违背正态假设。
#
# Real returns typically show:
# 真实收益率通常呈现:
#   Fat tails (厚尾 / leptokurtosis):  extreme events more frequent than normal
#   Negative skew (负偏态):            crashes are larger than rallies
#
# A high Sharpe ratio on a fat-tailed distribution can be misleading.
# 厚尾分布下的高夏普比率可能具有误导性。
# =============================================================================

fig2, axes = plt.subplots(1, 2, figsize=(14, 5))

# ── Return distribution histogram  收益率直方图 ─────────────────────────────
ax = axes[0]
r_ma  = bt_ma.strat_ret.dropna()
r_bh  = bt_bh.strat_ret.dropna()

ax.hist(r_bh, bins=80, alpha=0.5, color="gray",  density=True, label="Buy & Hold")
ax.hist(r_ma, bins=80, alpha=0.5, color="blue",  density=True, label="策略A: MA Crossover")

# Overlay a normal distribution for comparison / 叠加正态分布对比
x_range = np.linspace(r_bh.min(), r_bh.max(), 300)
ax.plot(x_range, stats.norm.pdf(x_range, r_bh.mean(), r_bh.std()),
        color="red", linewidth=1.5, linestyle="--", label="正态分布 Normal Dist.")
ax.set_title("收益率分布  (Return Distribution)", fontsize=12)
ax.set_xlabel("日收益率  Daily Return")
ax.set_ylabel("频率密度  Density")
ax.legend(fontsize=8)
ax.grid(alpha=0.3)

# Print distribution stats
print(f"\n[分布] 策略A — 日收益率统计:")
print(f"  偏度 Skewness : {r_ma.skew():.3f}  (负值=左尾更厚 fat left tail)")
print(f"  峰度 Kurtosis : {r_ma.kurtosis():.3f}  (>0 表示厚尾 fat tails vs normal)")

# ── Monthly returns heatmap  月度收益热力图 ─────────────────────────────────
ax = axes[1]
monthly = bt_ma.strat_ret.resample("M").apply(lambda x: (1 + x).prod() - 1)
monthly_df = pd.DataFrame({
    "year":  monthly.index.year,
    "month": monthly.index.month,
    "ret":   monthly.values,
})
pivot = monthly_df.pivot(index="year", columns="month", values="ret")
pivot.columns = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]

import matplotlib.colors as mcolors
cmap = mcolors.LinearSegmentedColormap.from_list("rg", ["#d73027","#ffffff","#1a9850"])
im = ax.imshow(pivot.values, cmap=cmap, aspect="auto",
                vmin=-0.15, vmax=0.15)
ax.set_xticks(range(12))
ax.set_xticklabels(pivot.columns, fontsize=8)
ax.set_yticks(range(len(pivot.index)))
ax.set_yticklabels(pivot.index, fontsize=9)
for i in range(len(pivot.index)):
    for j in range(12):
        v = pivot.values[i, j]
        if not np.isnan(v):
            ax.text(j, i, f"{v:.1%}", ha="center", va="center", fontsize=6,
                    color="black" if abs(v) < 0.08 else "white")
ax.set_title("策略A月度收益热力图\n(Monthly Return Heatmap)", fontsize=11)
plt.colorbar(im, ax=ax, format=plt.FuncFormatter(lambda x, _: f"{x:.0%}"))

plt.tight_layout()
plt.savefig("return_distribution.png", dpi=120, bbox_inches="tight")
plt.show()
print("[图表] 已保存至 return_distribution.png")


# =============================================================================
# SECTION 9: Summary & Next Steps  总结与后续
# =============================================================================

print(f"""
{'=' * 70}
  总结 Summary
{'=' * 70}
  本 Demo 演示了量化策略开发与回测的完整流程：

  ① 技术指标计算  Technical Indicators
     SMA / EMA / RSI / MACD / Bollinger Bands

  ② 信号生成  Signal Generation
     策略A: 双均线金叉死叉 (MA Crossover) — 趋势跟随
     策略B: RSI 超买超卖  (RSI Reversion) — 均值回归

  ③ 向量化回测引擎  Vectorized Backtester
     考虑了交易成本(佣金+滑点)和前视偏差(lookahead bias)

  ④ 绩效指标  Performance Metrics
     Sharpe / Sortino / Max Drawdown / Calmar / Win Rate / Profit Factor

  ⑤ 前向验证  Walk-Forward Validation
     训练集优化参数 → 测试集验证 → 防止过拟合

  ⑥ 收益率分布  Return Distribution
     偏度/峰度检验，月度热力图

  下一步学习方向  Next Steps:
  ──────────────────────────────────────────────────────────────
  • 因子选股策略 (Alpha Factor Models) — Fama-French, Momentum
  • 组合优化     (Portfolio Optimization) — Mean-Variance, Risk Parity
  • 事件驱动回测 (Event-Driven Backtesting) — more realistic execution
  • 机器学习信号 (ML-based Signals) — XGBoost, LSTM for return prediction
  • 风险管理     (Risk Management) — Position sizing, Stop-loss, VaR
{'=' * 70}
""")