trading/quant_strategy_backtest_dem...

912 lines
42 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# =============================================================================
# Quantitative Trading — Strategy Development & Backtesting Demo
# 量化交易 — 策略开发与回测演示
# =============================================================================
#
# 本文件是数据管道 (quant_data_pipeline_demo.py) 的续集。
# This file is the sequel to the data pipeline demo.
#
# Topics covered / 涵盖主题:
# 1. Technical Indicators 技术指标 (MA, RSI, MACD, Bollinger Bands)
# 2. Signal Generation 信号生成 (entry & exit rules)
# 3. Two Demo Strategies 两个示范策略:
# A. Dual Moving Average Crossover 双均线金叉死叉策略
# B. RSI Mean Reversion RSI 均值回归策略
# 4. Vectorized Backtest Engine 向量化回测引擎
# 5. Performance Metrics 绩效指标
# (Sharpe, Sortino, Max Drawdown, Win Rate …)
# 6. Visualization 可视化
#
# Prerequisites / 前置条件:
# pip install numpy pandas matplotlib scipy
#
# Running / 运行方式:
# python quant_strategy_backtest_demo.py
# =============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
# 中文字体配置 / Chinese font config
plt.rcParams['font.sans-serif'] = ['WenQuanYi Zen Hei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
np.random.seed(42)
print("=" * 70)
print(" 量化交易策略开发与回测演示")
print(" Quantitative Trading: Strategy Development & Backtesting Demo")
print("=" * 70)
# =============================================================================
# SECTION 0: Synthetic Price Data 合成价格数据
# -----------------------------------------------------------------------------
# We simulate a single stock using Geometric Brownian Motion (几何布朗运动),
# the classical model that underlies the Black-Scholes formula.
#
# GBM formula:
# dS = μ·S·dt + σ·S·dW
#
# Discrete form (what we actually compute each day):
# S_t = S_{t-1} · exp( (μ - σ²/2)·dt + σ·√dt·ε )
#
# where:
# μ = drift / 年化漂移率 (expected annual return)
# σ = volatility / 年化波动率
# dt = 1/252 (one trading day as a fraction of a year)
# ε ~ N(0,1) (standard normal random shock / 标准正态随机扰动)
# =============================================================================
def generate_price_series(
n_days: int = 1500,
mu: float = 0.10, # 年化预期收益率 / annual expected return
sigma: float = 0.25, # 年化波动率 / annual volatility
s0: float = 100.0, # 初始价格 / initial price
seed: int = 42,
) -> pd.Series:
"""
Generate a synthetic daily price series via GBM.
用几何布朗运动生成合成日线价格序列。
"""
np.random.seed(seed)
dt = 1.0 / 252 # 每个交易日占一年的比例
epsilon = np.random.randn(n_days) # 每日随机冲击
log_returns = (mu - 0.5 * sigma ** 2) * dt + sigma * np.sqrt(dt) * epsilon
prices = s0 * np.exp(np.cumsum(log_returns)) # 累积乘积 → 价格路径
# 生成工作日日期序列 / generate business-day date index
dates = pd.bdate_range(start="2019-01-02", periods=n_days)
return pd.Series(prices, index=dates, name="close")
price = generate_price_series()
print(f"\n[数据] 生成模拟股票价格: {len(price)} 个交易日")
print(f" 价格区间: {price.min():.2f} ~ {price.max():.2f}")
# =============================================================================
# SECTION 1: Technical Indicators 技术指标
# -----------------------------------------------------------------------------
# Technical indicators transform raw price/volume data into signals.
# 技术指标将原始价格/成交量数据转化为交易信号。
#
# They are divided into two broad families:
# 主要分为两大类:
#
# ① Trend-following indicators 趋势跟随指标
# → Moving Averages (MA), MACD
# → Work well in trending markets (趋势市中效果好)
#
# ② Oscillators / Mean-reversion indicators 震荡/均值回归指标
# → RSI, Bollinger Bands
# → Work well in range-bound / choppy markets (震荡市中效果好)
# =============================================================================
# ── 1-A Simple Moving Average 简单移动平均线 (SMA) ──────────────────────────
#
# SMA_n(t) = (P_{t} + P_{t-1} + … + P_{t-n+1}) / n
#
# The SMA smooths out daily noise to reveal the underlying trend.
# SMA 平滑日内噪音,揭示潜在趋势。
# A longer window → smoother, but lags more behind recent price action.
# 窗口越长 → 越平滑,但对价格变化的反应越滞后。
def sma(prices: pd.Series, window: int) -> pd.Series:
"""Simple Moving Average / 简单移动平均线"""
return prices.rolling(window=window).mean()
# ── 1-B Exponential Moving Average 指数移动平均线 (EMA) ───────────────────
#
# EMA gives MORE weight to recent prices (recent data matters more).
# EMA 给予近期价格更高权重(近期数据更重要)。
#
# EMA_t = α · P_t + (1 - α) · EMA_{t-1}
# where α = 2 / (n + 1) (smoothing factor / 平滑因子)
#
# EMA reacts faster than SMA to price changes.
# EMA 对价格变动的反应比 SMA 更灵敏。
def ema(prices: pd.Series, span: int) -> pd.Series:
"""Exponential Moving Average / 指数移动平均线"""
return prices.ewm(span=span, adjust=False).mean()
# ── 1-C RSI 相对强弱指数 (Relative Strength Index) ─────────────────────────
#
# RSI measures the speed and magnitude of recent price changes.
# RSI 衡量近期价格变动的速度和幅度。
#
# Formula:
# RS = average_gain / average_loss (over last n days)
# RSI = 100 - 100 / (1 + RS)
#
# Interpretation / 指标解读:
# RSI > 70 → Overbought 超买 (price may be due for a pullback / 价格可能回调)
# RSI < 30 → Oversold 超卖 (price may be due for a bounce / 价格可能反弹)
# RSI = 50 → Neutral 中性
def rsi(prices: pd.Series, window: int = 14) -> pd.Series:
"""
Compute Wilder's RSI.
计算 Wilder 平滑法 RSI。
"""
delta = prices.diff() # 每日价格变化 / daily price change
gain = delta.clip(lower=0) # 只保留上涨部分 / keep only up-days
loss = -delta.clip(upper=0) # 只保留下跌部分 / keep only down-days
# Wilder uses EMA with span = 2*n - 1 (equivalent to 1/n smoothing)
avg_gain = gain.ewm(alpha=1.0 / window, adjust=False).mean()
avg_loss = loss.ewm(alpha=1.0 / window, adjust=False).mean()
rs = avg_gain / avg_loss # 相对强弱值 / relative strength
return 100 - (100 / (1 + rs)) # 转换为 0~100 范围
# ── 1-D MACD 指数平滑异同移动平均线 ────────────────────────────────────────
#
# MACD reveals the relationship between two EMAs.
# MACD 揭示两条 EMA 之间的关系。
#
# Components / 构成:
# MACD Line MACD线 = EMA(12) - EMA(26) (fast minus slow / 快线减慢线)
# Signal Line 信号线 = EMA(9) of MACD Line (trigger line / 触发线)
# Histogram 柱状图 = MACD Line - Signal Line
#
# Trading rules / 交易规则:
# MACD crosses above Signal → Bullish (金叉, buy signal / 买入信号)
# MACD crosses below Signal → Bearish (死叉, sell signal / 卖出信号)
def macd(prices: pd.Series,
fast: int = 12, slow: int = 26, signal: int = 9
) -> pd.DataFrame:
"""
Compute MACD, Signal line, and Histogram.
计算 MACD线、信号线和柱状图。
"""
ema_fast = ema(prices, fast)
ema_slow = ema(prices, slow)
macd_line = ema_fast - ema_slow # MACD 线
signal_line = ema(macd_line, signal) # 信号线 (DIF的EMA)
histogram = macd_line - signal_line # 柱状图 (MACD Bar)
return pd.DataFrame({
"macd": macd_line,
"signal": signal_line,
"histogram": histogram,
})
# ── 1-E Bollinger Bands 布林带 ─────────────────────────────────────────────
#
# Bollinger Bands place upper/lower envelopes around a moving average.
# 布林带在移动平均线上下各画一条"包络线"。
#
# Formula:
# Middle Band 中轨 = SMA(n)
# Upper Band 上轨 = SMA(n) + k·σ_n (k = 2 by default / 默认 k=2)
# Lower Band 下轨 = SMA(n) - k·σ_n
#
# where σ_n is the rolling standard deviation / 滚动标准差
#
# When price touches the lower band → oversold area (超卖区域)
# When price touches the upper band → overbought area (超买区域)
# Band width (带宽) contracts before explosive moves (波动收窄常预示突破)
def bollinger_bands(prices: pd.Series, window: int = 20, k: float = 2.0
) -> pd.DataFrame:
"""
Compute Bollinger Bands.
计算布林带(上轨、中轨、下轨)。
"""
mid = sma(prices, window) # 中轨 (SMA)
std = prices.rolling(window).std() # 滚动标准差
upper = mid + k * std # 上轨
lower = mid - k * std # 下轨
# %B indicator: where is the current price within the band?
# %B 指标:当前价格在带宽中的位置 (0=下轨, 1=上轨)
pct_b = (prices - lower) / (upper - lower)
return pd.DataFrame({
"upper": upper, "mid": mid, "lower": lower, "pct_b": pct_b
})
# Compute all indicators on our simulated price series
# 对模拟价格序列计算所有指标
sma20 = sma(price, 20) # 20日均线 / 20-day SMA
sma60 = sma(price, 60) # 60日均线 / 60-day SMA (longer trend)
rsi14 = rsi(price, 14) # 14日RSI / 14-day RSI
macd_df = macd(price) # MACD (12/26/9)
bb = bollinger_bands(price, window=20, k=2.0)
print("\n[指标] 技术指标计算完成:")
print(f" SMA20 — 首个有效值日期: {sma20.first_valid_index().date()}")
print(f" SMA60 — 首个有效值日期: {sma60.first_valid_index().date()}")
print(f" RSI14 — 首个有效值日期: {rsi14.first_valid_index().date()}")
print(f" MACD — 首个有效值日期: {macd_df['macd'].first_valid_index().date()}")
print(f" BollingerBands — 首个有效值日期: {bb['mid'].first_valid_index().date()}")
# =============================================================================
# SECTION 2: Strategy A — Dual Moving Average Crossover
# 策略 A — 双均线金叉/死叉策略
# -----------------------------------------------------------------------------
# One of the oldest and most intuitive trend-following strategies.
# 最古老也最直观的趋势跟随策略之一。
#
# Logic / 逻辑:
# Golden Cross (金叉): short MA crosses ABOVE long MA → BUY (做多)
# Death Cross (死叉): short MA crosses BELOW long MA → SELL (平仓)
#
# Rationale / 原理:
# When the short-term average rises above the long-term average, it signals
# that recent momentum is stronger than the historical trend → bullish.
# 短期均线上穿长期均线,意味着近期动能强于历史趋势 → 看涨。
#
# Parameters / 参数:
# SHORT_WINDOW = 20 (fast line / 快线)
# LONG_WINDOW = 60 (slow line / 慢线)
# =============================================================================
SHORT_WIN = 20 # 短期均线窗口 / short-term MA window
LONG_WIN = 60 # 长期均线窗口 / long-term MA window
ma_short = sma(price, SHORT_WIN)
ma_long = sma(price, LONG_WIN)
# ── Signal generation 信号生成 ───────────────────────────────────────────────
#
# Signal (信号) = +1 when we should be LONG (持多仓), 0 when out of market (空仓)
#
# Step 1: raw_signal = 1 whenever short MA > long MA (short MA above long MA)
# Step 2: detect crossovers (cross = today's signal ≠ yesterday's signal)
#
# We use a "position" approach — hold the position until it reverses.
# 使用"持仓"方式 — 持有直到信号翻转。
# raw_signal: 1 = short above long (看多区域), 0 = short below long (看空区域)
raw_signal = (ma_short > ma_long).astype(int)
# Align signals: use yesterday's signal to trade today (avoid lookahead bias)
# 用昨天的信号决定今天的仓位,避免"未来数据偷窥" (前视偏差 / lookahead bias)
ma_signal = raw_signal.shift(1).fillna(0)
print("\n[策略A] 双均线信号生成完成")
print(f" 多头持仓天数 (Signal=1): {int(ma_signal.sum())}")
print(f" 空仓天数 (Signal=0): {int((ma_signal == 0).sum())}")
# =============================================================================
# SECTION 3: Strategy B — RSI Mean Reversion
# 策略 B — RSI 均值回归策略
# -----------------------------------------------------------------------------
# This is a contrarian strategy: buy when the market seems "too weak",
# sell when it seems "too strong".
# 这是一个逆势策略:市场"跌过头"时买入,"涨过头"时卖出。
#
# Logic / 逻辑:
# RSI drops below oversold level (超卖线, default 30) → BUY signal
# RSI rises above overbought level (超买线, default 70) → SELL signal
#
# This exploits mean reversion (均值回归): extreme prices tend to revert.
# 利用均值回归特性:极端价格倾向于回归均值。
#
# Risk / 风险:
# In a strong trend, RSI can stay oversold/overbought for long stretches.
# 在强趋势中RSI 可以长时间停留在超卖/超买区域,造成连续亏损。
# =============================================================================
RSI_OVERSOLD = 30 # 超卖线 / oversold threshold
RSI_OVERBOUGHT = 70 # 超买线 / overbought threshold
def rsi_signal(rsi_series: pd.Series,
oversold: float = 30,
overbought: float = 70) -> pd.Series:
"""
Generate long/short/flat signals from RSI.
根据 RSI 生成多空平信号。
Returns a Series of:
+1 → Long (做多)
-1 → Short (做空)
0 → Flat (空仓, no position)
"""
position = pd.Series(0, index=rsi_series.index, dtype=float)
current_pos = 0 # 当前持仓状态 / current position state
for i in range(1, len(rsi_series)):
r = rsi_series.iloc[i]
if pd.isna(r):
position.iloc[i] = 0
continue
# Entry rules / 入场规则
if r < oversold and current_pos == 0:
current_pos = 1 # 超卖 → 做多 / oversold → go long
elif r > overbought and current_pos == 0:
current_pos = -1 # 超买 → 做空 / overbought → go short
# Exit rules / 出场规则
# Exit long when RSI recovers above 50 (回到中性区域 / back to neutral)
elif current_pos == 1 and r > 50:
current_pos = 0
# Exit short when RSI falls below 50
elif current_pos == -1 and r < 50:
current_pos = 0
position.iloc[i] = current_pos
return position
rsi_pos = rsi_signal(rsi14, RSI_OVERSOLD, RSI_OVERBOUGHT)
# Shift by 1 day to avoid lookahead bias / 前移一天避免前视偏差
rsi_signal_shifted = rsi_pos.shift(1).fillna(0)
print("\n[策略B] RSI信号生成完成")
print(f" 多头持仓天数 (Signal=+1): {int((rsi_signal_shifted == 1).sum())}")
print(f" 空头持仓天数 (Signal=-1): {int((rsi_signal_shifted == -1).sum())}")
print(f" 空仓天数 (Signal= 0): {int((rsi_signal_shifted == 0).sum())}")
# =============================================================================
# SECTION 4: Vectorized Backtest Engine 向量化回测引擎
# -----------------------------------------------------------------------------
# A backtest (回测) simulates how a strategy would have performed
# on historical data. It is the primary tool for validating a strategy
# before risking real money.
# 回测是在历史数据上模拟策略表现的工具,是真实投资前验证策略的主要手段。
#
# Two main backtest styles / 两种主要回测方式:
#
# ① Vectorized backtest 向量化回测
# - Compute all positions & P&L as array operations at once (numpy/pandas)
# - Very fast; good for strategy exploration
# - 所有仓位和盈亏一次性用数组运算计算,速度极快,适合策略探索
#
# ② Event-driven backtest 事件驱动回测
# - Simulate time step-by-step, reacting to each market event
# - More realistic (handles fills, slippage, latency, order queuing)
# - 逐笔模拟市场事件,更真实(考虑成交、滑点、延迟等),速度较慢
#
# We use the vectorized approach here for clarity and speed.
# 此处使用向量化方式,兼顾清晰度和速度。
#
# Cost model 交易成本模型:
# - Commission (佣金): charged each time you trade (per trade)
# - Slippage (滑点): the difference between the expected fill price and
# the actual fill price (price moves against you)
# We approximate both as a percentage of the trade value.
# 两者合并近似为交易金额的固定比例。
# =============================================================================
class VectorizedBacktester:
"""
A simple vectorized backtesting engine.
简单的向量化回测引擎。
Assumptions / 假设:
• Long-only or long/short positions
• Trade at next-day's open (用下一天开盘价成交) — conservative assumption
We approximate this by using the same day's close shifted by 1 day.
• Round-trip cost (单次交易成本) = 2 × cost_per_trade
(pay cost on entry AND exit / 进出各收一次)
• No leverage (无杠杆), position size is 100% of capital when in trade
"""
def __init__(
self,
prices: pd.Series,
signal: pd.Series,
cost_per_trade: float = 0.001, # 0.1% one-way / 单向 0.1% (含佣金+滑点)
initial_capital: float = 1_000_000.0, # 初始资金 / initial capital
name: str = "Strategy",
):
self.prices = prices
self.signal = signal.reindex(prices.index).fillna(0)
self.cost_per_trade = cost_per_trade
self.initial_capital = initial_capital
self.name = name
self._run()
def _run(self):
"""Core backtesting logic. 核心回测逻辑。"""
prices = self.prices
signal = self.signal
# ── Daily price return 日收益率 ────────────────────────────────────
daily_ret = prices.pct_change().fillna(0)
# ── Strategy return (before costs) 策略日收益率(扣除成本前)─────────
# Strategy return = signal × market return
# 策略当日收益率 = 持仓方向 × 市场当日收益率
strat_ret_gross = signal * daily_ret
# ── Transaction cost 交易成本 ──────────────────────────────────────
# Detect position changes (signal changes from one day to the next)
# 检测仓位变化(信号从一天到下一天发生变化)
position_change = signal.diff().fillna(0).abs() # >0 means we traded
# Cost is charged each time position changes
# 每次仓位变化时扣除成本
cost = position_change * self.cost_per_trade
# ── Net strategy return 策略净收益率 ───────────────────────────────
strat_ret_net = strat_ret_gross - cost
# ── Equity curve 净值曲线 ───────────────────────────────────────────
# The equity curve tracks how 1 unit of capital grows over time.
# 净值曲线追踪单位资本随时间的增长。
# (1 + daily_net_return) compounded every day
equity = self.initial_capital * (1 + strat_ret_net).cumprod()
equity_bh = self.initial_capital * (1 + daily_ret).cumprod() # Buy & Hold benchmark
# ── Drawdown 回撤 ──────────────────────────────────────────────────
# Drawdown measures how far we are from the peak at any point in time.
# 回撤衡量当前净值距离历史最高点的跌幅。
rolling_max = equity.cummax()
drawdown = (equity - rolling_max) / rolling_max # always <= 0
# Store results for later analysis
self.daily_ret = daily_ret
self.strat_ret = strat_ret_net
self.equity = equity
self.equity_bh = equity_bh
self.drawdown = drawdown
self.n_trades = int((position_change > 0).sum())
self.total_cost = cost.sum()
# ── Performance metrics 绩效指标 ──────────────────────────────────────────
#
# A well-rounded strategy evaluation uses multiple metrics, because
# no single number captures the full picture.
# 全面的策略评估需要多个指标,因为单一数字无法描述全貌。
#
# Key metrics / 关键指标:
# Total Return 总收益率 — how much did we make in total?
# CAGR 年化复合增长率 — annualized compounded growth rate
# Sharpe Ratio 夏普比率 — return per unit of total risk (risk-adjusted)
# Sortino Ratio 索提诺比率 — return per unit of DOWNSIDE risk only
# Max Drawdown 最大回撤 — worst peak-to-trough decline
# Calmar Ratio 卡玛比率 — CAGR / Max Drawdown (reward vs worst loss)
# Win Rate 胜率 — fraction of days (or trades) with positive P&L
# Profit Factor 盈亏比 — total profit / total loss
def metrics(self) -> dict:
"""Compute and return a dictionary of performance metrics.
计算并返回绩效指标字典。"""
r = self.strat_ret
eq = self.equity
n = len(r)
years = n / 252.0 # approximate years in sample / 样本年数估算
# Total return / 总收益率
total_return = (eq.iloc[-1] / self.initial_capital) - 1
# CAGR 年化复合增长率
# CAGR = (EndValue / StartValue)^(1/years) - 1
cagr = (1 + total_return) ** (1 / years) - 1
# Annualized volatility 年化波动率
ann_vol = r.std() * np.sqrt(252)
# Sharpe Ratio 夏普比率
# Sharpe = (Mean excess return) / StdDev(return) × √252
# Excess return = strategy return - risk-free rate
# 超额收益率 = 策略收益率 - 无风险利率
# We use 0 as risk-free rate for simplicity (or assume it's netted out)
risk_free = 0.0
sharpe = (r.mean() - risk_free / 252) / r.std() * np.sqrt(252) if r.std() > 0 else 0
# Sortino Ratio 索提诺比率
# Like Sharpe but only penalizes DOWNSIDE volatility
# 类似夏普,但只惩罚下行波动率(亏损波动率)
downside = r[r < 0]
downside_std = downside.std() * np.sqrt(252) if len(downside) > 0 else 1e-9
sortino = (cagr - risk_free) / downside_std if downside_std > 0 else 0
# Maximum Drawdown 最大回撤
max_dd = self.drawdown.min() # most negative value (最大负值)
# Calmar Ratio 卡玛比率
# Calmar = CAGR / |Max Drawdown|
calmar = cagr / abs(max_dd) if max_dd != 0 else 0
# Win rate 胜率 (fraction of trading days with positive return)
win_rate = (r > 0).mean()
# Profit factor 盈亏比
# = Sum of positive returns / |Sum of negative returns|
gross_profit = r[r > 0].sum()
gross_loss = abs(r[r < 0].sum())
profit_factor = gross_profit / gross_loss if gross_loss > 0 else np.inf
return {
"总收益率 Total Return": f"{total_return:.2%}",
"年化收益率 CAGR": f"{cagr:.2%}",
"年化波动率 Ann. Volatility": f"{ann_vol:.2%}",
"夏普比率 Sharpe Ratio": f"{sharpe:.3f}",
"索提诺比率 Sortino Ratio": f"{sortino:.3f}",
"最大回撤 Max Drawdown": f"{max_dd:.2%}",
"卡玛比率 Calmar Ratio": f"{calmar:.3f}",
"胜率 Win Rate": f"{win_rate:.2%}",
"盈亏比 Profit Factor": f"{profit_factor:.3f}",
"交易次数 # Trades": str(self.n_trades),
"总成本 Total Cost": f"{self.total_cost:.4%}",
}
def print_metrics(self):
"""Pretty-print the performance report. 格式化打印绩效报告。"""
print(f"\n{'=' * 55}")
print(f" 策略绩效报告 / Performance Report: {self.name}")
print(f"{'=' * 55}")
for k, v in self.metrics().items():
print(f" {k:<35} {v}")
print(f"{'=' * 55}")
# =============================================================================
# SECTION 5: Run Backtests 执行回测
# =============================================================================
# ── Strategy A: MA Crossover 双均线策略 ──────────────────────────────────────
bt_ma = VectorizedBacktester(
prices=price,
signal=ma_signal, # +1 = long, 0 = flat
cost_per_trade=0.001, # 0.1% per trade (reasonable for liquid stocks)
name="双均线策略 (MA Crossover 20/60)",
)
# ── Strategy B: RSI Mean Reversion RSI均值回归策略 ──────────────────────────
bt_rsi = VectorizedBacktester(
prices=price,
signal=rsi_signal_shifted, # +1 = long, -1 = short, 0 = flat
cost_per_trade=0.001,
name="RSI均值回归策略 (RSI Mean Reversion 14)",
)
# ── Benchmark: Buy & Hold 基准:买入并持有 ───────────────────────────────────
# Buy & Hold (买入持有) is always our benchmark: simply hold the asset forever.
# It requires zero skill and zero effort — any strategy must beat this to
# justify the extra complexity and transaction costs.
# 买入持有是永远的基准策略:无需技能、零成本。任何策略都必须超越它才有意义。
bt_bh = VectorizedBacktester(
prices=price,
signal=pd.Series(1, index=price.index, dtype=float), # always long / 始终做多
cost_per_trade=0.0, # no trading costs / 无交易成本
name="Buy & Hold 基准 (买入持有)",
)
bt_ma.print_metrics()
bt_rsi.print_metrics()
bt_bh.print_metrics()
# =============================================================================
# SECTION 6: Visualization 可视化
# =============================================================================
fig = plt.figure(figsize=(16, 22))
gs = gridspec.GridSpec(6, 2, figure=fig, hspace=0.45, wspace=0.3)
# ── Plot 1: Price + MA signals 价格 + 均线信号 ────────────────────────────────
ax1 = fig.add_subplot(gs[0, :]) # span full width
ax1.plot(price, color="#1f77b4", linewidth=1, label="价格 Price")
ax1.plot(ma_short, color="orange", linewidth=1.2, label=f"SMA{SHORT_WIN} (快线)")
ax1.plot(ma_long, color="red", linewidth=1.2, label=f"SMA{LONG_WIN} (慢线)")
# Shade long periods (持多仓的区间着色)
ax1.fill_between(
price.index, price.min(), price.max(),
where=(ma_signal == 1).values,
alpha=0.12, color="green", label="多头持仓区间 Long Period"
)
ax1.set_title("策略A — 双均线信号 (MA Crossover Signals)", fontsize=13, fontweight="bold")
ax1.legend(loc="upper left", fontsize=8)
ax1.set_ylabel("价格 Price")
ax1.grid(alpha=0.3)
# ── Plot 2: RSI RSI指标 ────────────────────────────────────────────────────
ax2 = fig.add_subplot(gs[1, :])
ax2.plot(rsi14, color="purple", linewidth=1)
ax2.axhline(RSI_OVERBOUGHT, color="red", linestyle="--", linewidth=1, label=f"超买线 {RSI_OVERBOUGHT}")
ax2.axhline(RSI_OVERSOLD, color="green", linestyle="--", linewidth=1, label=f"超卖线 {RSI_OVERSOLD}")
ax2.axhline(50, color="gray", linestyle=":", linewidth=0.8)
ax2.fill_between(rsi14.index, RSI_OVERSOLD, rsi14,
where=(rsi14 < RSI_OVERSOLD), alpha=0.25, color="green",
label="超卖区域 Oversold")
ax2.fill_between(rsi14.index, rsi14, RSI_OVERBOUGHT,
where=(rsi14 > RSI_OVERBOUGHT), alpha=0.25, color="red",
label="超买区域 Overbought")
ax2.set_ylim(0, 100)
ax2.set_title(f"策略B指标 — RSI({14}) 均值回归信号 (RSI Mean Reversion)", fontsize=13, fontweight="bold")
ax2.set_ylabel("RSI")
ax2.legend(loc="upper left", fontsize=8, ncol=2)
ax2.grid(alpha=0.3)
# ── Plot 3: Bollinger Bands 布林带 ────────────────────────────────────────────
ax3 = fig.add_subplot(gs[2, :])
ax3.plot(price, color="#1f77b4", linewidth=1, label="价格 Price")
ax3.plot(bb["mid"], color="orange", linewidth=1.2, label="中轨 Middle (SMA20)")
ax3.plot(bb["upper"],color="red", linewidth=1, linestyle="--", label="上轨 Upper (+2σ)")
ax3.plot(bb["lower"],color="green", linewidth=1, linestyle="--", label="下轨 Lower (-2σ)")
ax3.fill_between(price.index, bb["upper"], bb["lower"], alpha=0.07, color="blue")
ax3.set_title("布林带 (Bollinger Bands 20, 2σ)", fontsize=13, fontweight="bold")
ax3.set_ylabel("价格 Price")
ax3.legend(loc="upper left", fontsize=8, ncol=2)
ax3.grid(alpha=0.3)
# ── Plot 4: MACD MACD指标 ────────────────────────────────────────────────────
ax4 = fig.add_subplot(gs[3, :])
ax4.plot(macd_df["macd"], color="blue", linewidth=1, label="MACD线 (DIF)")
ax4.plot(macd_df["signal"], color="orange", linewidth=1, label="信号线 (DEA)")
colors = ["green" if v >= 0 else "red" for v in macd_df["histogram"]]
ax4.bar(macd_df.index, macd_df["histogram"], color=colors, alpha=0.5, width=1, label="柱状图 Histogram")
ax4.axhline(0, color="black", linewidth=0.8)
ax4.set_title("MACD (12/26/9) — 趋势确认指标 (Trend Confirmation)", fontsize=13, fontweight="bold")
ax4.set_ylabel("MACD")
ax4.legend(loc="upper left", fontsize=8)
ax4.grid(alpha=0.3)
# ── Plot 5: Equity Curves 净值曲线 ────────────────────────────────────────────
ax5 = fig.add_subplot(gs[4, :])
ax5.plot(bt_ma.equity, color="blue", linewidth=1.5, label="策略A: 双均线 MA Crossover")
ax5.plot(bt_rsi.equity, color="purple", linewidth=1.5, label="策略B: RSI 均值回归 RSI Reversion")
ax5.plot(bt_bh.equity, color="gray", linewidth=1.2, linestyle="--", label="基准: 买入持有 Buy & Hold")
ax5.set_title("净值曲线对比 (Equity Curve Comparison)", fontsize=13, fontweight="bold")
ax5.set_ylabel("账户价值 Portfolio Value (元)")
ax5.legend(loc="upper left", fontsize=9)
ax5.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"¥{x/1e4:.0f}"))
ax5.grid(alpha=0.3)
# ── Plot 6: Drawdown 回撤曲线 ────────────────────────────────────────────────
ax6 = fig.add_subplot(gs[5, :])
ax6.fill_between(bt_ma.drawdown.index, bt_ma.drawdown, 0, alpha=0.5, color="blue", label="策略A")
ax6.fill_between(bt_rsi.drawdown.index, bt_rsi.drawdown, 0, alpha=0.5, color="purple", label="策略B")
ax6.fill_between(bt_bh.drawdown.index, bt_bh.drawdown, 0, alpha=0.3, color="gray", label="Buy & Hold")
ax6.set_title("回撤曲线 (Drawdown Curves)", fontsize=13, fontweight="bold")
ax6.set_ylabel("回撤幅度 Drawdown")
ax6.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{x:.0%}"))
ax6.legend(loc="lower left", fontsize=9)
ax6.grid(alpha=0.3)
plt.suptitle(
"量化交易策略开发与回测演示\nQuantitative Trading: Strategy Development & Backtesting",
fontsize=15, fontweight="bold", y=1.005,
)
plt.savefig("strategy_backtest_demo.png", dpi=120, bbox_inches="tight")
plt.show()
print("\n[图表] 已保存至 strategy_backtest_demo.png")
# =============================================================================
# SECTION 7: Walk-Forward Validation 滚动前向验证
# -----------------------------------------------------------------------------
# A critical warning for all new quant traders / 对所有量化新手的重要警告:
#
# In-sample overfitting (样本内过拟合) is the #1 trap in backtesting.
# 样本内过拟合是回测中最大的陷阱。
#
# If you test 100 different parameter sets on the same data and pick the best,
# that "best" result will almost certainly NOT hold out of sample.
# 如果在同一份数据上测试100组参数并选最好的这个"最优"结果在样本外几乎必然失效。
# This is called data snooping bias / 数据窥探偏差 or p-hacking.
#
# Walk-Forward Validation (滚动前向验证) helps guard against this:
# ┌──────────────────────────────────────────────────────────────────────┐
# │ Window 1: [TRAIN period 1] → optimize params → TEST on period 1+ │
# │ Window 2: [TRAIN period 2] → optimize params → TEST on period 2+ │
# │ …repeat, always training on past, testing on future │
# │ 始终用过去数据训练,用未来数据测试 │
# └──────────────────────────────────────────────────────────────────────┘
# Only report the concatenated OUT-OF-SAMPLE test results.
# 只汇报样本外OOS的测试结果。
#
# Below: a simplified version — we just split into train / test (80/20).
# 下面是简化版:直接按 80/20 切分训练集和测试集。
# =============================================================================
TRAIN_RATIO = 0.8
split_idx = int(len(price) * TRAIN_RATIO)
split_date = price.index[split_idx]
price_train = price.iloc[:split_idx]
price_test = price.iloc[split_idx:]
print(f"\n{'=' * 55}")
print(f" 滚动前向验证 / Walk-Forward Split")
print(f"{'=' * 55}")
print(f" 训练期 Train: {price_train.index[0].date()}{price_train.index[-1].date()} ({len(price_train)} 天)")
print(f" 测试期 Test : {price_test.index[0].date()}{price_test.index[-1].date()} ({len(price_test)} 天)")
# ── Optimize MA windows on TRAIN set 在训练集上优化均线参数 ────────────────────
#
# Grid search (网格搜索): try all combinations in the parameter space.
# This is the simplest optimization method — good for small parameter spaces.
# 网格搜索:遍历参数空间内的所有组合。适合参数空间小的情形。
print("\n[优化] 在训练集上搜索最优均线参数...")
print(" (搜索空间: short=[5,10,15,20,30], long=[30,40,50,60,80,100])")
best_sharpe = -np.inf
best_short = SHORT_WIN
best_long = LONG_WIN
results_grid = []
for sw in [5, 10, 15, 20, 30]:
for lw in [30, 40, 50, 60, 80, 100]:
if sw >= lw:
continue # short must be shorter than long / 短期必须小于长期
ma_s = sma(price_train, sw)
ma_l = sma(price_train, lw)
sig = (ma_s > ma_l).astype(int).shift(1).fillna(0)
bt = VectorizedBacktester(price_train, sig, cost_per_trade=0.001, name="grid")
m = bt.metrics()
sharpe_val = float(m["夏普比率 Sharpe Ratio"])
results_grid.append({"short": sw, "long": lw, "sharpe": sharpe_val})
if sharpe_val > best_sharpe:
best_sharpe = sharpe_val
best_short = sw
best_long = lw
print(f"\n 最优参数 (训练集 in-sample): short={best_short}, long={best_long}")
print(f" 训练集夏普比率 In-sample Sharpe: {best_sharpe:.3f}")
# ── Apply best params on TEST set 将最优参数应用于测试集 ──────────────────────
ma_s_test = sma(price_test, best_short)
ma_l_test = sma(price_test, best_long)
sig_test = (ma_s_test > ma_l_test).astype(int).shift(1).fillna(0)
bt_test = VectorizedBacktester(price_test, sig_test, cost_per_trade=0.001,
name=f"MA({best_short}/{best_long}) — 测试集 OOS")
bt_test.print_metrics()
print("\n" + "=" * 55)
print(" ⚠️ 注意 / WARNING:")
print(" 训练集(in-sample)夏普 通常高于 测试集(out-of-sample)夏普")
print(" In-sample Sharpe is typically HIGHER than out-of-sample.")
print(" 夏普衰减 (Sharpe decay) 是策略过拟合的典型信号。")
print(" Sharpe decay is a classic sign of overfitting.")
print("=" * 55)
# =============================================================================
# SECTION 8: Return Distribution Analysis 收益率分布分析
# -----------------------------------------------------------------------------
# Before trusting your Sharpe ratio, check if the return distribution
# violates the normality assumption.
# 在相信夏普比率之前,检验收益率分布是否违背正态假设。
#
# Real returns typically show:
# 真实收益率通常呈现:
# Fat tails (厚尾 / leptokurtosis): extreme events more frequent than normal
# Negative skew (负偏态): crashes are larger than rallies
#
# A high Sharpe ratio on a fat-tailed distribution can be misleading.
# 厚尾分布下的高夏普比率可能具有误导性。
# =============================================================================
fig2, axes = plt.subplots(1, 2, figsize=(14, 5))
# ── Return distribution histogram 收益率直方图 ─────────────────────────────
ax = axes[0]
r_ma = bt_ma.strat_ret.dropna()
r_bh = bt_bh.strat_ret.dropna()
ax.hist(r_bh, bins=80, alpha=0.5, color="gray", density=True, label="Buy & Hold")
ax.hist(r_ma, bins=80, alpha=0.5, color="blue", density=True, label="策略A: MA Crossover")
# Overlay a normal distribution for comparison / 叠加正态分布对比
x_range = np.linspace(r_bh.min(), r_bh.max(), 300)
ax.plot(x_range, stats.norm.pdf(x_range, r_bh.mean(), r_bh.std()),
color="red", linewidth=1.5, linestyle="--", label="正态分布 Normal Dist.")
ax.set_title("收益率分布 (Return Distribution)", fontsize=12)
ax.set_xlabel("日收益率 Daily Return")
ax.set_ylabel("频率密度 Density")
ax.legend(fontsize=8)
ax.grid(alpha=0.3)
# Print distribution stats
print(f"\n[分布] 策略A — 日收益率统计:")
print(f" 偏度 Skewness : {r_ma.skew():.3f} (负值=左尾更厚 fat left tail)")
print(f" 峰度 Kurtosis : {r_ma.kurtosis():.3f} (>0 表示厚尾 fat tails vs normal)")
# ── Monthly returns heatmap 月度收益热力图 ─────────────────────────────────
ax = axes[1]
monthly = bt_ma.strat_ret.resample("M").apply(lambda x: (1 + x).prod() - 1)
monthly_df = pd.DataFrame({
"year": monthly.index.year,
"month": monthly.index.month,
"ret": monthly.values,
})
pivot = monthly_df.pivot(index="year", columns="month", values="ret")
pivot.columns = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
import matplotlib.colors as mcolors
cmap = mcolors.LinearSegmentedColormap.from_list("rg", ["#d73027","#ffffff","#1a9850"])
im = ax.imshow(pivot.values, cmap=cmap, aspect="auto",
vmin=-0.15, vmax=0.15)
ax.set_xticks(range(12))
ax.set_xticklabels(pivot.columns, fontsize=8)
ax.set_yticks(range(len(pivot.index)))
ax.set_yticklabels(pivot.index, fontsize=9)
for i in range(len(pivot.index)):
for j in range(12):
v = pivot.values[i, j]
if not np.isnan(v):
ax.text(j, i, f"{v:.1%}", ha="center", va="center", fontsize=6,
color="black" if abs(v) < 0.08 else "white")
ax.set_title("策略A月度收益热力图\n(Monthly Return Heatmap)", fontsize=11)
plt.colorbar(im, ax=ax, format=plt.FuncFormatter(lambda x, _: f"{x:.0%}"))
plt.tight_layout()
plt.savefig("return_distribution.png", dpi=120, bbox_inches="tight")
plt.show()
print("[图表] 已保存至 return_distribution.png")
# =============================================================================
# SECTION 9: Summary & Next Steps 总结与后续
# =============================================================================
print(f"""
{'=' * 70}
总结 Summary
{'=' * 70}
本 Demo 演示了量化策略开发与回测的完整流程:
① 技术指标计算 Technical Indicators
SMA / EMA / RSI / MACD / Bollinger Bands
② 信号生成 Signal Generation
策略A: 双均线金叉死叉 (MA Crossover) — 趋势跟随
策略B: RSI 超买超卖 (RSI Reversion) — 均值回归
③ 向量化回测引擎 Vectorized Backtester
考虑了交易成本(佣金+滑点)和前视偏差(lookahead bias)
④ 绩效指标 Performance Metrics
Sharpe / Sortino / Max Drawdown / Calmar / Win Rate / Profit Factor
⑤ 前向验证 Walk-Forward Validation
训练集优化参数 → 测试集验证 → 防止过拟合
⑥ 收益率分布 Return Distribution
偏度/峰度检验,月度热力图
下一步学习方向 Next Steps:
──────────────────────────────────────────────────────────────
• 因子选股策略 (Alpha Factor Models) — Fama-French, Momentum
• 组合优化 (Portfolio Optimization) — Mean-Variance, Risk Parity
• 事件驱动回测 (Event-Driven Backtesting) — more realistic execution
• 机器学习信号 (ML-based Signals) — XGBoost, LSTM for return prediction
• 风险管理 (Risk Management) — Position sizing, Stop-loss, VaR
{'=' * 70}
""")