912 lines
42 KiB
Python
912 lines
42 KiB
Python
# =============================================================================
|
||
# Quantitative Trading — Strategy Development & Backtesting Demo
|
||
# 量化交易 — 策略开发与回测演示
|
||
# =============================================================================
|
||
#
|
||
# 本文件是数据管道 (quant_data_pipeline_demo.py) 的续集。
|
||
# This file is the sequel to the data pipeline demo.
|
||
#
|
||
# Topics covered / 涵盖主题:
|
||
# 1. Technical Indicators 技术指标 (MA, RSI, MACD, Bollinger Bands)
|
||
# 2. Signal Generation 信号生成 (entry & exit rules)
|
||
# 3. Two Demo Strategies 两个示范策略:
|
||
# A. Dual Moving Average Crossover 双均线金叉死叉策略
|
||
# B. RSI Mean Reversion RSI 均值回归策略
|
||
# 4. Vectorized Backtest Engine 向量化回测引擎
|
||
# 5. Performance Metrics 绩效指标
|
||
# (Sharpe, Sortino, Max Drawdown, Win Rate …)
|
||
# 6. Visualization 可视化
|
||
#
|
||
# Prerequisites / 前置条件:
|
||
# pip install numpy pandas matplotlib scipy
|
||
#
|
||
# Running / 运行方式:
|
||
# python quant_strategy_backtest_demo.py
|
||
# =============================================================================
|
||
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
import matplotlib.gridspec as gridspec
|
||
from scipy import stats
|
||
import warnings
|
||
warnings.filterwarnings('ignore')
|
||
|
||
# 中文字体配置 / Chinese font config
|
||
plt.rcParams['font.sans-serif'] = ['WenQuanYi Zen Hei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
|
||
plt.rcParams['axes.unicode_minus'] = False
|
||
|
||
np.random.seed(42)
|
||
print("=" * 70)
|
||
print(" 量化交易策略开发与回测演示")
|
||
print(" Quantitative Trading: Strategy Development & Backtesting Demo")
|
||
print("=" * 70)
|
||
|
||
|
||
# =============================================================================
|
||
# SECTION 0: Synthetic Price Data 合成价格数据
|
||
# -----------------------------------------------------------------------------
|
||
# We simulate a single stock using Geometric Brownian Motion (几何布朗运动),
|
||
# the classical model that underlies the Black-Scholes formula.
|
||
#
|
||
# GBM formula:
|
||
# dS = μ·S·dt + σ·S·dW
|
||
#
|
||
# Discrete form (what we actually compute each day):
|
||
# S_t = S_{t-1} · exp( (μ - σ²/2)·dt + σ·√dt·ε )
|
||
#
|
||
# where:
|
||
# μ = drift / 年化漂移率 (expected annual return)
|
||
# σ = volatility / 年化波动率
|
||
# dt = 1/252 (one trading day as a fraction of a year)
|
||
# ε ~ N(0,1) (standard normal random shock / 标准正态随机扰动)
|
||
# =============================================================================
|
||
|
||
def generate_price_series(
|
||
n_days: int = 1500,
|
||
mu: float = 0.10, # 年化预期收益率 / annual expected return
|
||
sigma: float = 0.25, # 年化波动率 / annual volatility
|
||
s0: float = 100.0, # 初始价格 / initial price
|
||
seed: int = 42,
|
||
) -> pd.Series:
|
||
"""
|
||
Generate a synthetic daily price series via GBM.
|
||
用几何布朗运动生成合成日线价格序列。
|
||
"""
|
||
np.random.seed(seed)
|
||
dt = 1.0 / 252 # 每个交易日占一年的比例
|
||
epsilon = np.random.randn(n_days) # 每日随机冲击
|
||
log_returns = (mu - 0.5 * sigma ** 2) * dt + sigma * np.sqrt(dt) * epsilon
|
||
prices = s0 * np.exp(np.cumsum(log_returns)) # 累积乘积 → 价格路径
|
||
|
||
# 生成工作日日期序列 / generate business-day date index
|
||
dates = pd.bdate_range(start="2019-01-02", periods=n_days)
|
||
return pd.Series(prices, index=dates, name="close")
|
||
|
||
|
||
price = generate_price_series()
|
||
print(f"\n[数据] 生成模拟股票价格: {len(price)} 个交易日")
|
||
print(f" 价格区间: {price.min():.2f} ~ {price.max():.2f}")
|
||
|
||
|
||
# =============================================================================
|
||
# SECTION 1: Technical Indicators 技术指标
|
||
# -----------------------------------------------------------------------------
|
||
# Technical indicators transform raw price/volume data into signals.
|
||
# 技术指标将原始价格/成交量数据转化为交易信号。
|
||
#
|
||
# They are divided into two broad families:
|
||
# 主要分为两大类:
|
||
#
|
||
# ① Trend-following indicators 趋势跟随指标
|
||
# → Moving Averages (MA), MACD
|
||
# → Work well in trending markets (趋势市中效果好)
|
||
#
|
||
# ② Oscillators / Mean-reversion indicators 震荡/均值回归指标
|
||
# → RSI, Bollinger Bands
|
||
# → Work well in range-bound / choppy markets (震荡市中效果好)
|
||
# =============================================================================
|
||
|
||
# ── 1-A Simple Moving Average 简单移动平均线 (SMA) ──────────────────────────
|
||
#
|
||
# SMA_n(t) = (P_{t} + P_{t-1} + … + P_{t-n+1}) / n
|
||
#
|
||
# The SMA smooths out daily noise to reveal the underlying trend.
|
||
# SMA 平滑日内噪音,揭示潜在趋势。
|
||
# A longer window → smoother, but lags more behind recent price action.
|
||
# 窗口越长 → 越平滑,但对价格变化的反应越滞后。
|
||
|
||
def sma(prices: pd.Series, window: int) -> pd.Series:
|
||
"""Simple Moving Average / 简单移动平均线"""
|
||
return prices.rolling(window=window).mean()
|
||
|
||
|
||
# ── 1-B Exponential Moving Average 指数移动平均线 (EMA) ───────────────────
|
||
#
|
||
# EMA gives MORE weight to recent prices (recent data matters more).
|
||
# EMA 给予近期价格更高权重(近期数据更重要)。
|
||
#
|
||
# EMA_t = α · P_t + (1 - α) · EMA_{t-1}
|
||
# where α = 2 / (n + 1) (smoothing factor / 平滑因子)
|
||
#
|
||
# EMA reacts faster than SMA to price changes.
|
||
# EMA 对价格变动的反应比 SMA 更灵敏。
|
||
|
||
def ema(prices: pd.Series, span: int) -> pd.Series:
|
||
"""Exponential Moving Average / 指数移动平均线"""
|
||
return prices.ewm(span=span, adjust=False).mean()
|
||
|
||
|
||
# ── 1-C RSI 相对强弱指数 (Relative Strength Index) ─────────────────────────
|
||
#
|
||
# RSI measures the speed and magnitude of recent price changes.
|
||
# RSI 衡量近期价格变动的速度和幅度。
|
||
#
|
||
# Formula:
|
||
# RS = average_gain / average_loss (over last n days)
|
||
# RSI = 100 - 100 / (1 + RS)
|
||
#
|
||
# Interpretation / 指标解读:
|
||
# RSI > 70 → Overbought 超买 (price may be due for a pullback / 价格可能回调)
|
||
# RSI < 30 → Oversold 超卖 (price may be due for a bounce / 价格可能反弹)
|
||
# RSI = 50 → Neutral 中性
|
||
|
||
def rsi(prices: pd.Series, window: int = 14) -> pd.Series:
|
||
"""
|
||
Compute Wilder's RSI.
|
||
计算 Wilder 平滑法 RSI。
|
||
"""
|
||
delta = prices.diff() # 每日价格变化 / daily price change
|
||
gain = delta.clip(lower=0) # 只保留上涨部分 / keep only up-days
|
||
loss = -delta.clip(upper=0) # 只保留下跌部分 / keep only down-days
|
||
|
||
# Wilder uses EMA with span = 2*n - 1 (equivalent to 1/n smoothing)
|
||
avg_gain = gain.ewm(alpha=1.0 / window, adjust=False).mean()
|
||
avg_loss = loss.ewm(alpha=1.0 / window, adjust=False).mean()
|
||
|
||
rs = avg_gain / avg_loss # 相对强弱值 / relative strength
|
||
return 100 - (100 / (1 + rs)) # 转换为 0~100 范围
|
||
|
||
|
||
# ── 1-D MACD 指数平滑异同移动平均线 ────────────────────────────────────────
|
||
#
|
||
# MACD reveals the relationship between two EMAs.
|
||
# MACD 揭示两条 EMA 之间的关系。
|
||
#
|
||
# Components / 构成:
|
||
# MACD Line MACD线 = EMA(12) - EMA(26) (fast minus slow / 快线减慢线)
|
||
# Signal Line 信号线 = EMA(9) of MACD Line (trigger line / 触发线)
|
||
# Histogram 柱状图 = MACD Line - Signal Line
|
||
#
|
||
# Trading rules / 交易规则:
|
||
# MACD crosses above Signal → Bullish (金叉, buy signal / 买入信号)
|
||
# MACD crosses below Signal → Bearish (死叉, sell signal / 卖出信号)
|
||
|
||
def macd(prices: pd.Series,
|
||
fast: int = 12, slow: int = 26, signal: int = 9
|
||
) -> pd.DataFrame:
|
||
"""
|
||
Compute MACD, Signal line, and Histogram.
|
||
计算 MACD线、信号线和柱状图。
|
||
"""
|
||
ema_fast = ema(prices, fast)
|
||
ema_slow = ema(prices, slow)
|
||
macd_line = ema_fast - ema_slow # MACD 线
|
||
signal_line = ema(macd_line, signal) # 信号线 (DIF的EMA)
|
||
histogram = macd_line - signal_line # 柱状图 (MACD Bar)
|
||
return pd.DataFrame({
|
||
"macd": macd_line,
|
||
"signal": signal_line,
|
||
"histogram": histogram,
|
||
})
|
||
|
||
|
||
# ── 1-E Bollinger Bands 布林带 ─────────────────────────────────────────────
|
||
#
|
||
# Bollinger Bands place upper/lower envelopes around a moving average.
|
||
# 布林带在移动平均线上下各画一条"包络线"。
|
||
#
|
||
# Formula:
|
||
# Middle Band 中轨 = SMA(n)
|
||
# Upper Band 上轨 = SMA(n) + k·σ_n (k = 2 by default / 默认 k=2)
|
||
# Lower Band 下轨 = SMA(n) - k·σ_n
|
||
#
|
||
# where σ_n is the rolling standard deviation / 滚动标准差
|
||
#
|
||
# When price touches the lower band → oversold area (超卖区域)
|
||
# When price touches the upper band → overbought area (超买区域)
|
||
# Band width (带宽) contracts before explosive moves (波动收窄常预示突破)
|
||
|
||
def bollinger_bands(prices: pd.Series, window: int = 20, k: float = 2.0
|
||
) -> pd.DataFrame:
|
||
"""
|
||
Compute Bollinger Bands.
|
||
计算布林带(上轨、中轨、下轨)。
|
||
"""
|
||
mid = sma(prices, window) # 中轨 (SMA)
|
||
std = prices.rolling(window).std() # 滚动标准差
|
||
upper = mid + k * std # 上轨
|
||
lower = mid - k * std # 下轨
|
||
# %B indicator: where is the current price within the band?
|
||
# %B 指标:当前价格在带宽中的位置 (0=下轨, 1=上轨)
|
||
pct_b = (prices - lower) / (upper - lower)
|
||
return pd.DataFrame({
|
||
"upper": upper, "mid": mid, "lower": lower, "pct_b": pct_b
|
||
})
|
||
|
||
|
||
# Compute all indicators on our simulated price series
|
||
# 对模拟价格序列计算所有指标
|
||
sma20 = sma(price, 20) # 20日均线 / 20-day SMA
|
||
sma60 = sma(price, 60) # 60日均线 / 60-day SMA (longer trend)
|
||
rsi14 = rsi(price, 14) # 14日RSI / 14-day RSI
|
||
macd_df = macd(price) # MACD (12/26/9)
|
||
bb = bollinger_bands(price, window=20, k=2.0)
|
||
|
||
print("\n[指标] 技术指标计算完成:")
|
||
print(f" SMA20 — 首个有效值日期: {sma20.first_valid_index().date()}")
|
||
print(f" SMA60 — 首个有效值日期: {sma60.first_valid_index().date()}")
|
||
print(f" RSI14 — 首个有效值日期: {rsi14.first_valid_index().date()}")
|
||
print(f" MACD — 首个有效值日期: {macd_df['macd'].first_valid_index().date()}")
|
||
print(f" BollingerBands — 首个有效值日期: {bb['mid'].first_valid_index().date()}")
|
||
|
||
|
||
# =============================================================================
|
||
# SECTION 2: Strategy A — Dual Moving Average Crossover
|
||
# 策略 A — 双均线金叉/死叉策略
|
||
# -----------------------------------------------------------------------------
|
||
# One of the oldest and most intuitive trend-following strategies.
|
||
# 最古老也最直观的趋势跟随策略之一。
|
||
#
|
||
# Logic / 逻辑:
|
||
# Golden Cross (金叉): short MA crosses ABOVE long MA → BUY (做多)
|
||
# Death Cross (死叉): short MA crosses BELOW long MA → SELL (平仓)
|
||
#
|
||
# Rationale / 原理:
|
||
# When the short-term average rises above the long-term average, it signals
|
||
# that recent momentum is stronger than the historical trend → bullish.
|
||
# 短期均线上穿长期均线,意味着近期动能强于历史趋势 → 看涨。
|
||
#
|
||
# Parameters / 参数:
|
||
# SHORT_WINDOW = 20 (fast line / 快线)
|
||
# LONG_WINDOW = 60 (slow line / 慢线)
|
||
# =============================================================================
|
||
|
||
SHORT_WIN = 20 # 短期均线窗口 / short-term MA window
|
||
LONG_WIN = 60 # 长期均线窗口 / long-term MA window
|
||
|
||
ma_short = sma(price, SHORT_WIN)
|
||
ma_long = sma(price, LONG_WIN)
|
||
|
||
# ── Signal generation 信号生成 ───────────────────────────────────────────────
|
||
#
|
||
# Signal (信号) = +1 when we should be LONG (持多仓), 0 when out of market (空仓)
|
||
#
|
||
# Step 1: raw_signal = 1 whenever short MA > long MA (short MA above long MA)
|
||
# Step 2: detect crossovers (cross = today's signal ≠ yesterday's signal)
|
||
#
|
||
# We use a "position" approach — hold the position until it reverses.
|
||
# 使用"持仓"方式 — 持有直到信号翻转。
|
||
|
||
# raw_signal: 1 = short above long (看多区域), 0 = short below long (看空区域)
|
||
raw_signal = (ma_short > ma_long).astype(int)
|
||
|
||
# Align signals: use yesterday's signal to trade today (avoid lookahead bias)
|
||
# 用昨天的信号决定今天的仓位,避免"未来数据偷窥" (前视偏差 / lookahead bias)
|
||
ma_signal = raw_signal.shift(1).fillna(0)
|
||
|
||
print("\n[策略A] 双均线信号生成完成")
|
||
print(f" 多头持仓天数 (Signal=1): {int(ma_signal.sum())} 天")
|
||
print(f" 空仓天数 (Signal=0): {int((ma_signal == 0).sum())} 天")
|
||
|
||
|
||
# =============================================================================
|
||
# SECTION 3: Strategy B — RSI Mean Reversion
|
||
# 策略 B — RSI 均值回归策略
|
||
# -----------------------------------------------------------------------------
|
||
# This is a contrarian strategy: buy when the market seems "too weak",
|
||
# sell when it seems "too strong".
|
||
# 这是一个逆势策略:市场"跌过头"时买入,"涨过头"时卖出。
|
||
#
|
||
# Logic / 逻辑:
|
||
# RSI drops below oversold level (超卖线, default 30) → BUY signal
|
||
# RSI rises above overbought level (超买线, default 70) → SELL signal
|
||
#
|
||
# This exploits mean reversion (均值回归): extreme prices tend to revert.
|
||
# 利用均值回归特性:极端价格倾向于回归均值。
|
||
#
|
||
# Risk / 风险:
|
||
# In a strong trend, RSI can stay oversold/overbought for long stretches.
|
||
# 在强趋势中,RSI 可以长时间停留在超卖/超买区域,造成连续亏损。
|
||
# =============================================================================
|
||
|
||
RSI_OVERSOLD = 30 # 超卖线 / oversold threshold
|
||
RSI_OVERBOUGHT = 70 # 超买线 / overbought threshold
|
||
|
||
def rsi_signal(rsi_series: pd.Series,
|
||
oversold: float = 30,
|
||
overbought: float = 70) -> pd.Series:
|
||
"""
|
||
Generate long/short/flat signals from RSI.
|
||
根据 RSI 生成多空平信号。
|
||
|
||
Returns a Series of:
|
||
+1 → Long (做多)
|
||
-1 → Short (做空)
|
||
0 → Flat (空仓, no position)
|
||
"""
|
||
position = pd.Series(0, index=rsi_series.index, dtype=float)
|
||
current_pos = 0 # 当前持仓状态 / current position state
|
||
|
||
for i in range(1, len(rsi_series)):
|
||
r = rsi_series.iloc[i]
|
||
if pd.isna(r):
|
||
position.iloc[i] = 0
|
||
continue
|
||
|
||
# Entry rules / 入场规则
|
||
if r < oversold and current_pos == 0:
|
||
current_pos = 1 # 超卖 → 做多 / oversold → go long
|
||
|
||
elif r > overbought and current_pos == 0:
|
||
current_pos = -1 # 超买 → 做空 / overbought → go short
|
||
|
||
# Exit rules / 出场规则
|
||
# Exit long when RSI recovers above 50 (回到中性区域 / back to neutral)
|
||
elif current_pos == 1 and r > 50:
|
||
current_pos = 0
|
||
|
||
# Exit short when RSI falls below 50
|
||
elif current_pos == -1 and r < 50:
|
||
current_pos = 0
|
||
|
||
position.iloc[i] = current_pos
|
||
|
||
return position
|
||
|
||
|
||
rsi_pos = rsi_signal(rsi14, RSI_OVERSOLD, RSI_OVERBOUGHT)
|
||
|
||
# Shift by 1 day to avoid lookahead bias / 前移一天避免前视偏差
|
||
rsi_signal_shifted = rsi_pos.shift(1).fillna(0)
|
||
|
||
print("\n[策略B] RSI信号生成完成")
|
||
print(f" 多头持仓天数 (Signal=+1): {int((rsi_signal_shifted == 1).sum())} 天")
|
||
print(f" 空头持仓天数 (Signal=-1): {int((rsi_signal_shifted == -1).sum())} 天")
|
||
print(f" 空仓天数 (Signal= 0): {int((rsi_signal_shifted == 0).sum())} 天")
|
||
|
||
|
||
# =============================================================================
|
||
# SECTION 4: Vectorized Backtest Engine 向量化回测引擎
|
||
# -----------------------------------------------------------------------------
|
||
# A backtest (回测) simulates how a strategy would have performed
|
||
# on historical data. It is the primary tool for validating a strategy
|
||
# before risking real money.
|
||
# 回测是在历史数据上模拟策略表现的工具,是真实投资前验证策略的主要手段。
|
||
#
|
||
# Two main backtest styles / 两种主要回测方式:
|
||
#
|
||
# ① Vectorized backtest 向量化回测
|
||
# - Compute all positions & P&L as array operations at once (numpy/pandas)
|
||
# - Very fast; good for strategy exploration
|
||
# - 所有仓位和盈亏一次性用数组运算计算,速度极快,适合策略探索
|
||
#
|
||
# ② Event-driven backtest 事件驱动回测
|
||
# - Simulate time step-by-step, reacting to each market event
|
||
# - More realistic (handles fills, slippage, latency, order queuing)
|
||
# - 逐笔模拟市场事件,更真实(考虑成交、滑点、延迟等),速度较慢
|
||
#
|
||
# We use the vectorized approach here for clarity and speed.
|
||
# 此处使用向量化方式,兼顾清晰度和速度。
|
||
#
|
||
# Cost model 交易成本模型:
|
||
# - Commission (佣金): charged each time you trade (per trade)
|
||
# - Slippage (滑点): the difference between the expected fill price and
|
||
# the actual fill price (price moves against you)
|
||
# We approximate both as a percentage of the trade value.
|
||
# 两者合并近似为交易金额的固定比例。
|
||
# =============================================================================
|
||
|
||
class VectorizedBacktester:
|
||
"""
|
||
A simple vectorized backtesting engine.
|
||
简单的向量化回测引擎。
|
||
|
||
Assumptions / 假设:
|
||
• Long-only or long/short positions
|
||
• Trade at next-day's open (用下一天开盘价成交) — conservative assumption
|
||
We approximate this by using the same day's close shifted by 1 day.
|
||
• Round-trip cost (单次交易成本) = 2 × cost_per_trade
|
||
(pay cost on entry AND exit / 进出各收一次)
|
||
• No leverage (无杠杆), position size is 100% of capital when in trade
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
prices: pd.Series,
|
||
signal: pd.Series,
|
||
cost_per_trade: float = 0.001, # 0.1% one-way / 单向 0.1% (含佣金+滑点)
|
||
initial_capital: float = 1_000_000.0, # 初始资金 / initial capital
|
||
name: str = "Strategy",
|
||
):
|
||
self.prices = prices
|
||
self.signal = signal.reindex(prices.index).fillna(0)
|
||
self.cost_per_trade = cost_per_trade
|
||
self.initial_capital = initial_capital
|
||
self.name = name
|
||
self._run()
|
||
|
||
def _run(self):
|
||
"""Core backtesting logic. 核心回测逻辑。"""
|
||
prices = self.prices
|
||
signal = self.signal
|
||
|
||
# ── Daily price return 日收益率 ────────────────────────────────────
|
||
daily_ret = prices.pct_change().fillna(0)
|
||
|
||
# ── Strategy return (before costs) 策略日收益率(扣除成本前)─────────
|
||
# Strategy return = signal × market return
|
||
# 策略当日收益率 = 持仓方向 × 市场当日收益率
|
||
strat_ret_gross = signal * daily_ret
|
||
|
||
# ── Transaction cost 交易成本 ──────────────────────────────────────
|
||
# Detect position changes (signal changes from one day to the next)
|
||
# 检测仓位变化(信号从一天到下一天发生变化)
|
||
position_change = signal.diff().fillna(0).abs() # >0 means we traded
|
||
# Cost is charged each time position changes
|
||
# 每次仓位变化时扣除成本
|
||
cost = position_change * self.cost_per_trade
|
||
|
||
# ── Net strategy return 策略净收益率 ───────────────────────────────
|
||
strat_ret_net = strat_ret_gross - cost
|
||
|
||
# ── Equity curve 净值曲线 ───────────────────────────────────────────
|
||
# The equity curve tracks how 1 unit of capital grows over time.
|
||
# 净值曲线追踪单位资本随时间的增长。
|
||
# (1 + daily_net_return) compounded every day
|
||
equity = self.initial_capital * (1 + strat_ret_net).cumprod()
|
||
equity_bh = self.initial_capital * (1 + daily_ret).cumprod() # Buy & Hold benchmark
|
||
|
||
# ── Drawdown 回撤 ──────────────────────────────────────────────────
|
||
# Drawdown measures how far we are from the peak at any point in time.
|
||
# 回撤衡量当前净值距离历史最高点的跌幅。
|
||
rolling_max = equity.cummax()
|
||
drawdown = (equity - rolling_max) / rolling_max # always <= 0
|
||
|
||
# Store results for later analysis
|
||
self.daily_ret = daily_ret
|
||
self.strat_ret = strat_ret_net
|
||
self.equity = equity
|
||
self.equity_bh = equity_bh
|
||
self.drawdown = drawdown
|
||
self.n_trades = int((position_change > 0).sum())
|
||
self.total_cost = cost.sum()
|
||
|
||
# ── Performance metrics 绩效指标 ──────────────────────────────────────────
|
||
#
|
||
# A well-rounded strategy evaluation uses multiple metrics, because
|
||
# no single number captures the full picture.
|
||
# 全面的策略评估需要多个指标,因为单一数字无法描述全貌。
|
||
#
|
||
# Key metrics / 关键指标:
|
||
# Total Return 总收益率 — how much did we make in total?
|
||
# CAGR 年化复合增长率 — annualized compounded growth rate
|
||
# Sharpe Ratio 夏普比率 — return per unit of total risk (risk-adjusted)
|
||
# Sortino Ratio 索提诺比率 — return per unit of DOWNSIDE risk only
|
||
# Max Drawdown 最大回撤 — worst peak-to-trough decline
|
||
# Calmar Ratio 卡玛比率 — CAGR / Max Drawdown (reward vs worst loss)
|
||
# Win Rate 胜率 — fraction of days (or trades) with positive P&L
|
||
# Profit Factor 盈亏比 — total profit / total loss
|
||
|
||
def metrics(self) -> dict:
|
||
"""Compute and return a dictionary of performance metrics.
|
||
计算并返回绩效指标字典。"""
|
||
r = self.strat_ret
|
||
eq = self.equity
|
||
n = len(r)
|
||
years = n / 252.0 # approximate years in sample / 样本年数估算
|
||
|
||
# Total return / 总收益率
|
||
total_return = (eq.iloc[-1] / self.initial_capital) - 1
|
||
|
||
# CAGR 年化复合增长率
|
||
# CAGR = (EndValue / StartValue)^(1/years) - 1
|
||
cagr = (1 + total_return) ** (1 / years) - 1
|
||
|
||
# Annualized volatility 年化波动率
|
||
ann_vol = r.std() * np.sqrt(252)
|
||
|
||
# Sharpe Ratio 夏普比率
|
||
# Sharpe = (Mean excess return) / StdDev(return) × √252
|
||
# Excess return = strategy return - risk-free rate
|
||
# 超额收益率 = 策略收益率 - 无风险利率
|
||
# We use 0 as risk-free rate for simplicity (or assume it's netted out)
|
||
risk_free = 0.0
|
||
sharpe = (r.mean() - risk_free / 252) / r.std() * np.sqrt(252) if r.std() > 0 else 0
|
||
|
||
# Sortino Ratio 索提诺比率
|
||
# Like Sharpe but only penalizes DOWNSIDE volatility
|
||
# 类似夏普,但只惩罚下行波动率(亏损波动率)
|
||
downside = r[r < 0]
|
||
downside_std = downside.std() * np.sqrt(252) if len(downside) > 0 else 1e-9
|
||
sortino = (cagr - risk_free) / downside_std if downside_std > 0 else 0
|
||
|
||
# Maximum Drawdown 最大回撤
|
||
max_dd = self.drawdown.min() # most negative value (最大负值)
|
||
|
||
# Calmar Ratio 卡玛比率
|
||
# Calmar = CAGR / |Max Drawdown|
|
||
calmar = cagr / abs(max_dd) if max_dd != 0 else 0
|
||
|
||
# Win rate 胜率 (fraction of trading days with positive return)
|
||
win_rate = (r > 0).mean()
|
||
|
||
# Profit factor 盈亏比
|
||
# = Sum of positive returns / |Sum of negative returns|
|
||
gross_profit = r[r > 0].sum()
|
||
gross_loss = abs(r[r < 0].sum())
|
||
profit_factor = gross_profit / gross_loss if gross_loss > 0 else np.inf
|
||
|
||
return {
|
||
"总收益率 Total Return": f"{total_return:.2%}",
|
||
"年化收益率 CAGR": f"{cagr:.2%}",
|
||
"年化波动率 Ann. Volatility": f"{ann_vol:.2%}",
|
||
"夏普比率 Sharpe Ratio": f"{sharpe:.3f}",
|
||
"索提诺比率 Sortino Ratio": f"{sortino:.3f}",
|
||
"最大回撤 Max Drawdown": f"{max_dd:.2%}",
|
||
"卡玛比率 Calmar Ratio": f"{calmar:.3f}",
|
||
"胜率 Win Rate": f"{win_rate:.2%}",
|
||
"盈亏比 Profit Factor": f"{profit_factor:.3f}",
|
||
"交易次数 # Trades": str(self.n_trades),
|
||
"总成本 Total Cost": f"{self.total_cost:.4%}",
|
||
}
|
||
|
||
def print_metrics(self):
|
||
"""Pretty-print the performance report. 格式化打印绩效报告。"""
|
||
print(f"\n{'=' * 55}")
|
||
print(f" 策略绩效报告 / Performance Report: {self.name}")
|
||
print(f"{'=' * 55}")
|
||
for k, v in self.metrics().items():
|
||
print(f" {k:<35} {v}")
|
||
print(f"{'=' * 55}")
|
||
|
||
|
||
# =============================================================================
|
||
# SECTION 5: Run Backtests 执行回测
|
||
# =============================================================================
|
||
|
||
# ── Strategy A: MA Crossover 双均线策略 ──────────────────────────────────────
|
||
bt_ma = VectorizedBacktester(
|
||
prices=price,
|
||
signal=ma_signal, # +1 = long, 0 = flat
|
||
cost_per_trade=0.001, # 0.1% per trade (reasonable for liquid stocks)
|
||
name="双均线策略 (MA Crossover 20/60)",
|
||
)
|
||
|
||
# ── Strategy B: RSI Mean Reversion RSI均值回归策略 ──────────────────────────
|
||
bt_rsi = VectorizedBacktester(
|
||
prices=price,
|
||
signal=rsi_signal_shifted, # +1 = long, -1 = short, 0 = flat
|
||
cost_per_trade=0.001,
|
||
name="RSI均值回归策略 (RSI Mean Reversion 14)",
|
||
)
|
||
|
||
# ── Benchmark: Buy & Hold 基准:买入并持有 ───────────────────────────────────
|
||
# Buy & Hold (买入持有) is always our benchmark: simply hold the asset forever.
|
||
# It requires zero skill and zero effort — any strategy must beat this to
|
||
# justify the extra complexity and transaction costs.
|
||
# 买入持有是永远的基准策略:无需技能、零成本。任何策略都必须超越它才有意义。
|
||
bt_bh = VectorizedBacktester(
|
||
prices=price,
|
||
signal=pd.Series(1, index=price.index, dtype=float), # always long / 始终做多
|
||
cost_per_trade=0.0, # no trading costs / 无交易成本
|
||
name="Buy & Hold 基准 (买入持有)",
|
||
)
|
||
|
||
bt_ma.print_metrics()
|
||
bt_rsi.print_metrics()
|
||
bt_bh.print_metrics()
|
||
|
||
|
||
# =============================================================================
|
||
# SECTION 6: Visualization 可视化
|
||
# =============================================================================
|
||
|
||
fig = plt.figure(figsize=(16, 22))
|
||
gs = gridspec.GridSpec(6, 2, figure=fig, hspace=0.45, wspace=0.3)
|
||
|
||
# ── Plot 1: Price + MA signals 价格 + 均线信号 ────────────────────────────────
|
||
ax1 = fig.add_subplot(gs[0, :]) # span full width
|
||
ax1.plot(price, color="#1f77b4", linewidth=1, label="价格 Price")
|
||
ax1.plot(ma_short, color="orange", linewidth=1.2, label=f"SMA{SHORT_WIN} (快线)")
|
||
ax1.plot(ma_long, color="red", linewidth=1.2, label=f"SMA{LONG_WIN} (慢线)")
|
||
|
||
# Shade long periods (持多仓的区间着色)
|
||
ax1.fill_between(
|
||
price.index, price.min(), price.max(),
|
||
where=(ma_signal == 1).values,
|
||
alpha=0.12, color="green", label="多头持仓区间 Long Period"
|
||
)
|
||
ax1.set_title("策略A — 双均线信号 (MA Crossover Signals)", fontsize=13, fontweight="bold")
|
||
ax1.legend(loc="upper left", fontsize=8)
|
||
ax1.set_ylabel("价格 Price")
|
||
ax1.grid(alpha=0.3)
|
||
|
||
# ── Plot 2: RSI RSI指标 ────────────────────────────────────────────────────
|
||
ax2 = fig.add_subplot(gs[1, :])
|
||
ax2.plot(rsi14, color="purple", linewidth=1)
|
||
ax2.axhline(RSI_OVERBOUGHT, color="red", linestyle="--", linewidth=1, label=f"超买线 {RSI_OVERBOUGHT}")
|
||
ax2.axhline(RSI_OVERSOLD, color="green", linestyle="--", linewidth=1, label=f"超卖线 {RSI_OVERSOLD}")
|
||
ax2.axhline(50, color="gray", linestyle=":", linewidth=0.8)
|
||
ax2.fill_between(rsi14.index, RSI_OVERSOLD, rsi14,
|
||
where=(rsi14 < RSI_OVERSOLD), alpha=0.25, color="green",
|
||
label="超卖区域 Oversold")
|
||
ax2.fill_between(rsi14.index, rsi14, RSI_OVERBOUGHT,
|
||
where=(rsi14 > RSI_OVERBOUGHT), alpha=0.25, color="red",
|
||
label="超买区域 Overbought")
|
||
ax2.set_ylim(0, 100)
|
||
ax2.set_title(f"策略B指标 — RSI({14}) 均值回归信号 (RSI Mean Reversion)", fontsize=13, fontweight="bold")
|
||
ax2.set_ylabel("RSI")
|
||
ax2.legend(loc="upper left", fontsize=8, ncol=2)
|
||
ax2.grid(alpha=0.3)
|
||
|
||
# ── Plot 3: Bollinger Bands 布林带 ────────────────────────────────────────────
|
||
ax3 = fig.add_subplot(gs[2, :])
|
||
ax3.plot(price, color="#1f77b4", linewidth=1, label="价格 Price")
|
||
ax3.plot(bb["mid"], color="orange", linewidth=1.2, label="中轨 Middle (SMA20)")
|
||
ax3.plot(bb["upper"],color="red", linewidth=1, linestyle="--", label="上轨 Upper (+2σ)")
|
||
ax3.plot(bb["lower"],color="green", linewidth=1, linestyle="--", label="下轨 Lower (-2σ)")
|
||
ax3.fill_between(price.index, bb["upper"], bb["lower"], alpha=0.07, color="blue")
|
||
ax3.set_title("布林带 (Bollinger Bands 20, 2σ)", fontsize=13, fontweight="bold")
|
||
ax3.set_ylabel("价格 Price")
|
||
ax3.legend(loc="upper left", fontsize=8, ncol=2)
|
||
ax3.grid(alpha=0.3)
|
||
|
||
# ── Plot 4: MACD MACD指标 ────────────────────────────────────────────────────
|
||
ax4 = fig.add_subplot(gs[3, :])
|
||
ax4.plot(macd_df["macd"], color="blue", linewidth=1, label="MACD线 (DIF)")
|
||
ax4.plot(macd_df["signal"], color="orange", linewidth=1, label="信号线 (DEA)")
|
||
colors = ["green" if v >= 0 else "red" for v in macd_df["histogram"]]
|
||
ax4.bar(macd_df.index, macd_df["histogram"], color=colors, alpha=0.5, width=1, label="柱状图 Histogram")
|
||
ax4.axhline(0, color="black", linewidth=0.8)
|
||
ax4.set_title("MACD (12/26/9) — 趋势确认指标 (Trend Confirmation)", fontsize=13, fontweight="bold")
|
||
ax4.set_ylabel("MACD")
|
||
ax4.legend(loc="upper left", fontsize=8)
|
||
ax4.grid(alpha=0.3)
|
||
|
||
# ── Plot 5: Equity Curves 净值曲线 ────────────────────────────────────────────
|
||
ax5 = fig.add_subplot(gs[4, :])
|
||
ax5.plot(bt_ma.equity, color="blue", linewidth=1.5, label="策略A: 双均线 MA Crossover")
|
||
ax5.plot(bt_rsi.equity, color="purple", linewidth=1.5, label="策略B: RSI 均值回归 RSI Reversion")
|
||
ax5.plot(bt_bh.equity, color="gray", linewidth=1.2, linestyle="--", label="基准: 买入持有 Buy & Hold")
|
||
ax5.set_title("净值曲线对比 (Equity Curve Comparison)", fontsize=13, fontweight="bold")
|
||
ax5.set_ylabel("账户价值 Portfolio Value (元)")
|
||
ax5.legend(loc="upper left", fontsize=9)
|
||
ax5.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"¥{x/1e4:.0f}万"))
|
||
ax5.grid(alpha=0.3)
|
||
|
||
# ── Plot 6: Drawdown 回撤曲线 ────────────────────────────────────────────────
|
||
ax6 = fig.add_subplot(gs[5, :])
|
||
ax6.fill_between(bt_ma.drawdown.index, bt_ma.drawdown, 0, alpha=0.5, color="blue", label="策略A")
|
||
ax6.fill_between(bt_rsi.drawdown.index, bt_rsi.drawdown, 0, alpha=0.5, color="purple", label="策略B")
|
||
ax6.fill_between(bt_bh.drawdown.index, bt_bh.drawdown, 0, alpha=0.3, color="gray", label="Buy & Hold")
|
||
ax6.set_title("回撤曲线 (Drawdown Curves)", fontsize=13, fontweight="bold")
|
||
ax6.set_ylabel("回撤幅度 Drawdown")
|
||
ax6.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f"{x:.0%}"))
|
||
ax6.legend(loc="lower left", fontsize=9)
|
||
ax6.grid(alpha=0.3)
|
||
|
||
plt.suptitle(
|
||
"量化交易策略开发与回测演示\nQuantitative Trading: Strategy Development & Backtesting",
|
||
fontsize=15, fontweight="bold", y=1.005,
|
||
)
|
||
plt.savefig("strategy_backtest_demo.png", dpi=120, bbox_inches="tight")
|
||
plt.show()
|
||
print("\n[图表] 已保存至 strategy_backtest_demo.png")
|
||
|
||
|
||
# =============================================================================
|
||
# SECTION 7: Walk-Forward Validation 滚动前向验证
|
||
# -----------------------------------------------------------------------------
|
||
# A critical warning for all new quant traders / 对所有量化新手的重要警告:
|
||
#
|
||
# In-sample overfitting (样本内过拟合) is the #1 trap in backtesting.
|
||
# 样本内过拟合是回测中最大的陷阱。
|
||
#
|
||
# If you test 100 different parameter sets on the same data and pick the best,
|
||
# that "best" result will almost certainly NOT hold out of sample.
|
||
# 如果在同一份数据上测试100组参数并选最好的,这个"最优"结果在样本外几乎必然失效。
|
||
# This is called data snooping bias / 数据窥探偏差 or p-hacking.
|
||
#
|
||
# Walk-Forward Validation (滚动前向验证) helps guard against this:
|
||
# ┌──────────────────────────────────────────────────────────────────────┐
|
||
# │ Window 1: [TRAIN period 1] → optimize params → TEST on period 1+ │
|
||
# │ Window 2: [TRAIN period 2] → optimize params → TEST on period 2+ │
|
||
# │ …repeat, always training on past, testing on future │
|
||
# │ 始终用过去数据训练,用未来数据测试 │
|
||
# └──────────────────────────────────────────────────────────────────────┘
|
||
# Only report the concatenated OUT-OF-SAMPLE test results.
|
||
# 只汇报样本外(OOS)的测试结果。
|
||
#
|
||
# Below: a simplified version — we just split into train / test (80/20).
|
||
# 下面是简化版:直接按 80/20 切分训练集和测试集。
|
||
# =============================================================================
|
||
|
||
TRAIN_RATIO = 0.8
|
||
split_idx = int(len(price) * TRAIN_RATIO)
|
||
split_date = price.index[split_idx]
|
||
|
||
price_train = price.iloc[:split_idx]
|
||
price_test = price.iloc[split_idx:]
|
||
|
||
print(f"\n{'=' * 55}")
|
||
print(f" 滚动前向验证 / Walk-Forward Split")
|
||
print(f"{'=' * 55}")
|
||
print(f" 训练期 Train: {price_train.index[0].date()} → {price_train.index[-1].date()} ({len(price_train)} 天)")
|
||
print(f" 测试期 Test : {price_test.index[0].date()} → {price_test.index[-1].date()} ({len(price_test)} 天)")
|
||
|
||
# ── Optimize MA windows on TRAIN set 在训练集上优化均线参数 ────────────────────
|
||
#
|
||
# Grid search (网格搜索): try all combinations in the parameter space.
|
||
# This is the simplest optimization method — good for small parameter spaces.
|
||
# 网格搜索:遍历参数空间内的所有组合。适合参数空间小的情形。
|
||
|
||
print("\n[优化] 在训练集上搜索最优均线参数...")
|
||
print(" (搜索空间: short=[5,10,15,20,30], long=[30,40,50,60,80,100])")
|
||
|
||
best_sharpe = -np.inf
|
||
best_short = SHORT_WIN
|
||
best_long = LONG_WIN
|
||
results_grid = []
|
||
|
||
for sw in [5, 10, 15, 20, 30]:
|
||
for lw in [30, 40, 50, 60, 80, 100]:
|
||
if sw >= lw:
|
||
continue # short must be shorter than long / 短期必须小于长期
|
||
ma_s = sma(price_train, sw)
|
||
ma_l = sma(price_train, lw)
|
||
sig = (ma_s > ma_l).astype(int).shift(1).fillna(0)
|
||
bt = VectorizedBacktester(price_train, sig, cost_per_trade=0.001, name="grid")
|
||
m = bt.metrics()
|
||
sharpe_val = float(m["夏普比率 Sharpe Ratio"])
|
||
results_grid.append({"short": sw, "long": lw, "sharpe": sharpe_val})
|
||
if sharpe_val > best_sharpe:
|
||
best_sharpe = sharpe_val
|
||
best_short = sw
|
||
best_long = lw
|
||
|
||
print(f"\n 最优参数 (训练集 in-sample): short={best_short}, long={best_long}")
|
||
print(f" 训练集夏普比率 In-sample Sharpe: {best_sharpe:.3f}")
|
||
|
||
# ── Apply best params on TEST set 将最优参数应用于测试集 ──────────────────────
|
||
ma_s_test = sma(price_test, best_short)
|
||
ma_l_test = sma(price_test, best_long)
|
||
sig_test = (ma_s_test > ma_l_test).astype(int).shift(1).fillna(0)
|
||
|
||
bt_test = VectorizedBacktester(price_test, sig_test, cost_per_trade=0.001,
|
||
name=f"MA({best_short}/{best_long}) — 测试集 OOS")
|
||
bt_test.print_metrics()
|
||
|
||
print("\n" + "=" * 55)
|
||
print(" ⚠️ 注意 / WARNING:")
|
||
print(" 训练集(in-sample)夏普 通常高于 测试集(out-of-sample)夏普")
|
||
print(" In-sample Sharpe is typically HIGHER than out-of-sample.")
|
||
print(" 夏普衰减 (Sharpe decay) 是策略过拟合的典型信号。")
|
||
print(" Sharpe decay is a classic sign of overfitting.")
|
||
print("=" * 55)
|
||
|
||
|
||
# =============================================================================
|
||
# SECTION 8: Return Distribution Analysis 收益率分布分析
|
||
# -----------------------------------------------------------------------------
|
||
# Before trusting your Sharpe ratio, check if the return distribution
|
||
# violates the normality assumption.
|
||
# 在相信夏普比率之前,检验收益率分布是否违背正态假设。
|
||
#
|
||
# Real returns typically show:
|
||
# 真实收益率通常呈现:
|
||
# Fat tails (厚尾 / leptokurtosis): extreme events more frequent than normal
|
||
# Negative skew (负偏态): crashes are larger than rallies
|
||
#
|
||
# A high Sharpe ratio on a fat-tailed distribution can be misleading.
|
||
# 厚尾分布下的高夏普比率可能具有误导性。
|
||
# =============================================================================
|
||
|
||
fig2, axes = plt.subplots(1, 2, figsize=(14, 5))
|
||
|
||
# ── Return distribution histogram 收益率直方图 ─────────────────────────────
|
||
ax = axes[0]
|
||
r_ma = bt_ma.strat_ret.dropna()
|
||
r_bh = bt_bh.strat_ret.dropna()
|
||
|
||
ax.hist(r_bh, bins=80, alpha=0.5, color="gray", density=True, label="Buy & Hold")
|
||
ax.hist(r_ma, bins=80, alpha=0.5, color="blue", density=True, label="策略A: MA Crossover")
|
||
|
||
# Overlay a normal distribution for comparison / 叠加正态分布对比
|
||
x_range = np.linspace(r_bh.min(), r_bh.max(), 300)
|
||
ax.plot(x_range, stats.norm.pdf(x_range, r_bh.mean(), r_bh.std()),
|
||
color="red", linewidth=1.5, linestyle="--", label="正态分布 Normal Dist.")
|
||
ax.set_title("收益率分布 (Return Distribution)", fontsize=12)
|
||
ax.set_xlabel("日收益率 Daily Return")
|
||
ax.set_ylabel("频率密度 Density")
|
||
ax.legend(fontsize=8)
|
||
ax.grid(alpha=0.3)
|
||
|
||
# Print distribution stats
|
||
print(f"\n[分布] 策略A — 日收益率统计:")
|
||
print(f" 偏度 Skewness : {r_ma.skew():.3f} (负值=左尾更厚 fat left tail)")
|
||
print(f" 峰度 Kurtosis : {r_ma.kurtosis():.3f} (>0 表示厚尾 fat tails vs normal)")
|
||
|
||
# ── Monthly returns heatmap 月度收益热力图 ─────────────────────────────────
|
||
ax = axes[1]
|
||
monthly = bt_ma.strat_ret.resample("M").apply(lambda x: (1 + x).prod() - 1)
|
||
monthly_df = pd.DataFrame({
|
||
"year": monthly.index.year,
|
||
"month": monthly.index.month,
|
||
"ret": monthly.values,
|
||
})
|
||
pivot = monthly_df.pivot(index="year", columns="month", values="ret")
|
||
pivot.columns = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
|
||
|
||
import matplotlib.colors as mcolors
|
||
cmap = mcolors.LinearSegmentedColormap.from_list("rg", ["#d73027","#ffffff","#1a9850"])
|
||
im = ax.imshow(pivot.values, cmap=cmap, aspect="auto",
|
||
vmin=-0.15, vmax=0.15)
|
||
ax.set_xticks(range(12))
|
||
ax.set_xticklabels(pivot.columns, fontsize=8)
|
||
ax.set_yticks(range(len(pivot.index)))
|
||
ax.set_yticklabels(pivot.index, fontsize=9)
|
||
for i in range(len(pivot.index)):
|
||
for j in range(12):
|
||
v = pivot.values[i, j]
|
||
if not np.isnan(v):
|
||
ax.text(j, i, f"{v:.1%}", ha="center", va="center", fontsize=6,
|
||
color="black" if abs(v) < 0.08 else "white")
|
||
ax.set_title("策略A月度收益热力图\n(Monthly Return Heatmap)", fontsize=11)
|
||
plt.colorbar(im, ax=ax, format=plt.FuncFormatter(lambda x, _: f"{x:.0%}"))
|
||
|
||
plt.tight_layout()
|
||
plt.savefig("return_distribution.png", dpi=120, bbox_inches="tight")
|
||
plt.show()
|
||
print("[图表] 已保存至 return_distribution.png")
|
||
|
||
|
||
# =============================================================================
|
||
# SECTION 9: Summary & Next Steps 总结与后续
|
||
# =============================================================================
|
||
|
||
print(f"""
|
||
{'=' * 70}
|
||
总结 Summary
|
||
{'=' * 70}
|
||
本 Demo 演示了量化策略开发与回测的完整流程:
|
||
|
||
① 技术指标计算 Technical Indicators
|
||
SMA / EMA / RSI / MACD / Bollinger Bands
|
||
|
||
② 信号生成 Signal Generation
|
||
策略A: 双均线金叉死叉 (MA Crossover) — 趋势跟随
|
||
策略B: RSI 超买超卖 (RSI Reversion) — 均值回归
|
||
|
||
③ 向量化回测引擎 Vectorized Backtester
|
||
考虑了交易成本(佣金+滑点)和前视偏差(lookahead bias)
|
||
|
||
④ 绩效指标 Performance Metrics
|
||
Sharpe / Sortino / Max Drawdown / Calmar / Win Rate / Profit Factor
|
||
|
||
⑤ 前向验证 Walk-Forward Validation
|
||
训练集优化参数 → 测试集验证 → 防止过拟合
|
||
|
||
⑥ 收益率分布 Return Distribution
|
||
偏度/峰度检验,月度热力图
|
||
|
||
下一步学习方向 Next Steps:
|
||
──────────────────────────────────────────────────────────────
|
||
• 因子选股策略 (Alpha Factor Models) — Fama-French, Momentum
|
||
• 组合优化 (Portfolio Optimization) — Mean-Variance, Risk Parity
|
||
• 事件驱动回测 (Event-Driven Backtesting) — more realistic execution
|
||
• 机器学习信号 (ML-based Signals) — XGBoost, LSTM for return prediction
|
||
• 风险管理 (Risk Management) — Position sizing, Stop-loss, VaR
|
||
{'=' * 70}
|
||
""")
|