# ============================================================================= # Quantitative Trading — Alpha Factor Research Demo # 量化交易 — Alpha 因子研究演示 # ============================================================================= # # Alpha 因子 (Alpha Factor) 是量化选股的核心工具。 # 它是一个数学公式,从市场数据中提取信号,预测哪些股票未来会跑赢大盘。 # # Alpha factors are the core tool of quantitative stock selection. # Each factor is a mathematical formula that extracts a signal from market data # to predict which stocks will outperform in the future. # # 研究流程 / Research Workflow: # §0 合成股票池 Synthetic Universe (50只股票 × 3年日线数据) # §1 因子构建 Factor Construction (5个经典因子) # §2 因子预处理 Factor Preprocessing (去极值、截面标准化、市值中性化) # §3 IC 分析 IC Analysis (信息系数 / 因子预测能力量化) # §4 分层回测 Quantile Analysis (五分位收益分层验证) # §5 因子合成 Factor Combination (等权 & IC加权合成因子) # §6 多空组合 Long-Short Portfolio (Top/Bottom 20% 多空策略) # §7 因子衰减 Factor Decay (IC 随持有期延长的衰减曲线) # §8 可视化 Visualization (9面板汇总图) # # 前置条件 / Prerequisites: # pip install numpy pandas matplotlib scipy # # 运行方式 / Run: # python quant_alpha_factor_demo.py # ============================================================================= import numpy as np import pandas as pd import matplotlib matplotlib.use('Agg') # 非交互模式,避免 GUI 阻塞 / non-interactive, prevents GUI block import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec from matplotlib.ticker import FuncFormatter from scipy import stats import warnings warnings.filterwarnings('ignore') # 中文字体配置 / Chinese font configuration plt.rcParams['font.sans-serif'] = ['WenQuanYi Zen Hei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False np.random.seed(42) print("=" * 70) print(" 量化交易 Alpha 因子研究演示") print(" Quantitative Trading: Alpha Factor Research Demo") print("=" * 70) # ============================================================================= # §0 合成股票池 Synthetic Stock Universe # ----------------------------------------------------------------------------- # 真实因子研究需要几百到几千只股票的截面数据 (cross-sectional data)。 # 我们用"隐藏因子生成模型" (Hidden Factor Generative Model) 合成数据, # 确保我们构建的因子确实具有预测能力: # # Real factor research requires cross-sectional data of hundreds of stocks. # We use a Hidden Factor Generative Model to synthesize data, ensuring the # factors we build actually have genuine predictive power. # # 每只股票的日收益 = 市场分量 + 质量Alpha + 动量Alpha + 特质噪音 # stock_daily_return = market_component + quality_alpha + momentum_alpha + noise # # 其中: # market_component = β_i × r_market (系统性风险 / systematic risk) # quality_alpha = λ_q × quality_score_{i,t} (质量因子加载, 季度持续) # momentum_alpha = λ_m × momentum_score_{i,t} (动量因子加载, 月度持续) # noise ~ N(0, σ_idio) (特质风险 / idiosyncratic risk) # # 关键: quality_score 和 momentum_score 是"隐藏"的,我们无法直接观察。 # 但当我们从价格数据中计算因子时,会自然捕捉到这些隐藏信号。 # Key: these hidden scores are unobservable, but our computed factors will # naturally capture them — this is exactly what alpha factors do! # ============================================================================= N_STOCKS = 50 # 股票数量 / number of stocks N_DAYS = 756 # 交易日 ≈ 3 年 / trading days ≈ 3 years N_QUARTERS = N_DAYS // 63 + 2 # 季度数 / number of quarters N_MONTHS = N_DAYS // 21 + 2 # 月度数 / number of months # 交易日期序列 / Business-day date index dates = pd.bdate_range(start="2021-01-04", periods=N_DAYS) # 股票代码 (模拟A股格式) / Stock symbols (simulated A-share format) symbols = [f"{str(i).zfill(6)}.{'SH' if i % 2 == 0 else 'SZ'}" for i in range(1, N_STOCKS + 1)] # ── 0-A 隐藏质量因子(季度持续)/ Hidden Quality Factor (Quarterly Persistence) ── # # 质量因子 AR(1): quality_t = 0.90 × quality_{t-1} + shock # 季度自回归系数 0.90 → 年度持续性 ≈ 0.90^4 = 0.66(持续性更强) # Quarterly AR(1) = 0.90 → annual persistence ≈ 0.90^4 = 0.66 (more persistent) quality_quarterly = np.zeros((N_STOCKS, N_QUARTERS)) quality_quarterly[:, 0] = np.random.randn(N_STOCKS) for q in range(1, N_QUARTERS): # sqrt(1 - 0.90^2) ≈ 0.436 保持方差为 1 / keeps variance = 1 quality_quarterly[:, q] = (0.90 * quality_quarterly[:, q - 1] + 0.436 * np.random.randn(N_STOCKS)) # 季度 → 每日映射 / Map quarterly to daily quality_daily = np.zeros((N_DAYS, N_STOCKS)) for d in range(N_DAYS): q_idx = min(d // 63, N_QUARTERS - 1) quality_daily[d] = quality_quarterly[:, q_idx] # ── 0-B 隐藏动量因子(月度持续)/ Hidden Momentum Factor (Monthly Persistence) ── # # 动量因子 AR(1): momentum_t = 0.60 × momentum_{t-1} + shock # 月度自回归系数 0.60,意味着 3 个月后的持续性 ≈ 0.60^3 = 0.22 # Monthly AR(1) coefficient 0.60 → 3-month persistence ≈ 0.22 momentum_monthly = np.zeros((N_STOCKS, N_MONTHS)) momentum_monthly[:, 0] = np.random.randn(N_STOCKS) for m in range(1, N_MONTHS): # 月度 AR(1) = 0.80,年度持续性 ≈ 0.80^12 = 0.069(合理的动量衰减) # Monthly AR(1) = 0.80 → annual persistence ≈ 0.80^12 = 0.069 momentum_monthly[:, m] = (0.80 * momentum_monthly[:, m - 1] + 0.60 * np.random.randn(N_STOCKS)) momentum_daily = np.zeros((N_DAYS, N_STOCKS)) for d in range(N_DAYS): m_idx = min(d // 21, N_MONTHS - 1) momentum_daily[d] = momentum_monthly[:, m_idx] # ── 0-C 日收益率生成 / Daily Return Generation ──────────────────────────────── # # 日市场收益 (Market Daily Return): 年化μ=8%, σ=20% mkt_returns = np.random.normal(0.0003, 0.0126, N_DAYS) # 0.20/√252 ≈ 0.0126 # 每只股票的市场贝塔 β: 均匀分布在 [0.5, 1.5] # Each stock's market beta β: uniformly distributed in [0.5, 1.5] stock_betas = np.random.uniform(0.5, 1.5, N_STOCKS) # (N_DAYS, N_STOCKS): 每列是一只股票,每行是一个交易日 market_component = mkt_returns[:, np.newaxis] * stock_betas[np.newaxis, :] # 市场分量 quality_component = 0.00080 * quality_daily # 质量贡献:约 ±20% 年化 alpha momentum_component = 0.00050 * momentum_daily # 动量贡献:约 ±12% 年化 alpha idio_noise = np.random.normal(0, 0.0075, (N_DAYS, N_STOCKS)) # 特质噪音 # 日对数收益率 / Daily log returns log_returns = market_component + quality_component + momentum_component + idio_noise # ── 0-D 价格路径 / Price Paths ──────────────────────────────────────────────── # P_t = P_0 × exp( Σ log_return_s, s=1..t ) (价格路径 / price path) initial_prices = np.random.uniform(5.0, 100.0, N_STOCKS) # A股初始价格 prices_arr = initial_prices * np.exp(np.cumsum(log_returns, axis=0)) prices_df = pd.DataFrame(prices_arr, index=dates, columns=symbols) log_ret_df = pd.DataFrame(log_returns, index=dates, columns=symbols) simple_ret_df = prices_df.pct_change() # 简单收益率: r_t = P_t/P_{t-1} - 1 # ── 0-E 成交量生成 / Volume Generation ────────────────────────────────────── # 真实市场中,成交量与绝对收益率正相关(大涨大跌时换手更活跃)。 # In real markets, volume correlates with |return| (more trading on big moves). # # log(volume) = log(base) + 3×|log_return| + noise base_volumes = np.random.uniform(1e6, 5e7, N_STOCKS) # 基础日均成交量(股) vol_multiplier = np.exp(3.0 * np.abs(log_returns) + 0.3 * np.random.randn(N_DAYS, N_STOCKS)) volume_df = pd.DataFrame(base_volumes * vol_multiplier, index=dates, columns=symbols) # ── 0-F 模拟市值 / Simulated Market Capitalization ─────────────────────────── # 市值 (Market Cap) = 价格 × 流通股数 / Market Cap = Price × Float Shares float_shares = np.random.uniform(1e8, 1e10, N_STOCKS) # 流通股数(股) mktcap_df = prices_df * float_shares # 市值(元) log_mktcap_df = np.log(mktcap_df) # 对数市值(因子分析常用) market_series = pd.Series(mkt_returns, index=dates) # 市场收益率序列 print(f"\n[§0] 股票池生成完成 / Universe Generated") print(f" 股票数量 (Stocks): {N_STOCKS} 只") print(f" 交易日数 (Days): {N_DAYS} 天 {dates[0].date()} ~ {dates[-1].date()}") print(f" 价格区间 (Price): {prices_df.min().min():.2f} ~ {prices_df.max().max():.2f} 元") # ============================================================================= # §1 因子构建 Factor Construction # ----------------------------------------------------------------------------- # 我们构建 5 个来自学术文献的经典因子,涵盖不同的 Alpha 来源: # We build 5 classic factors from academic literature, covering different alpha sources: # # ┌──────┬────────────────────────────┬──────────────────────────────────────────┐ # │ 因子 │ 名称 │ 公式 & 来源 │ # ├──────┼────────────────────────────┼──────────────────────────────────────────┤ # │ MOM │ 动量 Momentum │ r(t-252, t-21) Jegadeesh & Titman 1993 │ # │ REV │ 短期反转 Reversal │ -r(t-21, t) Jegadeesh 1990 │ # │ LVOL │ 低波动 Low Volatility │ -std(r, 60D) Baker et al. 2011 │ # │ BAB │ 低贝塔 Betting vs Beta │ -β(60D) Frazzini & Pedersen 2014 │ # │ ILLIQ│ 非流动性 Amihud Illiquidity│ mean(|r|/V,20D) Amihud 2002 │ # └──────┴────────────────────────────┴──────────────────────────────────────────┘ # # 重要约定: 所有因子都以"因子值越高 → 预期未来收益越高"为正方向。 # Convention: higher factor value → expected higher future return (unified sign). # ============================================================================= print("\n[§1] 构建因子 / Constructing factors...") # ── 1-A 动量因子 (MOM) Momentum Factor ────────────────────────────────────── # # 来源 / Source: Jegadeesh & Titman (1993) # "Returns to Buying Winners and Selling Losers" # # 动量效应 (Momentum Effect): 过去 12 个月(跳过最近 1 个月)涨幅最大的股票, # 未来 3-12 个月通常继续跑赢市场。这是学术上记录最多的股票市场异象之一。 # # Momentum effect: stocks with highest 12M-1M returns tend to outperform # over the next 3-12 months. One of the most documented stock market anomalies. # # 为什么要跳过最近 1 个月?/ Why skip the last month? # 最近 1 个月的收益存在短期反转效应(买卖价差、做市商库存调整等微观结构原因), # 会污染中期动量信号,所以必须跳过。 # The last month exhibits short-term reversal due to bid-ask spread and # market-maker inventory rebalancing, contaminating medium-term momentum. # # 公式 / Formula: # MOM_t = (P_{t-21} / P_{t-252}) - 1 (从12M前 到 1M前 的累积收益) # MOM_t = (P_{t-21} / P_{t-252}) - 1 (cumulative return from 12M to 1M ago) MOM_LONG = 252 # 长回看窗口: 12个月 / lookback long: 12 months MOM_SHORT = 21 # 跳过窗口: 1个月 / skip window: 1 month factor_mom = prices_df.shift(MOM_SHORT) / prices_df.shift(MOM_LONG) - 1.0 print(f" ✓ MOM 动量因子: 有效值 {factor_mom.notna().mean().mean():.1%}") # ── 1-B 短期反转因子 (REV) Short-Term Reversal Factor ─────────────────────── # # 来源 / Source: Jegadeesh (1990) # "Evidence of Predictable Behavior of Security Returns" # # 反转效应 (Reversal Effect): 上个月大涨的股票,下个月往往小幅回调; # 大跌的股票往往小幅反弹。这是对短期"过度反应"的修正。 # # Reversal effect: last month's winners tend to reverse; losers tend to bounce. # This is correction of short-term overreaction. # # 注意: 取负号是因为月涨 → 因子值高 → 预期下跌(反转),我们统一正方向。 # Note: negative sign because high past return → expected to reverse → lower future return, # we flip to maintain convention: higher factor → higher expected return. # # 公式 / Formula: # REV_t = -(P_t / P_{t-21} - 1) (负1月收益 / negative 1-month return) factor_rev = -(prices_df / prices_df.shift(MOM_SHORT) - 1.0) print(f" ✓ REV 反转因子: 有效值 {factor_rev.notna().mean().mean():.1%}") # ── 1-C 低波动因子 (LVOL) Low Volatility Factor ──────────────────────────── # # 来源 / Source: Ang et al.(2006); Baker, Bradley & Wurgler (2011) # "Benchmarks as Limits to Arbitrage: Understanding the Low-Volatility Anomaly" # # 低波动异象 (Low Volatility Anomaly): 经典金融理论 CAPM 预测高风险 = 高收益。 # 但实证研究发现,低波动率的股票反而有更高的原始收益和风险调整后收益! # # Low-vol anomaly: contrary to CAPM, low-volatility stocks earn HIGHER # risk-adjusted and even raw returns than high-volatility stocks. # # 可能的解释 / Possible explanations: # ① 机构投资者受基准约束,被迫持有高贝塔股票 (benchmark constraints) # ② 投资者对"彩票式"高波动股票有偏好,导致其被高估 (lottery preference) # ③ 杠杆限制阻止套利者纠正定价偏差 (leverage constraints) # # 公式 / Formula: # LVOL_t = -σ(r_{t-60:t}) (负60日已实现波动率 / negative 60-day realized vol) VOL_WINDOW = 60 # 60 个交易日 ≈ 3 个月 / 60 trading days ≈ 3 months factor_lvol = -(simple_ret_df.rolling(VOL_WINDOW).std()) print(f" ✓ LVOL 低波动因子: 有效值 {factor_lvol.notna().mean().mean():.1%}") # ── 1-D 低贝塔因子 (BAB) Betting Against Beta ────────────────────────────── # # 来源 / Source: Frazzini & Pedersen (2014) "Betting Against Beta" # # BAB 异象: 做多低贝塔股票、做空高贝塔股票,可以获得持续的超额收益。 # 与低波动因子类似,但专注于对"市场系统性风险"的暴露,而非总波动率。 # # BAB anomaly: long low-beta, short high-beta → persistent excess return. # Similar to low-vol but focuses on market systematic risk exposure. # # 贝塔计算 / Beta computation: # β_i = Cov(r_i, r_market) / Var(r_market) (60日滚动窗口 / 60-day rolling) # # 向量化技巧: 利用 pandas rolling 方法计算所有股票的滚动协方差和市场方差 # Vectorized trick: use pandas rolling to compute rolling covariance and variance BETA_WINDOW = 60 # 60日滚动贝塔 / 60-day rolling beta # pandas rolling().cov(other) 计算每个时间点的滚动协方差 # pandas rolling().cov(other) computes rolling covariance at each time point rolling_var_mkt = market_series.rolling(BETA_WINDOW).var() # 市场方差 / market variance rolling_cov = log_ret_df.apply( lambda col: col.rolling(BETA_WINDOW).cov(market_series) # 逐列协方差 / per-stock cov ) rolling_beta_df = rolling_cov.div(rolling_var_mkt, axis=0) # β = cov / var factor_bab = -rolling_beta_df # 取负:低贝塔 → 高因子值 / flip: low-beta → high score print(f" ✓ BAB 低贝塔因子: 有效值 {factor_bab.notna().mean().mean():.1%}") # ── 1-E Amihud 非流动性因子 (ILLIQ) Amihud Illiquidity Factor ─────────────── # # 来源 / Source: Amihud (2002) "Illiquidity and Stock Returns" # # Amihud 非流动性指标 (Amihud Illiquidity Measure): # ILLIQ_{i,t} = (1/D) × Σ_{d=t-D+1}^{t} |r_{i,d}| / Volume_{i,d} # # 含义 (Interpretation): # "每单位成交量能引起多大的价格变动?" # "How much price movement per unit of trading volume?" # # 值越高 → 流动性越差 (higher → less liquid) # 流动性差的股票风险更高,投资者要求额外的"流动性溢价"(liquidity premium) # Illiquid stocks carry higher risk, investors demand extra liquidity premium # # 注: 本因子中,高 ILLIQ(流动性差)→ 预期高收益(溢价补偿)→ 符合正方向约定 # Note: high ILLIQ (illiquid) → expected high return (premium) → matches positive convention ILLIQ_WINDOW = 20 # 20日均值 / 20-day average # |日收益率| / 日成交量,再取20日滚动均值 # |daily return| / daily volume, then 20-day rolling mean price_impact = simple_ret_df.abs() / volume_df # 每日价格冲击 / daily price impact factor_illiq = price_impact.rolling(ILLIQ_WINDOW).mean() * 1e8 # 缩放 / scaling print(f" ✓ ILLIQ 非流动性因子: 有效值 {factor_illiq.notna().mean().mean():.1%}") # ── 1-F 汇总 / Consolidate ─────────────────────────────────────────────────── FACTORS = { "MOM" : factor_mom, # 动量 "REV" : factor_rev, # 短期反转 "LVOL" : factor_lvol, # 低波动 "BAB" : factor_bab, # 低贝塔 / 贝塔 "ILLIQ" : factor_illiq, # Amihud 非流动性 } print(f"\n 共构建 {len(FACTORS)} 个因子: {list(FACTORS.keys())}") # ============================================================================= # §2 因子预处理 Factor Preprocessing # ----------------------------------------------------------------------------- # 原始因子值不能直接用于分析,需要经过三步标准化预处理流程: # Raw factor values must go through a 3-step preprocessing pipeline: # # Step 1 ── 截面去极值 (Cross-Sectional Winsorization) # 在每个日期截面,将超出 [μ - 3σ, μ + 3σ] 范围的值截断。 # At each date, clip values outside [mean - 3σ, mean + 3σ]. # 为什么?极端异常值会主导相关系数计算,掩盖真实的因子信号。 # Why? Outliers dominate correlation calculations and mask the true signal. # # Step 2 ── 截面 Z-score 标准化 (Cross-Sectional Z-score) # z_{i,t} = (x_{i,t} - μ_t) / σ_t (在每个时间截面 t 上计算) # z_{i,t} = (x_{i,t} - μ_t) / σ_t (computed cross-sectionally at each date t) # 为什么?不同因子量纲不同(动量是%,波动率也是%,ILLIQ 是极小数), # 标准化后可以直接比较和合成。 # Why? Factors have different scales; Z-score enables fair comparison and combination. # # Step 3 ── 市值中性化 (Market Cap Neutralization) # 在每个截面,对 log(市值) 做 OLS 回归,用残差替换原因子值: # At each date, regress on log(mktcap) via OLS; use residuals as factor: # factor_neutral_i = factor_i - (α + β × log_mktcap_i) # 为什么?小市值效应 (Size Effect) 会污染其他因子。例如小市值股票往往同时具有 # 高动量、高波动率、低流动性,如果不剔除市值效应,因子实际上只是 # 在选小市值股票,而不是真正的动量/波动/流动性信号。 # Why? Size effect contaminates other factors — small caps often have high momentum, # high vol, and low liquidity simultaneously. Without neutralization, # factors simply pick small caps rather than true alpha signals. # ============================================================================= print("\n[§2] 因子预处理 / Factor Preprocessing...") def winsorize_cross_section(factor_df: pd.DataFrame, n_std: float = 3.0) -> pd.DataFrame: """ 截面去极值 / Cross-sectional winsorization. 在每个日期(行),将超出 n_std 个标准差的值截断到边界。 At each date (row), clip values that are more than n_std std devs from the mean. 向量化实现,逐行 clip / Vectorized via per-row clip applied with apply. """ mu = factor_df.mean(axis=1) # 每日截面均值 / daily cross-sectional mean sig = factor_df.std(axis=1) # 每日截面标准差 / daily cross-sectional std lower = (mu - n_std * sig).values[:, np.newaxis] # 广播形状 / broadcast shape upper = (mu + n_std * sig).values[:, np.newaxis] clipped = np.clip(factor_df.values, lower, upper) return pd.DataFrame(clipped, index=factor_df.index, columns=factor_df.columns) def zscore_cross_section(factor_df: pd.DataFrame) -> pd.DataFrame: """ 截面 Z-score 标准化 / Cross-sectional Z-score normalization. 使每个时间截面的因子均值=0, 标准差=1。 Makes each time cross-section have mean=0 and std=1. """ mu = factor_df.mean(axis=1) # 每日截面均值 (N_DAYS,) sig = factor_df.std(axis=1) # 每日截面标准差 (N_DAYS,) # sub/div 沿行方向广播 / broadcast along rows return factor_df.sub(mu, axis=0).div(sig.replace(0, np.nan), axis=0) def neutralize_mktcap(factor_df: pd.DataFrame, log_mktcap: pd.DataFrame) -> pd.DataFrame: """ 市值中性化 / Market cap neutralization via cross-sectional OLS. 在每个截面,用 OLS 将因子对 log(市值) 回归,取残差作为中性化因子。 At each cross-section, regress factor on log(mktcap) via OLS; keep residuals. residual_i = factor_i - (intercept + slope × log_mktcap_i) """ # 对齐列顺序 / Align columns y_arr = factor_df.values.copy() # (N_DAYS, N_STOCKS) x_arr = log_mktcap.reindex(columns=factor_df.columns).values # (N_DAYS, N_STOCKS) for t in range(len(factor_df)): y = y_arr[t] x = x_arr[t] mask = ~(np.isnan(y) | np.isnan(x)) if mask.sum() < 10: continue y_c, x_c = y[mask], x[mask] # 手动 OLS / manual OLS (faster than scipy on small arrays) xm, ym = x_c.mean(), y_c.mean() denom = np.dot(x_c - xm, x_c - xm) if denom < 1e-12: continue slope = np.dot(x_c - xm, y_c - ym) / denom intercept = ym - slope * xm y_arr[t][mask] = y_c - (intercept + slope * x_c) # 残差 / residuals return pd.DataFrame(y_arr, index=factor_df.index, columns=factor_df.columns) # 对所有因子应用预处理流水线 / Apply preprocessing pipeline to all factors FACTORS_PROCESSED = {} for name, factor in FACTORS.items(): step1 = winsorize_cross_section(factor) # ① 去极值 step2 = zscore_cross_section(step1) # ② Z-score step3 = neutralize_mktcap(step2, log_mktcap_df) # ③ 市值中性化 FACTORS_PROCESSED[name] = step3 print(f" ✓ {name:5s}: 去极值 → Z-score → 市值中性化 完成") print(" 预处理完成 / Preprocessing complete.") # ============================================================================= # §3 IC 分析 Information Coefficient Analysis # ----------------------------------------------------------------------------- # IC (Information Coefficient 信息系数) 是衡量因子预测能力的黄金标准。 # IC is the gold standard for measuring a factor's predictive power. # # 定义 / Definition: # IC_t = Spearman_Rank_Correlation( factor_{t}, forward_return_{t+H} ) # # 其中: # factor_{t} = 第 t 日截面因子值(50只股票,每只一个数) # forward_return_{t+H} = 从第 t 日起持有 H 日的未来收益(未来数据!) # H = 21 交易日 ≈ 1 个月(最常用的持有期) # # 用 Spearman 秩相关而非 Pearson 线性相关的原因: # ① 对极端值鲁棒 (robust to outliers) # ② 衡量单调关系(不需要线性)(measures monotonic, not necessarily linear, relationship) # # 为什么用 Spearman rank correlation instead of Pearson? # ① Robust to outliers # ② Measures monotonic relationship (no linearity assumption needed) # # IC 评价标准 / IC benchmark: # |IC均值| > 0.05 → 因子有效 / factor is effective # |IC均值| > 0.10 → 因子强 / factor is strong # ICIR > 0.50 → 因子稳定 / factor is stable # IC>0 比率 > 55% → 方向一致性好 / directionally consistent # # ICIR (IC Information Ratio): # ICIR = IC均值 / IC标准差 = IC_mean / IC_std # 类似夏普比率,衡量"每单位波动贡献多少稳定的预测能力" # Similar to Sharpe ratio — measures how much stable predictive power per unit of volatility # ============================================================================= print("\n[§3] IC 分析 / IC Analysis...") FORWARD_PERIOD = 21 # 持有期 H = 21 日 ≈ 1 个月 / holding period ≈ 1 month def compute_ic_series( factor_df: pd.DataFrame, forward_ret_df: pd.DataFrame, holding_period: int = 21, ) -> pd.Series: """ 计算因子的 IC 时间序列。 Compute the IC time series for a factor. 在每个日期 t,计算截面 Spearman 相关系数: At each date t, compute cross-sectional Spearman rank correlation: IC_t = Spearmanr( factor[t, all_stocks], forward_return[t+H, all_stocks] ) Parameters: factor_df : 预处理后的因子值 (date × stock) forward_ret_df : 收益率 DataFrame (date × stock) holding_period : 持有期(交易日)/ holding period in days Returns: ic_series : IC 值时间序列 / IC time series indexed by date """ # shift(-H): 把 t+H 的收益值移到第 t 行,使得 fwd_returns.loc[t] = return from t to t+H # shift(-H) aligns so that fwd_returns.loc[t] = the return earned from t to t+H fwd_returns = forward_ret_df.shift(-holding_period) ic_values, ic_dates = [], [] for date in factor_df.index[:-holding_period]: # 最后 H 行无未来收益 / no future return f_row = factor_df.loc[date].dropna() r_row = fwd_returns.loc[date, f_row.index].dropna() common = f_row.index.intersection(r_row.index) if len(common) < 10: # 至少 10 只股票 / need at least 10 stocks continue ic, _ = stats.spearmanr(f_row[common].values, r_row[common].values) ic_values.append(ic) ic_dates.append(date) return pd.Series(ic_values, index=ic_dates, name="IC") # 逐因子计算 IC 序列 / Compute IC series for each factor ic_series_dict = {} for name, factor in FACTORS_PROCESSED.items(): ic_series_dict[name] = compute_ic_series(factor, simple_ret_df, FORWARD_PERIOD) # ── IC 汇总统计表 / IC Summary Statistics Table ─────────────────────────────── print(f"\n {'因子':8s} {'IC均值':>8s} {'IC标准差':>9s} {'ICIR':>8s} {'IC>0比率':>9s} 评级") print(f" " + "─" * 64) ic_stats = {} for name, ic_s in ic_series_dict.items(): ic_mean = ic_s.mean() ic_std = ic_s.std() icir = ic_mean / ic_std if ic_std > 0 else 0.0 positive_rate = (ic_s > 0).mean() ic_stats[name] = { "IC Mean" : ic_mean, "IC Std" : ic_std, "ICIR" : icir, "Positive Rate": positive_rate, } # 评级: ★★=强, ★=有效, ○=弱 / rating if abs(ic_mean) > 0.08: rating = "★★ 强 Strong" elif abs(ic_mean) > 0.04: rating = "★ 有效 Effective" else: rating = "○ 弱 Weak" print(f" {name:8s} {ic_mean:+8.4f} {ic_std:9.4f} {icir:+8.4f} {positive_rate:9.1%} {rating}") print(f" " + "─" * 64) print(f" 评级标准: |IC均值|>0.08 ★★强 |IC均值|>0.04 ★有效 ICIR>0.5 稳定") # ============================================================================= # §4 分层回测 Quantile Return Analysis # ----------------------------------------------------------------------------- # 分层回测 (Quantile Analysis / Bucket Test) 是验证因子有效性的经典直观方法。 # Quantile analysis is the classic, intuitive way to validate a factor. # # 操作步骤 / Procedure: # ① 在每个调仓日 T,按因子值将全部股票从小到大排序 # ② 等分为 Q 组(常用 Q=5 五分位) # ③ 每组各构建等权组合 (equal-weight portfolio),持有到下次调仓日 T+21 # ④ 记录每组收益,重复直到回测结束 # # ① On each rebalance date T, rank all stocks by factor value # ② Split into Q equal-sized buckets (usually Q=5 quintiles) # ③ Each bucket forms an equal-weight portfolio, held until next rebalance T+21 # ④ Record each bucket's return, repeat until end of backtest # # 期望结果 / Expected result: # Q5 累积收益 > Q4 > Q3 > Q2 > Q1 # 即因子值越高,未来收益越高 —— 这是因子有效的最直观证明 # Higher factor value → higher future return = most intuitive proof of efficacy # ============================================================================= print("\n[§4] 分层回测 / Quantile Return Analysis...") N_QUANTILES = 5 # 五分位 / quintiles REBAL_PERIOD = 21 # 月度调仓 / monthly rebalancing # 选用 ICIR 最高的因子做分层演示 / Use factor with highest |ICIR| for demo best_factor_name = max(ic_stats, key=lambda n: abs(ic_stats[n]["ICIR"])) best_factor = FACTORS_PROCESSED[best_factor_name] print(f" 最强因子 (Best Factor): {best_factor_name} ICIR={ic_stats[best_factor_name]['ICIR']:+.3f}") def compute_quantile_returns( factor_df: pd.DataFrame, price_df: pd.DataFrame, n_quantiles: int = 5, rebal_period: int = 21, ) -> pd.DataFrame: """ 分层回测:每月调仓,计算各五分位等权组合的期间收益。 Quintile backtest: monthly rebalancing, equal-weight portfolio returns per bucket. Returns: DataFrame, columns=[Q1..Q5], index=rebalance dates, values=equal-weight return for that holding period """ # 调仓日序列 / Rebalance date sequence rebal_dates = price_df.index[::rebal_period] bucket_returns = {f"Q{q}": [] for q in range(1, n_quantiles + 1)} period_dates = [] for i in range(len(rebal_dates) - 1): t0 = rebal_dates[i] # 建仓日 / entry date t1 = rebal_dates[i + 1] # 平仓日 / exit date f_today = factor_df.loc[t0].dropna() if len(f_today) < n_quantiles * 3: # 股票太少则跳过 / skip if too few stocks continue # pd.qcut 按因子值等分为 Q 组 / split into Q equal-sized bins labels = pd.qcut( f_today.rank(method='first'), # 先按秩排名(避免重复值问题) n_quantiles, labels=[f"Q{q}" for q in range(1, n_quantiles + 1)] ) # 每组的持有期收益(等权平均)/ equal-weight return per group p0 = price_df.loc[t0] p1 = price_df.loc[t1] hold_ret = p1 / p0 - 1.0 for q in range(1, n_quantiles + 1): stocks_in_q = labels[labels == f"Q{q}"].index bucket_returns[f"Q{q}"].append(hold_ret[stocks_in_q].mean()) period_dates.append(t0) return pd.DataFrame(bucket_returns, index=period_dates) quantile_rets = compute_quantile_returns(best_factor, prices_df, N_QUANTILES, REBAL_PERIOD) quantile_cumret = (1 + quantile_rets).cumprod() # 累积净值 / cumulative NAV # 多空价差 (Long-Short Spread): Q5(多头)- Q1(空头) ls_spread = quantile_rets["Q5"] - quantile_rets["Q1"] ls_cumret = (1 + ls_spread).cumprod() periods_per_year = 252.0 / REBAL_PERIOD print(f"\n {'分组':8s} {'年化收益':>10s} {'累积收益':>10s}") print(f" " + "─" * 40) for q in range(1, N_QUANTILES + 1): ret_s = quantile_rets[f"Q{q}"] ann_ret = (1 + ret_s.mean()) ** periods_per_year - 1 cum_ret = quantile_cumret[f"Q{q}"].iloc[-1] - 1 print(f" Q{q} {ann_ret:+10.2%} {cum_ret:+10.2%}") ann_ls = (1 + ls_spread.mean()) ** periods_per_year - 1 print(f" L-S(Q5-Q1) {ann_ls:+9.2%} {ls_cumret.iloc[-1]-1:+10.2%}") print(f" " + "─" * 40) # ============================================================================= # §5 因子合成 Factor Combination # ----------------------------------------------------------------------------- # 单个因子的预测能力有限(IC 通常只有 0.05~0.10),因子合成可以: # Individual factors have limited predictive power (IC ≈ 0.05~0.10). # Factor combination can: # # ① 提升综合 ICIR(多个信号互补,减少因子特定噪音) # Improve composite ICIR (signals complement each other, reduce factor noise) # ② 覆盖更多 Alpha 来源(动量+质量+流动性的综合) # Cover more alpha sources (momentum + quality + liquidity combined) # ③ 降低单因子的"失效期"风险(某个风格因子可能在某段时间失效) # Reduce regime risk (individual factors can fail in certain market regimes) # # 方法1: 等权合成 (Equal-Weight Composite) # composite_EQ = (z_1 + z_2 + … + z_n) / n # 优点: 简单、稳健,不依赖历史 IC 估计 缺点: 不区分因子强弱 # Pro: simple, robust Con: treats all factors equally regardless of efficacy # # 方法2: IC 加权合成 (IC-Weighted Composite) # w_i = max(IC_mean_i, 0) / Σ max(IC_mean_j, 0) (只给正 IC 因子分配权重) # composite_ICW = Σ w_i × z_i # 优点: 给更强的因子更高权重 缺点: 历史 IC 可能不稳定(过拟合风险) # Pro: higher weight to stronger factors Con: historical IC may be unstable (overfitting) # ============================================================================= print("\n[§5] 因子合成 / Factor Combination...") # 只合成正 IC 的因子(负 IC 因子方向混乱,不宜纳入) # Only combine factors with positive IC (negative IC factors are directionally inconsistent) positive_ic_factors = { name: f for name, f in FACTORS_PROCESSED.items() if ic_stats[name]["IC Mean"] > 0 } print(f" IC均值为正的因子: {list(positive_ic_factors.keys())}") # ── 等权合成 / Equal-Weight Composite ───────────────────────────────────────── composite_eq = sum( f.reindex(columns=symbols).fillna(0) for f in positive_ic_factors.values() ) / len(positive_ic_factors) # ── IC 加权合成 / IC-Weighted Composite ─────────────────────────────────────── ic_weights_raw = {n: max(ic_stats[n]["IC Mean"], 0) for n in positive_ic_factors} total_w = sum(ic_weights_raw.values()) ic_weights = {n: w / total_w for n, w in ic_weights_raw.items()} composite_icw = sum( ic_weights[n] * f.reindex(columns=symbols).fillna(0) for n, f in positive_ic_factors.items() ) # 计算合成因子的 IC / Evaluate composite factor IC ic_eq = compute_ic_series(composite_eq, simple_ret_df, FORWARD_PERIOD) ic_icw = compute_ic_series(composite_icw, simple_ret_df, FORWARD_PERIOD) print(f"\n IC 权重分配 / IC Weights:") for name, w in ic_weights.items(): print(f" {name}: {w:.3f}") print(f"\n {'合成方法':22s} {'IC均值':>8s} {'ICIR':>8s}") print(f" " + "─" * 45) print(f" {'等权 Equal-Weight':22s} {ic_eq.mean():+8.4f} {ic_eq.mean()/ic_eq.std():+8.4f}") print(f" {'IC加权 IC-Weighted':22s} {ic_icw.mean():+8.4f} {ic_icw.mean()/ic_icw.std():+8.4f}") # ============================================================================= # §6 多空组合 Long-Short Portfolio # ----------------------------------------------------------------------------- # 多空组合 (Long-Short Portfolio) 是因子策略的实际收益实现形式。 # A long-short portfolio is how a factor strategy generates actual P&L. # # 构建逻辑 / Construction logic: # 做多 (Long) = 买入因子分最高的 Top 20% 股票(预期跑赢,做多享受上涨) # 做空 (Short) = 卖空因子分最低的 Bottom 20% 股票(预期跑输,做空赚下跌) # 多空价差 = 多头组合收益 - 空头组合收益(无论市场涨跌都能赚钱) # # Long = Buy top 20% stocks by factor score (expected to outperform) # Short = Sell short bottom 20% stocks (expected to underperform) # L-S = Long return − Short return (market-neutral, profits in any market) # # 注意 A 股限制 / A-share restriction: # A 股融券做空受到严格限制,本演示仅作为量化研究的理论演示。 # 在港股、美股等市场中,做空非常普遍。 # Short selling in A-shares is heavily restricted. This demo is theoretical. # Short selling is common and practical in HK/US markets. # ============================================================================= print("\n[§6] 多空组合 / Long-Short Portfolio...") TOP_PCTILE = 0.20 # 前后 20% / top & bottom 20% def compute_long_short_portfolio( factor_df: pd.DataFrame, price_df: pd.DataFrame, top_pct: float = 0.20, rebal_period: int = 21, ) -> dict: """ 构建多空组合,计算每个持仓期的多头、空头、多空价差收益。 Build a long-short portfolio; compute per-period long, short, L-S returns. Returns: dict with 'long', 'short', 'long_short' keys, each a pd.Series. """ rebal_dates = price_df.index[::rebal_period] long_rets, short_rets, ls_rets, ret_dates = [], [], [], [] for i in range(len(rebal_dates) - 1): t0, t1 = rebal_dates[i], rebal_dates[i + 1] f_today = factor_df.loc[t0].dropna() if len(f_today) < 10: continue n_top = max(1, int(len(f_today) * top_pct)) long_stocks = f_today.nlargest(n_top).index # Top 20% → 买入 short_stocks = f_today.nsmallest(n_top).index # Bottom 20% → 卖空 p0 = price_df.loc[t0] p1 = price_df.loc[t1] ret = p1 / p0 - 1.0 lr = ret[long_stocks].mean() sr = ret[short_stocks].mean() long_rets.append(lr) short_rets.append(sr) ls_rets.append(lr - sr) # 多空价差 / long-short spread ret_dates.append(t0) return { "long" : pd.Series(long_rets, index=ret_dates, name="Long"), "short" : pd.Series(short_rets, index=ret_dates, name="Short"), "long_short" : pd.Series(ls_rets, index=ret_dates, name="L-S"), } def compute_max_drawdown(returns: pd.Series) -> float: """最大回撤 / Maximum Drawdown""" cum = (1 + returns).cumprod() peak = cum.cummax() return ((cum - peak) / peak).min() def compute_sharpe(returns: pd.Series, ann_factor: float) -> float: """夏普比率 / Annualized Sharpe Ratio""" return returns.mean() / returns.std() * np.sqrt(ann_factor) if returns.std() > 0 else 0.0 ls_result = compute_long_short_portfolio(composite_icw, prices_df, TOP_PCTILE, REBAL_PERIOD) ls_equity = {k: (1 + v).cumprod() for k, v in ls_result.items()} ann_factor = periods_per_year print(f"\n {'组合':14s} {'年化收益':>10s} {'夏普比率':>10s} {'最大回撤':>10s}") print(f" " + "─" * 54) for name, rets in ls_result.items(): ann_ret = (1 + rets.mean()) ** ann_factor - 1 sharpe = compute_sharpe(rets, ann_factor) mdd = compute_max_drawdown(rets) print(f" {name:14s} {ann_ret:+10.2%} {sharpe:+10.3f} {mdd:+10.2%}") print(f" " + "─" * 54) # ============================================================================= # §7 因子衰减分析 Factor Decay Analysis # ----------------------------------------------------------------------------- # 因子衰减 (Factor Decay) 描述了因子预测能力随持有期增加而减弱的规律: # Factor decay describes how predictive power weakens as holding period grows: # # 短持有期 H=1D: IC 最高(信号最新鲜,预测力最强) # H 增大: IC 逐渐下降(信号被市场消化,噪音积累) # H=63D(1季度): IC 接近 0(信号已基本失效) # # 实践意义 / Practical implications: # ┌──────────────────┬──────────────────────────────────────────────┐ # │ 衰减类型 │ 适合持仓频率 │ # ├──────────────────┼──────────────────────────────────────────────┤ # │ 快速衰减 (Fast) │ 日频或周频换仓(换手率高,成本高!) │ # │ 慢速衰减 (Slow) │ 月频或季频换仓(成本效率更高,适合实盘) │ # └──────────────────┴──────────────────────────────────────────────┘ # ============================================================================= print("\n[§7] 因子衰减分析 / Factor Decay Analysis...") DECAY_HORIZONS = [1, 5, 10, 21, 42, 63] # 持有期(交易日)/ holding periods (days) decay_results = {} for fname in [best_factor_name, "REV"]: # 对比 "慢衰减" vs "快衰减" 因子 horizon_ics = [] for h in DECAY_HORIZONS: ic_h = compute_ic_series(FACTORS_PROCESSED[fname], simple_ret_df, h) horizon_ics.append(ic_h.mean()) decay_results[fname] = horizon_ics ic_str = " ".join([f"H={h}D: {v:+.4f}" for h, v in zip(DECAY_HORIZONS, horizon_ics)]) print(f" {fname}: {ic_str}") # ============================================================================= # §8 可视化 Visualization # ============================================================================= print("\n[§8] 生成可视化图表 / Generating visualization...") pct_fmt = FuncFormatter(lambda x, _: f"{x:.0%}") fig = plt.figure(figsize=(20, 24)) fig.suptitle( "Alpha 因子研究演示 | Alpha Factor Research Demo\n" "50只合成股票 × 3年日线数据 | 50 Synthetic Stocks × 3-Year Daily Data", fontsize=14, fontweight='bold', y=0.99 ) gs = gridspec.GridSpec(4, 3, figure=fig, hspace=0.50, wspace=0.35) # ── Panel 1: 股票池价格(归一化)/ Universe Prices (Normalized) ────────────── ax1 = fig.add_subplot(gs[0, 0]) norm_prices = prices_df / prices_df.iloc[0] # 归一化 / normalize to 1 for sym in symbols[:12]: ax1.plot(dates, norm_prices[sym], alpha=0.35, linewidth=0.7, color='#1f77b4') eq_index = norm_prices.mean(axis=1) # 等权指数 / equal-weight index ax1.plot(dates, eq_index, color='black', linewidth=2, label='等权指数 EW Index', zorder=5) ax1.axhline(1, color='gray', linewidth=0.6, linestyle='--') ax1.set_title("股票池价格(归一化)\nUniverse Prices (Normalized)", fontsize=10) ax1.set_ylabel("归一化价格") ax1.legend(fontsize=8) ax1.tick_params(axis='x', rotation=25, labelsize=8) ax1.xaxis.set_major_locator(plt.MaxNLocator(4)) # ── Panel 2: IC 时间序列(5因子)/ IC Time Series ───────────────────────────── ax2 = fig.add_subplot(gs[0, 1:]) ic_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd'] for (name, ic_s), color in zip(ic_series_dict.items(), ic_colors): rolling_ic = ic_s.rolling(20, min_periods=5).mean() # 20日滚动均值平滑 ax2.plot(ic_s.index, rolling_ic, label=name, color=color, linewidth=1.4) ax2.axhline(0, color='black', linewidth=0.8, linestyle='--') ax2.axhline(0.05, color='green', linewidth=0.8, linestyle=':', alpha=0.7) ax2.axhline(-0.05, color='red', linewidth=0.8, linestyle=':', alpha=0.7) ax2.set_title("因子 IC 时间序列(20日滚动均值)\nFactor IC Time Series (20D Rolling Mean)", fontsize=10) ax2.set_ylabel("IC") ax2.legend(loc='upper right', ncol=5, fontsize=8) ax2.tick_params(axis='x', rotation=25, labelsize=8) ax2.xaxis.set_major_locator(plt.MaxNLocator(5)) # ── Panel 3: IC 均值 & 误差棒 / IC Mean Bar Chart ───────────────────────────── ax3 = fig.add_subplot(gs[1, 0]) names_list = list(ic_stats.keys()) ic_means = [ic_stats[n]["IC Mean"] for n in names_list] ic_stds = [ic_stats[n]["IC Std"] for n in names_list] bar_colors3 = ['#2ca02c' if v > 0 else '#d62728' for v in ic_means] ax3.bar(names_list, ic_means, yerr=ic_stds, capsize=5, color=bar_colors3, alpha=0.8, error_kw={'linewidth': 1.5, 'ecolor': 'black'}) ax3.axhline(0, color='black', linewidth=0.8) ax3.axhline(0.05, color='green', linewidth=1.2, linestyle='--', alpha=0.7) ax3.axhline(-0.05, color='red', linewidth=1.2, linestyle='--', alpha=0.7) ax3.set_title("因子 IC 均值(误差棒=±1σ)\nIC Mean ± 1σ", fontsize=10) ax3.set_ylabel("IC 均值") ax3.tick_params(axis='x', labelsize=9) # ── Panel 4: ICIR 柱状图 / ICIR Bar Chart ────────────────────────────────────── ax4 = fig.add_subplot(gs[1, 1]) icirs = [ic_stats[n]["ICIR"] for n in names_list] bar_colors4 = ['#2ca02c' if v > 0 else '#d62728' for v in icirs] ax4.bar(names_list, icirs, color=bar_colors4, alpha=0.8) ax4.axhline(0, color='black', linewidth=0.8) ax4.axhline(0.5, color='green', linewidth=1.2, linestyle='--', alpha=0.7, label='ICIR=0.5') ax4.axhline(-0.5, color='red', linewidth=1.2, linestyle='--', alpha=0.7) ax4.set_title("因子 ICIR\nFactor ICIR (Mean IC / Std IC)", fontsize=10) ax4.set_ylabel("ICIR") ax4.legend(fontsize=8) ax4.tick_params(axis='x', labelsize=9) # ── Panel 5: 因子相关矩阵 / Factor Correlation Matrix ───────────────────────── ax5 = fig.add_subplot(gs[1, 2]) # 用各因子的截面均值时序来衡量因子间相关性 / Use cross-sectional mean timeseries factor_ts = pd.DataFrame({n: f.mean(axis=1) for n, f in FACTORS_PROCESSED.items()}) corr_matrix = factor_ts.corr() im5 = ax5.imshow(corr_matrix.values, cmap='RdYlGn', vmin=-1, vmax=1, aspect='auto') ax5.set_xticks(range(len(names_list))) ax5.set_yticks(range(len(names_list))) ax5.set_xticklabels(names_list, fontsize=9) ax5.set_yticklabels(names_list, fontsize=9) for i in range(len(names_list)): for j in range(len(names_list)): ax5.text(j, i, f"{corr_matrix.values[i, j]:.2f}", ha='center', va='center', fontsize=8, color='black' if abs(corr_matrix.values[i, j]) < 0.6 else 'white') plt.colorbar(im5, ax=ax5) ax5.set_title("因子相关矩阵\nFactor Correlation Matrix", fontsize=10) # ── Panel 6: 分层回测累积净值 / Quantile Cumulative Returns ─────────────────── ax6 = fig.add_subplot(gs[2, 0]) q_colors = plt.cm.RdYlGn(np.linspace(0.05, 0.95, N_QUANTILES)) for q in range(1, N_QUANTILES + 1): ax6.plot(quantile_cumret.index, quantile_cumret[f"Q{q}"], label=f"Q{q}", color=q_colors[q - 1], linewidth=1.5) ax6.plot(ls_cumret.index, ls_cumret, label='L-S', color='black', linewidth=2, linestyle='--') ax6.axhline(1, color='gray', linewidth=0.6, linestyle=':') ax6.set_title(f"分层回测({best_factor_name})\nQuantile Returns ({best_factor_name})", fontsize=10) ax6.set_ylabel("累积净值") ax6.legend(ncol=3, fontsize=8) ax6.yaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x:.1f}x")) ax6.tick_params(axis='x', rotation=25, labelsize=8) ax6.xaxis.set_major_locator(plt.MaxNLocator(4)) # ── Panel 7: 分位年化收益柱状图 / Quintile Annualized Return Bar ─────────────── ax7 = fig.add_subplot(gs[2, 1]) ann_q_rets = [(1 + quantile_rets[f"Q{q}"].mean()) ** periods_per_year - 1 for q in range(1, N_QUANTILES + 1)] bar_colors7 = plt.cm.RdYlGn(np.linspace(0.05, 0.95, N_QUANTILES)) bars7 = ax7.bar([f"Q{q}" for q in range(1, N_QUANTILES + 1)], ann_q_rets, color=bar_colors7) ax7.axhline(0, color='black', linewidth=0.8) ax7.set_title("各分位组年化收益\nAnnualized Return per Quintile", fontsize=10) ax7.set_ylabel("年化收益 / Ann. Return") ax7.yaxis.set_major_formatter(pct_fmt) ax7.tick_params(axis='x', labelsize=9) for bar, val in zip(bars7, ann_q_rets): ax7.text(bar.get_x() + bar.get_width() / 2, val, f"{val:.1%}", ha='center', va='bottom' if val >= 0 else 'top', fontsize=8) # ── Panel 8: 多空组合净值曲线 / Long-Short Equity Curve ─────────────────────── ax8 = fig.add_subplot(gs[2, 2]) ax8.plot(ls_equity['long_short'].index, ls_equity['long_short'].values, color='purple', linewidth=2, label='多空 L-S') ax8.plot(ls_equity['long'].index, ls_equity['long'].values, color='#2ca02c', linewidth=1.5, linestyle='--', label='多头 Long', alpha=0.8) ax8.plot(ls_equity['short'].index, ls_equity['short'].values, color='#d62728', linewidth=1.5, linestyle='--', label='空头 Short', alpha=0.8) ax8.axhline(1, color='gray', linewidth=0.6, linestyle=':') ax8.set_title("多空组合净值曲线\nLong-Short Portfolio NAV", fontsize=10) ax8.set_ylabel("净值 / NAV") ax8.legend(fontsize=8) ax8.yaxis.set_major_formatter(FuncFormatter(lambda x, _: f"{x:.1f}x")) ax8.tick_params(axis='x', rotation=25, labelsize=8) ax8.xaxis.set_major_locator(plt.MaxNLocator(4)) # ── Panel 9: 因子衰减曲线 / Factor Decay Curve ──────────────────────────────── ax9 = fig.add_subplot(gs[3, :]) decay_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'] horizon_labels = [f"{h}D" for h in DECAY_HORIZONS] for (fname, decay_ics), color in zip(decay_results.items(), decay_colors): ax9.plot(range(len(DECAY_HORIZONS)), decay_ics, 'o-', label=fname, color=color, linewidth=2, markersize=7) for i, (h, v) in enumerate(zip(DECAY_HORIZONS, decay_ics)): ax9.annotate(f"{v:+.3f}", (i, v), textcoords="offset points", xytext=(0, 8), ha='center', fontsize=8, color=color) ax9.axhline(0, color='black', linewidth=0.8, linestyle='--') ax9.axhline(0.05, color='green', linewidth=0.8, linestyle=':', alpha=0.7, label='IC=0.05 参考线') ax9.set_title( "因子衰减曲线 Factor Decay Curve\n" "IC 均值随持有期延长的衰减 | How Mean IC Decays with Longer Holding Period", fontsize=10 ) ax9.set_xlabel("持有期 / Holding Period") ax9.set_ylabel("IC 均值 / Mean IC") ax9.set_xticks(range(len(DECAY_HORIZONS))) ax9.set_xticklabels(horizon_labels) ax9.legend(fontsize=9, ncol=4) ax9.grid(True, alpha=0.3) ax9.set_xlim(-0.3, len(DECAY_HORIZONS) - 0.7) plt.savefig("alpha_factor_demo.png", dpi=150, bbox_inches='tight') print(f" 图表已保存: alpha_factor_demo.png / Chart saved: alpha_factor_demo.png") print("\n" + "=" * 70) print(" 演示完成! Demo Complete!") print(" 输出: alpha_factor_demo.png") print("=" * 70)