Source code for finds.backtesting.backtesting

"""Utilities for backtesting

Copyright 2022, Terence Lim

MIT License
"""
from typing import List, Dict, Tuple, Any, Iterable
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from finds.structured.stocks import Stocks
from finds.structured.signals import Signals
from finds.recipes.filters import fractile_split
from .backtest import compound_ret

# construct spread portfolio weights
[docs]def univariate_sorts(stocks: Stocks, label: str, signals: Signals, rebalbeg: int, rebalend: int, window: int = 0, months: List[int] = [], pct: Tuple[float, float] = (20., 80.), leverage: float = 1., minobs: int = 100, minprc: int = 0, mincap: int = 0, maxdecile: int = 10) -> Dict[int, Series]: """Monthly series of cap-weighted holdings of univariate spread portfolios Args: stocks: Stocks object for accessing stock returns and price data label: Name of signal to retrieve signals: Call to extract cross section of values for the signal rebalbeg: First rebalance date (YYYYMMDD) rebalend: Last holding date (YYYYMMDD) pct: Percentile breakpoints to sort high, medium and low buckets window: No. of months to look back for signal values; 0 is exact month months: Months (e.g. 6=June) to retrieve univ; empty for all months maxdecile: Include largest stocks decile from 1 through maxdecile mincap: Minimum market cap minobs: Minimum required sample size with non-missing signal values leverage: Multiplier for leverage or shorting """ rebaldates = stocks.bd.date_range(rebalbeg, rebalend, 'endmo') holdings = dict() for rebaldate in rebaldates: # check if this is a rebalance month if not months or (rebaldate//100)%100 in months or not holdings: # rebalance: get this month's universe df = stocks.get_universe(rebaldate) # get signal values within lagged window if window: # lookback window to get signal values start = stocks.bd.endmo(rebaldate, months=-abs(window)) else: # no window, so signal value as of exact rebaldate start = stocks.bd.offset(rebaldate, offsets=-1) signal = signals(label=label, date=rebaldate, start=start) df[label] = signal[label].reindex(df.index) df = df[df['prc'].abs().gt(minprc) & df['cap'].gt(mincap) & df['decile'].le(maxdecile)].dropna() if (len(df) < minobs): # skip if insufficient observations continue # split signal into desired fractiles df['fractile'] = fractile_split(df[label], pct=pct, keys=df[label][df['nyse']]) subs = {'H' : (df['fractile'] == 1), 'M' : (df['fractile'] == 2), 'L' : (df['fractile'] == 3)} weights = dict() for subname, weight in zip(['H', 'L'], [leverage, -leverage]): cap = df.loc[subs[subname], 'cap'] weights[subname] = weight * cap / cap.sum() #print("(portfolio_sorts)", rebaldate, len(df)) else: # if not rebalance, then adjust previous stock weights by retx retx = stocks.get_ret(stocks.bd.begmo(rebaldate), rebaldate, field='retx') + 1 for port, old in weights.items(): new = old * retx.reindex(old.index, fill_value=1) weights[port] = new / (abs(new.sum()) * len(weights) / 2) holdings[rebaldate] = pd.concat(list(weights.values()), axis=0) return holdings
[docs]def bivariate_sorts(stocks: Stocks, label: str, signals: Signals, rebalbeg: int, rebalend: int, window: int = 0, pct: Tuple[float, float] = (30., 70.), leverage: float = 1., months: List[int] = [], minobs: int = 100, minprc: float = 0., mincap: float = 0., maxdecile: int = 10) -> Tuple[Dict, Dict, Dict]: """Generate monthly time series of holdings by two-way sort procedure Args: stocks: Stocks object for accessing stock returns and price data label: Name of signal to retrieve signals: Call to extract cross section of values for the signal rebalbeg: First rebalance date (YYYYMMDD) rebalend: Last holding date (YYYYMMDD) pct: Percentile breakpoints to sort high, medium and low buckets window: No. of months to look back for signal values; 0 is exact month months: Months (e.g. 6=June) to retrieve univ; empty for all months maxdecile: Include largest stocks decile from 1 through maxdecile mincap: Minimum market cap minobs: Minimum required sample size with non-missing signal values leverage: Multiplier for leverage or shorting Returns 3-tuple of spread holdings, smb holdings, and subportfolio sizes Notes: - Independent sort by median (NYSE) mkt cap and 30/70 (NYSE) HML percentiles - Subportfolios of the intersections are value-weighted; - Spread portfolios are equal-weighted of subportfolios - Portfolio are resorted every June; and other months' holdings are adjusted by monthly realized retx (i.e. dividends not reinvested) """ rebaldates = stocks.bd.date_range(rebalbeg, rebalend, 'endmo') holdings = {label: dict(), 'smb': dict()} # to return two sets of holdings # sizes = {h : dict() for h in ['HB','HS','MB','MS','LB','LS']} for rebaldate in rebaldates: # check if this is a rebalance month if not months or (rebaldate//100)%100 in months or not holdings[label]: # rebalance: get this month's universe of stocks with valid data df = stocks.get_universe(rebaldate) # get signal values within lagged window if window: start = stocks.bd.endmo(rebaldate, months=-abs(window)) else: start = stocks.bd.offset(rebaldate, offsets=-1) signal = signals(label=label, date=rebaldate, start=start) df[label] = signal[label].reindex(df.index) df = df[df['prc'].abs().gt(minprc) & df['cap'].gt(mincap) & df['decile'].le(maxdecile)].dropna() if (len(df) < minobs): # skip if insufficient observations continue # split signal into desired fractiles, and assign to subportfolios df['fractile'] = fractile_split(df[label], pct=pct, keys=df[label][df['nyse']], ascending=False) subs = {'HB' : (df['fractile'] == 1) & (df['decile'] <= 5), 'MB' : (df['fractile'] == 2) & (df['decile'] <= 5), 'LB' : (df['fractile'] == 3) & (df['decile'] <= 5), 'HS' : (df['fractile'] == 1) & (df['decile'] > 5), 'MS' : (df['fractile'] == 2) & (df['decile'] > 5), 'LS' : (df['fractile'] == 3) & (df['decile'] > 5)} weights = {label: dict(), 'smb': dict()} for subname, weight in zip(['HB','HS','LB','LS'], [0.5, 0.5, -0.5, -0.5]): cap = df.loc[subs[subname], 'cap'] weights[label][subname] = leverage * weight * cap / cap.sum() # sizes[subname][rebaldate] = sum(subs[subname]) for subname, weight in zip(['HB','HS','MB','MS','LB','LS'], [-0.5, 0.5, -0.5, 0.5, -0.5, 0.5]): cap = df.loc[subs[subname], 'cap'] weights['smb'][subname] = leverage * weight * cap / cap.sum() # sizes[subname][rebaldate] = sum(subs[subname]) #print("(famafrench_sorts)", rebaldate, len(df)) else: # else not a rebalance month, so adjust holdings by retx retx = 1 + stocks.get_ret(stocks.bd.begmo(rebaldate), rebaldate, field='retx') for port, subports in weights.items(): for subport, old in subports.items(): new = old * retx.reindex(old.index, fill_value=1) weights[port][subport] = ( new / (abs(np.sum(new)) * len(subports) / 2) ) # combine this month's subportfolios for h in holdings: holdings[h][rebaldate] = pd.concat(list(weights[h].values())) return holdings[label], holdings['smb']