Source code for finds.backtesting.backtesting

"""Utilities for backtesting

Copyright 2022, Terence Lim

MIT License
"""
from typing import List, Dict, Tuple, Any, Iterable
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from finds.structured.stocks import Stocks
from finds.structured.signals import Signals
from finds.recipes.filters import fractile_split
from .backtest import compound_ret

# construct spread portfolio weights
[docs]def univariate_sorts(stocks: Stocks,
                     label: str,
                     signals: Signals,
                     rebalbeg: int,
                     rebalend: int, 
                     window: int = 0,
                     months: List[int] = [],
                     pct: Tuple[float, float] = (20., 80.),
                     leverage: float = 1.,
                     minobs: int = 100,
                     minprc: int = 0,
                     mincap: int = 0,
                     maxdecile: int = 10) -> Dict[int, Series]:
    """Monthly series of cap-weighted holdings of univariate spread portfolios

    Args:
        stocks: Stocks object for accessing stock returns and price data
        label: Name of signal to retrieve
        signals: Call to extract cross section of values for the signal
        rebalbeg: First rebalance date (YYYYMMDD)
        rebalend: Last holding date (YYYYMMDD)
        pct: Percentile breakpoints to sort high, medium and low buckets
        window: No. of months to look back for signal values; 0 is exact month
        months: Months (e.g. 6=June) to retrieve univ; empty for all months
        maxdecile: Include largest stocks decile from 1 through maxdecile
        mincap: Minimum market cap
        minobs: Minimum required sample size with non-missing signal values
        leverage: Multiplier for leverage or shorting
    """
    rebaldates = stocks.bd.date_range(rebalbeg, rebalend, 'endmo')
    holdings = dict()
    for rebaldate in rebaldates:

        # check if this is a rebalance month
        if not months or (rebaldate//100)%100 in months or not holdings:

            # rebalance: get this month's universe
            df = stocks.get_universe(rebaldate)

            # get signal values within lagged window
            if window:  # lookback window to get signal values
                start = stocks.bd.endmo(rebaldate, months=-abs(window))
            else:       # no window, so signal value as of exact rebaldate
                start = stocks.bd.offset(rebaldate, offsets=-1)
            signal = signals(label=label, date=rebaldate, start=start)
            df[label] = signal[label].reindex(df.index)
            
            df = df[df['prc'].abs().gt(minprc)
                    & df['cap'].gt(mincap)
                    & df['decile'].le(maxdecile)].dropna()
            if (len(df) < minobs):  # skip if insufficient observations
                continue

            # split signal into desired fractiles            
            df['fractile'] = fractile_split(df[label],
                                            pct=pct,
                                            keys=df[label][df['nyse']])
            subs = {'H' : (df['fractile'] == 1),
                    'M' : (df['fractile'] == 2),
                    'L' : (df['fractile'] == 3)}
            weights = dict()
            for subname, weight in zip(['H', 'L'],
                                       [leverage, -leverage]):
                cap = df.loc[subs[subname], 'cap']
                weights[subname] = weight * cap / cap.sum()
            #print("(portfolio_sorts)", rebaldate, len(df))
        else:   # if not rebalance, then adjust previous stock weights by retx
            retx = stocks.get_ret(stocks.bd.begmo(rebaldate),
                                  rebaldate,
                                  field='retx') + 1
            for port, old in weights.items():
                new = old * retx.reindex(old.index, fill_value=1)
                weights[port] = new / (abs(new.sum()) * len(weights) / 2)
        holdings[rebaldate] = pd.concat(list(weights.values()), axis=0)
    return holdings


    
[docs]def bivariate_sorts(stocks: Stocks, 
                    label: str, 
                    signals: Signals, 
                    rebalbeg: int, 
                    rebalend: int,
                    window: int = 0, 
                    pct: Tuple[float, float] = (30., 70.), 
                    leverage: float = 1.,
                    months: List[int] = [], 
                    minobs: int = 100, 
                    minprc: float = 0., 
                    mincap: float = 0., 
                    maxdecile: int = 10) -> Tuple[Dict, Dict, Dict]:
    """Generate monthly time series of holdings by two-way sort procedure

    Args:
        stocks: Stocks object for accessing stock returns and price data
        label: Name of signal to retrieve
        signals: Call to extract cross section of values for the signal
        rebalbeg: First rebalance date (YYYYMMDD)
        rebalend: Last holding date (YYYYMMDD)
        pct: Percentile breakpoints to sort high, medium and low buckets
        window: No. of months to look back for signal values; 0 is exact month
        months: Months (e.g. 6=June) to retrieve univ; empty for all months
        maxdecile: Include largest stocks decile from 1 through maxdecile
        mincap: Minimum market cap
        minobs: Minimum required sample size with non-missing signal values
        leverage: Multiplier for leverage or shorting

    Returns
       3-tuple of spread holdings, smb holdings, and subportfolio sizes

    Notes:

    - Independent sort by median (NYSE) mkt cap and 30/70 (NYSE) HML percentiles
    - Subportfolios of the intersections are value-weighted; 
    - Spread portfolios are equal-weighted of subportfolios
    - Portfolio are resorted every June; and other months' holdings are 
      adjusted by monthly realized retx (i.e. dividends not reinvested)
    """
    rebaldates = stocks.bd.date_range(rebalbeg, rebalend, 'endmo')
    holdings = {label: dict(), 'smb': dict()}  # to return two sets of holdings
    # sizes = {h : dict() for h in ['HB','HS','MB','MS','LB','LS']}

    for rebaldate in rebaldates:

        # check if this is a rebalance month
        if not months or (rebaldate//100)%100 in months or not holdings[label]:
            
            # rebalance: get this month's universe of stocks with valid data
            df = stocks.get_universe(rebaldate)
            
            # get signal values within lagged window
            if window:
                start = stocks.bd.endmo(rebaldate, months=-abs(window))
            else:
                start = stocks.bd.offset(rebaldate, offsets=-1)
            signal = signals(label=label,
                             date=rebaldate,
                             start=start)
            df[label] = signal[label].reindex(df.index)

            df = df[df['prc'].abs().gt(minprc)
                    & df['cap'].gt(mincap)
                    & df['decile'].le(maxdecile)].dropna()
            if (len(df) < minobs):  # skip if insufficient observations
                continue

            # split signal into desired fractiles, and assign to subportfolios
            df['fractile'] = fractile_split(df[label],
                                            pct=pct,
                                            keys=df[label][df['nyse']],
                                            ascending=False)
            subs = {'HB' : (df['fractile'] == 1) & (df['decile'] <= 5),
                    'MB' : (df['fractile'] == 2) & (df['decile'] <= 5),
                    'LB' : (df['fractile'] == 3) & (df['decile'] <= 5),
                    'HS' : (df['fractile'] == 1) & (df['decile'] > 5),
                    'MS' : (df['fractile'] == 2) & (df['decile'] > 5),
                    'LS' : (df['fractile'] == 3) & (df['decile'] > 5)}
            weights = {label: dict(), 'smb': dict()}
            
            for subname, weight in zip(['HB','HS','LB','LS'],
                                       [0.5, 0.5, -0.5, -0.5]):
                cap = df.loc[subs[subname], 'cap']
                weights[label][subname] = leverage * weight * cap / cap.sum()
                # sizes[subname][rebaldate] = sum(subs[subname])
                
            for subname, weight in zip(['HB','HS','MB','MS','LB','LS'],
                                       [-0.5, 0.5, -0.5, 0.5, -0.5, 0.5]):
                cap = df.loc[subs[subname], 'cap']
                weights['smb'][subname] = leverage * weight * cap / cap.sum()
                # sizes[subname][rebaldate] = sum(subs[subname])
            #print("(famafrench_sorts)", rebaldate, len(df))
            
        else:  # else not a rebalance month, so adjust holdings by retx
            retx = 1 + stocks.get_ret(stocks.bd.begmo(rebaldate),
                                      rebaldate,
                                      field='retx')
            for port, subports in weights.items():
                for subport, old in subports.items():
                    new = old * retx.reindex(old.index, fill_value=1)
                    weights[port][subport] = (
                        new / (abs(np.sum(new)) * len(subports) / 2)
                    )

        # combine this month's subportfolios
        for h in holdings:
            holdings[h][rebaldate] = pd.concat(list(weights[h].values()))
    return holdings[label], holdings['smb']