Source code for finds.backtesting.riskpremium

"""Evaluate risk premiums from cross-sectional regressions

Copyright 2022, Terence Lim

MIT License
"""
import numpy as np
import scipy
from matplotlib import dates as mdates
import matplotlib.pyplot as plt
import pandas as pd
from pandas import DataFrame, Series
from pandas.api import types
from sqlalchemy import Integer, String, Float, Column, Index
from typing import Dict, Any, Tuple, List
from finds.structured.stocks import Stocks
from finds.structured.benchmarks import Benchmarks
from finds.database.sql import SQL
from finds.utils.plots import plot_date
from finds.recipes.econs import least_squares
from .backtesting import compound_ret

[docs]class RiskPremium: """Compute and test of factor loading risk premiums Args: sql: Connection to user database to store results bench: Benchmark dataset of market and indexreturns rf: Name of riskfree rate from bench database end: Last date of price and returns data """ def __init__(self, sql: SQL, bench: Benchmarks, rf: str, end: int): """Initialize for testing factor loading premiums""" self.sql = sql self.bench = bench self.rf = bench.get_series([rf], 'ret', end=end)[rf] rf = bench.get_series([rf + "(mo)"], 'ret', end=end) # monthly riskfree self.monthly_ = {(bench.bd.begmo(d), bench.bd.endmo(d)): float(rf.loc[d]) for d in rf.index} self.end_ = end
[docs] def __call__(self, stocks: Stocks, loadings: Dict[int, DataFrame], weights: str = "", standardize: List[str] = []) -> Series: """Estimate factor risk premiums with cross-sectional FM regressions Args: stocks: Stocks' returns data loadings: dict keyed by rebalance date of loadings DataFrames standardize: List of columns to demean and rescale (eql-wtd std = 1) weights: List of weights for weighted least squares and demean Returns: Series of means and stderrs of FM cross-sectional regression """ pordates = sorted(list(loadings.keys())) self.holdrets = stocks.bd.date_tuples(pordates) out = [] for pordate, holdrets in zip(pordates[:-1], self.holdrets): if holdrets in self.monthly_: rf = self.monthly_[holdrets] else: rf = compound_ret(self.rf, holdrets) df = loadings[pordate] if not weights: w = np.ones(len(df)) else: w = df[weights].to_numpy() df = df.drop(columns=[weights]) x = df.columns for col in standardize: # weighted mean <- 0, equal wtd stdev <- 1 df[col] -= np.average(df[col], weights=w) df[col] /= np.std(df[col]) df = df.join(stocks.get_ret(*holdrets) - rf, how='left') p = least_squares(df.dropna(), x=x, y=['ret'], add_constant=False) p.name = holdrets[1] out.append(p) self.perf = pd.concat(out, axis=1).T self.results = {'mean': self.perf.mean(), 'stderr': self.perf.sem(), 'std': self.perf.std(), 'count': len(self.perf)} return DataFrame(self.results)
[docs] def fit(self, benchnames: List[str] = []) -> List[DataFrame]: """Compute risk premiums and benchmark correlations""" out = [] corr = self.perf.corr() out.append(DataFrame(self.results)\ .rename_axis('Factor Returns', axis=1)) out.append(corr.loc[self.perf.columns, self.perf.columns]\ .rename_axis('Correlation of Factor Returns:', axis=1)) if benchnames: df = self.bench.get_series(benchnames, 'ret') b = DataFrame({k: compound_ret(df[k], self.holdrets) for k in benchnames}, index=self.perf.index) out.append(DataFrame({'mean': b.mean(), 'stderr': b.sem(), 'std': b.std(), 'count': len(b)})\ .rename_axis('Benchmarks', axis=1)) corr = b.join(self.perf).corr() out.append(corr.loc[benchnames, benchnames]\ .rename_axis('Correlation of Benchmarks', axis=1)) out.append(corr.loc[self.perf.columns, benchnames]\ .rename_axis('Correlation of Factor Returns' ' and Benchmarks', axis=1)) return out
[docs] def plot(self, factors: List[str] = [], num: int = 1, fontsize: int = 8, figsize: Tuple[float, float] = (10, 5)): """Plot computed time series of factor returns""" if not factors: factors = list(self.perf.columns) if isinstance(factors, str): factors = [factors] b = dict() if isinstance(factors, dict): df = self.bench.get_series(factors.values(), 'ret') for k,v in factors.items(): b[k] = Series(compound_ret(df[v], self.holdrets), index=self.perf.index, name=v) factors = list(factors.keys()) nrow = int(np.ceil(np.sqrt(len(factors)))) ncol = int(np.ceil(len(factors) / nrow)) fig, axes = plt.subplots(nrow, ncol, clear=True, num=num, squeeze=False, figsize=figsize) for i, (ax, col) in enumerate(zip(np.ravel(axes), factors)): if len(b): plot_date(y1=self.perf[col].cumsum(), legend1=[col], y2=b[col].cumsum(), legend2=[b[col].name], cn=i*2, loc1='upper left', loc2='lower right', fontsize=fontsize, ax=ax) else: plot_date(y1=self.perf[col].cumsum(), ax=ax, cn=i, fontsize=fontsize, legend1=[col]) plt.tight_layout(pad=3)
if __name__=="__main__": from env.conf import credentials