Source code for finds.structured.benchmarks

"""Benchmarks dataset for index returns

Copyright 2022, Terence Lim

MIT License
"""
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from sqlalchemy import Table, Column, Index
from sqlalchemy import Integer, String, Float, SmallInteger, Boolean, BigInteger
from finds.database.sql import SQL
from finds.database.redisdb import RedisDB
from finds.structured.busday import BusDay
from finds.structured.stocks import Stocks
_VERBOSE = 0

[docs]class Benchmarks(Stocks): """Provide Structured Stocks interface to benchmark and index returns""" def __init__(self, sql: SQL, bd: BusDay, verbose: int = _VERBOSE): """Initialize connection to a benchmark index returns dataset""" tables = { 'daily': sql.Table('benchmarks', Column('permno', String(32), primary_key=True), Column('date', Integer, primary_key=True), Column('ret', Float)), 'ident': sql.Table('benchident', Column('permno', String(32), primary_key=True), Column('name', String(64)), Column('item', String(8)))} tables['monthly'] = tables['daily'] super().__init__(sql, bd, tables, identifier='permno', name='benchmarks', verbose=verbose)
[docs] def load_series(self, df: DataFrame, name: str, item: str = '', monthly: bool = False) -> DataFrame: """Loads a Series containing benchmark returns to sql Args: df : DataFrame with time-series in each column to load to sql name: Primary description to insert into ident table item: Secondary description to insert into ident table monthly: if True: convert index to business calendar endmo dates Returns: DataFrame of identifiers metadata for series successfully loaded Notes: - Each column of input data frame is loaded to sql table 'daily', with its series name as 'permno' field, values as 'ret' field, and series index as 'date' field. - 'idents' table in sql is updated with identifier and descriptive info """ self.sql.create_all() #self['daily'].create(checkfirst=True) permno = df.name df = df.rename('ret').to_frame() df['permno'] = permno self.sql.run(self['daily'].delete().where(self['daily'].c['permno'] == permno)) self.sql.load_dataframe(self['daily'].key, df=df, index_label='date') #self['ident'].create(checkfirst=True) self.sql.run(self['ident'].delete().where(self['ident'].c['permno'] == permno)) ident = DataFrame.from_dict({0: {'permno': permno, 'name': name, 'item':item}}, orient='index') self.sql.load_dataframe(self['ident'].key, df=ident) return ident
if __name__ == "__main__": from secret import credentials, paths from finds.readers import FFReader VERBOSE = 1 sql = SQL(**credentials['sql'], verbose=VERBOSE) user = SQL(**credentials['user'], verbose=VERBOSE) rdb = RedisDB(**credentials['redis']) bd = BusDay(sql) bench = Benchmarks(sql, bd) downloads = paths['data'] / 'CRSP' df = pd.read_csv(downloads / 'treasuries.txt.gz', header=0, sep='\t').set_index('caldt') for col in df.columns: print(bench.load_series(df[col].rename(col + '(mo)'), name=col, item='monthly')) df = pd.read_csv(downloads / 'sp500.txt.gz', header=0, sep='\t').set_index('caldt') for col in df.columns: print(bench.load_series(df[col], name=col, item='daily')) ''' # load benchmarks (mostly FamaFrench) for datasets, date_formatter in zip([FFReader.monthly, FFReader.daily], [bd.endmo, bd.offset]): for name, item, suffix in datasets: df = FFReader.fetch(name=name, item=item, suffix=suffix, date_formatter=date_formatter) for col in df.columns: print(bench.load_series(df[col], name=name, item=str(item))) print(DataFrame(**sql.run('select * from ' + bench['ident'].key))) print(bench.get_series('CMA', 'ret')) print(bench.get_series(['CMA', 'HML'], 'ret')) '''