Source code for finds.utils.finder

"""Finder class to lookup reference info across multiple datasets

Copyright 2022, Terence Lim

MIT License
"""
import pandas as pd
from pandas import DataFrame, Series
from finds.database.sql import SQL
_VERBOSE = 1

[docs]class Finder: """Looks-up reference info across multiple datasets by various identifiers""" def __init__(self, sql: SQL, identifier: str = '', table: str = ''): """Initialize lookup method with optional identifier type and table Args: sql: SQL connection instance identifier: Type of input identifier for this Finder instance table: Physical name of table to query Examples: >>> find = Find(sql, identifier='comnam', table='names') """ self.sql = sql self.identifier = identifier self.table = table
[docs] def __call__(self, label: str = '', identifier: str = '', table: str = '', **kwargs) -> DataFrame: """Lookup an identifier Args: label: Input label to lookup identifier: Identifier type of input label table: Physical name of table to query kwargs: Alternate method to specify identifier=label Notes: Guesses identifier type and table if not specified or initialized Examples: >>> find('ALPHABET', 'comnam') >>> find('ALPHABET', 'conm') >>> find('ALPHABET', 'cname') >>> find(18144) >>> find(328795, 'gvkey') >>> find('0011', 'ticker', 'idsum') >>> find('aapl') >>> find('03783310') >>> find('03783310','cusip', 'links') >>> find('03783310','cusip', 'idsum') >>> find('45483', 'permco', 'names') """ if len(kwargs): for k, v in kwargs.items(): identifier = k label = v label = str(label).upper() assert label if not identifier: # guess identifier if not specified if 5 <= len(label) <= 6 and label.isnumeric(): identifier = 'permno' label = int(label) elif label.isnumeric(): identifier = 'gvkey' label = int(label) elif len(label) in [8, 9] and any(c.isdigit() for c in label): identifier = 'ncusip' label = label[:8] elif len(label) < 6: identifier = 'tsymbol' else: identifier = 'comnam' if not table: # guess table if not specified if identifier in ['permno', 'ncusip', 'tsymbol', 'comnam', 'permco']: table = 'names' elif identifier in ['gvkey', 'conm', 'cik']: table = 'links' else: table = 'idsum' like = '=' if identifier in ['comnam', 'conm', 'cname']: label = '%' + label.upper() + '%' like = 'LIKE' # for identifiers of str type, match with wildcard elif identifier in ['permno', 'gvkey', 'cik', 'permco']: label = int(label) elif identifier in ['ncusip', 'cusip']: label = label[:8] q = "SELECT * FROM {table} WHERE {identifier} {like} '{label}'".format( table=table, identifier=identifier, like=like, label=label) result = self.sql.run(q) return DataFrame(**result) if result is not None else None
if __name__ == "__main__": from secret import credentials VERBOSE = 1 sql = SQL(**credentials['sql'], verbose=VERBOSE) find = Finder(sql) print(find('GOOG')) print(find('META', identifier='tsymbol')) print(find(comnam='FACEBOOK'))