"""Retrieves FOMC meeting minutes
MIT License
Copyright 2022-2023 Terence Lim
"""
import requests
import re
import numpy as np
import pandas as pd
import time
from tqdm import tqdm
from pandas import DataFrame, Series
from bs4 import BeautifulSoup
from typing import Dict
[docs]class FOMCReader:
"""Class to retrieve FOMC minutes"""
_url = 'https://www.federalreserve.gov/' # root url
def __init__(self, url: str = _url, delay: float = 0.1):
"""Initializer retrieves dates available from website
Args:
url: root url of Federal Reserve website
delay: sleep between requests
"""
def dateOf(s):
"""parse date from link string"""
return int(re.sub('\D', '', s)[-8:])
# latest five years' minutes can be found from a main page
new_url = url + 'monetarypolicy/fomccalendars.htm'
raw = BeautifulSoup(markup=requests.get(new_url).content,
features='html.parser')
hrefs = raw.find_all(name='a',
href=re.compile('\S+minutes\S+.htm$', re.I))
links = [url + m.attrs['href'] for m in hrefs]
# earlier years' minutes are linked from annual pages with this format
old_url = url + 'monetarypolicy/fomchistorical%d.htm'
for year in tqdm(range(1993, min([dateOf(m) for m in links]) // 10000)):
raw = BeautifulSoup(markup=requests.get(old_url % year).content,
features='html.parser')
hrefs = raw.find_all(name='a',
href=re.compile('\S+minutes\S+.htm$', re.I))
links += [url + m.attrs['href'].replace(url,'') for m in hrefs]
time.sleep(delay)
self.dates = {dateOf(link) : link for link in links}
def __len__(self):
return len(self.dates)
def __iter__(self):
return iter(self.dates)
[docs] def __getitem__(self, date) -> str:
"""Retrieve FOMC minutes text from Fed website
Args:
date: meeting date
Returns:
text of minutes for meeting date
"""
url = self.dates[date]
raw = BeautifulSoup(markup=requests.get(url).content,
features='html.parser')
minutes = "\n\n".join([p.get_text().strip()
for p in raw.findAll('p')])
return re.sub('\n+','\n', re.sub('[\r\t]',' ', minutes))
if __name__ == "__main__":
from finds.database import MongoDB
from finds.unstructured import Unstructured
from finds.utils import Store
from secret import credentials, paths
VERBOSE = 1
mongodb = MongoDB(**credentials['mongodb'], verbose=VERBOSE)
print('uptime:', mongodb.client.admin.command("serverStatus")['uptime'])
fomc = Unstructured(mongodb, 'FOMC')
# retrieve keys (dates) of minutes previously retrieved and stored locally
dates = fomc['minutes'].distinct('date')
# fetch new minutes from FOMC site
docs = {d: minutes[d] for d in minutes if d not in dates}
print("New minutes:")
pprint([f"{k}: {len(v)} chars" for k,v in docs.items()])
def edit(text: str) -> str:
"""helper to spawn editor and write/edit/read to tempfile"""
import subprocess
import tempfile
with tempfile.NamedTemporaryFile(suffix=".tmp") as f: # save temp file
f.write(text.encode("utf-8"))
f.flush()
subprocess.call([os.environ.get('EDITOR','emacs'), "-nw", f.name])
f.seek(0)
return f.read().decode("utf-8") # keep edited text
if docs:
# to edit out head and tail of each document
results = list()
for date, initial_message in docs.items():
edited_text = edit(initial_message)
results.append({'date': date, 'text' : edited_text})
results = sorted(results, key = lambda x: x['date']) # sort by date
# save edited docs
Store(paths['scratch'] / 'fomc', ext='gz').dump(results, f"{max(docs.keys())}.json")
for doc in results: # store docs for new dates
fomc.insert('minutes', doc, keys=['date'])