# Development from pprint import pprint import csv import sys import calendar from datetime import datetime from pytz import timezone import pytz class StripStats: month_abbr_to_int = {name: num for num, name in enumerate(calendar.month_abbr) if num} def __init__(self, url, output_file='out.csv', fields=None): self.output_file = output_file self.rows = self.strip_data(url) if fields: self.fields = fields else: self.fields = self.rows[0].keys # # Static methods # @staticmethod def date_object_utc_from_string(date_string, timezone_string): datetime_obj_naive = datetime.strptime(date_string, '%b %d %Y %I:%M%p') datetime_obj_local = timezone(timezone_string).localize(datetime_obj_naive) return datetime_obj_local.astimezone(pytz.UTC) @staticmethod def month_int_from_string(month_string): {v: k for k,v in enumerate(calendar.month_abbr)} # # Instance methods # # Actions def write_data(self): if not self.rows: raise ValueError, "'self.rows' is empty" is_empty = False if self.is_output_file_empty(): is_empty = True open(self.output_file, 'w').close() #truncate whitespace with open(self.output_file, 'ab') as file_handle: writer = csv.DictWriter( file_handle, self.fields, extrasaction='raise', dialect='excel') if is_empty: writer.writeheader() for row in self.rows: writer.writerow(row) # Utility def is_output_file_empty(self): try: with open(self.output_file, 'r') as file_handle: for line in file_handle: if line.strip(): return False except: pass return True # Abstract methods def strip_data(self, url): """Override this abstract method in derived class to. Returns an array of dictionaries where each dictionary's keys correspond to a particular matchup's data.""" pass