strip-stats/strip-stats-old.py

import StripStats
strip_stats = StripStats(
    'out.csv',
    url,
    (
        'matchup_id',
        'game_id',
        'datetime',
        'network',
        'away_team',
        'away_team_points',
        'home_team',
        'home_team_points',
        'spread',
        'confidence',
    )
)

#!/usr/local/bin/python

# Development
#import sys
#from pprint import pprint

import csv
import re
from datetime import datetime
from pytz import timezone
import httplib2
try:
    from BeautifulSoup import BeautifulSoup
except ImportError:
    from bs4 import BeautifulSoup

GAMES_PER_SEASON = 17
YEAR = 2016
FIRST_GAME_ID = 69
URL_TEMPLATE = 'http://games.espn.com/nfl-pigskin-pickem/2016/en/entry?entryID=171981&period={}'

FIELDS = (
    'matchup_id',
    'game_id',
    'datetime',
    'network',
    'away_team',
    'away_team_points',
    'home_team',
    'home_team_points',
    'spread',
    'confidence',
)

# Open csv file for writing
CSV_FILE = 'espn_pickem.csv'

def create_or_truncate_file(path):
    open(path, 'w').close()

def is_file_empty(path):
    try:
        with open(path, 'r') as f:
            for line in f:
                if not line.strip():
                    return False
                    break
    except:
        pass
    return True

def dict_writer():
    return csv.DictWriter(csvfile, FIELDS, extrasaction='raise', dialect='excel')

create_or_truncate_file(CSV_FILE)

with open(CSV_FILE, 'wb') as csvfile:
    #mywriter = csv.DictWriter(csvfile, FIELDS, extrasaction='raise', dialect='excel')
    mywriter = dict_writer()
    if is_file_empty(CSV_FILE):
        mywriter.writeheader()

    for i in range(0, GAMES_PER_SEASON):
        # Load html from url into variable
        url = URL_TEMPLATE.format(FIRST_GAME_ID + i)
        http = httplib2.Http()
        headers, body = http.request(url)

        # Load HTML into an object
        #soup = BeautifulSoup(html)
        soup = BeautifulSoup(body, 'html.parser')

        # Pick'Em table
        pickem_table = soup.find('table', class_='pickemTable')

        # List of matchup rows
        matchup_rows = pickem_table.find_all('tr', class_='matchupRow')

        # Loop through rows
        for j in range(0, len(matchup_rows)):
            matchup_row = matchup_rows[j]

            # Dictionary for data
            data = {}

            # Matchup id
            data['matchup_id'] = matchup_row['data-matchupid']

            # Game id
            game_href = matchup_row.find('a', class_='matchupLink')['href']
            p = re.compile(r'http://.+\?gameId=([\d]+)', re.IGNORECASE)
            m = p.match(game_href)
            data['game_id'] = m.group(1)

            # Date
            date = matchup_row.find('div', class_='pickem-date').string.strip()
            date_parts = date.split(',')
            month_and_day = ''
            if len(date_parts) > 1:
                month_and_day = date_parts[1].strip()
            else:
                month_and_day = date_parts[0].strip()
            month_and_day_parts = month_and_day.split(' ')
            month = month_and_day_parts[0].strip()
            day = month_and_day_parts[1].strip()
            if len(day) == 1:
                day = '0' + day #pad with zero if necessary

            # Time
            time = matchup_row.find('div', class_='pickem-time').string.strip()
            if time[1] == ':':
                time = '0' + time #pad with zero if necessary

            datetime_obj_naive = datetime.strptime(
                month + ' ' + day + ' ' + str(YEAR) + ' ' + time,
                '%b %d %Y %H:%M%p')
            datetime_obj_eastern = timezone('US/Eastern').localize(datetime_obj_naive)
            data['datetime'] = datetime_obj_eastern.strftime('%Y-%m-%d %H:%M:%S %Z%z')

            # Media network
            data['network'] = matchup_row.find('div', class_='tvNetwork').string.strip()

            # Load divs for each team into array
            td_teams = matchup_row.find('td', class_='teams')
            div_teams = td_teams.find_all('div', class_='pickem-teams')

            # Away team points
            data['away_team_points'] = div_teams[0].find('div', class_='away').string.strip()

            # Home team points
            data['home_team_points'] = div_teams[1].find('div', class_='home').string.strip()

            # Away team 3-letter code
            data['away_team'] = div_teams[1].find('button')['data-f'].strip()

            # Home team 3-letter code
            data['home_team'] = div_teams[1].find('button')['data-u'].strip()

            # Spread
            data['spread'] = div_teams[1].find('button')['data-s'].strip()

            # Home team confidence %
            td_picked = matchup_row.find('td', class_='picked')
            divs = td_picked.find_all('div', class_='wpwOutsideWrapper')
            data['confidence'] = divs[1].find('span').string.strip()

            # Insert row of data into csv file
            mywriter.writerow(data)