strip-stats/strip-stats-old.py
2017-08-25 16:16:33 -05:00

167 lines
4.9 KiB
Python

import StripStats
strip_stats = StripStats(
'out.csv',
url,
(
'matchup_id',
'game_id',
'datetime',
'network',
'away_team',
'away_team_points',
'home_team',
'home_team_points',
'spread',
'confidence',
)
)
#!/usr/local/bin/python
# Development
#import sys
#from pprint import pprint
import csv
import re
from datetime import datetime
from pytz import timezone
import httplib2
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
from bs4 import BeautifulSoup
GAMES_PER_SEASON = 17
YEAR = 2016
FIRST_GAME_ID = 69
URL_TEMPLATE = 'http://games.espn.com/nfl-pigskin-pickem/2016/en/entry?entryID=171981&period={}'
FIELDS = (
'matchup_id',
'game_id',
'datetime',
'network',
'away_team',
'away_team_points',
'home_team',
'home_team_points',
'spread',
'confidence',
)
# Open csv file for writing
CSV_FILE = 'espn_pickem.csv'
def create_or_truncate_file(path):
open(path, 'w').close()
def is_file_empty(path):
try:
with open(path, 'r') as f:
for line in f:
if not line.strip():
return False
break
except:
pass
return True
def dict_writer():
return csv.DictWriter(csvfile, FIELDS, extrasaction='raise', dialect='excel')
create_or_truncate_file(CSV_FILE)
with open(CSV_FILE, 'wb') as csvfile:
#mywriter = csv.DictWriter(csvfile, FIELDS, extrasaction='raise', dialect='excel')
mywriter = dict_writer()
if is_file_empty(CSV_FILE):
mywriter.writeheader()
for i in range(0, GAMES_PER_SEASON):
# Load html from url into variable
url = URL_TEMPLATE.format(FIRST_GAME_ID + i)
http = httplib2.Http()
headers, body = http.request(url)
# Load HTML into an object
#soup = BeautifulSoup(html)
soup = BeautifulSoup(body, 'html.parser')
# Pick'Em table
pickem_table = soup.find('table', class_='pickemTable')
# List of matchup rows
matchup_rows = pickem_table.find_all('tr', class_='matchupRow')
# Loop through rows
for j in range(0, len(matchup_rows)):
matchup_row = matchup_rows[j]
# Dictionary for data
data = {}
# Matchup id
data['matchup_id'] = matchup_row['data-matchupid']
# Game id
game_href = matchup_row.find('a', class_='matchupLink')['href']
p = re.compile(r'http://.+\?gameId=([\d]+)', re.IGNORECASE)
m = p.match(game_href)
data['game_id'] = m.group(1)
# Date
date = matchup_row.find('div', class_='pickem-date').string.strip()
date_parts = date.split(',')
month_and_day = ''
if len(date_parts) > 1:
month_and_day = date_parts[1].strip()
else:
month_and_day = date_parts[0].strip()
month_and_day_parts = month_and_day.split(' ')
month = month_and_day_parts[0].strip()
day = month_and_day_parts[1].strip()
if len(day) == 1:
day = '0' + day #pad with zero if necessary
# Time
time = matchup_row.find('div', class_='pickem-time').string.strip()
if time[1] == ':':
time = '0' + time #pad with zero if necessary
datetime_obj_naive = datetime.strptime(
month + ' ' + day + ' ' + str(YEAR) + ' ' + time,
'%b %d %Y %H:%M%p')
datetime_obj_eastern = timezone('US/Eastern').localize(datetime_obj_naive)
data['datetime'] = datetime_obj_eastern.strftime('%Y-%m-%d %H:%M:%S %Z%z')
# Media network
data['network'] = matchup_row.find('div', class_='tvNetwork').string.strip()
# Load divs for each team into array
td_teams = matchup_row.find('td', class_='teams')
div_teams = td_teams.find_all('div', class_='pickem-teams')
# Away team points
data['away_team_points'] = div_teams[0].find('div', class_='away').string.strip()
# Home team points
data['home_team_points'] = div_teams[1].find('div', class_='home').string.strip()
# Away team 3-letter code
data['away_team'] = div_teams[1].find('button')['data-f'].strip()
# Home team 3-letter code
data['home_team'] = div_teams[1].find('button')['data-u'].strip()
# Spread
data['spread'] = div_teams[1].find('button')['data-s'].strip()
# Home team confidence %
td_picked = matchup_row.find('td', class_='picked')
divs = td_picked.find_all('div', class_='wpwOutsideWrapper')
data['confidence'] = divs[1].find('span').string.strip()
# Insert row of data into csv file
mywriter.writerow(data)