167 lines
4.9 KiB
Python
167 lines
4.9 KiB
Python
import StripStats
|
|
strip_stats = StripStats(
|
|
'out.csv',
|
|
url,
|
|
(
|
|
'matchup_id',
|
|
'game_id',
|
|
'datetime',
|
|
'network',
|
|
'away_team',
|
|
'away_team_points',
|
|
'home_team',
|
|
'home_team_points',
|
|
'spread',
|
|
'confidence',
|
|
)
|
|
)
|
|
|
|
#!/usr/local/bin/python
|
|
|
|
# Development
|
|
#import sys
|
|
#from pprint import pprint
|
|
|
|
import csv
|
|
import re
|
|
from datetime import datetime
|
|
from pytz import timezone
|
|
import httplib2
|
|
try:
|
|
from BeautifulSoup import BeautifulSoup
|
|
except ImportError:
|
|
from bs4 import BeautifulSoup
|
|
|
|
GAMES_PER_SEASON = 17
|
|
YEAR = 2016
|
|
FIRST_GAME_ID = 69
|
|
URL_TEMPLATE = 'http://games.espn.com/nfl-pigskin-pickem/2016/en/entry?entryID=171981&period={}'
|
|
|
|
FIELDS = (
|
|
'matchup_id',
|
|
'game_id',
|
|
'datetime',
|
|
'network',
|
|
'away_team',
|
|
'away_team_points',
|
|
'home_team',
|
|
'home_team_points',
|
|
'spread',
|
|
'confidence',
|
|
)
|
|
|
|
# Open csv file for writing
|
|
CSV_FILE = 'espn_pickem.csv'
|
|
|
|
def create_or_truncate_file(path):
|
|
open(path, 'w').close()
|
|
|
|
def is_file_empty(path):
|
|
try:
|
|
with open(path, 'r') as f:
|
|
for line in f:
|
|
if not line.strip():
|
|
return False
|
|
break
|
|
except:
|
|
pass
|
|
return True
|
|
|
|
def dict_writer():
|
|
return csv.DictWriter(csvfile, FIELDS, extrasaction='raise', dialect='excel')
|
|
|
|
create_or_truncate_file(CSV_FILE)
|
|
|
|
with open(CSV_FILE, 'wb') as csvfile:
|
|
#mywriter = csv.DictWriter(csvfile, FIELDS, extrasaction='raise', dialect='excel')
|
|
mywriter = dict_writer()
|
|
if is_file_empty(CSV_FILE):
|
|
mywriter.writeheader()
|
|
|
|
for i in range(0, GAMES_PER_SEASON):
|
|
# Load html from url into variable
|
|
url = URL_TEMPLATE.format(FIRST_GAME_ID + i)
|
|
http = httplib2.Http()
|
|
headers, body = http.request(url)
|
|
|
|
# Load HTML into an object
|
|
#soup = BeautifulSoup(html)
|
|
soup = BeautifulSoup(body, 'html.parser')
|
|
|
|
# Pick'Em table
|
|
pickem_table = soup.find('table', class_='pickemTable')
|
|
|
|
# List of matchup rows
|
|
matchup_rows = pickem_table.find_all('tr', class_='matchupRow')
|
|
|
|
# Loop through rows
|
|
for j in range(0, len(matchup_rows)):
|
|
matchup_row = matchup_rows[j]
|
|
|
|
# Dictionary for data
|
|
data = {}
|
|
|
|
# Matchup id
|
|
data['matchup_id'] = matchup_row['data-matchupid']
|
|
|
|
# Game id
|
|
game_href = matchup_row.find('a', class_='matchupLink')['href']
|
|
p = re.compile(r'http://.+\?gameId=([\d]+)', re.IGNORECASE)
|
|
m = p.match(game_href)
|
|
data['game_id'] = m.group(1)
|
|
|
|
# Date
|
|
date = matchup_row.find('div', class_='pickem-date').string.strip()
|
|
date_parts = date.split(',')
|
|
month_and_day = ''
|
|
if len(date_parts) > 1:
|
|
month_and_day = date_parts[1].strip()
|
|
else:
|
|
month_and_day = date_parts[0].strip()
|
|
month_and_day_parts = month_and_day.split(' ')
|
|
month = month_and_day_parts[0].strip()
|
|
day = month_and_day_parts[1].strip()
|
|
if len(day) == 1:
|
|
day = '0' + day #pad with zero if necessary
|
|
|
|
# Time
|
|
time = matchup_row.find('div', class_='pickem-time').string.strip()
|
|
if time[1] == ':':
|
|
time = '0' + time #pad with zero if necessary
|
|
|
|
datetime_obj_naive = datetime.strptime(
|
|
month + ' ' + day + ' ' + str(YEAR) + ' ' + time,
|
|
'%b %d %Y %H:%M%p')
|
|
datetime_obj_eastern = timezone('US/Eastern').localize(datetime_obj_naive)
|
|
data['datetime'] = datetime_obj_eastern.strftime('%Y-%m-%d %H:%M:%S %Z%z')
|
|
|
|
# Media network
|
|
data['network'] = matchup_row.find('div', class_='tvNetwork').string.strip()
|
|
|
|
# Load divs for each team into array
|
|
td_teams = matchup_row.find('td', class_='teams')
|
|
div_teams = td_teams.find_all('div', class_='pickem-teams')
|
|
|
|
# Away team points
|
|
data['away_team_points'] = div_teams[0].find('div', class_='away').string.strip()
|
|
|
|
# Home team points
|
|
data['home_team_points'] = div_teams[1].find('div', class_='home').string.strip()
|
|
|
|
# Away team 3-letter code
|
|
data['away_team'] = div_teams[1].find('button')['data-f'].strip()
|
|
|
|
# Home team 3-letter code
|
|
data['home_team'] = div_teams[1].find('button')['data-u'].strip()
|
|
|
|
# Spread
|
|
data['spread'] = div_teams[1].find('button')['data-s'].strip()
|
|
|
|
# Home team confidence %
|
|
td_picked = matchup_row.find('td', class_='picked')
|
|
divs = td_picked.find_all('div', class_='wpwOutsideWrapper')
|
|
data['confidence'] = divs[1].find('span').string.strip()
|
|
|
|
# Insert row of data into csv file
|
|
mywriter.writerow(data)
|
|
|