165 lines
4.6 KiB
Ruby
165 lines
4.6 KiB
Ruby
require 'Nokogiri'
|
|
require 'open-uri'
|
|
require 'tzinfo'
|
|
require 'uri'
|
|
|
|
require './StripStats.rb'
|
|
|
|
class EspnPickemStripStats < StripStats
|
|
###Override method get_data from StripStats class###
|
|
|
|
@@season_start_month = 8
|
|
@@timezone_string = 'US/Eastern'
|
|
|
|
# Override data getter method and return data extracted from "html_doc" object
|
|
def strip_data(html_source)
|
|
|
|
# Load HTML into object
|
|
begin
|
|
if File.exists?(html_source) || html_source =~ URI::regexp
|
|
html_doc = Nokogiri::HTML(open(html_source))
|
|
else
|
|
html_doc = Nokogiri::HTML(html_source)
|
|
end
|
|
rescue Exception => e
|
|
puts e.message
|
|
end
|
|
|
|
# Initialize data attribute
|
|
data = []
|
|
|
|
# List of matchup rows in the pickem table
|
|
matchup_rows = html_doc.css('table.pickemTable tr.matchupRow')
|
|
|
|
# Loop through rows
|
|
matchup_rows.each do |matchup_row|
|
|
row = {}
|
|
|
|
# Matchup id
|
|
row[:matchup_id] = matchup_row.attr('data-matchupid')
|
|
|
|
# Game id
|
|
game_href = matchup_row.css('a.matchupLink').attr('href').to_s
|
|
begin
|
|
row[:game_id] = game_href.match(/\?gameId=([\d]+)/)[1]
|
|
rescue
|
|
row[:game_id] = ''
|
|
end
|
|
|
|
# Date
|
|
date = matchup_row.css('div.pickem-date').first.inner_html.strip
|
|
date_parts = date.split(',')
|
|
month_and_day = ''
|
|
if date_parts.length > 1
|
|
month_and_day = date_parts.last.strip
|
|
else
|
|
month_and_day = date_parts.first.strip
|
|
end
|
|
month_and_day_parts = month_and_day.split(' ')
|
|
month = month_and_day_parts.first.strip
|
|
day = month_and_day_parts.last.strip
|
|
|
|
# Time
|
|
url = html_doc.xpath("//head/link[@rel='canonical']/@href").to_s
|
|
|
|
start_year = url.match(/http:\/\/games.espn.com\/nfl-pigskin-pickem\/([\d]+)/)[1].to_i
|
|
year = start_year
|
|
month_int = StripStats.month_int_from_string(month)
|
|
if month_int < @@season_start_month
|
|
year += 1
|
|
end
|
|
|
|
time = matchup_row.css('div.pickem-time').first.inner_html.strip
|
|
matches = time.match(/([\d]+):([\d]+)(.+)/)
|
|
hour = matches[1].to_s
|
|
minute = matches[2].to_s
|
|
am_or_pm = matches[3].to_s.upcase
|
|
if am_or_pm == 'AM'
|
|
hour = '0' if hour == '12'
|
|
else
|
|
i_hour = hour.to_i
|
|
hour = (i_hour + 12).to_s if i_hour < 12
|
|
end
|
|
tz = TZInfo::Timezone.get(@@timezone_string)
|
|
utc = tz.local_to_utc(Time.utc(year.to_s, month, day, hour, minute))
|
|
row[:datetime] = utc.strftime('%Y-%m-%d %H:%M:%S %z')
|
|
|
|
# Media network
|
|
row[:network] = matchup_row.css('div.tvNetwork').first.inner_html.strip
|
|
|
|
# Load divs for each team into array
|
|
div_teams = matchup_row.css('td.teams').first.css('div.pickem-teams')
|
|
|
|
# Away team points
|
|
row[:away_team_points] = div_teams.first.css('div.away').first.inner_html.strip
|
|
|
|
# Home team points
|
|
row[:home_team_points] = div_teams.last.css('div.home').first.inner_html.strip
|
|
|
|
# Away team 3-letter code
|
|
row[:away_team] = div_teams.last.css('button').first.attr('data-f').to_s.strip
|
|
|
|
# Home team 3-letter code
|
|
row[:home_team] = div_teams.last.css('button').first.attr('data-u').to_s.strip
|
|
|
|
# Spread
|
|
row[:spread] = div_teams.last.css('button').first.attr('data-s').to_s.strip
|
|
|
|
# Home team confidence %
|
|
divs = matchup_row.css('td.picked').first.css('div.wpwOutsideWrapper')
|
|
row[:confidence] = divs.last.css('span').first.inner_html
|
|
data<< row
|
|
end
|
|
|
|
return data
|
|
end
|
|
end
|
|
|
|
# Main program exectuion
|
|
URL_TEMPLATE = 'http://games.espn.com/nfl-pigskin-pickem/%d/en/entry?entryID=%d&period=%d'
|
|
OUTPUT_FILE = 'out.ruby.csv'
|
|
YEAR_ZERO = 2012 #year when first week corresponds to period=1
|
|
WEEKS_PER_SEASON = 17
|
|
FIELDS = [
|
|
:matchup_id,
|
|
:game_id,
|
|
:datetime,
|
|
:network,
|
|
:away_team,
|
|
:home_team,
|
|
:away_team_points,
|
|
:home_team_points,
|
|
:spread,
|
|
:confidence,
|
|
]
|
|
|
|
File.open(OUTPUT_FILE, 'w') {} #truncate file
|
|
|
|
# 2015 and 2016
|
|
#START_YEAR = 2015
|
|
#ENTRY_IDS = [276612, 171981]
|
|
|
|
# 2016
|
|
START_YEAR = 2016
|
|
ENTRY_IDS = [171981]
|
|
|
|
ENTRY_IDS.each_with_index do |entry_id, i|
|
|
year = i + START_YEAR
|
|
start_period = (year - YEAR_ZERO) * WEEKS_PER_SEASON + 1
|
|
end_period = start_period + WEEKS_PER_SEASON - 1
|
|
puts "Stripping ESPN Pick'Em data for %d..." % [year]
|
|
start_period.upto(end_period).each do |period|
|
|
|
|
# Get html from URL
|
|
html_source = URL_TEMPLATE % [year, entry_id, period]
|
|
|
|
# Get html from file
|
|
#html_source = "period_#{period}.html"
|
|
#next unless File.exists?(html_source)
|
|
|
|
espn_pickem_strip_stats = EspnPickemStripStats.new(html_source, OUTPUT_FILE, FIELDS)
|
|
espn_pickem_strip_stats.write_data
|
|
puts "Parsing #{html_source}"
|
|
end
|
|
end
|