strip-stats/StripStats.rb

75 lines
1.6 KiB
Ruby

require 'csv'
class StripStats
attr_accessor :output_file
attr_accessor :rows
attr_accessor :fields
def initialize(html_source, output_file='out.csv', fields=nil)
@output_file = output_file
@rows = []
data_hashes = self.strip_data(html_source)
if fields.nil?
@fields = data_hashes.first.keys #field order determined by interpreter
else
@fields = fields #field order specified by user
end
data_hashes.each do |data_hash|
row = []
@fields.each do |field|
row<< data_hash[field]
end
@rows<< row
end
end
#
# Class methods
#
def self.month_int_from_string(month_string)
Date::ABBR_MONTHNAMES.index(month_string)
end
# Abstract method - override this in derived class. Should return an array of hashes
def strip_data
nil
end
# Actions
def write_data
raise StandardError if @rows.nil? #TODO: raise better exception when no data
write_headers = false
headers = nil
if output_file_contains_only_whitespace?
File.open(@output_file, 'w') {} #remove any whitespace in otherwise empty file
write_headers = true
headers = @fields
end
CSV.open(@output_file, 'ab',
write_headers: write_headers,
headers: headers,
row_sep: "\r\n"
) do |csv|
@rows.each { |row| csv<< row }
end
end
# Utility
def output_file_contains_only_whitespace?
begin
return true if File.zero?(@output_file)
File.foreach(@output_file) do |line|
return false unless line.strip.empty?
end
rescue Exception => e
puts e.message
end
return true
end
end