require 'csv' class StripStats attr_accessor :output_file attr_accessor :rows attr_accessor :fields def initialize(html_source, output_file='out.csv', fields=nil) @output_file = output_file @rows = [] data_hashes = self.strip_data(html_source) if fields.nil? @fields = data_hashes.first.keys #field order determined by interpreter else @fields = fields #field order specified by user end data_hashes.each do |data_hash| row = [] @fields.each do |field| row<< data_hash[field] end @rows<< row end end # Abstract method - override this in derived class. Should return an array of hashes def strip_data nil end # Actions def write_data raise StandardError if @rows.nil? #TODO: raise better exception when no data write_headers = false headers = nil if output_file_contains_only_whitespace? File.open(@output_file, 'w') {} #remove any whitespace in otherwise empty file write_headers = true headers = @fields end CSV.open(@output_file, 'ab', write_headers: write_headers, headers: headers, row_sep: "\r\n" ) do |csv| @rows.each { |row| csv<< row } end end # Utility def output_file_contains_only_whitespace? begin return true if File.zero?(@output_file) File.foreach(@output_file) do |line| return false unless line.strip.empty? end rescue Exception => e puts e.message end return true end end