require 'rubygems'
require 'nokogiri'
require 'sqlite3'
FIELD_NAMES = [['selectcity', 'VARCHAR'],['match', 'VARCHAR'],
['phone_no', 'NUMERIC'], ['name', 'VARCHAR'],['address', 'VARCHAR'] ]
TABLE_DIV_ID = "#dgrSearch"
OFILE = File.open('data-hold/tel-directory.txt', 'w')
OFILE.puts( FIELD_NAMES.map{|f| f[0]}.join("\t") )
DBNAME = "data-hold/tel-directory.sqlite"
File.delete(DBNAME) if File.exists?DBNAME
DB = SQLite3::Database.new( DBNAME )
TABLE_NAME = "telephone_records"
DB_INSERT_STATEMENT = "INSERT into #{TABLE_NAME} values
(#{FIELD_NAMES.map{'?'}.join(',')})"
DB.execute "CREATE TABLE #{TABLE_NAME}(#{FIELD_NAMES.map{|f| "`#{f[0]}`
#{f[1]}"}.join(', ')});"
FIELD_NAMES.each do |fn|
DB.execute "CREATE INDEX #{fn[2]} ON #{TABLE_NAME}(#{fn[0]})" unless
fn[2].nil?
end
Dir.glob("data-hold/pages/*.html").reject{|f| f =~ /All match/}.each do
|fname|
meta_info = File.basename(fname, '.html').split('--')
page = Nokogiri::HTML(open(fname))
page.css("#{TABLE_DIV_ID} tr")[1..-2].each do |tr|
data_tds = tr.css('td').map{ |td|
td.text.gsub(/[$,](?=\d)/, '').gsub(/\302\240|\s/, ' ').strip
}
data_row = meta_info + data_tds
OFILE.puts( data_row.join("\t"))
DB.execute(DB_INSERT_STATEMENT, data_row)
end
end
OFILE.close
--
Posted via http://www.ruby-forum.com/.
--
You received this message because you are subscribed to the Google Groups "Ruby on Rails: Talk" group.
To post to this group, send email to rubyonrails-talk@googlegroups.com.
To unsubscribe from this group, send email to rubyonrails-talk+unsubscribe@googlegroups.com.
For more options, visit this group at http://groups.google.com/group/rubyonrails-talk?hl=en.
No comments:
Post a Comment