# 2009 12 31 john blue # ruby code to extract question elements for twitter discussions # The questions/responses must have a string that has a 'q' and a number require 'rubygems' require 'hpricot' require 'open-uri' # Pull command line items: intro text, question number, web page url containing twitter discussion # For ref on command line info see http://ruby.about.com/od/rubyfeatures/a/argv.htm introtext = ARGV[0] # example "Agchaxt for 2009-12-29" questionnumber = ARGV[1] # example 1 weburl = ARGV[2] # example "http://localhost:8888/files/Agchat20081229Frame.html" # Get the Twitter discussion input web page info doc = Hpricot(open(weburl)) #----------- # get the head part of page, set absolute link, and update all relative links to absolute links thehead=doc.search("/html/head") thebody=doc.search("/html/body") theresultsul=doc.search("/html/body/div/div/div/ul") #----------- # set up output # beginning html file codes htmlstart = '' htmlhead='
' # setup the file pre-html puts htmlhead ########### Set the ul pre html ulprehtml='