# 2009 12 31 john blue
# command line ruby code to extract question elements for twitter discussions
# The questions/responses must have a string that has a 'q' and a number

require 'rubygems'
require 'hpricot'
require 'open-uri'

# Pull command line items: intro text, question number, web page url containing twitter discussion 
# For ref on command line info see http://ruby.about.com/od/rubyfeatures/a/argv.htm 
questionnumber = ARGV[0] # example 1
introtext = ARGV[1] # example "Agchaxt for 2009-12-29"
weburl = ARGV[2] # example "http://localhost:8888/files/Agchat20081229Frame.html"
appendfilename = ARGV[3] # name of file to append before </body>

#mydebug = File.new("debug%d.html" % questionnumber, 'a') 

#mydebug.puts("/n questionnumber debugdebugdebugdebugdebugdebugdebug") #
#mydebug.puts(questionnumber) #
#mydebug.puts("/n debugdebugdebugdebugdebugdebugdebug") #

#mydebug.puts("/n introtext debugdebugdebugdebugdebugdebugdebug") #
#mydebug.puts(introtext) #
#mydebug.puts("/n debugdebugdebugdebugdebugdebugdebug") #

#mydebug.puts("/n weburl debugdebugdebugdebugdebugdebugdebug") #
#mydebug.puts(weburl) #
#mydebug.puts("/n debugdebugdebugdebugdebugdebugdebug") #

#mydebug.puts("/n appendfilename debugdebugdebugdebugdebugdebugdebug") #
#mydebug.puts(appendfilename) #
#mydebug.puts("/n debugdebugdebugdebugdebugdebugdebug") #

# Get the Twitter discussion input web page info
doc = Hpricot(open(weburl))

#-----------
# get the head part of page, set absolute link, and update all relative links to absolute links
thehead=doc.search("/html/head")
thebody=doc.search("/html/body")
theresultsul=doc.search("/html/body/div/div/div/ul")

#-----------
# set up output

# beginning html file codes
htmlstart = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html>'

htmlhead='<head>
		<meta content="text/html; charset=UTF-8" http-equiv="content-type" />
		<meta name="description" content="" />
		<meta name="keywords" content="" />
		<link href="http://search.twitter.com/opensearch.xml" title="Twitter Search" rel="search" type="application/opensearchdescription+xml" />
		<link href="http://search.twitter.com/stylesheets/search/twitter.css?1258673560" rel="stylesheet" type="text/css" media="screen" />
        <script type="text/javascript" src="http://search.twitter.com/javascripts/search/c_all.js?1258673560"></script>
		  <meta name="robots" content="noindex" />
	<link href="http://search.twitter.com/stylesheets/search/search_search.css?1258673560" rel="stylesheet" type="text/css" media="screen" />
    

	</head>'


# setup the file pre-html 
puts htmlhead

########### Set the ul pre html
# 2010 04 06 jlb changed <div id="timer"> to <div>
ulprehtml='<body>
  <div id="main">
    <div id="mainContent" style="width:460px;padding:0px;">
      <div id="results" style="width:440px;margin:5px 5px 5px 5px;padding: 5px 5px 5px 5px; ">
        <div >
          <h2><b>' + introtext +'</b><br>Note: The posts are read from top to bottom.</h2>
         
        </div>'

# setup the file post-html 
ulposthtml='<p class="clearer"></p>
      </div>
    </div>
  </div>'

htmlclose= '</body></html>'

# setup regular expression for use within ruby if statement
# see http://icfun.blogspot.com/2008/04/ruby-regular-expression-handling.html
# and use to test reg exp http://rubular.com/

regexpression = "[qQ][ :\|]*"+ "%d[ :\|]* " % questionnumber # Note space at end is required in regular expression
regexforif = Regexp.new(regexpression, true) # for details see Regexp.new at http://ruby-doc.org/core/classes/Regexp.html
   
puts htmlstart
puts ulprehtml
puts '<ul>'

########### Set the questions to search
liitems = theresultsul.search('//li')
 liitems.map do |liitem|
  limsg = liitem.search('//div[2]') 
   if (limsg.inner_html =~ regexforif )
    puts liitem
   end

 end
puts '</ul>'
puts ulposthtml

#take contents of append file and insert before body close tag
File.open(appendfilename).each { |line|
    puts line 
}

puts htmlclose
