# 2009 12 31 john blue
# # command line ruby code pulls Twitter info using the Twitter search API URL 
# for more info see http://apiwiki.twitter.com/Twitter-Search-API-Method:-search

# Note that no exception testing is done: no time outs, edge conditions, etc are tested. 
# The output html should be proofed to see if expected info is returned.

require 'rubygems'
require 'hpricot'
require 'open-uri'



#-----------
# function to get the results sections section of the page (the unordered list in results) 
# and reverse the li items so they read top to bottom

def reverse_li_elements(pagelink, i)

# thelink = "http://search.twitter.com/" not needed I think 
 lilist= Array.new # array for li result items
 adoc = Hpricot(open(pagelink))
 
 # NOTE: Check here to see if Twitter timed out or request went bad.

# theresultsul=adoc.search("/html/body/div[2]/div/div[2]/ul") #2010 04 06 jlb old version
  theresultsul=adoc.search("/html/body/div[2]/div/div[4]/ul") # 2010 04 06 jlb new version

 
 # set the absolute link for substitution
 athelink = "http://search.twitter.com/"

 liitems = theresultsul.search('//li')
 liitems.reverse.map do |liitem|
  # sub relative links with absolute links.
  # initial idea from http://garrickvanburen.com/archive/ruby-on-rails-snippet-for-changing-relative-paths-to-absolute
  liitemnew=liitem.to_s.gsub(/=('|")\//, '=\1*/').gsub(/\*\//, athelink.match(/(http|https):\/\/[\w.]+\//)[0])
  lilist = lilist << liitemnew
 end

#mydebug = File.new("debug%d.html" % i, 'a') # the output file "%d" % i

#mydebug.puts("/n pagelink debugdebugdebugdebugdebugdebugdebug") #
#mydebug.puts(pagelink) #
#mydebug.puts("/n debugdebugdebugdebugdebugdebugdebug") #

#mydebug.puts("/n adoc debugdebugdebugdebugdebugdebugdebug") #
#mydebug.puts(adoc) #
#mydebug.puts("/n debugdebugdebugdebugdebugdebugdebug") #

#mydebug.puts("/n theresultsul debugdebugdebugdebugdebugdebugdebug") #
#mydebug.puts(theresultsul) #
#mydebug.puts("/n debugdebugdebugdebugdebugdebugdebug") #                          
 
#mydebug.puts("/n lilist debugdebugdebugdebugdebugdebugdebug") #
#mydebug.puts(lilist) #
#mydebug.puts("/n debugdebugdebugdebugdebugdebugdebug") #  

return lilist

end

pagearray= Array.new # array for pages to scan

# Pull command line items: start date, end date, and text. 
# For ref on command line info see http://ruby.about.com/od/rubyfeatures/a/argv.htm 
startdate = ARGV[0] # example "2009-12-30" NOTE: this parameter currently not used.
enddate = ARGV[1] # example "2009-12-31"
searchterm = ARGV[2] # example "agchat"
searchterm = "%23"+searchterm # note to assemble hash character as %23
introtext = ARGV[3] # example "Agchaxt for 2009-12-29"
titletext = introtext
appendfilename = ARGV[4] # name of file to append before </body>



# We are going to do calls to twitter to bring back MAXPAGE sets of PAGESIZE entries (1500 is max number of Twitter entries returnable so 1500/maxpage=pagesize).
# See http://apiwiki.twitter.com/Twitter-Search-API-Method:-search

pagesize=25 # 10, 20, 25, 30, or 50 based on search.twitter.com advance settings NOTE: set maxpage below so that 1500/maxpage=pagesize is valid

# setup the twitter search url  
prequery="http://search.twitter.com/search?q=+"+searchterm+"+until%3A"+enddate+"&rpp="+pagesize.to_s+"&page="

# loop to tack on page portion of query.
maxpage = 60
i = maxpage
while i > 0 
 pagearray[i]=prequery+"%d" % i # see str % arg => new_str at http://ruby-doc.org/core/classes/String.html
 i -= 1
end


#-----------

# get intial page info for head info
doc = Hpricot(open(pagearray[1]))


#-----------
# get the head part of page, set absolute link, and update all relative links to absolute links
thehead=doc.search("/html/head")

thebody=doc.search("/html/body")

# set the absolute link
thelink = "http://search.twitter.com/"

# initial idea from http://garrickvanburen.com/archive/ruby-on-rails-snippet-for-changing-relative-paths-to-absolute
theheadnew=thehead.to_s.gsub(/=('|")\//, '=\1*/').gsub(/\*\//, thelink.match(/(http|https):\/\/[\w.]+\//)[0])


#-----------
# set up output

# beginning html file codes
htmlstart = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"><html>'

# setup the file pre-html 
# 2010 04 06 jlb changed <div id="timer"> to <div>
ulprehtml='<body>
  <div id="main">
    <div id="mainContent" style="width:460px;padding:0px;">
      <div id="results" style="width:440px;margin:5px 5px 5px 5px;padding: 5px 5px 5px 5px; ">
        <div >
          <h2><b>'+ titletext + '</b><br>
          <b>Note</b>: The posts are read from top to bottom.</h2>
        </div>'

# setup the file post-html 
ulposthtml='<p class="clearer"></p>
      </div>
    </div>
  </div>'

htmlclose= '</body></html>'

#-----------
# generate web html output
puts htmlstart
puts theheadnew
puts ulprehtml
puts '<ul>' 

#-----------
# process all pages
# get the results sections section of the page (the unordered list) and reverse the items so they read top to bottom
i = maxpage
while i > 0 
 puts reverse_li_elements(pagearray[i], i)
# puts 'test line' i
 i -= 1
end


puts '</ul>'
puts ulposthtml

#take contents of append file and insert before body close tag
File.open(appendfilename).each { |line|
    puts line 
}


puts htmlclose
