kopongo.com

home

Accessing Trac with Ruby's HTTP Library

08 Aug 2008

I needed to access a Trac repository which was password-protected with basic authentication. Using ruby's standard standard http libraries, my first try did not work.

This is what does not work:

def trac_scraper_that_not_work
    http = Net::HTTP.new('some_trac_hoster.com', 443)
    http.use_ssl = true
    http.start do |http|

      login_location = '/trac/<something>/login'
      req = Net::HTTP::Get.new(login_location)
      req.basic_auth 'username', 'secret'
      response = http.request(req)    
      return nil if response.class == Net::HTTPUnauthorized

      actual_page_to_fetch_url = 'reports/or/whatever...'   
      req = Net::HTTP::Get.new(actual_page_to_fetch_url)
      response = http.request(req)
      #puts "response is #{response.body}"
      response.body
    end
end

Turns out that Trac uses cookies, so you need to provide the cookie you are given. First you to provide the authentication using the basic_auth method, then you copy the cookie you are given when provide it in the next request. Like this:

def trac_scraper_that_works
    http = Net::HTTP.new('some_trac_hoster.com', 443)
    http.use_ssl = true
    http.start do |http|

      login_location = '/trac/<something>/login'
      req = Net::HTTP::Get.new(login_location)
      req.basic_auth 'username', 'secret'
      response = http.request(req)

      return nil if response.class == Net::HTTPUnauthorized

      cookie = response.response['set-cookie']      
      headers = {
        'Cookie' => cookie
      }
      actual_page_to_fetch_url = 'reports/or/whatever...'   
      req = Net::HTTP::Get.new(actual_page_to_fetch_url, headers)
      response = http.request(req)
      #puts "response is #{response.body}"
      response.body
    end
end