Accessing Trac with Ruby's HTTP Library
I needed to access a Trac repository which was password-protected with basic authentication. Using ruby's standard standard http libraries, my first try did not work.
This is what does not work:
def trac_scraper_that_not_work
http = Net::HTTP.new('some_trac_hoster.com', 443)
http.use_ssl = true
http.start do |http|
login_location = '/trac/<something>/login'
req = Net::HTTP::Get.new(login_location)
req.basic_auth 'username', 'secret'
response = http.request(req)
return nil if response.class == Net::HTTPUnauthorized
actual_page_to_fetch_url = 'reports/or/whatever...'
req = Net::HTTP::Get.new(actual_page_to_fetch_url)
response = http.request(req)
#puts "response is #{response.body}"
response.body
end
end
Turns out that Trac uses cookies, so you need to provide the cookie you are given.
First you to provide the authentication using the basic_auth method, then you copy the cookie
you are given when provide it in the next request. Like this:
def trac_scraper_that_works
http = Net::HTTP.new('some_trac_hoster.com', 443)
http.use_ssl = true
http.start do |http|
login_location = '/trac/<something>/login'
req = Net::HTTP::Get.new(login_location)
req.basic_auth 'username', 'secret'
response = http.request(req)
return nil if response.class == Net::HTTPUnauthorized
cookie = response.response['set-cookie']
headers = {
'Cookie' => cookie
}
actual_page_to_fetch_url = 'reports/or/whatever...'
req = Net::HTTP::Get.new(actual_page_to_fetch_url, headers)
response = http.request(req)
#puts "response is #{response.body}"
response.body
end
end
