# Script to spider a web application at random # # You'll need rfuzz and hpricot to run this script: # gem install rfuzz # gem install hpricot require 'rubygems' require 'hpricot' require 'rfuzz/client' cl = RFuzz::HttpClient.new('localhost', 4000) # First path to visit paths = ['/people'] # Paths already visited visited_paths = [] while !paths.empty? # Get the next path to visit path = paths.pop # Ignore it if already visited if visited_paths.member?(path) puts "Already visited #{path} - ignoring" else visited_paths.push path puts "Getting links from page at #{path}" body = cl.get(path).http_body doc = Hpricot(body) # Get all links out of the page links = doc.search("//a") links.each do |l| # Add the path for each URL into array of paths to visit found_path = l.attributes['href'] # Ignore mailto and empty links unless /^mailto:/ =~ found_path or found_path.empty? puts "Found link #{found_path}" paths.push(found_path) end end # Visit current path 10-100 times with get requests num_gets = 10 + (rand(9) * 10) puts "Visiting #{path} #{num_gets} times" (1..num_gets).each do |i| cl.get(path) end end end