#!/usr/bin/env python import urllib2 from urllib import urlencode from urlparse import urlparse from BeautifulSoup import BeautifulSoup def doGoogleSearch(query, limit=10): def _googleSearch(query, start, limit): urlParams = {'q' : query} if start > 0: urlParams['start'] = start url = "http://www.google.com.au/search?hl=en&" + urlencode(urlParams) request = urllib2.Request(url) # Google blocks queries based on User Agent. So pretend we are IE 7 request.add_header('User-agent', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT)') opener = urllib2.build_opener() htmlResults = opener.open(request).read() soup = BeautifulSoup(htmlResults) results = [] howManyRemaining = limit - start for link in soup.findAll('a', {'class' : 'l'}): if len(results) == howManyRemaining: break results.append(link['href']) if soup.find('div', {'id' : 'nn'}): start = start + 10 if start < limit: results.extend(_googleSearch(query, start, limit)) return results return _googleSearch(query, 0, limit) def main(): results = doGoogleSearch('a_search_term', 20) for rank, link in enumerate(results): host = urlparse(link)[1] if host.endswith('mydomain.com'): print str(rank) + ':' + link if __name__ == '__main__': main()