python

Google Search Screen Scrapper

#!/usr/bin/env python
import urllib2
from urllib import urlencode
from urlparse import urlparse
 
from BeautifulSoup import BeautifulSoup
 
def doGoogleSearch(query, limit=10):
    def _googleSearch(query, start, limit):
        urlParams = {'q' : query}
        if start > 0:
            urlParams['start'] = start
        url = "http://www.google.com.au/search?hl=en&" + urlencode(urlParams)
        request = urllib2.Request(url)
        # Google blocks queries based on User Agent.

HTML Tidy

#!/usr/bin/env python
import sys
import getopt
 
def isHexDigit(c):
    return c.isdigit() or (c >= 'A' and c <= 'F') or (c >= 'a' and c <= 'f')
 
def isEntityStartCharacter(c):
    return c.isalpha() or c == '_' or c == ':'
 
def isEntityCharacter(c):
    return isEntityStartCharacter(c) or c.isdigit() or c == '.' or c == '-'
 
class ParseError(Exception):
    def __init__(self, msg):
        Exception.__init__(self, msg)
 
class TidyHtml:
    def init(self, html):
        self.input = html
        self.pos = 0
        self.tagStack = []
        self.beginText = False # Indic