Hi folks,
Has anyone seen 'Googlewhack Adventure'?
http://www.davegorman.com/googlewhack.htm
I wrote a script to generate Googlewhacks - thought I'd share it with you. I'd better stop running it as I fear Google may ban my IP for making 20 searches a seconds..
Oops, wrong script..
Will
import random import urllib2 import threading
WHACKER_THREADS = 20 random.seed() wordlist = [ line.rstrip() for line in file("word.lst") ] whacks = file( "whacks.txt", "a" ) class WhackerThread( threading.Thread ): excluded = "/dict .lst word.lst .txt words".split() def run(self): def check_word( word ): url = """http://dictionary.reference.com/search?q=%s""" % word dict_page = urllib2.urlopen( url ).read() return "No entry found" not in dict_page def is_excluded(page): for word in WhackerThread.excluded: if word in page: return True return False while( True ): word_a = random.choice( wordlist ) #word_a = "haggis" word_b = random.choice( wordlist ) words = word_a + " " + word_b google_url = """http://www.google.com/search?hl=en&q=%s+%s&btnG=Google+Search""" % ( word_a, word_b ) opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] google_page = opener.open(google_url).read() if is_excluded( google_page ): print words + " (probably a word list)" continue if "Results <b>1</b> - <b>1</b> of <b>1</b>" in google_page: if not check_word( word_a ): print "%s (%s is not in dicionary.com)" % (words, word_a) elif not check_word( word_b ): print "%s (%s is not in dicionary.com)" % (words, word_b) else: print words + " WHACK!" print >> whacks, words whacks.flush() else: print words + "(no whack)" Threads= [ WhackerThread() for _ in xrange(WHACKER_THREADS) ] for whacker_thread in Threads: whacker_thread.start()
-- http://mail.python.org/mailman/listinfo/python-list