Hello, I'm trying to come up with a real-world example of performant twisted code to show as the conclusion of the europython training I'll be giving this Monday.
After much help in the #twisted channel, I've written a simple "website monitoring service". I'm trying to keep everything as small and self-contained as possible. There are 3 files, one with a very simple http GET implementation, one with a 1000 sites (some might be NSFW-this is taken from the Alexa site rank) and a simple web service that uses a couple of DeferredQueues and cooperate to do the actual work. However, I find that still there are issues - sometimes the output will block completely, signifying a blocked reactor (?). I don't know what else to try and fix, apart from setting up a local DNS server to cache the DNS queries. I'd appreciate either a) someone looking at the code and pointing out possible bugs or ways it could be improved or b) someone pointing out a proven, scaling real-world project that shows examples of doing things like this Thanks, Orestis
from twisted.internet import reactor, protocol, defer, error class HTTPGETProtocol(protocol.Protocol): def connectionMade(self): self.buffer = [] self.timeoutCall = None self.timedOut = False self.transport.write('GET %s HTTP/1.1\r\n' % self.factory.path) self.transport.write('User-Agent: europython/2011\r\n') self.transport.write('Host: %s\r\n' % self.factory.host) self.transport.write('Connection: close\r\n') self.transport.write('\r\n') self.timeoutCall = reactor.callLater(10, self.timeout) def dataReceived(self, data): self.buffer.append(data) def timeout(self): self.timedOut = True self.factory.deferred.errback(error.TimeoutError()) self.transport.loseConnection() def connectionLost(self, reason): if not self.timedOut: if self.timeoutCall and self.timeoutCall.active(): self.timeoutCall.cancel() self.factory.deferred.callback(''.join(self.buffer)) def get(address, host, path): f = protocol.ClientFactory() f.protocol = HTTPGETProtocol f.path = path f.host = host f.deferred = defer.Deferred() reactor.connectTCP(address, 80, f) return f.deferred if __name__ == '__main__': import sys host = sys.argv[1] path = sys.argv[2] d = get(host, path) def gotResult(data): print data d.addCallback(gotResult) d.addCallback(lambda _: reactor.stop()) reactor.run()
from twisted.names.client import createResolver from twisted.internet import reactor reactor.installResolver(createResolver(servers=[('8.8.8.8', 53), ('8.8.4.4', 53)])) from twisted.internet import defer, task from twisted.python import log from twisted.web.resource import Resource from twisted.web.server import Site, NOT_DONE_YET from twisted.names.client import getHostByName import sys log.startLogging(sys.stdout) from httpget2 import get from sites import SITES class Index(Resource): def __init__(self, *a, **kw): Resource.__init__(self, *a, **kw) self.dns_queue = defer.DeferredQueue() self.get_queue = defer.DeferredQueue() def got_site(self, data, site, agg): agg['OK'] += 1 #return 'GOT %s (%d)\r\n' % (site, len(data)) return 'Y' def got_get_error(self, failure, site, agg): agg['GET_ERROR'] += 1 return 'GET ERROR %s (%s)\r\n' % (site, failure.getErrorMessage()) def got_dns_error(self, failure, site, agg): agg['DNS_ERROR'] += 1 return 'DNS ERROR %s (%s)\r\n' % (site, failure.getErrorMessage()) def got_other_error(self, failure, site, agg): agg['OTHER_ERROR'] += 1 return 'UNKNOWN ERROR %s (%s)\r\n' % (site, failure.getErrorMessage()) def _ping(self, request, agg): request.write('---') request.write(str(agg)) request.write('\r\n') request.write('GET %d left\r\n' % len(self.get_queue.pending)) request.write('DNS %d left\r\n' % len(self.dns_queue.pending)) reactor.callLater(1, self._ping, request, agg) def _getSite(self, (address, site), request, agg): print 'getting', site, address if address is None: return defer.fail(Exception("SITE %s GOT none address" % site)) d = get(address, site, '/') d.addCallbacks(self.got_site, self.got_get_error, callbackArgs=(site, agg), errbackArgs=(site, agg)) d.addCallback(request.write) return d def _lookupName(self, site, request, agg): print 'looking up', site d = getHostByName(site, timeout=(1, 3, 7)) def put(address, site, agg): if address is None: request.write(self.got_dns_error(Exception("NONE ADDRESS"), site, agg)) else: agg['DNS'] += 1 self.get_queue.put((address, site)) d.addCallbacks(put, self.got_dns_error, callbackArgs=(site, agg), errbackArgs=(site, agg)) return d def _sites_to_lookup(self, request, agg): while True: yield self.dns_queue.get().addCallback(self._lookupName, request, agg) def _sites_to_get(self, request, agg): while True: yield self.get_queue.get().addCallback(self._getSite, request, agg) def render_GET(self, request): dl = [] AGG = { 'OK': 0, 'DNS': 0, 'DNS_ERROR': 0, 'GET_ERROR': 0, 'OTHER_ERROR': 0 } self._ping(request, AGG) for site in SITES: self.dns_queue.put(site) for i in range(12): task.cooperate(self._sites_to_lookup(request, AGG)) for i in range(45): c = task.cooperate(self._sites_to_get(request, AGG)) dl.append(c.whenDone()) dl = defer.DeferredList(dl) def finished(results): request.finish() dl.addCallback(finished) return NOT_DONE_YET root = Resource() root.putChild('', Index()) factory = Site(root) reactor.listenTCP(8000, factory) reactor.run()
SITES = ["google.com", "facebook.com", "youtube.com", "yahoo.com", "blogspot.com", "baidu.com", "wikipedia.org", "live.com", "twitter.com", "qq.com", "msn.com", "yahoo.co.jp", "taobao.com", "sina.com.cn", "google.co.in", "linkedin.com", "amazon.com", "wordpress.com", "google.com.hk", "google.de", "bing.com", "google.co.uk", "ebay.com", "yandex.ru", "163.com", "google.fr", "google.co.jp", "microsoft.com", "paypal.com", "google.com.br", "flickr.com", "googleusercontent.com", "fc2.com", "mail.ru", "google.it", "craigslist.org", "apple.com", "bbc.co.uk", "google.es", "google.ru", "imdb.com", "sohu.com", "vkontakte.ru", "ask.com", "go.com", "cnn.com", "tumblr.com", "xvideos.com", "bp.blogspot.com", "livejasmin.com", "youku.com", "soso.com", "google.ca", "aol.com", "tudou.com", "xhamster.com", "megaupload.com", "weibo.com", "ifeng.com", "zedo.com", "mediafire.com", "pornhub.com", "adobe.com", "ameblo.jp", "espn.go.com", "google.co.id", "godaddy.com", "yieldmanager.com", "rakuten.co.jp", "about.com", "4shared.com", "ebay.de", "alibaba.com", "google.com.tr", "livejournal.com", "google.com.mx", "wordpress.org", "livedoor.com", "renren.com", "google.com.au", "uol.com.br", "youporn.com", "cnet.com", "nytimes.com", "myspace.com", "google.pl", "huffingtonpost.com", "ebay.co.uk", "chinaz.com", "twitpic.com", "thepiratebay.org", "cnzz.com", "hao123.com", "conduit.com", "weather.com", "orkut.com.br", "netflix.com", "amazon.de", "orkut.com", "dailymotion.com", "alipay.com", "google.com.sa", "babylon.com", "doubleclick.com", "fileserve.com", "odnoklassniki.ru", "amazon.co.jp", "imgur.com", "tube8.com", "google.nl", "stumbleupon.com", "globo.com", "goo.ne.jp", "tmall.com", "secureserver.net", "tianya.cn", "imageshack.us", "ehow.com", "badoo.com", "xnxx.com", "photobucket.com", "megavideo.com", "deviantart.com", "dailymail.co.uk", "filestube.com", "douban.com", "spiegel.de", "taringa.net", "addthis.com", "sogou.com", "stackoverflow.com", "rapidshare.com", "redtube.com", "vimeo.com", "reddit.com", "hotfile.com", "amazon.co.uk", "aweber.com", "fbcdn.net", "mixi.jp", "optmd.com", "digg.com", "google.cn", "pengyou.com", "indiatimes.com", "answers.com", "360buy.com", "bit.ly", "google.com.eg", "filesonic.com", "sourceforge.net", "google.com.pk", "yfrog.com", "google.co.th", "bankofamerica.com", "58.com", "foxnews.com", "sparkstudios.com", "rediff.com", "mozilla.com", "nicovideo.jp", "maktoob.com", "56.com", "clicksor.com", "google.co.za", "amazonaws.com", "warriorforum.com", "xtendmedia.com", "download.com", "skype.com", "torrentz.eu", "statcounter.com", "liveinternet.ru", "reference.com", "google.com.ar", "guardian.co.uk", "ku6.com", "ucoz.ru", "xinhuanet.com", "youjizz.com", "kaixin001.com", "rambler.ru", "naver.com", "domaintools.com", "files.wordpress.com", "adultfriendfinder.com", "mashable.com", "bild.de", "megaclick.com", "scribd.com", "chase.com", "onet.pl", "wikimedia.org", "yelp.com", "pconline.com.cn", "free.fr", "ezinearticles.com", "ameba.jp", "espncricinfo.com", "pgmediaserve.com", "avg.com", "blogfa.com", "reuters.com", "etsy.com", "digitalpoint.com", "youdao.com", "hulu.com", "typepad.com", "salesforce.com", "terra.com.br", "adf.ly", "wikia.com", "wsj.com", "archive.org", "google.com.my", "w3schools.com", "clickbank.com", "telegraph.co.uk", "allegro.pl", "ganji.com", "zol.com.cn", "narod.ru", "comcast.net", "google.be", "in.com", "orange.fr", "squidoo.com", "orkut.co.in", "nba.com", "google.gr", "repubblica.it", "techcrunch.com", "soufun.com", "kaskus.us", "hatena.ne.jp", "google.com.vn", "angege.com", "gmx.net", "icio.us", "xunlei.com", "qiyi.com", "libero.it", "web.de", "rutracker.org", "hostgator.com", "hootsuite.com", "seesaa.net", "51.la", "php.net", "xing.com", "twimg.com", "mywebsearch.com", "slideshare.net", "126.com", "wretch.cc", "2ch.net", "soku.com", "google.com.tw", "csdn.net", "constantcontact.com", "google.se", "nifty.com", "people.com.cn", "hp.com", "leboncoin.fr", "dell.com", "partypoker.com", "google.com.ua", "ning.com", "google.at", "booking.com", "joomla.org", "tripadvisor.com", "daum.net", "hudong.com", "depositfiles.com", "google.ch", "paipai.com", "walmart.com", "usps.com", "wp.pl", "ups.com", "marca.com", "google.ro", "isohunt.com", "tagged.com", "china.com", "cj.com", "wordreference.com", "linkwithin.com", "homeway.com.cn", "abcnews.go.com", "thefreedictionary.com", "groupon.com", "wellsfargo.com", "metacafe.com", "themeforest.net", "mozilla.org", "cam4.com", "plentyoffish.com", "search-results.com", "tribalfusion.com", "freelancer.com", "arpg2.com", "opendns.com", "love21cn.com", "facemoods.com", "360.cn", "outbrain.com", "latimes.com", "netlog.com", "10086.cn", "elpais.com", "google.com.ph", "istockphoto.com", "match.com", "biglobe.ne.jp", "google.pt", "hardsextube.com", "dropbox.com", "zimbio.com", "spankwire.com", "hubpages.com", "yesky.com", "imagevenue.com", "ign.com", "ig.com.br", "uimserv.net", "engadget.com", "kooora.com", "washingtonpost.com", "kat.ph", "google.co.ve", "corriere.it", "ebay.it", "eastmoney.com", "xe.com", "time.com", "camzap.com", "amazon.cn", "seznam.cz", "google.com.ng", "51job.com", "pandora.com", "webs.com", "mlb.com", "w3.org", "lockerz.com", "sakura.ne.jp", "fiverr.com", "dianping.com", "goal.com", "tmz.com", "elance.com", "t-online.de", "indeed.com", "tradedoubler.com", "elmundo.es", "softonic.com", "neobux.com", "expedia.com", "duckload.com", "over-blog.com", "google.com.sg", "keezmovies.com", "google.com.co", "kakaku.com", "vnexpress.net", "ikea.com", "histats.com", "snapdeal.com", "zynga.com", "bestbuy.com", "weebly.com", "nih.gov", "alimama.com", "vancl.com", "ynet.com", "leo.org", "target.com", "aljazeera.net", "gamespot.com", "multiply.com", "drudgereport.com", "letitbit.net", "google.cl", "att.com", "google.ae", "soundcloud.com", "virgilio.it", "vk.com", "getfirebug.com", "amung.us", "tinypic.com", "basecamphq.com", "businessinsider.com", "softpedia.com", "google.ie", "dmm.co.jp", "sitesell.com", "drupal.org", "fedex.com", "mercadolivre.com.br", "mybrowserbar.com", "gotomeeting.com", "google.co.hu", "google.co.kr", "mailchimp.com", "rr.com", "bluehost.com", "btjunkie.org", "drtuber.com", "feedburner.com", "hi5.com", "justin.tv", "inetglobal.com", "vmn.net", "people.com", "shutterstock.com", "huanqiu.com", "hurriyet.com.tr", "ustream.tv", "usatoday.com", "samsung.com", "admin5.com", "odesk.com", "bloomberg.com", "commentcamarche.net", "skyrock.com", "lenta.ru", "geocities.jp", "cntv.cn", "slutload.com", "myegy.com", "livedoor.biz", "tweetmeme.com", "mihanblog.com", "discuz.net", "pchome.net", "pokerstrategy.com", "americanexpress.com", "google.no", "autohome.com.cn", "comcast.com", "ero-advertising.com", "mynet.com", "youm7.com", "exblog.jp", "peyvandha.ir", "forbes.com", "4399.com", "google.dk", "google.com.pe", "milliyet.com.tr", "gazeta.pl", "lashou.com", "ebay.com.au", "pornhublive.com", "mpnrs.com", "brothersoft.com", "cbsnews.com", "dangdang.com", "cocolog-nifty.com", "bearshare.com", "rbc.ru", "mop.com", "zing.vn", "lzjl.com", "verycd.com", "surveymonkey.com", "onbux.com", "news.com.au", "jquery.com", "blackhatworld.com", "it168.com", "chinanews.com", "posterous.com", "softlayer.com", "ya.ru", "baixing.com", "lemonde.fr", "ocn.ne.jp", "tom.com", "pcpop.com", "google.fi", "ibm.com", "meetup.com", "instagr.am", "exoplanetwar.com", "imesh.com", "qidian.com", "newegg.com", "ebay.fr", "imagebam.com", "gutefrage.net", "naukri.com", "cz.cc", "gc.ca", "gougou.com", "nk.pl", "mgid.com", "sape.ru", "playstation.com", "android.com", "formspring.me", "zhaopin.com", "oneindia.in", "clixsense.com", "hc360.com", "google.co.il", "altervista.org", "qip.ru", "ziddu.com", "51.com", "shareasale.com", "tnaflix.com", "irctc.co.in", "careerbuilder.com", "meituan.com", "blackberry.com", "wikihow.com", "monster.com", "fastclick.com", "mapquest.com", "verizonwireless.com", "thesun.co.uk", "115.com", "way2sms.com", "pogo.com", "v1.cn", "last.fm", "google.cz", "yomiuri.co.jp", "plimus.com", "dmoz.org", "z5x.net", "hypergames.net", "so-net.ne.jp", "dtiblog.com", "as.com", "smashingmagazine.com", "wunderground.com", "excite.co.jp", "livingsocial.com", "oracle.com", "wired.com", "theplanet.com", "com-net.info", "sitemeter.com", "megaporn.com", "miniclip.com", "tabelog.com", "gsmarena.com", "fotolia.com", "chip.de", "hdfcbank.com", "jugem.jp", "kijiji.ca", "mobile.de", "seomoz.org", "ovh.net", "admagnet.net", "seobook.com", "habrahabr.ru", "immobilienscout24.de", "37see.com", "bigpoint.com", "multiupload.com", "yandex.ua", "bitauto.com", "ypmate.com", "cnbc.com", "foxsports.com", "appspot.com", "duowan.com", "nu.nl", "extratorrent.com", "lequipe.fr", "demonoid.me", "weather.com.cn", "beemp3.com", "fatakat.com", "ndtv.com", "www.net.cn", "macrumors.com", "speedtest.net", "kinopoisk.ru", "enterfactory.com", "github.com", "grooveshark.com", "zillow.com", "anonym.to", "linksynergy.com", "adultadworld.com", "qq937.com", "articlesbase.com", "pornhost.com", "mercadolibre.com.mx", "webmasterworld.com", "ibibo.com", "xcar.com.cn", "verizon.com", "livescore.com", "infusionsoft.com", "abril.com.br", "detik.com", "warez-bb.org", "templatemonster.com", "infolinks.com", "boston.com", "streamate.com", "oron.com", "who.is", "infoseek.co.jp", "partycasino.com", "aliexpress.com", "uploadstation.com", "zanox-affiliate.de", "lifehacker.com", "lefigaro.fr", "beeg.com", "friendfeed.com", "sfgate.com", "searchqu.com", "okcupid.com", "xtube.com", "businessweek.com", "nydailynews.com", "backpage.com", "urbandictionary.com", "nate.com", "nextag.com", "force.com", "sulekha.com", "tutsplus.com", "google.co.ma", "asg.to", "alertpay.com", "viadeo.com", "hyves.nl", "smowtion.com", "sdo.com", "timeanddate.com", "sapo.pt", "39.net", "3992929.com", "mercadolibre.com.ar", "networkedblogs.com", "gizmodo.com", "clickbank.net", "hubspot.com", "wn.com", "youjizzlive.com", "linkbucks.com", "jimdo.com", "battle.net", "seriesyonkis.com", "4tube.com", "videobb.com", "break.com", "tripod.com", "amazon.fr", "blogimg.jp", "adult-empire.com", "icontact.com", "accuweather.com", "webmd.com", "okwave.jp", "glispa.com", "allrecipes.com", "pixiv.net", "cncmax.cn", "sunporno.com", "webmoney.ru", "discoverbing.com", "xvideoslive.com", "priceline.com", "groupon.cn", "examiner.com", "speakasiaonline.com", "traidnt.net", "foursquare.com", "homedepot.com", "me.com", "zoho.com", "scriptmafia.org", "mysql.com", "club-asteria.com", "dantri.com.vn", "cracked.com", "am10.ru", "bleacherreport.com", "google.sk", "heise.de", "capitalone.com", "issuu.com", "iciba.com", "exbii.com", "cashtrafic.com", "cnblogs.com", "tuenti.com", "radikal.ru", "buzzfeed.com", "pptv.com", "icicibank.com", "2345.com", "ahram.org.eg", "sahibinden.com", "19lou.com", "slickdeals.net", "smh.com.au", "icbc.com.cn", "dreamstime.com", "disney.go.com", "gazzetta.it", "custhelp.com", "msn.ca", "moneycontrol.com", "yellowpages.com", "interia.pl", "mediaset.it", "persianblog.ir", "marketwatch.com", "4chan.org", "pch.com", "onlinedown.net", "atwiki.jp", "bodybuilding.com", "cnbeta.com", "searchengines.ru", "google.co.nz", "dyndns.org", "joy.cn", "docin.com", "sanook.com", "hotels.com", "kompas.com", "eluniversal.com.mx", "gaopeng.com", "swagbucks.com", "masrawy.com", "coupons.com", "getresponse.com", "gamefaqs.com", "filehippo.com", "freeones.com", "sinaimg.cn", "mtv.com", "wix.com", "whitepages.com", "brazzers.com", "paper.li", "nhl.com", "npr.org", "retailmenot.com", "trafficholder.com", "eyny.com", "wiktionary.org", "itau.com.br", "google.kz", "manta.com", "rakuten.ne.jp", "xda-developers.com", "nypost.com", "tinyurl.com", "gismeteo.ru", "hoopchina.com", "17173.com", "searchresultsdirect.com", "focus.cn", "88210212.com", "uploading.com", "jeuxvideo.com", "twiends.com", "hsbc.co.uk", "enet.com.cn", "rottentomatoes.com", "metrolyrics.com", "1und1.de", "rian.ru", "airtelforum.com", "pornerbros.com", "telegraaf.nl", "wetter.com", "mtime.com", "usbank.com", "6.cn", "vente-privee.com", "flippa.com", "nhk.or.jp", "nasa.gov", "aftonbladet.se", "ekolay.net", "popeater.com", "welt.de", "opera.com", "icq.com", "indianrail.gov.in", "overstock.com", "southwest.com", "songs.pk", "zappos.com", "sfr.fr", "sueddeutsche.de", "yoka.com", "realtor.com", "pcauto.com.cn", "mcssl.com", "skycn.com", "asahi.com", "ca.gov", "123rf.com", "yousendit.com", "exoclick.com", "citibank.com", "pcworld.com", "goo.gl", "ninemsn.com.au", "cmbchina.com", "virtapay.com", "macys.com", "sidereel.com", "kayak.com", "marketgid.com", "vivanews.com", "earthlink.net", "ct10000.com", "ctrip.com", "ebay.in", "barnesandnoble.com", "logmein.com", "allocine.fr", "elegantthemes.com", "rtl.de", "webhostingtalk.com", "dhgate.com", "nipic.com", "howstuffworks.com", "pixnet.net", "iminent.com", "varzesh3.com", "stern.de", "cookpad.com", "pagesjaunes.fr", "google.com.kw", "pomoho.com", "nikkansports.com", "sky.com", "ucoz.com", "traforet.ru", "orbitz.com", "norton.com", "tabnak.ir", "babycenter.com", "nationalgeographic.com", "compete.com", "clarin.com", "linternaute.com", "intuit.com", "liveperson.net", "namecheap.com", "marktplaats.nl", "freakshare.com", "m-w.com", "sweetim.com", "aruba.it", "gap.com", "nikkei.com", "europa.eu", "dict.cc", "perezhilton.com", "alphaporno.com", "td.com", "orf.at", "google.lk", "incredimail.com", "allabout.co.jp", "alice.it", "bahn.de", "sears.com", "technorati.com", "ikariam.com", "tubegalore.com", "cloudapp.net", "myfreecams.com", "eventbrite.com", "empflix.com", "taleo.net", "haberturk.com", "mangafox.com", "trulia.com", "independent.co.uk", "quikr.com", "topix.com", "politico.com", "mainichi.jp", "hostmonster.com", "cbssports.com", "inbox.com", "google.bg", "zhubajie.com", "2leep.com", "kino.to", "mangastream.com", "letv.com", "gumtree.com", "modelmayhem.com", "woot.com", "sponichi.co.jp", "google.com.qa", "24h.com.vn", "dealextreme.com", "magentocommerce.com", "mail.com", "quora.com", "made-in-china.com", "nokia.com", "informer.com", "178.com", "giveawayoftheday.com", "picnik.com", "nikkeibp.co.jp", "sxc.hu", "zazzle.com", "worldstarhiphop.com", "askmen.com", "lowes.com", "qunar.com", "adscale.de", "tuan800.com", "java.com", "ip138.com", "letmewatchthis.ch", "gawker.com", "musica.com", "zendesk.com", "idnes.cz", "xyxy.net", "auto.ru", "mainadv.com", "failblog.org", "cloob.com", "makemytrip.com", "sify.com", "pho.to", "7k7k.com", "ultimate-guitar.com", "naver.jp", "17kuxun.com", "ticketmaster.com", "blackhatteam.com", "5d6d.com", "makepolo.com", "justdial.com", "sitepoint.com", "hawaaworld.com", "alarabiya.net", "dafont.com", "yam.com", "pantip.com", "rutube.ru", "cyworld.com", "ads8.com", "novinky.cz", "tiexue.net", "travian.ae", "cbslocal.com", "120ask.com", "intel.com", "ubuntu.com", "niux88.com", "postbank.de", "weather.gov", "networksolutions.com", "laredoute.fr", "veoh.com", "friendster.com", "lacaixa.es", "docstoc.com", "kongregate.com", "google.az", "pcmag.com", "fishki.net", "bharatstudent.com", "sanspo.com", "delta.com", "blogsky.com", "teacup.com", "grepolis.com", "gazeta.ru", "zdnet.com", "merchantcircle.com", "ft.com", "kicker.de", "timesjobs.com", "hidemyass.com", "dl4all.com", "skysports.com", "uploaded.to", "icicibank.co.in", ]
_______________________________________________ Twisted-Python mailing list Twisted-Python@twistedmatrix.com http://twistedmatrix.com/cgi-bin/mailman/listinfo/twisted-python