KeyboardInterrupt close failed in file object destructor: sys.excepthook is missing lost sys.stderr
Code # #!/usr/bin/env python import sys, re def find_position(line): pun = "" if re.search(r"[.?!]+", line): pun = re.search(r"[.?!]+", line).group() pos = line.find(pun) pos = pos+len(pun)-1 return pos def sentence_splitter(filename): f = open(filename, "r") for line in f: line = line.strip() print line + "\n" while line: pos = find_position(line) line2 = line[ : pos+1].split(" ") length = len(line2) last_word = line2[length -1] try: if re.search(r"[A-Z]+.*", last_word) or line[pos+1] != " " or line[pos+2].islower() : print line[:pos+1], line = line[pos+1:] else: print line[ : pos+1] line = line[pos+1 :] except : print " error here!!" f.close() return " bye bye" if __name__=="__main__": print sentence_splitter(sys.argv[1]) ##3 exicution python sentence_splitter6.py README | more ### README Mr. Smith bought example.cheapsite.com for 1.5 million dollars, i.e. he paid a lot for it. Did he mind? Adam Jones Jr. thinks he didn't. In any case, this isn't true... Well, with a probability of .9 it isn't. The result should be: ~ output Mr. Smith bought example.cheapsite.com for 1.5 million dollars, i.e. he paid a lot for it. Did he mind? Adam Jones Jr. thinks he didn't. In any case, this isn't true... Well, with a probability of .9 it isn't. The result should be: Mr. Smith bought example. cheapsite. com for 1. 5 million dollars, i. e. he paid a lot for it. Did he mind? Adam Jones Jr. thinks he didn't. In any case, this isn't true... Well, with a probability of . 9 it isn't. ##3 error KeyboardInterrupt close failed in file object destructor: sys.excepthook is missing lost sys.stderr ## please help what is this i have try lot but unable to remove it -- https://mail.python.org/mailman/listinfo/python-list
ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'S SIZE 11.5 NEW IN BOX', '$49.99')' at
from BeautifulSoup import BeautifulSoup import re, urllib2, MySQLdb #MySQLdb.escape_string(" ") import sys import unicodedata if __name__=="__main__": #link = raw_input("Enter the url link: ") db = MySQLdb.connect("localhost","root","", "ebay") cursor=db.cursor() link = "http://www.ebay.com/sch/Mens-Shoes-/93427/i.html?_pppn=r1&_dcat=93427&LH_ItemCondition=1000%7C1500"; link1 =db.escape_string(link) page = urllib2.urlopen(link) soup = BeautifulSoup(page) Contents = soup.findAll(itemprop = "name") lst_content = [] for x in Contents: x = x.string lst_content.append(x) Price = soup.findAll(itemprop = "price") lst_price =[] for y in Price: y = y.string lst_price.append(y) for x , y in zip(lst_content, lst_price): sql = """insert into `category` (url, catagory,price) VAlUES ('%s', '%s', '%s')"""%(link1,x,y) #print sql sql = unicodedata.normalize('NFKD', sql).encode('ascii','ignore') #sys.exit(0) cursor.execute(sql) db.commit() db.close() +++ error occur ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'S SIZE 11.5 NEW IN BOX', '$49.99')' at line 1") >>> -- https://mail.python.org/mailman/listinfo/python-list
please guide to make proxy type function in python
please guide to make proxy type function in python -- https://mail.python.org/mailman/listinfo/python-list
want to run proxy in python
hey , will u guide me how to run proxies from python i have tested lots of code but my ip show always constant on when i see it online plz help . -- https://mail.python.org/mailman/listinfo/python-list
TypeError: not all arguments converted during string formatting
my code : #!/usr/bin/env python from bs4 import BeautifulSoup import re,urllib2,urlparse, MySQLdb def get_domain(url): return urlparse.urlparse(url).netloc def men_tshirts2(main_link, cat_link,db,cursor): #print main_link for cat,link in cat_link.iteritems(): cat = str(cat) #print cat, link page = urllib2.urlopen(link) soup = BeautifulSoup(page) page.close() item = soup.find_all("div",attrs={"class":"itemTitle"}) price = soup.find_all("div", attrs={"class":"itemPrice"}) item_list =[] price_list =[] seller_list =[] for x,y in zip(item,price): item_content=str(x.a.string) price = str(y.p.string) link = str(x.a.get("href")) page =urllib2.urlopen(link) soup = BeautifulSoup(page) page.close() data = soup.find_all("span", attrs={"class":"mbg-nw"}) seller = str(data[0].string) #print cat,item_content,price,seller gender = "men" sql = """insert into fashion(GENDER,links,category,item_content,price,seller) VAlUES('%s','%s','%s','%s','%s','s')""" cursor.execute(sql,(gender,main_link,cat,item_content,price,seller)) db.commit() #except: #db.rollback() #print len(gender),len(main_link),len(cat),len(item_content),len(price),len(seller) def men_tshirts(db,cursor): main_link = "http://fashion.ebay.in/index.html#men_tshirts"; domane = get_domain(main_link) main_page = urllib2.urlopen(main_link) main_soup=BeautifulSoup(main_page) main_page.close() data = main_soup.find_all("div",attrs= {"class":"itmTitle"}) price = main_soup.find_all("span",attrs={"class":"catlblTitle"}) cat_link = {} for x, y in zip(data, price): #cat= str(x.a.string)+":"+str(y.string) cat= str(x.a.string) link= "http://"+domane+"/"+str(x.a.get("href")) #print cat, link cat_link[cat] = link men_tshirts2(main_link, cat_link,db,cursor) if __name__=="__main__": db = MySQLdb.connect("localhost","root","india123","ebay_db" ) cursor = db.cursor() men_tshirts(db,cursor) db.close() ++ sql structure :- mysql> describe fashion; +--+--+--+-+---++ | Field| Type | Null | Key | Default | Extra | +--+--+--+-+---++ | id | int(11) | NO | PRI | NULL | auto_increment | | GENDER | varchar(6) | YES | | NULL | | | links| varchar(255) | YES | | NULL | | | category | varchar(255) | YES | | NULL | | | item_content | varchar(255) | YES | | NULL | | | price| varchar(10) | YES | | NULL | | | seller | varchar(20) | YES | | NULL | | | created_on | timestamp| NO | | CURRENT_TIMESTAMP | | +--+--+--+-+---++ 8 rows in set (0.00 sec) +++ error: query = query % db.literal(args) TypeError: not all arguments converted during string formatting -- https://mail.python.org/mailman/listinfo/python-list
GUI:-please answer want to learn GUI programming in python , how should i proceed.
GUI:-want to learn GUI programming in python , how should i proceed. There are lots of book here so I am confuse which book i should refer so that i don't waste time . please answer -- https://mail.python.org/mailman/listinfo/python-list
Re: GUI:-please answer want to learn GUI programming in python , how should i proceed.
thank you sir -- https://mail.python.org/mailman/listinfo/python-list
Want guidance to set proxy please help
hey i am working on parsing like project . so , i need some step to set proxy so that my ip is not blocked by them += i am using this method proxy_support = urllib2.ProxyHandler({"http":"http://61.147.82.87:8000"}) opener = urllib2.build_opener(proxy_support) urllib2.install_opener(opener) is it ok ? if yes how can i verify that my i have set proxy if no, please give some guidance also if some modification need please help -- https://mail.python.org/mailman/listinfo/python-list
CAPTCHA HANDLING IN MECHANIZE
HEY , I AM NEW TO THE PYTHON STARTED WORKING ON MECHANIZE , WANT HELP THAT TO HOW TO FILL A FORM HAVING CAPTCHA FIELD AUTOMATIC PLEASE ANSWER -- https://mail.python.org/mailman/listinfo/python-list
HOW TO HANDLE CAPTCHA WHILE PARSING A WEB SITE
RIGHT NOW NOW I AM WORKING WITH MECHANIZE MODULE . BUT UNABLE TO SUBMIT CAPTACHA AUTOMATICALLY . DO U HAVE ANY IDEA PLEASE SHARE WITH ME , ANY IDEA WILL BE APPRECIATES ON ANY M MODULE. -- https://mail.python.org/mailman/listinfo/python-list
how to handle captcha through machanize module or any module
please do replay how to handle captcha through machanize module -- https://mail.python.org/mailman/listinfo/python-list
Re: how to handle captcha through machanize module or any module
#/usr/bin/env python import mechanize, re import cookielib import cgi import urllib2 from random import choice def get_domain(url): return urlparse.urlparse(url).netloc if __name__=="__main__": br = mechanize.Browser() cj = cookielib.LWPCookieJar() br.set_proxies({"http": "217.174.155.73:8080"}) br.set_cookiejar(cj) br.addheaders = [('User-agent', 'Firefox')] br.set_handle_equiv(True) br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) br.set_handle_refresh(False) # can sometimes hang without this url = "http://goarticles.com/"; response = br.open(url) print response.info() link_list = [] for link in br.links(): match = re.search(r"register", str(link.url)) if match: link2 = (link.base_url[:-1]+link.url) if link2 not in link_list: link_list.append(link2) print link2 response2 = br.open(link2) for form in br.forms(): form.set_all_readonly(False) br.form = list(br.forms())[1] # use when form is unnamed for control in br.form.controls: if str(control.type) == "text" and str(control.name) =="email": br.form[str(control.name)]="jaiprak...@wisepromo.com" elif str(control.type)=="text": br.form[str(control.name)]="mybot"+control.name elif str(control.type) =="select": br.form[str(control.name)]=[str(control.items[1])] elif str(control.type) =="password": br.form[str(control.name)] = "mybotpassword213" elif str(control.type)=="checkbox": br.find_control(str(control.name)).items[0].selected=True elif re.search(r"captcha",str(control.name)): br.form[str(control.name)] = "mybotcaptcha" else: pass #elif str(control.type)=="submit": responce = br.submit() print responce.info() #print responce.read() sir , i am very new to python i have just started to learn it , my aim is to practice 1)simple automatic form registration, 2)simple automatic vitrification 3)simple automatic loging this site http://goarticles.com/register.html sir i m referring the mechanize module now i am stuck on two thing 1) how to automatically handling the captcha on this form 2) how to verify or the filled form please help i have already gave 2 days on this just i want some clue from ur side -- https://mail.python.org/mailman/listinfo/python-list
Re: HOW TO HANDLE CAPTCHA WHILE PARSING A WEB SITE
sorry sir, it will not happen again in future. -- https://mail.python.org/mailman/listinfo/python-list
Re: HOW TO HANDLE CAPTCHA WHILE PARSING A WEB SITE
sorry sir , it will not happen in future -- https://mail.python.org/mailman/listinfo/python-list
Re: CAPTCHA HANDLING IN MECHANIZE
it will not happen in future -- https://mail.python.org/mailman/listinfo/python-list
python querry on firebug extention
hello i am working on selenium module of python, i know how to make extension of firebug with selenium, but i want to know how to use firebug extension with request module / mechanize . i search a lot but unable to find it , please help . technique similar like :- from selenium import webdriver fp = webdriver.FirefoxProfile() fp.add_extension(extension='firebug-1.8.4.xpi') fp.set_preference("extensions.firebug.currentVersion", "1.8.4") #Avoid startup screen browser = webdriver.Firefox(firefox_profile=fp) -- https://mail.python.org/mailman/listinfo/python-list
Re: How to install googleapp engine on ubuntu 12.04 already downloads google_appengine_1.8.2.zip
got answer downloaded google_appengine_1.8.2.zip unziped it command path-to-/dev_appserver.py path-to/application-diectory that it -- http://mail.python.org/mailman/listinfo/python-list
requesting you all to please guide me , which tutorials is best to learn redis database
hello all, i want to learn redis database and its use via python , please guide me which tutorials i should be study, so that i can learn it in good way I search this on google but i am little confuse, so please help me thank you jai -- https://mail.python.org/mailman/listinfo/python-list
Problem with PEXPECT in Python
import sys,os import pexpect source_file= 'sourcefile.txt' user='username' ip='00.00.00.00' desti_path='/home/jai/………' password='bond007' cmd ='scp'+' '+source_file+' '+user+'@'+ip+':'+desti_path try: foo = pexpect.spawn(cmd) foo.expect('.ssword:*') foo.sendline(passwd) foo.sendline('Passwd_to_server') foo.interact() except Exception,e: pass Its simple, this should work, it works for me -- http://mail.python.org/mailman/listinfo/python-list
python query on firebug extention
hello i am working on selenium module of python, i know how to make extension of firebug with selenium, but i want to know how to use firebug extension with request module / mechanize . i search a lot but unable to find it , please help . i want technique similar like :- from selenium import webdriver fp = webdriver.FirefoxProfile() fp.add_extension(extension='firebug-.8.4.xpi') fp.set_preference("extensions.firebug.currentVersion", "1.8.4") browser = webdriver.Firefox(firefox_profile=fp) in request module or mechanize module -- https://mail.python.org/mailman/listinfo/python-list