I travel to 'item-name', how do i quickly travel to c-price and then print both values of text.
I tried: for anchor in element.xpath('//a[@class="item-name"]'): #Travel to item-name but when i getparent and then call xpath I get a whole bunch of span elements as a list - why? Shouldn't xpath start it's traversal from the current element (parent to 'a') and go downwards. Also why aren't contents of span text? print shows them to be Elements so.. cprice.text should work.. elements text <class 'lxml.html.HtmlElement'> [<Element span at 0x7fd53b261470>, <Element span at 0x7fd53b2614c8>, <Element span at 0x7fd53b261520>, <Element span at 0x7fd53b261578>, <Element span at 0x7fd53b2615d0>, <Element span at 0x7fd53b261628>, <Element span at 0x7fd53b261680>, <Element span at 0x7fd53b2616d8>, <Element span at 0x7fd53b261730>, <Element span at 0x7fd53b261788>, <Element span at 0x7fd53b2617e0>, <Element span at 0x7fd53b261838>, <Element span at 0x7fd53b261890>, <Element span at 0x7fd53b2618e8>, <Element span at 0x7fd53b261940>, <Element span at 0x7fd53b261998>, <Element span at 0x7fd53b2619f0>, <Element span at 0x7fd53b261a48>, <Element span at 0x7fd53b261aa0>, <Element span at 0x7fd53b261af8>, <Element span at 0x7fd53b261b50>, <Element span at 0x7fd53b261ba8>, <Element span at 0x7fd53b261c00>, <Element span at 0x7fd53b261c58>] Traceback (most recent call last): File "fooxxx.py", line 47, in <module> text = anchor.text + " " + cprice.text + "\n" AttributeError: 'list' object has no attribute 'text' deathstar> <dl class="item" data-id="39280481144" =""> <dt class="photo"> <dd class="detail"> <a class="item-name" target="_blank" href="//item.taobao.com/item.htm?id=39280481144">\u675c\u90a6\u7ebf/\u5f69\u6392\u7ebf/40P-40p/\u53cc\u5934/\u516c\u5bf9\u516c/1P-1P/\u957f10CM/10\u5398\u7c73 \u6279\u53d1</a> <div class="attribute"> <div class="cprice-area"> <span class="symbol">¥</span> <span class="c-price">2.35 </span> </div> import sys, re import codecs import requests from lxml import html from lxml import etree url = 'http://xdguo.taobao.com/i/asynSearch.htm?_ksTS=1435804975003_695&callback=jsonp696&mid=w-6778075404-0&wid=6778075404&path=/&pageNo=' user_agent = ('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.16) ' 'Gecko/20111108 Iceweasel/3.5.16 (like Firefox/3.5.16)') def get_page(s, url): print(url) r = s.get(url, headers = { 'User-Agent' : user_agent, 'Keep-Alive' : '3600', 'Connection' : 'keep-alive', }) s.encoding='gbk' text = r.text return text # Open output file fh=codecs.open('/tmp/out', 'wb') # Download s = requests.Session() contents = get_page(s, url + str(1)) # Extract frag frag = re.findall('jsonp[0-9]+\("(.+?)"\)', contents, re.S)[0] element = html.fromstring(frag) # Clean frag frag = re.sub(r'\\', '', frag) print frag # Get a element and parse frag element = html.fromstring(frag) # Extract text from frag for anchor in element.xpath('//a[@class="item-name"]'): #Travel to a tag parent = anchor.getparent() # go one up print(type(parent)) cprice = parent.xpath('//span[@class="c-price"]') # This acts weird! print(cprice) text = anchor.text + " " + cprice.text + "\n" fh.write(text.encode('gbk')) # Close output and exit fh.close() sys.exit() -- https://mail.python.org/mailman/listinfo/python-list