# -*- coding: utf-8 -*- import scrapy from elabSpider.items import * import traceback from elabSpider.email_util import send_email class ExampleSpider(scrapy.Spider): name = '58ershoufang' allowed_domains = ['58.com'] start_urls = ['http://nb.58.com/haishu/ershoufang/'] def parse(self, response): try: for href in response.xpath(r'//ul[@class="house-list-wrap"]/li/div[@class="list-info"]/h2[@class="title"]/a/@href'): url = href.extract() yield scrapy.Request(url, callback=self.parse_item) except: send_email('58ershoufang lv 1 url parse error', response._url + '\n' + traceback.format_exc()) print('error') try: next_page = response.xpath(r'//div[@class="pager"]/a[@class="next"]/@href').extract_first() if next_page: yield scrapy.Request(next_page, callback=self.parse) except: send_email('58ershoufang get next url error', response._url + '\n' + traceback.format_exc()) print('error next page') def parse_item(self, response): try: item = ResoldApartmentItem.handle_response(response) yield item except: send_email('58ershoufang get item parse error', response._url + '\n' + traceback.format_exc()) print('error' + response.string)