1234567891011121314151617181920212223242526272829303132333435363738 |
- # -*- coding: utf-8 -*-
- import scrapy
- from elabSpider.items import *
- import traceback
- from elabSpider.email_util import send_email
- class ExampleSpider(scrapy.Spider):
- name = '58ershoufang'
- allowed_domains = ['58.com']
- start_urls = ['http://nb.58.com/haishu/ershoufang/']
- def parse(self, response):
- try:
- for href in response.xpath(r'//ul[@class="house-list-wrap"]/li/div[@class="list-info"]/h2[@class="title"]/a/@href'):
- url = href.extract()
- yield scrapy.Request(url, callback=self.parse_item)
- except:
- send_email('58ershoufang lv 1 url parse error', response._url + '\n' + traceback.format_exc())
- print('error')
- try:
- next_page = response.xpath(r'//div[@class="pager"]/a[@class="next"]/@href').extract_first()
- if next_page:
- yield scrapy.Request(next_page, callback=self.parse)
- except:
- send_email('58ershoufang get next url error', response._url + '\n' + traceback.format_exc())
- print('error next page')
- def parse_item(self, response):
- try:
- item = ResoldApartmentItem.handle_response(response)
- yield item
- except:
- send_email('58ershoufang get item parse error', response._url + '\n' + traceback.format_exc())
- print('error' + response.string)
|