resoldapartment.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. from elabSpider.items import *
  4. import traceback
  5. from elabSpider.email_util import send_email
  6. class ExampleSpider(scrapy.Spider):
  7. name = '58ershoufang'
  8. allowed_domains = ['58.com']
  9. start_urls = ['http://nb.58.com/haishu/ershoufang/']
  10. def parse(self, response):
  11. try:
  12. for href in response.xpath(r'//ul[@class="house-list-wrap"]/li/div[@class="list-info"]/h2[@class="title"]/a/@href'):
  13. url = href.extract()
  14. yield scrapy.Request(url, callback=self.parse_item)
  15. except:
  16. send_email('58ershoufang lv 1 url parse error', response._url + '\n' + traceback.format_exc())
  17. print('error')
  18. try:
  19. next_page = response.xpath(r'//div[@class="pager"]/a[@class="next"]/@href').extract_first()
  20. if next_page:
  21. yield scrapy.Request(next_page, callback=self.parse)
  22. except:
  23. send_email('58ershoufang get next url error', response._url + '\n' + traceback.format_exc())
  24. print('error next page')
  25. def parse_item(self, response):
  26. try:
  27. item = ResoldApartmentItem.handle_response(response)
  28. yield item
  29. except:
  30. send_email('58ershoufang get item parse error', response._url + '\n' + traceback.format_exc())
  31. print('error' + response.string)