lfs_rental_house.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # -*- coding:utf-8 -*-
  2. # @Time : 2018/4/27 10:50 AM
  3. # @Author : Swing
  4. import scrapy
  5. from elabSpider.items import RentalHouseItem
  6. import logging
  7. from elabSpider.email_util import send_email
  8. import traceback
  9. class RentalHouseSpider(scrapy.Spider):
  10. name = 'lfsrentalHouse'
  11. allowed_domains = [
  12. 'nb.zu.anjuke.com',
  13. 'nb.anjuke.com'
  14. ]
  15. start_urls = [
  16. 'https://nb.anjuke.com/community/props/rent/1003094'
  17. # 'https://nb.zu.anjuke.com/rent/F717483045'
  18. ]
  19. def parse(self, response):
  20. try:
  21. community_list = response.xpath('//ul[@class="m-house-list"]/li/a/@href').extract()
  22. if community_list:
  23. for community_url in community_list:
  24. yield scrapy.Request(community_url, callback=self.parse_item)
  25. except Exception as err:
  26. send_email('lfsrentalHouse get detail url error', response._url + '\n' + traceback.format_exc())
  27. logging.error('get detail url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
  28. try:
  29. next_page = response.xpath(r'//div[@class="m-page"]/div[@class="multi-page"]/a[@class="aNxt"]/@href.....').extract_first()
  30. if next_page:
  31. yield scrapy.Request(next_page, callback=self.parse)
  32. except Exception as err:
  33. send_email('lfsrentalHouse get next page error', response._url + '\n' + traceback.format_exc())
  34. logging.error('get next page url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
  35. def parse_item(self, response):
  36. try:
  37. item = RentalHouseItem.handle_response(response)
  38. yield item
  39. except Exception as err:
  40. send_email('lfsrentalHouse parse response error', response._url + '\n' + traceback.format_exc())
  41. logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))