# -*- coding:utf-8 -*- # @Time : 2018/4/27 10:50 AM # @Author : Swing import scrapy from elabSpider.items import RentalHouseItem import logging import traceback from elabSpider.email_util import send_email class RentalHouseSpider(scrapy.Spider): name = 'sjkrentalHouse' allowed_domains = [ 'nb.zu.anjuke.com', 'nb.anjuke.com' ] start_urls = [ 'https://nb.anjuke.com/community/props/rent/275642/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/275642/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/1003094/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/1003094/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/275869/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/275869/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/973807/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/973807/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/973808/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/973808/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/275517/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/275517/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/1000067/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/1000067/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/406899/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/406899/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/1016525/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/1016525/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/275936/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/275936/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/1017728/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/1017728/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/275274/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/275274/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/275658/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/275658/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/275386/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/275386/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/1006982/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/1006982/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/275764/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/275764/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/792725/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/792725/lx8-x1/', 'https://nb.anjuke.com/community/props/rent/1022250/lx1-x1/', 'https://nb.anjuke.com/community/props/rent/1022250/lx8-x1/' ] def parse(self, response): try: community_list = response.xpath('//ul[@class="m-house-list"]/li/a/@href').extract() if community_list: for community_url in community_list: yield scrapy.Request(community_url, callback=self.parse_item) except Exception as err: send_email('sjkrentalHouse get detail url error', response._url + '\n' + traceback.format_exc()) logging.error('get detail url error ! url: ' + response._url + " reason: " + '-'.join(err.args)) try: next_page = response.xpath(r'//div[@class="m-page"]/div[@class="multi-page"]/a[@class="aNxt"]/@href').extract_first() if next_page: yield scrapy.Request(next_page, callback=self.parse) except Exception as err: send_email('sjkrentalHouse get next page url parse error', response._url + '\n' + traceback.format_exc()) logging.error('get next page url error ! url: ' + response._url + " reason: " + '-'.join(err.args)) def parse_item(self, response): try: item = RentalHouseItem.handle_response(response) yield item except Exception as err: send_email('sjkrentalHouse parse response error', response._url + '\n' + traceback.format_exc()) logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))