elab-papa
/
spider


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
							# -*- coding:utf-8 -*-

# @Time    : 2018/4/27 10:50 AM

# @Author  : Swing

import scrapy
from elabSpider.items import CommunityItem
import traceback
from elabSpider.email_util import send_email


class DepartmentPriceSpider(scrapy.Spider):
    name = 'departmentPrice'
    allowed_domains = [
        'hangzhou.anjuke.com',
        'suzhou.zuanjuke.com',
        'nb.anjuke.com'
    ]
    start_urls = [
        'https://shanghai.anjuke.com/community/xujiahui/',
        'https://shanghai.anjuke.com/community/nanjingxilu/',
        'https://shanghai.anjuke.com/community/jingansi/',
        'https://shanghai.anjuke.com/community/lujiazui/',
        'https://shanghai.anjuke.com/community/nanjingdonglu/',
        'https://shanghai.anjuke.com/community/renminguangchang/',
        'https://shanghai.anjuke.com/community/xintiandia/',

        'https://hangzhou.anjuke.com/community/gulouy/t30/',
        'https://hangzhou.anjuke.com/community/hubin/t30/',
        'https://hangzhou.anjuke.com/community/wushana/t30/',
        'https://hangzhou.anjuke.com/community/wulin/t30/',
        'https://hangzhou.anjuke.com/community/xihuwenhuaguangchang/t30/',
        'https://hangzhou.anjuke.com/community/qianjiangxincheng/t30/',

        'https://suzhou.anjuke.com/community/guanqianjie/t34/',
        'https://suzhou.anjuke.com/community/pingjianglua/t34/',
        'https://suzhou.anjuke.com/community/shilus/t34/',
        'https://suzhou.anjuke.com/community/shishanw/t34/',
        'https://suzhou.anjuke.com/community/hudongs/t34/',
        'https://suzhou.anjuke.com/community/huxi/t34/',

        'https://nb.anjuke.com/community/tianyiguangchang/t107/',
        'https://nb.anjuke.com/community/gulouh/t107/',
        'https://nb.anjuke.com/community/dongbuxinchengw/t107/',
        'https://nb.anjuke.com/community/baizhangt/t107/',
        'https://nb.anjuke.com/community/zhongma/t107/',


        'https://hangzhou.anjuke.com/community/gulouy/t29/',
        'https://hangzhou.anjuke.com/community/hubin/t29/',
        'https://hangzhou.anjuke.com/community/wushana/t29/',
        'https://hangzhou.anjuke.com/community/wulin/t29/',
        'https://hangzhou.anjuke.com/community/xihuwenhuaguangchang/t29/',
        'https://hangzhou.anjuke.com/community/qianjiangxincheng/t29/',

        'https://suzhou.anjuke.com/community/guanqianjie/t33/',
        'https://suzhou.anjuke.com/community/pingjianglua/t33/',
        'https://suzhou.anjuke.com/community/shilus/t33/',
        'https://suzhou.anjuke.com/community/shishanw/t33/',
        'https://suzhou.anjuke.com/community/hudongs/t33/',
        'https://suzhou.anjuke.com/community/huxi/t33/',

        'https://nb.anjuke.com/community/tianyiguangchang/t105/',
        'https://nb.anjuke.com/community/gulouh/t105/',
        'https://nb.anjuke.com/community/dongbuxinchengw/t105/',
        'https://nb.anjuke.com/community/baizhangt/t105/',
        'https://nb.anjuke.com/community/zhongma/t105/'
    ]

    def parse(self, response):
        try:
            community_list = response.xpath('//div[@class="maincontent"]/div[@class="list-content"]/div[@_soj="xqlb"]').extract()
            # house_type = ''
            house_type = response.xpath('//div[@class="items"][3]/span[@class="elems-l pp-mod"]/a[@class="selected-item"]/text()').extract_first()
            if not house_type:
                house_type = response.xpath('//div[@class="items no-border-bottom"]/span[@class="elems-l "]/a[@class="selected-item"]/text()').extract_first()

            if not house_type:
                house_type = response.xpath('//div[@class="items"][3]/span[@class="elems-l "]/a[@class="selected-item"]/text()').extract_first()

            # if not house_type:
            #     print('error')

            if community_list:
                for community in community_list:
                    item = CommunityItem.handle_response(community, house_type)
                    yield item
        except:
            send_email('departmentPrice lv 1 web parse error', response._url + '\n' + traceback.format_exc())
            print('error info: ', response._url())

        try:
            next_page = response.xpath(r'//div[@class="page-content"]/div[@class="multi-page"]/a[@class="aNxt"]/@href').extract_first()
            if next_page:
                yield scrapy.Request(next_page, callback=self.parse)
        except:
            send_email('departmentPrice get next page url error', response._url + '\n' + traceback.format_exc())
            print('error info: ')