# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

from scrapy.selector import Selector
import scrapy
import re
import time
import logging


class ResoldApartmentItem(scrapy.Item):
    _id = scrapy.Field()
    title = scrapy.Field()
    unit_price = scrapy.Field()
    total_price = scrapy.Field()
    orientation = scrapy.Field()
    area = scrapy.Field()
    built_year = scrapy.Field()
    property = scrapy.Field()
    decoration = scrapy.Field()
    model = scrapy.Field()
    floor = scrapy.Field()
    image = scrapy.Field()
    house_type = scrapy.Field()
    trading_ownership = scrapy.Field()
    tag = scrapy.Field()
    location = scrapy.Field()
    longitude = scrapy.Field()
    latitude = scrapy.Field()
    page_url = scrapy.Field()
    house_id = scrapy.Field()

    @classmethod
    def handle_response(cls, response):
        item = cls()
        item['title'] = response.xpath(
            '//ul[@class="house-basic-item3"]/li[1]/span[@class="c_000 mr_10"]/a[1]/text()').extract_first().strip()

        unit_price_string = response.xpath('//span[@class="unit"]/text()').extract_first()
        item['unit_price'] = re.search(r'[1-9][\d]*', unit_price_string).group()

        # 处理总价
        total_price_string = response.xpath('//span[@class="price"]/text()').extract_first()
        total_price = re.search('[0-9]+(\.)?[0-9]*', total_price_string).group()
        price_unit = response.xpath(r'//span[@class="price"]/b/text()', )
        if price_unit == '万':
            total_price = str(int(total_price) * 10000)
        elif price_unit == '千':
            total_price = str(int(total_price) * 1000)
        item['total_price'] = total_price

        item['orientation'] = response.xpath('//p[@class="toward"]/span[@class="main"]/text()').extract_first()

        # 建筑年代
        built_year_str = response.xpath(r'//p[@class="toward"]/span[@class="sub"]/text()').extract_first()
        if built_year_str:
            item['built_year'] = re.search(r'[\d]*', built_year_str).group()

        area_string = response.xpath(
            '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[3]/span[2]/text()').extract_first()
        item['area'] = re.search(r'[\d]+', area_string).group()

        item['property'] = response.xpath(
            '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-right"]/li[3]/span[2]/text()').re(r'[\d]+')[0]
        item['decoration'] = response.xpath(
            '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-right"]/li[2]/span[2]/text()').extract_first()
        item['model'] = response.xpath(
            '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[2]/span[2]/text()').extract_first()
        item['floor'] = response.xpath(
            '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-right"]/li[1]/span[2]/text()').extract_first()
        item['house_type'] = response.xpath('//div[@id="generalExpense"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[2]/span[2]/text()').extract_first()
        item['trading_ownership'] = response.xpath('//div[@id="generalExpense"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[3]/span[2]/text()').extract_first()
        # 图片
        item['image'] = response.xpath(
            r'//div[@class="basic-pic-list pr"]/ul[@id="leftImg"]/li[1]/img/@data-value').extract_first()

        # 位置
        location_list = response.xpath(r'//ul[@class="house-basic-item3"]/li[2]/span[2]/a/text()').extract()
        if location_list:
            location_str = '-'.join(location_list)
            item['location'] = location_str

        # 标签
        tag_list = response.xpath(r'//p[@class="house-update-info"]/span[@class="ts"]/text()').extract()
        if tag_list:
            item['tag'] = tag_list[0]

        # 经纬度
        script_string = response.xpath(r'//script[@type="text/javascript"]').extract_first()
        latitude_math = re.search(r'"lat":([1-9])[\d](\.)[\d]*,"', script_string).group()
        longitude_math = re.search(r'"lon":[1-9][\d][\d](\.)[\d]*,"', script_string).group()
        item['latitude'] = re.search(r'([1-9])[\d](\.)[\d]*', latitude_math).group()
        item['longitude'] = re.search(r'[1-9][\d][\d](\.)[\d]*', longitude_math).group()

        page_url = response._url
        item['page_url'] = page_url

        item['house_id'] = '109'

        return item


class CommunityItem(scrapy.Item):
    _id = scrapy.Field()
    title = scrapy.Field()
    unit_price = scrapy.Field()
    floating_rate = scrapy.Field()
    built_year = scrapy.Field()
    location = scrapy.Field()
    page_url = scrapy.Field()
    type = scrapy.Field()
    house_id = scrapy.Field()

    @classmethod
    def handle_response(cls, response, type):
        item = cls()
        selector = Selector(text=response)
        item['title'] = selector.xpath(r'//h3/a/@title').extract_first()
        item['unit_price'] = selector.xpath(r'//div[@class="li-side"]/p/strong/text()').extract_first()
        floating_rate = selector.xpath(r'//div[@class="li-side"]/p[@class="price-txt"]/text()').extract_first()
        if not floating_rate:
            floating_rate = selector.xpath(r'//div[@class="li-side"]/p[@class="price-txt price-down"]/text()').extract_first()
        item['floating_rate'] = floating_rate
        item['location'] = selector.xpath(r'//div[@class="li-info"]/address/text()').extract_first().strip()
        item['page_url'] = selector.xpath(r'//div[@_soj="xqlb"]/@link').extract_first().strip()
        item['built_year'] = selector.xpath(r'//p[@class="date"]/text()').extract_first().strip()
        item['type'] = type
        item['house_id'] = '109'
        return item


class FTXCommunityItem(scrapy.Item):
    _id = scrapy.Field()
    title = scrapy.Field()
    unit_price = scrapy.Field()
    floating_rate = scrapy.Field()
    year_floating_tare = scrapy.Field()
    built_year = scrapy.Field()
    property = scrapy.Field()
    property_type = scrapy.Field()
    building_type = scrapy.Field()
    greening_rate = scrapy.Field()
    plot_ratio = scrapy.Field()
    total_area = scrapy.Field()
    building_area = scrapy.Field()
    construction = scrapy.Field()
    location = scrapy.Field()
    region = scrapy.Field()
    page_url = scrapy.Field()
    img_url = scrapy.Field()
    predict_type = scrapy.Field()
    house_id = scrapy.Field()

    @classmethod
    def handle_response(cls, response):
        item = cls()
        title_string: str = response.xpath(r'//div[@class="logoBox_sq"]/div[@class="ceninfo_sq"]/h1/a[@class="tt"]/text()').extract_first()
        if title_string:
            item['title'] = title_string.replace('小区网', '')
        item['unit_price'] = response.xpath(r'//div[@class="box detaiLtop mt20 clearfix"]/dl[1]/dd/span/text()').extract_first()
        item['floating_rate'] = response.xpath(r'//div[@class="box detaiLtop mt20 clearfix"]/dl[2]/dd/span/text()').extract_first()
        item['year_floating_tare'] = response.xpath(r'//div[@class="box detaiLtop mt20 clearfix"]/dl[3]/dd/span/text()').extract_first()

        item['location'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="小区地址："]/text()').extract_first()
        item['region'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="所属区域："]/text()').extract_first()
        property_string = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="产权描述："]/text()').extract_first()
        item['property'] = None
        if property_string:
            re_list = re.search(r'[\d]{1,2}', property_string)
            if re_list:
                item['property'] = re_list.group(0)

        item['property_type'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="物业类别："]/text()').extract_first()
        if not item['property']:
            item['predict_type'] = '其他'
        elif item['property'] == '70':
            item['predict_type'] = '住宅'
        else:
            item['predict_type'] = '公寓'

        item['construction'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑结构："]/span/text()').extract_first()
        item['built_year'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑年代："]/text()').extract_first()
        item['building_type'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑类型："]/text()').extract_first()
        item['greening_rate'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="绿 化 率："]/text()').extract_first()
        item['plot_ratio'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="容 积 率："]/text()').extract_first()
        item['total_area'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="占地面积："]/text()').extract_first()
        item['building_area'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑面积："]/text()').extract_first()
        item['img_url'] = response.xpath(r'//div[@class="logoBox_sq"]/div[@class="logopic_sq"]/a/img/@src').extract_first()
        item['page_url'] = response._url
        item['house_id'] = '109'

        return item


class RentalHouseItem(scrapy.Item):
    _id = scrapy.Field()
    title = scrapy.Field()
    location = scrapy.Field()
    price = scrapy.Field()
    house_type = scrapy.Field()
    area = scrapy.Field()
    orientation = scrapy.Field()
    floor = scrapy.Field()
    decoration = scrapy.Field()
    property_type = scrapy.Field()
    house_code = scrapy.Field()
    publish_date = scrapy.Field()
    longitude = scrapy.Field()
    latitude = scrapy.Field()
    img_url = scrapy.Field()
    page_url = scrapy.Field()
    date = scrapy.Field()
    coordinate = scrapy.Field()
    house_id = scrapy.Field()

    @classmethod
    def handle_response(cls, response):
        item = cls()
        name_list: list = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="小区："]/a/text()').extract()
        location_string = ''
        if name_list:
            if len(name_list) > 0:
                item['title'] = name_list[0]
            if len(name_list) > 1:
                location_string += name_list[1]
                location_string += "-"
            if len(name_list) > 2:
                location_string += name_list[2]
        item['location'] = location_string
        price_list = response.xpath(r'//li[@class="full-line cf"]/span[@class="price"]//text()').extract()
        if price_list:
            item['price'] = "".join(price_list)
        item['house_type'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="户型："]/span[@class="info"]/text()').extract_first()
        item['area'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="面积："]/span[@class="info"]/text()').extract_first()
        item['orientation'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="朝向："]/span[@class="info"]/text()').extract_first()
        item['floor'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="楼层："]/span[@class="info"]/text()').extract_first()
        item['decoration'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="装修："]/span[@class="info"]/text()').extract_first()
        item['property_type'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="类型："]/span[@class="info"]/text()').extract_first()

        house_info_string = response.xpath(r'//div[@class="mod-title bottomed"][h3="房屋信息"]/div/text()').extract_first()
        if house_info_string:
            code_match = re.search(r'[\d]{6,}', house_info_string)
            if code_match:
                item['house_code'] = code_match.group()

            date_match = re.search(r'[\d]{0,4}年[\d]{0,2}月[\d]{0,2}日', house_info_string)
            if date_match:
                item['publish_date'] = date_match.group()

        longitude_match = Selector(response).re(r'lng:[\d]{0,3}[\.][\d]*,')
        if longitude_match:
            item['longitude'] = re.search(r'[\d]{0,3}[\.][\d]*', longitude_match[0]).group()

        latitude_match = Selector(response).re(r'lat:[\d]{0,2}[\.][\d]*,')
        if latitude_match:
            item['latitude'] = re.search(r'[\d]{0,2}[\.][\d]*', latitude_match[0]).group()

        if longitude_match and latitude_match:
            try:
                item['coordinate'] = [float(item['longitude']), float(item['latitude'])]
            except Exception as err:
                logging.error('type conversion error ! reason: ' + '-'.join(err.args))

        item['img_url'] = response.xpath(r'//div[@class="switch_list"][1]/div[@class="img_wrap"][1]/img[1]/@data-src').extract_first()
        item['page_url'] = response._url
        item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
        item['house_id'] = '109'

        return item


class FTXRentalHouseItem(scrapy.Item):
    _id = scrapy.Field()
    title = scrapy.Field()
    location = scrapy.Field()
    price = scrapy.Field()
    house_type = scrapy.Field()
    area = scrapy.Field()
    orientation = scrapy.Field()
    floor = scrapy.Field()
    decoration = scrapy.Field()
    # property_type = scrapy.Field()
    house_code = scrapy.Field()
    update_date = scrapy.Field()
    # longitude = scrapy.Field()
    # latitude = scrapy.Field()
    img_url = scrapy.Field()
    page_url = scrapy.Field()
    date = scrapy.Field()
    house_id = scrapy.Field()

    @classmethod
    def handle_response(cls, response):
        item = cls()

        house_info_match = Selector(response).re(r'var houseInfo = {[\s\S]*};')
        if house_info_match:
            info_str = house_info_match[0]
            title_match = re.search(r"projname: '[\s\S]*?',", info_str)
            if title_match:
                title_str = title_match[0]
                title_str = title_str.replace("projname: '", '')
                title_str = title_str.replace("',", '')
                item['title'] = title_str

            district_math = re.search(r"district: '[\s\S]*?',", info_str)
            location_string = ''
            if district_math:
                district_str = district_math[0]
                district_str = district_str.replace("district: '", '')
                district_str = district_str.replace("',", '')
                location_string += district_str
                location_string += '-'

            comarea_math = re.search(r"comarea: '[\s\S]*?',", info_str)
            if comarea_math:
                comarea_str = comarea_math[0]
                comarea_str = comarea_str.replace("comarea: '", '')
                comarea_str = comarea_str.replace("',", '')
                location_string += comarea_str

            item['location'] = location_string


        # name_list: list = response.xpath(r'//div[div[@class="lab"][text()="小      区"]]/div[contains(@class, "rcont")]/a/text()').extract()
        # location_string = ''
        # if name_list:
        #     if len(name_list) > 0:
        #         item['title'] = name_list[0]
        #     if len(name_list) > 1:
        #         location_string += name_list[1]
        #         location_string += "-"
        #     if len(name_list) > 2:
        #         location_string += name_list[2]
        # item['location'] = location_string
        price_list = response.xpath(r'//div[@class ="tab-cont-right"]/div[@class ="tr-line clearfix zf_new_title"]/div[contains(@class, "trl-item sty1")]//text()').extract()
        if price_list:
            item['price'] = "".join(price_list).strip()
        item['house_type'] = response.xpath(r'//div[@class="trl-item1 w182"][div[@class="font14"]="户型"]/div[@class="tt"]/text()').extract_first()
        item['area'] = response.xpath(r'//div[@class="trl-item1 w132"][div[@class="font14"]="建筑面积"]/div[@class="tt"]/text()').extract_first()
        item['orientation'] = response.xpath(r'//div[@class="trl-item1 w146"][div[@class="font14"]="朝向"]/div[@class="tt"]/text()').extract_first()

        floor_list = response.xpath(r'//div[@class="trl-item1 w182"][div[@class="font14"][contains(text(), "楼层")]]/div//text()').extract()
        if floor_list:
            floor_str = '-'.join(floor_list)
            item['floor'] = floor_str

        item['decoration'] = response.xpath(r'//div[@class="trl-item1 w132"][div[@class="font14"]="装修"]/div[@class="tt"]/text()').extract_first()
        # item['property_type'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="类型："]/span[@class="info"]/text()').extract_first()

        # house_info_string = response.xpath(r'//div[@class="mod-title bottomed"][h3="房屋信息"]/div/text()').extract_first()
        house_code_string = response.xpath(r'//span[contains(text(), "房源编号")]/text()').extract_first()
        if house_code_string:
            code_match = re.search(r'[\d]{6,}', house_code_string)
            if code_match:
                item['house_code'] = code_match.group()
        house_date_string = response.xpath(r'//span[contains(text(), "更新时间")]/text()').extract_first()
        if house_code_string:
            date_match = re.search(r'[\d]{0,4}-[\d]{0,2}-[\d]{0,2}', house_date_string)
            if date_match:
                item['update_date'] = date_match.group()

        # longitude_match = Selector(response).re(r'lng:[\d]{0,3}[\.][\d]*,')
        # if longitude_match:
        #     item['longitude'] = re.search(r'[\d]{0,3}[\.][\d]*', longitude_match[0]).group()
        #
        # latitude_match = Selector(response).re(r'[\d]{0,2}[\.][\d]*,')
        # if latitude_match:
        #     item['latitude'] = re.search(r'[\d]{0,2}[\.][\d]*', latitude_match[0]).group()

        item['img_url'] = response.xpath(r'//div[@class="bigImg"]/img[1]/@src').extract_first()
        item['page_url'] = response._url
        item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
        item['house_id'] = '109'

        return item


class ResoldHouseItem(scrapy.Item):
    _id = scrapy.Field()
    title = scrapy.Field()
    location = scrapy.Field()
    price = scrapy.Field()
    house_type = scrapy.Field()
    area = scrapy.Field()
    orientation = scrapy.Field()
    floor = scrapy.Field()
    decoration = scrapy.Field()
    property_type = scrapy.Field()
    total_price = scrapy.Field()
    down_payment = scrapy.Field()
    monthly_payment = scrapy.Field()
    house_code = scrapy.Field()
    publish_date = scrapy.Field()
    house_price_info = scrapy.Field()
    community_price_info = scrapy.Field()
    area_price_info = scrapy.Field()
    longitude = scrapy.Field()
    latitude = scrapy.Field()
    img_url = scrapy.Field()
    page_url = scrapy.Field()
    date = scrapy.Field()
    coordinate = scrapy.Field()
    house_id = scrapy.Field()
    build_year = scrapy.Field()

    @classmethod
    def handle_response(cls, response):
        item = cls()

        item['title'] = response.xpath(r'//li[div[text()="所属小区："]]/div/a/text()').extract_first()
        location_list = response.xpath(r'//li[div[text()="所在位置："]]/div/p//text()').extract()
        if location_list:
            location = ''.join(location_list).strip()
            location = location.replace('\n', '')
            location = location.replace('\t', '')
            item['location'] = location

        item['price'] = response.xpath(r'//li[div[text()="房屋单价："]]/div[2]/text()').extract_first()
        house_type_string: str = response.xpath(r'normalize-space(//li[div[text()="房屋户型："]]/div[2]/text())').extract_first()
        if house_type_string:
            item['house_type'] = house_type_string.replace(' ', '')

        item['area'] = response.xpath(r'//li[div[text()="建筑面积："]]/div[2]/text()').extract_first()
        item['orientation'] = response.xpath(r'//li[div[text()="房屋朝向："]]/div[2]/text()').extract_first()
        item['floor'] = response.xpath(r'normalize-space(//li[div[text()="所在楼层："]]/div[2]/text())').extract_first()
        item['decoration'] = response.xpath(r'normalize-space(//li[div[text()="装修程度："]]/div[2]/text())').extract_first()
        item['property_type'] = response.xpath(r'normalize-space(//li[div[text()="房屋类型："]]/div[2]/text())').extract_first()
        item['total_price'] = response.xpath(r'//div[@class="wrapper"]/div[@class="wrapper-lf clearfix"]/div[@class="basic-info clearfix"]/span[1]/em/text()').extract_first()
        item['down_payment'] = response.xpath(r'normalize-space(//li[div[text()="参考首付："]]/div[2]/text())').extract_first()
        item['monthly_payment'] = response.xpath(r'normalize-space(//li[div[text()="参考月供："]]/div/span/text())').extract_first()
        item['build_year'] = response.xpath(r'normalize-space(//li[div[text()="建造年代："]]/div[2]/text())').extract_first()

        house_code_string = response.xpath(r'//span[contains(text(), "房屋编码")]/text()').extract_first()
        if house_code_string:
            code_match = re.search(r'[\d]{6,}', house_code_string)
            if code_match:
                item['house_code'] = code_match.group()
        house_date_string = response.xpath(r'//span[contains(text(), "发布时间")]/text()').extract_first()
        if house_code_string:
            date_match = re.search(r'[\d]{0,4}年[\d]{0,2}月[\d]{0,2}日', house_date_string)
            if date_match:
                item['publish_date'] = date_match.group()

        longitude_match = Selector(response).re(r'lng : "[\d]{0,3}[\.][\d]*"')
        if longitude_match:
            item['longitude'] = re.search(r'[\d]{0,3}[\.][\d]*', longitude_match[0]).group()

        latitude_match = Selector(response).re(r'lat : "[\d]{0,2}[\.][\d]*"')
        if latitude_match:
            item['latitude'] = re.search(r'[\d]{0,2}[\.][\d]*', latitude_match[0]).group()

        if longitude_match and latitude_match:
            try:
                item['coordinate'] = [float(item['longitude']), float(item['latitude'])]
            except Exception as err:
                logging.error('type conversion error ! reason: ' + '-'.join(err.args))

        item['img_url'] = response.xpath(r'//div[@class="switch_list"][1]/div[@class="img_wrap"][1]/img/@data-src').extract_first()

        item['page_url'] = response._url
        item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
        item['house_id'] = '109'

        return item


class LfsAveragePriceItem(scrapy.Item):
    _id = scrapy.Field()
    title = scrapy.Field()
    price = scrapy.Field()
    arrow = scrapy.Field()
    rate = scrapy.Field()
    page_url = scrapy.Field()
    date = scrapy.Field()
    house_id = scrapy.Field()

    @classmethod
    def handle_response(cls, response):
        item = cls()

        item['title'] = response.xpath(r'//div[@class="comm-title"]/a/@title').extract_first()
        price_math = Selector(response).re(r'(?<="comm_midprice":")([0-9]*(?=","area_midprice))')
        if price_math:
            item['price'] = price_math[0]
        # item['arrow'] = response.xpath(r'//i[@class="arrow"]/text()').extract_first()
        # item['rate'] = response.xpath(r'normalize-space(//span[@class="status level"]/text())').extract_first()
        item['page_url'] = response._url
        item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
        item['house_id'] = '109'

        return item