123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496 |
- # -*- coding: utf-8 -*-
- # Define here the models for your scraped items
- #
- # See documentation in:
- # https://doc.scrapy.org/en/latest/topics/items.html
- from scrapy.selector import Selector
- import scrapy
- import re
- import time
- import logging
- class ResoldApartmentItem(scrapy.Item):
- _id = scrapy.Field()
- title = scrapy.Field()
- unit_price = scrapy.Field()
- total_price = scrapy.Field()
- orientation = scrapy.Field()
- area = scrapy.Field()
- built_year = scrapy.Field()
- property = scrapy.Field()
- decoration = scrapy.Field()
- model = scrapy.Field()
- floor = scrapy.Field()
- image = scrapy.Field()
- house_type = scrapy.Field()
- trading_ownership = scrapy.Field()
- tag = scrapy.Field()
- location = scrapy.Field()
- longitude = scrapy.Field()
- latitude = scrapy.Field()
- page_url = scrapy.Field()
- house_id = scrapy.Field()
- @classmethod
- def handle_response(cls, response):
- item = cls()
- item['title'] = response.xpath(
- '//ul[@class="house-basic-item3"]/li[1]/span[@class="c_000 mr_10"]/a[1]/text()').extract_first().strip()
- unit_price_string = response.xpath('//span[@class="unit"]/text()').extract_first()
- item['unit_price'] = re.search(r'[1-9][\d]*', unit_price_string).group()
- # 处理总价
- total_price_string = response.xpath('//span[@class="price"]/text()').extract_first()
- total_price = re.search('[0-9]+(\.)?[0-9]*', total_price_string).group()
- price_unit = response.xpath(r'//span[@class="price"]/b/text()', )
- if price_unit == '万':
- total_price = str(int(total_price) * 10000)
- elif price_unit == '千':
- total_price = str(int(total_price) * 1000)
- item['total_price'] = total_price
- item['orientation'] = response.xpath('//p[@class="toward"]/span[@class="main"]/text()').extract_first()
- # 建筑年代
- built_year_str = response.xpath(r'//p[@class="toward"]/span[@class="sub"]/text()').extract_first()
- if built_year_str:
- item['built_year'] = re.search(r'[\d]*', built_year_str).group()
- area_string = response.xpath(
- '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[3]/span[2]/text()').extract_first()
- item['area'] = re.search(r'[\d]+', area_string).group()
- item['property'] = response.xpath(
- '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-right"]/li[3]/span[2]/text()').re(r'[\d]+')[0]
- item['decoration'] = response.xpath(
- '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-right"]/li[2]/span[2]/text()').extract_first()
- item['model'] = response.xpath(
- '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[2]/span[2]/text()').extract_first()
- item['floor'] = response.xpath(
- '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-right"]/li[1]/span[2]/text()').extract_first()
- item['house_type'] = response.xpath('//div[@id="generalExpense"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[2]/span[2]/text()').extract_first()
- item['trading_ownership'] = response.xpath('//div[@id="generalExpense"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[3]/span[2]/text()').extract_first()
- # 图片
- item['image'] = response.xpath(
- r'//div[@class="basic-pic-list pr"]/ul[@id="leftImg"]/li[1]/img/@data-value').extract_first()
- # 位置
- location_list = response.xpath(r'//ul[@class="house-basic-item3"]/li[2]/span[2]/a/text()').extract()
- if location_list:
- location_str = '-'.join(location_list)
- item['location'] = location_str
- # 标签
- tag_list = response.xpath(r'//p[@class="house-update-info"]/span[@class="ts"]/text()').extract()
- if tag_list:
- item['tag'] = tag_list[0]
- # 经纬度
- script_string = response.xpath(r'//script[@type="text/javascript"]').extract_first()
- latitude_math = re.search(r'"lat":([1-9])[\d](\.)[\d]*,"', script_string).group()
- longitude_math = re.search(r'"lon":[1-9][\d][\d](\.)[\d]*,"', script_string).group()
- item['latitude'] = re.search(r'([1-9])[\d](\.)[\d]*', latitude_math).group()
- item['longitude'] = re.search(r'[1-9][\d][\d](\.)[\d]*', longitude_math).group()
- page_url = response._url
- item['page_url'] = page_url
- item['house_id'] = '109'
- return item
- class CommunityItem(scrapy.Item):
- _id = scrapy.Field()
- title = scrapy.Field()
- unit_price = scrapy.Field()
- floating_rate = scrapy.Field()
- built_year = scrapy.Field()
- location = scrapy.Field()
- page_url = scrapy.Field()
- type = scrapy.Field()
- house_id = scrapy.Field()
- @classmethod
- def handle_response(cls, response, type):
- item = cls()
- selector = Selector(text=response)
- item['title'] = selector.xpath(r'//h3/a/@title').extract_first()
- item['unit_price'] = selector.xpath(r'//div[@class="li-side"]/p/strong/text()').extract_first()
- floating_rate = selector.xpath(r'//div[@class="li-side"]/p[@class="price-txt"]/text()').extract_first()
- if not floating_rate:
- floating_rate = selector.xpath(r'//div[@class="li-side"]/p[@class="price-txt price-down"]/text()').extract_first()
- item['floating_rate'] = floating_rate
- item['location'] = selector.xpath(r'//div[@class="li-info"]/address/text()').extract_first().strip()
- item['page_url'] = selector.xpath(r'//div[@_soj="xqlb"]/@link').extract_first().strip()
- item['built_year'] = selector.xpath(r'//p[@class="date"]/text()').extract_first().strip()
- item['type'] = type
- item['house_id'] = '109'
- return item
- class FTXCommunityItem(scrapy.Item):
- _id = scrapy.Field()
- title = scrapy.Field()
- unit_price = scrapy.Field()
- floating_rate = scrapy.Field()
- year_floating_tare = scrapy.Field()
- built_year = scrapy.Field()
- property = scrapy.Field()
- property_type = scrapy.Field()
- building_type = scrapy.Field()
- greening_rate = scrapy.Field()
- plot_ratio = scrapy.Field()
- total_area = scrapy.Field()
- building_area = scrapy.Field()
- construction = scrapy.Field()
- location = scrapy.Field()
- region = scrapy.Field()
- page_url = scrapy.Field()
- img_url = scrapy.Field()
- predict_type = scrapy.Field()
- house_id = scrapy.Field()
- @classmethod
- def handle_response(cls, response):
- item = cls()
- title_string: str = response.xpath(r'//div[@class="logoBox_sq"]/div[@class="ceninfo_sq"]/h1/a[@class="tt"]/text()').extract_first()
- if title_string:
- item['title'] = title_string.replace('小区网', '')
- item['unit_price'] = response.xpath(r'//div[@class="box detaiLtop mt20 clearfix"]/dl[1]/dd/span/text()').extract_first()
- item['floating_rate'] = response.xpath(r'//div[@class="box detaiLtop mt20 clearfix"]/dl[2]/dd/span/text()').extract_first()
- item['year_floating_tare'] = response.xpath(r'//div[@class="box detaiLtop mt20 clearfix"]/dl[3]/dd/span/text()').extract_first()
- item['location'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="小区地址:"]/text()').extract_first()
- item['region'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="所属区域:"]/text()').extract_first()
- property_string = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="产权描述:"]/text()').extract_first()
- item['property'] = None
- if property_string:
- re_list = re.search(r'[\d]{1,2}', property_string)
- if re_list:
- item['property'] = re_list.group(0)
- item['property_type'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="物业类别:"]/text()').extract_first()
- if not item['property']:
- item['predict_type'] = '其他'
- elif item['property'] == '70':
- item['predict_type'] = '住宅'
- else:
- item['predict_type'] = '公寓'
- item['construction'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑结构:"]/span/text()').extract_first()
- item['built_year'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑年代:"]/text()').extract_first()
- item['building_type'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑类型:"]/text()').extract_first()
- item['greening_rate'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="绿 化 率:"]/text()').extract_first()
- item['plot_ratio'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="容 积 率:"]/text()').extract_first()
- item['total_area'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="占地面积:"]/text()').extract_first()
- item['building_area'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑面积:"]/text()').extract_first()
- item['img_url'] = response.xpath(r'//div[@class="logoBox_sq"]/div[@class="logopic_sq"]/a/img/@src').extract_first()
- item['page_url'] = response._url
- item['house_id'] = '109'
- return item
- class RentalHouseItem(scrapy.Item):
- _id = scrapy.Field()
- title = scrapy.Field()
- location = scrapy.Field()
- price = scrapy.Field()
- house_type = scrapy.Field()
- area = scrapy.Field()
- orientation = scrapy.Field()
- floor = scrapy.Field()
- decoration = scrapy.Field()
- property_type = scrapy.Field()
- house_code = scrapy.Field()
- publish_date = scrapy.Field()
- longitude = scrapy.Field()
- latitude = scrapy.Field()
- img_url = scrapy.Field()
- page_url = scrapy.Field()
- date = scrapy.Field()
- coordinate = scrapy.Field()
- house_id = scrapy.Field()
- @classmethod
- def handle_response(cls, response):
- item = cls()
- name_list: list = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="小区:"]/a/text()').extract()
- location_string = ''
- if name_list:
- if len(name_list) > 0:
- item['title'] = name_list[0]
- if len(name_list) > 1:
- location_string += name_list[1]
- location_string += "-"
- if len(name_list) > 2:
- location_string += name_list[2]
- item['location'] = location_string
- price_list = response.xpath(r'//li[@class="full-line cf"]/span[@class="price"]//text()').extract()
- if price_list:
- item['price'] = "".join(price_list)
- item['house_type'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="户型:"]/span[@class="info"]/text()').extract_first()
- item['area'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="面积:"]/span[@class="info"]/text()').extract_first()
- item['orientation'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="朝向:"]/span[@class="info"]/text()').extract_first()
- item['floor'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="楼层:"]/span[@class="info"]/text()').extract_first()
- item['decoration'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="装修:"]/span[@class="info"]/text()').extract_first()
- item['property_type'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="类型:"]/span[@class="info"]/text()').extract_first()
- house_info_string = response.xpath(r'//div[@class="mod-title bottomed"][h3="房屋信息"]/div/text()').extract_first()
- if house_info_string:
- code_match = re.search(r'[\d]{6,}', house_info_string)
- if code_match:
- item['house_code'] = code_match.group()
- date_match = re.search(r'[\d]{0,4}年[\d]{0,2}月[\d]{0,2}日', house_info_string)
- if date_match:
- item['publish_date'] = date_match.group()
- longitude_match = Selector(response).re(r'lng:[\d]{0,3}[\.][\d]*,')
- if longitude_match:
- item['longitude'] = re.search(r'[\d]{0,3}[\.][\d]*', longitude_match[0]).group()
- latitude_match = Selector(response).re(r'lat:[\d]{0,2}[\.][\d]*,')
- if latitude_match:
- item['latitude'] = re.search(r'[\d]{0,2}[\.][\d]*', latitude_match[0]).group()
- if longitude_match and latitude_match:
- try:
- item['coordinate'] = [float(item['longitude']), float(item['latitude'])]
- except Exception as err:
- logging.error('type conversion error ! reason: ' + '-'.join(err.args))
- item['img_url'] = response.xpath(r'//div[@class="switch_list"][1]/div[@class="img_wrap"][1]/img[1]/@data-src').extract_first()
- item['page_url'] = response._url
- item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
- item['house_id'] = '109'
- return item
- class FTXRentalHouseItem(scrapy.Item):
- _id = scrapy.Field()
- title = scrapy.Field()
- location = scrapy.Field()
- price = scrapy.Field()
- house_type = scrapy.Field()
- area = scrapy.Field()
- orientation = scrapy.Field()
- floor = scrapy.Field()
- decoration = scrapy.Field()
- # property_type = scrapy.Field()
- house_code = scrapy.Field()
- update_date = scrapy.Field()
- # longitude = scrapy.Field()
- # latitude = scrapy.Field()
- img_url = scrapy.Field()
- page_url = scrapy.Field()
- date = scrapy.Field()
- house_id = scrapy.Field()
- @classmethod
- def handle_response(cls, response):
- item = cls()
- house_info_match = Selector(response).re(r'var houseInfo = {[\s\S]*};')
- if house_info_match:
- info_str = house_info_match[0]
- title_match = re.search(r"projname: '[\s\S]*?',", info_str)
- if title_match:
- title_str = title_match[0]
- title_str = title_str.replace("projname: '", '')
- title_str = title_str.replace("',", '')
- item['title'] = title_str
- district_math = re.search(r"district: '[\s\S]*?',", info_str)
- location_string = ''
- if district_math:
- district_str = district_math[0]
- district_str = district_str.replace("district: '", '')
- district_str = district_str.replace("',", '')
- location_string += district_str
- location_string += '-'
- comarea_math = re.search(r"comarea: '[\s\S]*?',", info_str)
- if comarea_math:
- comarea_str = comarea_math[0]
- comarea_str = comarea_str.replace("comarea: '", '')
- comarea_str = comarea_str.replace("',", '')
- location_string += comarea_str
- item['location'] = location_string
- # name_list: list = response.xpath(r'//div[div[@class="lab"][text()="小 区"]]/div[contains(@class, "rcont")]/a/text()').extract()
- # location_string = ''
- # if name_list:
- # if len(name_list) > 0:
- # item['title'] = name_list[0]
- # if len(name_list) > 1:
- # location_string += name_list[1]
- # location_string += "-"
- # if len(name_list) > 2:
- # location_string += name_list[2]
- # item['location'] = location_string
- price_list = response.xpath(r'//div[@class ="tab-cont-right"]/div[@class ="tr-line clearfix zf_new_title"]/div[contains(@class, "trl-item sty1")]//text()').extract()
- if price_list:
- item['price'] = "".join(price_list).strip()
- item['house_type'] = response.xpath(r'//div[@class="trl-item1 w182"][div[@class="font14"]="户型"]/div[@class="tt"]/text()').extract_first()
- item['area'] = response.xpath(r'//div[@class="trl-item1 w132"][div[@class="font14"]="建筑面积"]/div[@class="tt"]/text()').extract_first()
- item['orientation'] = response.xpath(r'//div[@class="trl-item1 w146"][div[@class="font14"]="朝向"]/div[@class="tt"]/text()').extract_first()
- floor_list = response.xpath(r'//div[@class="trl-item1 w182"][div[@class="font14"][contains(text(), "楼层")]]/div//text()').extract()
- if floor_list:
- floor_str = '-'.join(floor_list)
- item['floor'] = floor_str
- item['decoration'] = response.xpath(r'//div[@class="trl-item1 w132"][div[@class="font14"]="装修"]/div[@class="tt"]/text()').extract_first()
- # item['property_type'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="类型:"]/span[@class="info"]/text()').extract_first()
- # house_info_string = response.xpath(r'//div[@class="mod-title bottomed"][h3="房屋信息"]/div/text()').extract_first()
- house_code_string = response.xpath(r'//span[contains(text(), "房源编号")]/text()').extract_first()
- if house_code_string:
- code_match = re.search(r'[\d]{6,}', house_code_string)
- if code_match:
- item['house_code'] = code_match.group()
- house_date_string = response.xpath(r'//span[contains(text(), "更新时间")]/text()').extract_first()
- if house_code_string:
- date_match = re.search(r'[\d]{0,4}-[\d]{0,2}-[\d]{0,2}', house_date_string)
- if date_match:
- item['update_date'] = date_match.group()
- # longitude_match = Selector(response).re(r'lng:[\d]{0,3}[\.][\d]*,')
- # if longitude_match:
- # item['longitude'] = re.search(r'[\d]{0,3}[\.][\d]*', longitude_match[0]).group()
- #
- # latitude_match = Selector(response).re(r'[\d]{0,2}[\.][\d]*,')
- # if latitude_match:
- # item['latitude'] = re.search(r'[\d]{0,2}[\.][\d]*', latitude_match[0]).group()
- item['img_url'] = response.xpath(r'//div[@class="bigImg"]/img[1]/@src').extract_first()
- item['page_url'] = response._url
- item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
- item['house_id'] = '109'
- return item
- class ResoldHouseItem(scrapy.Item):
- _id = scrapy.Field()
- title = scrapy.Field()
- location = scrapy.Field()
- price = scrapy.Field()
- house_type = scrapy.Field()
- area = scrapy.Field()
- orientation = scrapy.Field()
- floor = scrapy.Field()
- decoration = scrapy.Field()
- property_type = scrapy.Field()
- total_price = scrapy.Field()
- down_payment = scrapy.Field()
- monthly_payment = scrapy.Field()
- house_code = scrapy.Field()
- publish_date = scrapy.Field()
- house_price_info = scrapy.Field()
- community_price_info = scrapy.Field()
- area_price_info = scrapy.Field()
- longitude = scrapy.Field()
- latitude = scrapy.Field()
- img_url = scrapy.Field()
- page_url = scrapy.Field()
- date = scrapy.Field()
- coordinate = scrapy.Field()
- house_id = scrapy.Field()
- build_year = scrapy.Field()
- @classmethod
- def handle_response(cls, response):
- item = cls()
- item['title'] = response.xpath(r'//li[div[text()="所属小区:"]]/div/a/text()').extract_first()
- location_list = response.xpath(r'//li[div[text()="所在位置:"]]/div/p//text()').extract()
- if location_list:
- location = ''.join(location_list).strip()
- location = location.replace('\n', '')
- location = location.replace('\t', '')
- item['location'] = location
- item['price'] = response.xpath(r'//li[div[text()="房屋单价:"]]/div[2]/text()').extract_first()
- house_type_string: str = response.xpath(r'normalize-space(//li[div[text()="房屋户型:"]]/div[2]/text())').extract_first()
- if house_type_string:
- item['house_type'] = house_type_string.replace(' ', '')
- item['area'] = response.xpath(r'//li[div[text()="建筑面积:"]]/div[2]/text()').extract_first()
- item['orientation'] = response.xpath(r'//li[div[text()="房屋朝向:"]]/div[2]/text()').extract_first()
- item['floor'] = response.xpath(r'normalize-space(//li[div[text()="所在楼层:"]]/div[2]/text())').extract_first()
- item['decoration'] = response.xpath(r'normalize-space(//li[div[text()="装修程度:"]]/div[2]/text())').extract_first()
- item['property_type'] = response.xpath(r'normalize-space(//li[div[text()="房屋类型:"]]/div[2]/text())').extract_first()
- item['total_price'] = response.xpath(r'//div[@class="wrapper"]/div[@class="wrapper-lf clearfix"]/div[@class="basic-info clearfix"]/span[1]/em/text()').extract_first()
- item['down_payment'] = response.xpath(r'normalize-space(//li[div[text()="参考首付:"]]/div[2]/text())').extract_first()
- item['monthly_payment'] = response.xpath(r'normalize-space(//li[div[text()="参考月供:"]]/div/span/text())').extract_first()
- item['build_year'] = response.xpath(r'normalize-space(//li[div[text()="建造年代:"]]/div[2]/text())').extract_first()
- house_code_string = response.xpath(r'//span[contains(text(), "房屋编码")]/text()').extract_first()
- if house_code_string:
- code_match = re.search(r'[\d]{6,}', house_code_string)
- if code_match:
- item['house_code'] = code_match.group()
- house_date_string = response.xpath(r'//span[contains(text(), "发布时间")]/text()').extract_first()
- if house_code_string:
- date_match = re.search(r'[\d]{0,4}年[\d]{0,2}月[\d]{0,2}日', house_date_string)
- if date_match:
- item['publish_date'] = date_match.group()
- longitude_match = Selector(response).re(r'lng : "[\d]{0,3}[\.][\d]*"')
- if longitude_match:
- item['longitude'] = re.search(r'[\d]{0,3}[\.][\d]*', longitude_match[0]).group()
- latitude_match = Selector(response).re(r'lat : "[\d]{0,2}[\.][\d]*"')
- if latitude_match:
- item['latitude'] = re.search(r'[\d]{0,2}[\.][\d]*', latitude_match[0]).group()
- if longitude_match and latitude_match:
- try:
- item['coordinate'] = [float(item['longitude']), float(item['latitude'])]
- except Exception as err:
- logging.error('type conversion error ! reason: ' + '-'.join(err.args))
- item['img_url'] = response.xpath(r'//div[@class="switch_list"][1]/div[@class="img_wrap"][1]/img/@data-src').extract_first()
- item['page_url'] = response._url
- item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
- item['house_id'] = '109'
- return item
- class LfsAveragePriceItem(scrapy.Item):
- _id = scrapy.Field()
- title = scrapy.Field()
- price = scrapy.Field()
- arrow = scrapy.Field()
- rate = scrapy.Field()
- page_url = scrapy.Field()
- date = scrapy.Field()
- house_id = scrapy.Field()
- @classmethod
- def handle_response(cls, response):
- item = cls()
- item['title'] = response.xpath(r'//div[@class="comm-title"]/a/@title').extract_first()
- price_math = Selector(response).re(r'(?<="comm_midprice":")([0-9]*(?=","area_midprice))')
- if price_math:
- item['price'] = price_math[0]
- # item['arrow'] = response.xpath(r'//i[@class="arrow"]/text()').extract_first()
- # item['rate'] = response.xpath(r'normalize-space(//span[@class="status level"]/text())').extract_first()
- item['page_url'] = response._url
- item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
- item['house_id'] = '109'
- return item
|