12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- # -*- coding: utf-8 -*-
- # Define your item pipelines here
- #
- # Don't forget to add your pipeline to the ITEM_PIPELINES setting
- # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
- import pymongo
- from scrapy.conf import settings
- from elabSpider.items import *
- import urllib.parse
- class ElabspiderPipeline(object):
- def __init__(self):
- # self.client = pymongo.MongoClient(host='139.196.5.59', port=27017)
- # self.client.admin.authenticate(name='dbuser', password='elab@123', mechanism='SCRAM-SHA-1')
- # username = urllib.parse.quote_plus('dbuser')
- # password = urllib.parse.quote_plus('elab@123')
- # uri = "mongodb://dbuser:elab@123@example.com/?authSource=the_database&authMechanism=SCRAM-SHA-1"
- # self.client = pymongo.MongoClient('101.132.106.154', authSource='logdb')
- self.client = pymongo.MongoClient(settings['MONGO_HOST'], authSource='logdb')
- self.db = self.client[settings['MONGO_DB']]
- self.coll = self.db[settings['MONGO_COLL']]
- def process_item(self, item, spider):
- if isinstance(item, CommunityItem):
- self.coll = self.db['departmengprice']
- elif isinstance(item, FTXCommunityItem):
- self.coll = self.db['ftxcommunity']
- elif isinstance(item, ResoldApartmentItem):
- self.coll = self.db[settings['MONGO_COLL']]
- elif isinstance(item, RentalHouseItem):
- if spider.name == 'lfsrentalHouse':
- self.coll = self.db['lfs_rental_house']
- elif spider.name == 'sjkrentalHouse':
- self.coll = self.db['sjk_rental_house']
- else:
- self.coll = self.db['rental_house']
- elif isinstance(item, FTXRentalHouseItem):
- self.coll = self.db['ftx_rental_house']
- elif isinstance(item, ResoldHouseItem):
- if spider.name == 'sjkresoldHouse':
- self.coll = self.db['sjk_resold_house']
- elif spider.name == 'lfsresoldHouse':
- self.coll = self.db['lfs_resold_house']
- else:
- self.coll = self.db['nb_resold_house']
- elif isinstance(item, LfsAveragePriceItem):
- self.coll = self.db['lfs_average_price']
- self.coll.insert_one(item)
- return item
|