pipelines.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. # -*- coding: utf-8 -*-
  2. # Define your item pipelines here
  3. #
  4. # Don't forget to add your pipeline to the ITEM_PIPELINES setting
  5. # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
  6. import pymongo
  7. from scrapy.conf import settings
  8. from elabSpider.items import *
  9. import urllib.parse
  10. class ElabspiderPipeline(object):
  11. def __init__(self):
  12. # self.client = pymongo.MongoClient(host='139.196.5.59', port=27017)
  13. # self.client.admin.authenticate(name='dbuser', password='elab@123', mechanism='SCRAM-SHA-1')
  14. # username = urllib.parse.quote_plus('dbuser')
  15. # password = urllib.parse.quote_plus('elab@123')
  16. # uri = "mongodb://dbuser:elab@123@example.com/?authSource=the_database&authMechanism=SCRAM-SHA-1"
  17. # self.client = pymongo.MongoClient('101.132.106.154', authSource='logdb')
  18. self.client = pymongo.MongoClient(settings['MONGO_HOST'], authSource='logdb')
  19. self.db = self.client[settings['MONGO_DB']]
  20. self.coll = self.db[settings['MONGO_COLL']]
  21. def process_item(self, item, spider):
  22. if isinstance(item, CommunityItem):
  23. self.coll = self.db['departmengprice']
  24. elif isinstance(item, FTXCommunityItem):
  25. self.coll = self.db['ftxcommunity']
  26. elif isinstance(item, ResoldApartmentItem):
  27. self.coll = self.db[settings['MONGO_COLL']]
  28. elif isinstance(item, RentalHouseItem):
  29. if spider.name == 'lfsrentalHouse':
  30. self.coll = self.db['lfs_rental_house']
  31. elif spider.name == 'sjkrentalHouse':
  32. self.coll = self.db['sjk_rental_house']
  33. else:
  34. self.coll = self.db['rental_house']
  35. elif isinstance(item, FTXRentalHouseItem):
  36. self.coll = self.db['ftx_rental_house']
  37. elif isinstance(item, ResoldHouseItem):
  38. if spider.name == 'sjkresoldHouse':
  39. self.coll = self.db['sjk_resold_house']
  40. elif spider.name == 'lfsresoldHouse':
  41. self.coll = self.db['lfs_resold_house']
  42. else:
  43. self.coll = self.db['nb_resold_house']
  44. elif isinstance(item, LfsAveragePriceItem):
  45. self.coll = self.db['lfs_average_price']
  46. self.coll.insert_one(item)
  47. return item