il y a 6 ans · e7a8d3f789
--- a/LICENSE
+++ b/LICENSE
@@ -0,0 +1,339 @@
 
				+                    GNU GENERAL PUBLIC LICENSE
			
 
				+                       Version 2, June 1991
			
 
				+
			
 
				+ Copyright (C) 1989, 1991 Free Software Foundation, Inc., [http://fsf.org/]
			
 
				+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				+ Everyone is permitted to copy and distribute verbatim copies
			
 
				+ of this license document, but changing it is not allowed.
			
 
				+
			
 
				+                            Preamble
			
 
				+
			
 
				+  The licenses for most software are designed to take away your
			
 
				+freedom to share and change it.  By contrast, the GNU General Public
			
 
				+License is intended to guarantee your freedom to share and change free
			
 
				+software--to make sure the software is free for all its users.  This
			
 
				+General Public License applies to most of the Free Software
			
 
				+Foundation's software and to any other program whose authors commit to
			
 
				+using it.  (Some other Free Software Foundation software is covered by
			
 
				+the GNU Lesser General Public License instead.)  You can apply it to
			
 
				+your programs, too.
			
 
				+
			
 
				+  When we speak of free software, we are referring to freedom, not
			
 
				+price.  Our General Public Licenses are designed to make sure that you
			
 
				+have the freedom to distribute copies of free software (and charge for
			
 
				+this service if you wish), that you receive source code or can get it
			
 
				+if you want it, that you can change the software or use pieces of it
			
 
				+in new free programs; and that you know you can do these things.
			
 
				+
			
 
				+  To protect your rights, we need to make restrictions that forbid
			
 
				+anyone to deny you these rights or to ask you to surrender the rights.
			
 
				+These restrictions translate to certain responsibilities for you if you
			
 
				+distribute copies of the software, or if you modify it.
			
 
				+
			
 
				+  For example, if you distribute copies of such a program, whether
			
 
				+gratis or for a fee, you must give the recipients all the rights that
			
 
				+you have.  You must make sure that they, too, receive or can get the
			
 
				+source code.  And you must show them these terms so they know their
			
 
				+rights.
			
 
				+
			
 
				+  We protect your rights with two steps: (1) copyright the software, and
			
 
				+(2) offer you this license which gives you legal permission to copy,
			
 
				+distribute and/or modify the software.
			
 
				+
			
 
				+  Also, for each author's protection and ours, we want to make certain
			
 
				+that everyone understands that there is no warranty for this free
			
 
				+software.  If the software is modified by someone else and passed on, we
			
 
				+want its recipients to know that what they have is not the original, so
			
 
				+that any problems introduced by others will not reflect on the original
			
 
				+authors' reputations.
			
 
				+
			
 
				+  Finally, any free program is threatened constantly by software
			
 
				+patents.  We wish to avoid the danger that redistributors of a free
			
 
				+program will individually obtain patent licenses, in effect making the
			
 
				+program proprietary.  To prevent this, we have made it clear that any
			
 
				+patent must be licensed for everyone's free use or not licensed at all.
			
 
				+
			
 
				+  The precise terms and conditions for copying, distribution and
			
 
				+modification follow.
			
 
				+
			
 
				+                    GNU GENERAL PUBLIC LICENSE
			
 
				+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
			
 
				+
			
 
				+  0. This License applies to any program or other work which contains
			
 
				+a notice placed by the copyright holder saying it may be distributed
			
 
				+under the terms of this General Public License.  The "Program", below,
			
 
				+refers to any such program or work, and a "work based on the Program"
			
 
				+means either the Program or any derivative work under copyright law:
			
 
				+that is to say, a work containing the Program or a portion of it,
			
 
				+either verbatim or with modifications and/or translated into another
			
 
				+language.  (Hereinafter, translation is included without limitation in
			
 
				+the term "modification".)  Each licensee is addressed as "you".
			
 
				+
			
 
				+Activities other than copying, distribution and modification are not
			
 
				+covered by this License; they are outside its scope.  The act of
			
 
				+running the Program is not restricted, and the output from the Program
			
 
				+is covered only if its contents constitute a work based on the
			
 
				+Program (independent of having been made by running the Program).
			
 
				+Whether that is true depends on what the Program does.
			
 
				+
			
 
				+  1. You may copy and distribute verbatim copies of the Program's
			
 
				+source code as you receive it, in any medium, provided that you
			
 
				+conspicuously and appropriately publish on each copy an appropriate
			
 
				+copyright notice and disclaimer of warranty; keep intact all the
			
 
				+notices that refer to this License and to the absence of any warranty;
			
 
				+and give any other recipients of the Program a copy of this License
			
 
				+along with the Program.
			
 
				+
			
 
				+You may charge a fee for the physical act of transferring a copy, and
			
 
				+you may at your option offer warranty protection in exchange for a fee.
			
 
				+
			
 
				+  2. You may modify your copy or copies of the Program or any portion
			
 
				+of it, thus forming a work based on the Program, and copy and
			
 
				+distribute such modifications or work under the terms of Section 1
			
 
				+above, provided that you also meet all of these conditions:
			
 
				+
			
 
				+    a) You must cause the modified files to carry prominent notices
			
 
				+    stating that you changed the files and the date of any change.
			
 
				+
			
 
				+    b) You must cause any work that you distribute or publish, that in
			
 
				+    whole or in part contains or is derived from the Program or any
			
 
				+    part thereof, to be licensed as a whole at no charge to all third
			
 
				+    parties under the terms of this License.
			
 
				+
			
 
				+    c) If the modified program normally reads commands interactively
			
 
				+    when run, you must cause it, when started running for such
			
 
				+    interactive use in the most ordinary way, to print or display an
			
 
				+    announcement including an appropriate copyright notice and a
			
 
				+    notice that there is no warranty (or else, saying that you provide
			
 
				+    a warranty) and that users may redistribute the program under
			
 
				+    these conditions, and telling the user how to view a copy of this
			
 
				+    License.  (Exception: if the Program itself is interactive but
			
 
				+    does not normally print such an announcement, your work based on
			
 
				+    the Program is not required to print an announcement.)
			
 
				+
			
 
				+These requirements apply to the modified work as a whole.  If
			
 
				+identifiable sections of that work are not derived from the Program,
			
 
				+and can be reasonably considered independent and separate works in
			
 
				+themselves, then this License, and its terms, do not apply to those
			
 
				+sections when you distribute them as separate works.  But when you
			
 
				+distribute the same sections as part of a whole which is a work based
			
 
				+on the Program, the distribution of the whole must be on the terms of
			
 
				+this License, whose permissions for other licensees extend to the
			
 
				+entire whole, and thus to each and every part regardless of who wrote it.
			
 
				+
			
 
				+Thus, it is not the intent of this section to claim rights or contest
			
 
				+your rights to work written entirely by you; rather, the intent is to
			
 
				+exercise the right to control the distribution of derivative or
			
 
				+collective works based on the Program.
			
 
				+
			
 
				+In addition, mere aggregation of another work not based on the Program
			
 
				+with the Program (or with a work based on the Program) on a volume of
			
 
				+a storage or distribution medium does not bring the other work under
			
 
				+the scope of this License.
			
 
				+
			
 
				+  3. You may copy and distribute the Program (or a work based on it,
			
 
				+under Section 2) in object code or executable form under the terms of
			
 
				+Sections 1 and 2 above provided that you also do one of the following:
			
 
				+
			
 
				+    a) Accompany it with the complete corresponding machine-readable
			
 
				+    source code, which must be distributed under the terms of Sections
			
 
				+    1 and 2 above on a medium customarily used for software interchange; or,
			
 
				+
			
 
				+    b) Accompany it with a written offer, valid for at least three
			
 
				+    years, to give any third party, for a charge no more than your
			
 
				+    cost of physically performing source distribution, a complete
			
 
				+    machine-readable copy of the corresponding source code, to be
			
 
				+    distributed under the terms of Sections 1 and 2 above on a medium
			
 
				+    customarily used for software interchange; or,
			
 
				+
			
 
				+    c) Accompany it with the information you received as to the offer
			
 
				+    to distribute corresponding source code.  (This alternative is
			
 
				+    allowed only for noncommercial distribution and only if you
			
 
				+    received the program in object code or executable form with such
			
 
				+    an offer, in accord with Subsection b above.)
			
 
				+
			
 
				+The source code for a work means the preferred form of the work for
			
 
				+making modifications to it.  For an executable work, complete source
			
 
				+code means all the source code for all modules it contains, plus any
			
 
				+associated interface definition files, plus the scripts used to
			
 
				+control compilation and installation of the executable.  However, as a
			
 
				+special exception, the source code distributed need not include
			
 
				+anything that is normally distributed (in either source or binary
			
 
				+form) with the major components (compiler, kernel, and so on) of the
			
 
				+operating system on which the executable runs, unless that component
			
 
				+itself accompanies the executable.
			
 
				+
			
 
				+If distribution of executable or object code is made by offering
			
 
				+access to copy from a designated place, then offering equivalent
			
 
				+access to copy the source code from the same place counts as
			
 
				+distribution of the source code, even though third parties are not
			
 
				+compelled to copy the source along with the object code.
			
 
				+
			
 
				+  4. You may not copy, modify, sublicense, or distribute the Program
			
 
				+except as expressly provided under this License.  Any attempt
			
 
				+otherwise to copy, modify, sublicense or distribute the Program is
			
 
				+void, and will automatically terminate your rights under this License.
			
 
				+However, parties who have received copies, or rights, from you under
			
 
				+this License will not have their licenses terminated so long as such
			
 
				+parties remain in full compliance.
			
 
				+
			
 
				+  5. You are not required to accept this License, since you have not
			
 
				+signed it.  However, nothing else grants you permission to modify or
			
 
				+distribute the Program or its derivative works.  These actions are
			
 
				+prohibited by law if you do not accept this License.  Therefore, by
			
 
				+modifying or distributing the Program (or any work based on the
			
 
				+Program), you indicate your acceptance of this License to do so, and
			
 
				+all its terms and conditions for copying, distributing or modifying
			
 
				+the Program or works based on it.
			
 
				+
			
 
				+  6. Each time you redistribute the Program (or any work based on the
			
 
				+Program), the recipient automatically receives a license from the
			
 
				+original licensor to copy, distribute or modify the Program subject to
			
 
				+these terms and conditions.  You may not impose any further
			
 
				+restrictions on the recipients' exercise of the rights granted herein.
			
 
				+You are not responsible for enforcing compliance by third parties to
			
 
				+this License.
			
 
				+
			
 
				+  7. If, as a consequence of a court judgment or allegation of patent
			
 
				+infringement or for any other reason (not limited to patent issues),
			
 
				+conditions are imposed on you (whether by court order, agreement or
			
 
				+otherwise) that contradict the conditions of this License, they do not
			
 
				+excuse you from the conditions of this License.  If you cannot
			
 
				+distribute so as to satisfy simultaneously your obligations under this
			
 
				+License and any other pertinent obligations, then as a consequence you
			
 
				+may not distribute the Program at all.  For example, if a patent
			
 
				+license would not permit royalty-free redistribution of the Program by
			
 
				+all those who receive copies directly or indirectly through you, then
			
 
				+the only way you could satisfy both it and this License would be to
			
 
				+refrain entirely from distribution of the Program.
			
 
				+
			
 
				+If any portion of this section is held invalid or unenforceable under
			
 
				+any particular circumstance, the balance of the section is intended to
			
 
				+apply and the section as a whole is intended to apply in other
			
 
				+circumstances.
			
 
				+
			
 
				+It is not the purpose of this section to induce you to infringe any
			
 
				+patents or other property right claims or to contest validity of any
			
 
				+such claims; this section has the sole purpose of protecting the
			
 
				+integrity of the free software distribution system, which is
			
 
				+implemented by public license practices.  Many people have made
			
 
				+generous contributions to the wide range of software distributed
			
 
				+through that system in reliance on consistent application of that
			
 
				+system; it is up to the author/donor to decide if he or she is willing
			
 
				+to distribute software through any other system and a licensee cannot
			
 
				+impose that choice.
			
 
				+
			
 
				+This section is intended to make thoroughly clear what is believed to
			
 
				+be a consequence of the rest of this License.
			
 
				+
			
 
				+  8. If the distribution and/or use of the Program is restricted in
			
 
				+certain countries either by patents or by copyrighted interfaces, the
			
 
				+original copyright holder who places the Program under this License
			
 
				+may add an explicit geographical distribution limitation excluding
			
 
				+those countries, so that distribution is permitted only in or among
			
 
				+countries not thus excluded.  In such case, this License incorporates
			
 
				+the limitation as if written in the body of this License.
			
 
				+
			
 
				+  9. The Free Software Foundation may publish revised and/or new versions
			
 
				+of the General Public License from time to time.  Such new versions will
			
 
				+be similar in spirit to the present version, but may differ in detail to
			
 
				+address new problems or concerns.
			
 
				+
			
 
				+Each version is given a distinguishing version number.  If the Program
			
 
				+specifies a version number of this License which applies to it and "any
			
 
				+later version", you have the option of following the terms and conditions
			
 
				+either of that version or of any later version published by the Free
			
 
				+Software Foundation.  If the Program does not specify a version number of
			
 
				+this License, you may choose any version ever published by the Free Software
			
 
				+Foundation.
			
 
				+
			
 
				+  10. If you wish to incorporate parts of the Program into other free
			
 
				+programs whose distribution conditions are different, write to the author
			
 
				+to ask for permission.  For software which is copyrighted by the Free
			
 
				+Software Foundation, write to the Free Software Foundation; we sometimes
			
 
				+make exceptions for this.  Our decision will be guided by the two goals
			
 
				+of preserving the free status of all derivatives of our free software and
			
 
				+of promoting the sharing and reuse of software generally.
			
 
				+
			
 
				+                            NO WARRANTY
			
 
				+
			
 
				+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
			
 
				+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
			
 
				+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
			
 
				+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
			
 
				+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
			
 
				+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
			
 
				+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
			
 
				+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
			
 
				+REPAIR OR CORRECTION.
			
 
				+
			
 
				+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
			
 
				+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
			
 
				+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
			
 
				+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
			
 
				+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
			
 
				+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
			
 
				+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
			
 
				+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
			
 
				+POSSIBILITY OF SUCH DAMAGES.
			
 
				+
			
 
				+                     END OF TERMS AND CONDITIONS
			
 
				+
			
 
				+            How to Apply These Terms to Your New Programs
			
 
				+
			
 
				+  If you develop a new program, and you want it to be of the greatest
			
 
				+possible use to the public, the best way to achieve this is to make it
			
 
				+free software which everyone can redistribute and change under these terms.
			
 
				+
			
 
				+  To do so, attach the following notices to the program.  It is safest
			
 
				+to attach them to the start of each source file to most effectively
			
 
				+convey the exclusion of warranty; and each file should have at least
			
 
				+the "copyright" line and a pointer to where the full notice is found.
			
 
				+
			
 
				+    {description}
			
 
				+    Copyright (C) 2018  剑鸣
			
 
				+
			
 
				+    This program is free software; you can redistribute it and/or modify
			
 
				+    it under the terms of the GNU General Public License as published by
			
 
				+    the Free Software Foundation; either version 2 of the License, or
			
 
				+    (at your option) any later version.
			
 
				+
			
 
				+    This program is distributed in the hope that it will be useful,
			
 
				+    but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+    GNU General Public License for more details.
			
 
				+
			
 
				+    You should have received a copy of the GNU General Public License along
			
 
				+    with this program; if not, write to the Free Software Foundation, Inc.,
			
 
				+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
			
 
				+
			
 
				+Also add information on how to contact you by electronic and paper mail.
			
 
				+
			
 
				+If the program is interactive, make it output a short notice like this
			
 
				+when it starts in an interactive mode:
			
 
				+
			
 
				+    Gnomovision version 69, Copyright (C) year name of author
			
 
				+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
			
 
				+    This is free software, and you are welcome to redistribute it
			
 
				+    under certain conditions; type `show c' for details.
			
 
				+
			
 
				+The hypothetical commands `show w' and `show c' should show the appropriate
			
 
				+parts of the General Public License.  Of course, the commands you use may
			
 
				+be called something other than `show w' and `show c'; they could even be
			
 
				+mouse-clicks or menu items--whatever suits your program.
			
 
				+
			
 
				+You should also get your employer (if you work as a programmer) or your
			
 
				+school, if any, to sign a "copyright disclaimer" for the program, if
			
 
				+necessary.  Here is a sample; alter the names:
			
 
				+
			
 
				+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
			
 
				+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
			
 
				+
			
 
				+  {signature of Ty Coon}, 1 April 1989
			
 
				+  Ty Coon, President of Vice
			
 
				+
			
 
				+This General Public License does not permit incorporating your program into
			
 
				+proprietary programs.  If your program is a subroutine library, you may
			
 
				+consider it more useful to permit linking proprietary applications with the
			
 
				+library.  If this is what you want to do, use the GNU Lesser General
			
 
				+Public License instead of this License.
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,39 @@
 
				+# elabSpider
			
 
				+
			
 
				+#### 项目介绍
			
 
				+{**以下是码云平台说明，您可以替换为您的项目简介**
			
 
				+码云是开源中国推出的基于 Git 的代码托管平台（同时支持 SVN）。专为开发者提供稳定、高效、安全的云端软件开发协作平台
			
 
				+无论是个人、团队、或是企业，都能够用码云实现代码托管、项目管理、协作开发。企业项目请看 [https://gitee.com/enterprises](https://gitee.com/enterprises)}
			
 
				+
			
 
				+#### 软件架构
			
 
				+软件架构说明
			
 
				+
			
 
				+
			
 
				+#### 安装教程
			
 
				+
			
 
				+1. xxxx
			
 
				+2. xxxx
			
 
				+3. xxxx
			
 
				+
			
 
				+#### 使用说明
			
 
				+
			
 
				+1. xxxx
			
 
				+2. xxxx
			
 
				+3. xxxx
			
 
				+
			
 
				+#### 参与贡献
			
 
				+
			
 
				+1. Fork 本项目
			
 
				+2. 新建 Feat_xxx 分支
			
 
				+3. 提交代码
			
 
				+4. 新建 Pull Request
			
 
				+
			
 
				+
			
 
				+#### 码云特技
			
 
				+
			
 
				+1. 使用 Readme\_XXX.md 来支持不同的语言，例如 Readme\_en.md, Readme\_zh.md
			
 
				+2. 码云官方博客 [blog.gitee.com](https://blog.gitee.com)
			
 
				+3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解码云上的优秀开源项目
			
 
				+4. [GVP](https://gitee.com/gvp) 全称是码云最有价值开源项目，是码云综合评定出的优秀开源项目
			
 
				+5. 码云官方提供的使用手册 [http://git.mydoc.io/](http://git.mydoc.io/)
			
 
				+6. 码云封面人物是一档用来展示码云会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/)
			
--- a/elabSpider/__init__.py
+++ b/elabSpider/__init__.py
--- a/elabSpider/email_util.py
+++ b/elabSpider/email_util.py
@@ -0,0 +1,38 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/5/23 11:49 AM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+
			
 
				+import smtplib
			
 
				+from email.mime.text import MIMEText
			
 
				+from email.header import Header
			
 
				+import traceback
			
 
				+
			
 
				+mail_host = 'smtp.exmail.qq.com'
			
 
				+mail_user = 'zhaojh@elab-plus.com'
			
 
				+mail_pass = 'Elab@123'
			
 
				+
			
 
				+sender = 'zhaojh@elab-plus.com'
			
 
				+receivers = ['zhaojh@elab-plus.com']
			
 
				+
			
 
				+
			
 
				+def send_email(title, content):
			
 
				+    message = MIMEText(content, 'plain', 'utf-8')
			
 
				+    message['From'] = Header("Tornado service", 'utf-8')
			
 
				+    message['To'] = Header("Admin", 'utf-8')
			
 
				+    message['subject'] = Header(title, 'utf-8')
			
 
				+
			
 
				+    try:
			
 
				+        smtp_obj = smtplib.SMTP()
			
 
				+        smtp_obj.connect(mail_host, 25)
			
 
				+        smtp_obj.login(mail_user, mail_pass)
			
 
				+        smtp_obj.sendmail(sender, receivers, message.as_string())
			
 
				+        print('Mail sent successfully')
			
 
				+    except smtplib.SMTPException:
			
 
				+        print('Error: Mail send failed' + traceback.format_exc())
			
 
				+
			
 
				+
			
 
				+# 测试代码
			
 
				+# send_email("我是测试标题", "我是测试内容")
			
--- a/elabSpider/fake_useragent.json
+++ b/elabSpider/fake_useragent.json
--- a/elabSpider/items.py
+++ b/elabSpider/items.py
@@ -0,0 +1,496 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# Define here the models for your scraped items
			
 
				+#
			
 
				+# See documentation in:
			
 
				+# https://doc.scrapy.org/en/latest/topics/items.html
			
 
				+
			
 
				+from scrapy.selector import Selector
			
 
				+import scrapy
			
 
				+import re
			
 
				+import time
			
 
				+import logging
			
 
				+
			
 
				+
			
 
				+class ResoldApartmentItem(scrapy.Item):
			
 
				+    _id = scrapy.Field()
			
 
				+    title = scrapy.Field()
			
 
				+    unit_price = scrapy.Field()
			
 
				+    total_price = scrapy.Field()
			
 
				+    orientation = scrapy.Field()
			
 
				+    area = scrapy.Field()
			
 
				+    built_year = scrapy.Field()
			
 
				+    property = scrapy.Field()
			
 
				+    decoration = scrapy.Field()
			
 
				+    model = scrapy.Field()
			
 
				+    floor = scrapy.Field()
			
 
				+    image = scrapy.Field()
			
 
				+    house_type = scrapy.Field()
			
 
				+    trading_ownership = scrapy.Field()
			
 
				+    tag = scrapy.Field()
			
 
				+    location = scrapy.Field()
			
 
				+    longitude = scrapy.Field()
			
 
				+    latitude = scrapy.Field()
			
 
				+    page_url = scrapy.Field()
			
 
				+    house_id = scrapy.Field()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def handle_response(cls, response):
			
 
				+        item = cls()
			
 
				+        item['title'] = response.xpath(
			
 
				+            '//ul[@class="house-basic-item3"]/li[1]/span[@class="c_000 mr_10"]/a[1]/text()').extract_first().strip()
			
 
				+
			
 
				+        unit_price_string = response.xpath('//span[@class="unit"]/text()').extract_first()
			
 
				+        item['unit_price'] = re.search(r'[1-9][\d]*', unit_price_string).group()
			
 
				+
			
 
				+        # 处理总价
			
 
				+        total_price_string = response.xpath('//span[@class="price"]/text()').extract_first()
			
 
				+        total_price = re.search('[0-9]+(\.)?[0-9]*', total_price_string).group()
			
 
				+        price_unit = response.xpath(r'//span[@class="price"]/b/text()', )
			
 
				+        if price_unit == '万':
			
 
				+            total_price = str(int(total_price) * 10000)
			
 
				+        elif price_unit == '千':
			
 
				+            total_price = str(int(total_price) * 1000)
			
 
				+        item['total_price'] = total_price
			
 
				+
			
 
				+        item['orientation'] = response.xpath('//p[@class="toward"]/span[@class="main"]/text()').extract_first()
			
 
				+
			
 
				+        # 建筑年代
			
 
				+        built_year_str = response.xpath(r'//p[@class="toward"]/span[@class="sub"]/text()').extract_first()
			
 
				+        if built_year_str:
			
 
				+            item['built_year'] = re.search(r'[\d]*', built_year_str).group()
			
 
				+
			
 
				+        area_string = response.xpath(
			
 
				+            '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[3]/span[2]/text()').extract_first()
			
 
				+        item['area'] = re.search(r'[\d]+', area_string).group()
			
 
				+
			
 
				+        item['property'] = response.xpath(
			
 
				+            '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-right"]/li[3]/span[2]/text()').re(r'[\d]+')[0]
			
 
				+        item['decoration'] = response.xpath(
			
 
				+            '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-right"]/li[2]/span[2]/text()').extract_first()
			
 
				+        item['model'] = response.xpath(
			
 
				+            '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[2]/span[2]/text()').extract_first()
			
 
				+        item['floor'] = response.xpath(
			
 
				+            '//div[@id="generalSituation"]/div[@class="general-item-wrap"]/ul[@class="general-item-right"]/li[1]/span[2]/text()').extract_first()
			
 
				+        item['house_type'] = response.xpath('//div[@id="generalExpense"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[2]/span[2]/text()').extract_first()
			
 
				+        item['trading_ownership'] = response.xpath('//div[@id="generalExpense"]/div[@class="general-item-wrap"]/ul[@class="general-item-left"]/li[3]/span[2]/text()').extract_first()
			
 
				+        # 图片
			
 
				+        item['image'] = response.xpath(
			
 
				+            r'//div[@class="basic-pic-list pr"]/ul[@id="leftImg"]/li[1]/img/@data-value').extract_first()
			
 
				+
			
 
				+        # 位置
			
 
				+        location_list = response.xpath(r'//ul[@class="house-basic-item3"]/li[2]/span[2]/a/text()').extract()
			
 
				+        if location_list:
			
 
				+            location_str = '-'.join(location_list)
			
 
				+            item['location'] = location_str
			
 
				+
			
 
				+        # 标签
			
 
				+        tag_list = response.xpath(r'//p[@class="house-update-info"]/span[@class="ts"]/text()').extract()
			
 
				+        if tag_list:
			
 
				+            item['tag'] = tag_list[0]
			
 
				+
			
 
				+        # 经纬度
			
 
				+        script_string = response.xpath(r'//script[@type="text/javascript"]').extract_first()
			
 
				+        latitude_math = re.search(r'"lat":([1-9])[\d](\.)[\d]*,"', script_string).group()
			
 
				+        longitude_math = re.search(r'"lon":[1-9][\d][\d](\.)[\d]*,"', script_string).group()
			
 
				+        item['latitude'] = re.search(r'([1-9])[\d](\.)[\d]*', latitude_math).group()
			
 
				+        item['longitude'] = re.search(r'[1-9][\d][\d](\.)[\d]*', longitude_math).group()
			
 
				+
			
 
				+        page_url = response._url
			
 
				+        item['page_url'] = page_url
			
 
				+
			
 
				+        item['house_id'] = '109'
			
 
				+
			
 
				+        return item
			
 
				+
			
 
				+
			
 
				+class CommunityItem(scrapy.Item):
			
 
				+    _id = scrapy.Field()
			
 
				+    title = scrapy.Field()
			
 
				+    unit_price = scrapy.Field()
			
 
				+    floating_rate = scrapy.Field()
			
 
				+    built_year = scrapy.Field()
			
 
				+    location = scrapy.Field()
			
 
				+    page_url = scrapy.Field()
			
 
				+    type = scrapy.Field()
			
 
				+    house_id = scrapy.Field()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def handle_response(cls, response, type):
			
 
				+        item = cls()
			
 
				+        selector = Selector(text=response)
			
 
				+        item['title'] = selector.xpath(r'//h3/a/@title').extract_first()
			
 
				+        item['unit_price'] = selector.xpath(r'//div[@class="li-side"]/p/strong/text()').extract_first()
			
 
				+        floating_rate = selector.xpath(r'//div[@class="li-side"]/p[@class="price-txt"]/text()').extract_first()
			
 
				+        if not floating_rate:
			
 
				+            floating_rate = selector.xpath(r'//div[@class="li-side"]/p[@class="price-txt price-down"]/text()').extract_first()
			
 
				+        item['floating_rate'] = floating_rate
			
 
				+        item['location'] = selector.xpath(r'//div[@class="li-info"]/address/text()').extract_first().strip()
			
 
				+        item['page_url'] = selector.xpath(r'//div[@_soj="xqlb"]/@link').extract_first().strip()
			
 
				+        item['built_year'] = selector.xpath(r'//p[@class="date"]/text()').extract_first().strip()
			
 
				+        item['type'] = type
			
 
				+        item['house_id'] = '109'
			
 
				+        return item
			
 
				+
			
 
				+
			
 
				+class FTXCommunityItem(scrapy.Item):
			
 
				+    _id = scrapy.Field()
			
 
				+    title = scrapy.Field()
			
 
				+    unit_price = scrapy.Field()
			
 
				+    floating_rate = scrapy.Field()
			
 
				+    year_floating_tare = scrapy.Field()
			
 
				+    built_year = scrapy.Field()
			
 
				+    property = scrapy.Field()
			
 
				+    property_type = scrapy.Field()
			
 
				+    building_type = scrapy.Field()
			
 
				+    greening_rate = scrapy.Field()
			
 
				+    plot_ratio = scrapy.Field()
			
 
				+    total_area = scrapy.Field()
			
 
				+    building_area = scrapy.Field()
			
 
				+    construction = scrapy.Field()
			
 
				+    location = scrapy.Field()
			
 
				+    region = scrapy.Field()
			
 
				+    page_url = scrapy.Field()
			
 
				+    img_url = scrapy.Field()
			
 
				+    predict_type = scrapy.Field()
			
 
				+    house_id = scrapy.Field()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def handle_response(cls, response):
			
 
				+        item = cls()
			
 
				+        title_string: str = response.xpath(r'//div[@class="logoBox_sq"]/div[@class="ceninfo_sq"]/h1/a[@class="tt"]/text()').extract_first()
			
 
				+        if title_string:
			
 
				+            item['title'] = title_string.replace('小区网', '')
			
 
				+        item['unit_price'] = response.xpath(r'//div[@class="box detaiLtop mt20 clearfix"]/dl[1]/dd/span/text()').extract_first()
			
 
				+        item['floating_rate'] = response.xpath(r'//div[@class="box detaiLtop mt20 clearfix"]/dl[2]/dd/span/text()').extract_first()
			
 
				+        item['year_floating_tare'] = response.xpath(r'//div[@class="box detaiLtop mt20 clearfix"]/dl[3]/dd/span/text()').extract_first()
			
 
				+
			
 
				+        item['location'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="小区地址："]/text()').extract_first()
			
 
				+        item['region'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="所属区域："]/text()').extract_first()
			
 
				+        property_string = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="产权描述："]/text()').extract_first()
			
 
				+        item['property'] = None
			
 
				+        if property_string:
			
 
				+            re_list = re.search(r'[\d]{1,2}', property_string)
			
 
				+            if re_list:
			
 
				+                item['property'] = re_list.group(0)
			
 
				+
			
 
				+        item['property_type'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="物业类别："]/text()').extract_first()
			
 
				+        if not item['property']:
			
 
				+            item['predict_type'] = '其他'
			
 
				+        elif item['property'] == '70':
			
 
				+            item['predict_type'] = '住宅'
			
 
				+        else:
			
 
				+            item['predict_type'] = '公寓'
			
 
				+
			
 
				+        item['construction'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑结构："]/span/text()').extract_first()
			
 
				+        item['built_year'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑年代："]/text()').extract_first()
			
 
				+        item['building_type'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑类型："]/text()').extract_first()
			
 
				+        item['greening_rate'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="绿 化 率："]/text()').extract_first()
			
 
				+        item['plot_ratio'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="容 积 率："]/text()').extract_first()
			
 
				+        item['total_area'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="占地面积："]/text()').extract_first()
			
 
				+        item['building_area'] = response.xpath(r'//div[@class="box"][1]/div[@class="inforwrap clearfix"]/dl/dd[strong="建筑面积："]/text()').extract_first()
			
 
				+        item['img_url'] = response.xpath(r'//div[@class="logoBox_sq"]/div[@class="logopic_sq"]/a/img/@src').extract_first()
			
 
				+        item['page_url'] = response._url
			
 
				+        item['house_id'] = '109'
			
 
				+
			
 
				+        return item
			
 
				+
			
 
				+
			
 
				+class RentalHouseItem(scrapy.Item):
			
 
				+    _id = scrapy.Field()
			
 
				+    title = scrapy.Field()
			
 
				+    location = scrapy.Field()
			
 
				+    price = scrapy.Field()
			
 
				+    house_type = scrapy.Field()
			
 
				+    area = scrapy.Field()
			
 
				+    orientation = scrapy.Field()
			
 
				+    floor = scrapy.Field()
			
 
				+    decoration = scrapy.Field()
			
 
				+    property_type = scrapy.Field()
			
 
				+    house_code = scrapy.Field()
			
 
				+    publish_date = scrapy.Field()
			
 
				+    longitude = scrapy.Field()
			
 
				+    latitude = scrapy.Field()
			
 
				+    img_url = scrapy.Field()
			
 
				+    page_url = scrapy.Field()
			
 
				+    date = scrapy.Field()
			
 
				+    coordinate = scrapy.Field()
			
 
				+    house_id = scrapy.Field()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def handle_response(cls, response):
			
 
				+        item = cls()
			
 
				+        name_list: list = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="小区："]/a/text()').extract()
			
 
				+        location_string = ''
			
 
				+        if name_list:
			
 
				+            if len(name_list) > 0:
			
 
				+                item['title'] = name_list[0]
			
 
				+            if len(name_list) > 1:
			
 
				+                location_string += name_list[1]
			
 
				+                location_string += "-"
			
 
				+            if len(name_list) > 2:
			
 
				+                location_string += name_list[2]
			
 
				+        item['location'] = location_string
			
 
				+        price_list = response.xpath(r'//li[@class="full-line cf"]/span[@class="price"]//text()').extract()
			
 
				+        if price_list:
			
 
				+            item['price'] = "".join(price_list)
			
 
				+        item['house_type'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="户型："]/span[@class="info"]/text()').extract_first()
			
 
				+        item['area'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="面积："]/span[@class="info"]/text()').extract_first()
			
 
				+        item['orientation'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="朝向："]/span[@class="info"]/text()').extract_first()
			
 
				+        item['floor'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="楼层："]/span[@class="info"]/text()').extract_first()
			
 
				+        item['decoration'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="装修："]/span[@class="info"]/text()').extract_first()
			
 
				+        item['property_type'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="类型："]/span[@class="info"]/text()').extract_first()
			
 
				+
			
 
				+        house_info_string = response.xpath(r'//div[@class="mod-title bottomed"][h3="房屋信息"]/div/text()').extract_first()
			
 
				+        if house_info_string:
			
 
				+            code_match = re.search(r'[\d]{6,}', house_info_string)
			
 
				+            if code_match:
			
 
				+                item['house_code'] = code_match.group()
			
 
				+
			
 
				+            date_match = re.search(r'[\d]{0,4}年[\d]{0,2}月[\d]{0,2}日', house_info_string)
			
 
				+            if date_match:
			
 
				+                item['publish_date'] = date_match.group()
			
 
				+
			
 
				+        longitude_match = Selector(response).re(r'lng:[\d]{0,3}[\.][\d]*,')
			
 
				+        if longitude_match:
			
 
				+            item['longitude'] = re.search(r'[\d]{0,3}[\.][\d]*', longitude_match[0]).group()
			
 
				+
			
 
				+        latitude_match = Selector(response).re(r'lat:[\d]{0,2}[\.][\d]*,')
			
 
				+        if latitude_match:
			
 
				+            item['latitude'] = re.search(r'[\d]{0,2}[\.][\d]*', latitude_match[0]).group()
			
 
				+
			
 
				+        if longitude_match and latitude_match:
			
 
				+            try:
			
 
				+                item['coordinate'] = [float(item['longitude']), float(item['latitude'])]
			
 
				+            except Exception as err:
			
 
				+                logging.error('type conversion error ! reason: ' + '-'.join(err.args))
			
 
				+
			
 
				+        item['img_url'] = response.xpath(r'//div[@class="switch_list"][1]/div[@class="img_wrap"][1]/img[1]/@data-src').extract_first()
			
 
				+        item['page_url'] = response._url
			
 
				+        item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
			
 
				+        item['house_id'] = '109'
			
 
				+
			
 
				+        return item
			
 
				+
			
 
				+
			
 
				+class FTXRentalHouseItem(scrapy.Item):
			
 
				+    _id = scrapy.Field()
			
 
				+    title = scrapy.Field()
			
 
				+    location = scrapy.Field()
			
 
				+    price = scrapy.Field()
			
 
				+    house_type = scrapy.Field()
			
 
				+    area = scrapy.Field()
			
 
				+    orientation = scrapy.Field()
			
 
				+    floor = scrapy.Field()
			
 
				+    decoration = scrapy.Field()
			
 
				+    # property_type = scrapy.Field()
			
 
				+    house_code = scrapy.Field()
			
 
				+    update_date = scrapy.Field()
			
 
				+    # longitude = scrapy.Field()
			
 
				+    # latitude = scrapy.Field()
			
 
				+    img_url = scrapy.Field()
			
 
				+    page_url = scrapy.Field()
			
 
				+    date = scrapy.Field()
			
 
				+    house_id = scrapy.Field()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def handle_response(cls, response):
			
 
				+        item = cls()
			
 
				+
			
 
				+        house_info_match = Selector(response).re(r'var houseInfo = {[\s\S]*};')
			
 
				+        if house_info_match:
			
 
				+            info_str = house_info_match[0]
			
 
				+            title_match = re.search(r"projname: '[\s\S]*?',", info_str)
			
 
				+            if title_match:
			
 
				+                title_str = title_match[0]
			
 
				+                title_str = title_str.replace("projname: '", '')
			
 
				+                title_str = title_str.replace("',", '')
			
 
				+                item['title'] = title_str
			
 
				+
			
 
				+            district_math = re.search(r"district: '[\s\S]*?',", info_str)
			
 
				+            location_string = ''
			
 
				+            if district_math:
			
 
				+                district_str = district_math[0]
			
 
				+                district_str = district_str.replace("district: '", '')
			
 
				+                district_str = district_str.replace("',", '')
			
 
				+                location_string += district_str
			
 
				+                location_string += '-'
			
 
				+
			
 
				+            comarea_math = re.search(r"comarea: '[\s\S]*?',", info_str)
			
 
				+            if comarea_math:
			
 
				+                comarea_str = comarea_math[0]
			
 
				+                comarea_str = comarea_str.replace("comarea: '", '')
			
 
				+                comarea_str = comarea_str.replace("',", '')
			
 
				+                location_string += comarea_str
			
 
				+
			
 
				+            item['location'] = location_string
			
 
				+
			
 
				+
			
 
				+        # name_list: list = response.xpath(r'//div[div[@class="lab"][text()="小      区"]]/div[contains(@class, "rcont")]/a/text()').extract()
			
 
				+        # location_string = ''
			
 
				+        # if name_list:
			
 
				+        #     if len(name_list) > 0:
			
 
				+        #         item['title'] = name_list[0]
			
 
				+        #     if len(name_list) > 1:
			
 
				+        #         location_string += name_list[1]
			
 
				+        #         location_string += "-"
			
 
				+        #     if len(name_list) > 2:
			
 
				+        #         location_string += name_list[2]
			
 
				+        # item['location'] = location_string
			
 
				+        price_list = response.xpath(r'//div[@class ="tab-cont-right"]/div[@class ="tr-line clearfix zf_new_title"]/div[contains(@class, "trl-item sty1")]//text()').extract()
			
 
				+        if price_list:
			
 
				+            item['price'] = "".join(price_list).strip()
			
 
				+        item['house_type'] = response.xpath(r'//div[@class="trl-item1 w182"][div[@class="font14"]="户型"]/div[@class="tt"]/text()').extract_first()
			
 
				+        item['area'] = response.xpath(r'//div[@class="trl-item1 w132"][div[@class="font14"]="建筑面积"]/div[@class="tt"]/text()').extract_first()
			
 
				+        item['orientation'] = response.xpath(r'//div[@class="trl-item1 w146"][div[@class="font14"]="朝向"]/div[@class="tt"]/text()').extract_first()
			
 
				+
			
 
				+        floor_list = response.xpath(r'//div[@class="trl-item1 w182"][div[@class="font14"][contains(text(), "楼层")]]/div//text()').extract()
			
 
				+        if floor_list:
			
 
				+            floor_str = '-'.join(floor_list)
			
 
				+            item['floor'] = floor_str
			
 
				+
			
 
				+        item['decoration'] = response.xpath(r'//div[@class="trl-item1 w132"][div[@class="font14"]="装修"]/div[@class="tt"]/text()').extract_first()
			
 
				+        # item['property_type'] = response.xpath(r'//ul[@class="house-info-zufang cf"]/li[span="类型："]/span[@class="info"]/text()').extract_first()
			
 
				+
			
 
				+        # house_info_string = response.xpath(r'//div[@class="mod-title bottomed"][h3="房屋信息"]/div/text()').extract_first()
			
 
				+        house_code_string = response.xpath(r'//span[contains(text(), "房源编号")]/text()').extract_first()
			
 
				+        if house_code_string:
			
 
				+            code_match = re.search(r'[\d]{6,}', house_code_string)
			
 
				+            if code_match:
			
 
				+                item['house_code'] = code_match.group()
			
 
				+        house_date_string = response.xpath(r'//span[contains(text(), "更新时间")]/text()').extract_first()
			
 
				+        if house_code_string:
			
 
				+            date_match = re.search(r'[\d]{0,4}-[\d]{0,2}-[\d]{0,2}', house_date_string)
			
 
				+            if date_match:
			
 
				+                item['update_date'] = date_match.group()
			
 
				+
			
 
				+        # longitude_match = Selector(response).re(r'lng:[\d]{0,3}[\.][\d]*,')
			
 
				+        # if longitude_match:
			
 
				+        #     item['longitude'] = re.search(r'[\d]{0,3}[\.][\d]*', longitude_match[0]).group()
			
 
				+        #
			
 
				+        # latitude_match = Selector(response).re(r'[\d]{0,2}[\.][\d]*,')
			
 
				+        # if latitude_match:
			
 
				+        #     item['latitude'] = re.search(r'[\d]{0,2}[\.][\d]*', latitude_match[0]).group()
			
 
				+
			
 
				+        item['img_url'] = response.xpath(r'//div[@class="bigImg"]/img[1]/@src').extract_first()
			
 
				+        item['page_url'] = response._url
			
 
				+        item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
			
 
				+        item['house_id'] = '109'
			
 
				+
			
 
				+        return item
			
 
				+
			
 
				+
			
 
				+class ResoldHouseItem(scrapy.Item):
			
 
				+    _id = scrapy.Field()
			
 
				+    title = scrapy.Field()
			
 
				+    location = scrapy.Field()
			
 
				+    price = scrapy.Field()
			
 
				+    house_type = scrapy.Field()
			
 
				+    area = scrapy.Field()
			
 
				+    orientation = scrapy.Field()
			
 
				+    floor = scrapy.Field()
			
 
				+    decoration = scrapy.Field()
			
 
				+    property_type = scrapy.Field()
			
 
				+    total_price = scrapy.Field()
			
 
				+    down_payment = scrapy.Field()
			
 
				+    monthly_payment = scrapy.Field()
			
 
				+    house_code = scrapy.Field()
			
 
				+    publish_date = scrapy.Field()
			
 
				+    house_price_info = scrapy.Field()
			
 
				+    community_price_info = scrapy.Field()
			
 
				+    area_price_info = scrapy.Field()
			
 
				+    longitude = scrapy.Field()
			
 
				+    latitude = scrapy.Field()
			
 
				+    img_url = scrapy.Field()
			
 
				+    page_url = scrapy.Field()
			
 
				+    date = scrapy.Field()
			
 
				+    coordinate = scrapy.Field()
			
 
				+    house_id = scrapy.Field()
			
 
				+    build_year = scrapy.Field()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def handle_response(cls, response):
			
 
				+        item = cls()
			
 
				+
			
 
				+        item['title'] = response.xpath(r'//li[div[text()="所属小区："]]/div/a/text()').extract_first()
			
 
				+        location_list = response.xpath(r'//li[div[text()="所在位置："]]/div/p//text()').extract()
			
 
				+        if location_list:
			
 
				+            location = ''.join(location_list).strip()
			
 
				+            location = location.replace('\n', '')
			
 
				+            location = location.replace('\t', '')
			
 
				+            item['location'] = location
			
 
				+
			
 
				+        item['price'] = response.xpath(r'//li[div[text()="房屋单价："]]/div[2]/text()').extract_first()
			
 
				+        house_type_string: str = response.xpath(r'normalize-space(//li[div[text()="房屋户型："]]/div[2]/text())').extract_first()
			
 
				+        if house_type_string:
			
 
				+            item['house_type'] = house_type_string.replace(' ', '')
			
 
				+
			
 
				+        item['area'] = response.xpath(r'//li[div[text()="建筑面积："]]/div[2]/text()').extract_first()
			
 
				+        item['orientation'] = response.xpath(r'//li[div[text()="房屋朝向："]]/div[2]/text()').extract_first()
			
 
				+        item['floor'] = response.xpath(r'normalize-space(//li[div[text()="所在楼层："]]/div[2]/text())').extract_first()
			
 
				+        item['decoration'] = response.xpath(r'normalize-space(//li[div[text()="装修程度："]]/div[2]/text())').extract_first()
			
 
				+        item['property_type'] = response.xpath(r'normalize-space(//li[div[text()="房屋类型："]]/div[2]/text())').extract_first()
			
 
				+        item['total_price'] = response.xpath(r'//div[@class="wrapper"]/div[@class="wrapper-lf clearfix"]/div[@class="basic-info clearfix"]/span[1]/em/text()').extract_first()
			
 
				+        item['down_payment'] = response.xpath(r'normalize-space(//li[div[text()="参考首付："]]/div[2]/text())').extract_first()
			
 
				+        item['monthly_payment'] = response.xpath(r'normalize-space(//li[div[text()="参考月供："]]/div/span/text())').extract_first()
			
 
				+        item['build_year'] = response.xpath(r'normalize-space(//li[div[text()="建造年代："]]/div[2]/text())').extract_first()
			
 
				+
			
 
				+        house_code_string = response.xpath(r'//span[contains(text(), "房屋编码")]/text()').extract_first()
			
 
				+        if house_code_string:
			
 
				+            code_match = re.search(r'[\d]{6,}', house_code_string)
			
 
				+            if code_match:
			
 
				+                item['house_code'] = code_match.group()
			
 
				+        house_date_string = response.xpath(r'//span[contains(text(), "发布时间")]/text()').extract_first()
			
 
				+        if house_code_string:
			
 
				+            date_match = re.search(r'[\d]{0,4}年[\d]{0,2}月[\d]{0,2}日', house_date_string)
			
 
				+            if date_match:
			
 
				+                item['publish_date'] = date_match.group()
			
 
				+
			
 
				+        longitude_match = Selector(response).re(r'lng : "[\d]{0,3}[\.][\d]*"')
			
 
				+        if longitude_match:
			
 
				+            item['longitude'] = re.search(r'[\d]{0,3}[\.][\d]*', longitude_match[0]).group()
			
 
				+
			
 
				+        latitude_match = Selector(response).re(r'lat : "[\d]{0,2}[\.][\d]*"')
			
 
				+        if latitude_match:
			
 
				+            item['latitude'] = re.search(r'[\d]{0,2}[\.][\d]*', latitude_match[0]).group()
			
 
				+
			
 
				+        if longitude_match and latitude_match:
			
 
				+            try:
			
 
				+                item['coordinate'] = [float(item['longitude']), float(item['latitude'])]
			
 
				+            except Exception as err:
			
 
				+                logging.error('type conversion error ! reason: ' + '-'.join(err.args))
			
 
				+
			
 
				+        item['img_url'] = response.xpath(r'//div[@class="switch_list"][1]/div[@class="img_wrap"][1]/img/@data-src').extract_first()
			
 
				+
			
 
				+        item['page_url'] = response._url
			
 
				+        item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
			
 
				+        item['house_id'] = '109'
			
 
				+
			
 
				+        return item
			
 
				+
			
 
				+
			
 
				+class LfsAveragePriceItem(scrapy.Item):
			
 
				+    _id = scrapy.Field()
			
 
				+    title = scrapy.Field()
			
 
				+    price = scrapy.Field()
			
 
				+    arrow = scrapy.Field()
			
 
				+    rate = scrapy.Field()
			
 
				+    page_url = scrapy.Field()
			
 
				+    date = scrapy.Field()
			
 
				+    house_id = scrapy.Field()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def handle_response(cls, response):
			
 
				+        item = cls()
			
 
				+
			
 
				+        item['title'] = response.xpath(r'//div[@class="comm-title"]/a/@title').extract_first()
			
 
				+        price_math = Selector(response).re(r'(?<="comm_midprice":")([0-9]*(?=","area_midprice))')
			
 
				+        if price_math:
			
 
				+            item['price'] = price_math[0]
			
 
				+        # item['arrow'] = response.xpath(r'//i[@class="arrow"]/text()').extract_first()
			
 
				+        # item['rate'] = response.xpath(r'normalize-space(//span[@class="status level"]/text())').extract_first()
			
 
				+        item['page_url'] = response._url
			
 
				+        item['date'] = time.strftime('%Y-%m-%d %H:%M:%S')
			
 
				+        item['house_id'] = '109'
			
 
				+
			
 
				+        return item
			
--- a/elabSpider/middlewares.py
+++ b/elabSpider/middlewares.py
@@ -0,0 +1,172 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# Define here the models for your spider middleware
			
 
				+#
			
 
				+# See documentation in:
			
 
				+# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
			
 
				+
			
 
				+from scrapy.downloadermiddlewares.retry import RetryMiddleware
			
 
				+from scrapy.utils.response import response_status_message
			
 
				+from scrapy.downloadermiddlewares.useragent import UserAgentMiddleware
			
 
				+from scrapy import signals
			
 
				+from scrapy.conf import settings
			
 
				+import logging
			
 
				+import time
			
 
				+import fake_useragent
			
 
				+
			
 
				+
			
 
				+class ElabspiderSpiderMiddleware(object):
			
 
				+    # Not all methods need to be defined. If a method is not defined,
			
 
				+    # scrapy acts as if the spider middleware does not modify the
			
 
				+    # passed objects.
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_crawler(cls, crawler):
			
 
				+        # This method is used by Scrapy to create your spiders.
			
 
				+        s = cls()
			
 
				+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
			
 
				+        return s
			
 
				+
			
 
				+    def process_spider_input(self, response, spider):
			
 
				+        # Called for each response that goes through the spider
			
 
				+        # middleware and into the spider.
			
 
				+
			
 
				+        # Should return None or raise an exception.
			
 
				+        return None
			
 
				+
			
 
				+    def process_spider_output(self, response, result, spider):
			
 
				+        # Called with the results returned from the Spider, after
			
 
				+        # it has processed the response.
			
 
				+
			
 
				+        # Must return an iterable of Request, dict or Item objects.
			
 
				+        for i in result:
			
 
				+            yield i
			
 
				+
			
 
				+    def process_spider_exception(self, response, exception, spider):
			
 
				+        # Called when a spider or process_spider_input() method
			
 
				+        # (from other spider middleware) raises an exception.
			
 
				+
			
 
				+        # Should return either None or an iterable of Response, dict
			
 
				+        # or Item objects.
			
 
				+        pass
			
 
				+
			
 
				+    def process_start_requests(self, start_requests, spider):
			
 
				+        # Called with the start requests of the spider, and works
			
 
				+        # similarly to the process_spider_output() method, except
			
 
				+        # that it doesn’t have a response associated.
			
 
				+
			
 
				+        # Must return only requests (not items).
			
 
				+        for r in start_requests:
			
 
				+            yield r
			
 
				+
			
 
				+
			
 
				+class ElabspiderDownloaderMiddleware(object):
			
 
				+    # Not all methods need to be defined. If a method is not defined,
			
 
				+    # scrapy acts as if the downloader middleware does not modify the
			
 
				+    # passed objects.
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_crawler(cls, crawler):
			
 
				+        # This method is used by Scrapy to create your spiders.
			
 
				+        s = cls()
			
 
				+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
			
 
				+        return s
			
 
				+
			
 
				+    def process_request(self, request, spider):
			
 
				+        # Called for each request that goes through the downloader
			
 
				+        # middleware.
			
 
				+
			
 
				+        # Must either:
			
 
				+        # - return None: continue processing this request
			
 
				+        # - or return a Response object
			
 
				+        # - or return a Request object
			
 
				+        # - or raise IgnoreRequest: process_exception() methods of
			
 
				+        #   installed downloader middleware will be called
			
 
				+
			
 
				+        # proxy_user_pass = settings['PROXY_IDENTIFY'] + ':' + settings['PROXY_SECRETKEY']
			
 
				+        # encoded_proxy_pass = base64.encodebytes(bytes(proxy_user_pass.encode(encoding='utf-8')))
			
 
				+
			
 
				+        #TODO: 开关代理
			
 
				+        # logging.info(msg='process request url: ' + request._url)
			
 
				+        # request.meta['proxy'] = settings['PROXY_HOST'] + ':' + settings['PROXY_PORT']
			
 
				+        # request.headers['Proxy-Authorization'] = 'Basic ' + 'SDVQMDI5OU44MzBBQzlDRDo1MTZGOTVEMDNFQjFGMDI2'
			
 
				+        return None
			
 
				+
			
 
				+    def process_response(self, request, response, spider):
			
 
				+        # Called with the response returned from the downloader.
			
 
				+
			
 
				+        # Must either;
			
 
				+        # - return a Response object
			
 
				+        # - return a Request object
			
 
				+        # - or raise IgnoreRequest
			
 
				+        logging.info(msg='receive response status url: ' + response._url + ' status: ' + str(response.status))
			
 
				+        # if response.status != 200:
			
 
				+        #     logging.debug('retry url: ' + response._url)
			
 
				+        #     # proxy = self.get_random_proxy()
			
 
				+        #     # request.meta['proxy'] = proxy
			
 
				+        #     return request
			
 
				+        return response
			
 
				+
			
 
				+    def process_exception(self, request, exception, spider):
			
 
				+        # Called when a download handler or a process_request()
			
 
				+        # (from other downloader middleware) raises an exception.
			
 
				+
			
 
				+        # Must either:
			
 
				+        # - return None: continue processing this exception
			
 
				+        # - return a Response object: stops process_exception() chain
			
 
				+        # - return a Request object: stops process_exception() chain
			
 
				+        pass
			
 
				+
			
 
				+    def spider_opened(self, spider):
			
 
				+        spider.logger.info('Spider opened: %s' % spider.name)
			
 
				+
			
 
				+    # def get_random_proxy(self):
			
 
				+    #     while 1:
			
 
				+    #         with open('proxies.txt', 'r') as f:
			
 
				+    #             proxies = f.readlines()
			
 
				+    #         if proxies:
			
 
				+    #             break
			
 
				+    #         else:
			
 
				+    #             time.sleep(1)
			
 
				+    #     proxy = random.choice(proxies).strip()
			
 
				+    #     return proxy
			
 
				+
			
 
				+
			
 
				+class TooManyRequestsRetryMiddleware(RetryMiddleware):
			
 
				+
			
 
				+    def __init__(self, crawler):
			
 
				+        super(TooManyRequestsRetryMiddleware, self).__init__(crawler.settings)
			
 
				+        self.crawler = crawler
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_crawler(cls, crawler):
			
 
				+        return cls(crawler)
			
 
				+
			
 
				+    def process_response(self, request, response, spider):
			
 
				+        if request.meta.get('dont_retry', False):
			
 
				+            return response
			
 
				+        elif response.status == 429:
			
 
				+            self.crawler.engine.pause()
			
 
				+            time.sleep(10)  # If the rate limit is renewed in a minute, put 60 seconds, and so on.
			
 
				+            self.crawler.engine.unpause()
			
 
				+            reason = response_status_message(response.status)
			
 
				+            return self._retry(request, reason, spider) or response
			
 
				+        elif response.status in self.retry_http_codes:
			
 
				+            reason = response_status_message(response.status)
			
 
				+            return self._retry(request, reason, spider) or response
			
 
				+        return response
			
 
				+
			
 
				+
			
 
				+class UserAgent(UserAgentMiddleware):
			
 
				+
			
 
				+    def __init__(self, user_agent=''):
			
 
				+        self.user_agent = user_agent
			
 
				+
			
 
				+    def process_request(self, request, spider):
			
 
				+        # agent = random.choice(self.user_agent_list)
			
 
				+        agent = fake_useragent.UserAgent(path=settings['USER_AGENT_PATH']).random
			
 
				+        if agent:
			
 
				+            # print("********Current UserAgent:%s************" % agent)
			
 
				+            # log(level=logging.DEBUG, msg='Current UserAgent: ' + agent)
			
 
				+            request.headers.setdefault('User-Agent', agent)
			
 
				+
			
--- a/elabSpider/pipelines.py
+++ b/elabSpider/pipelines.py
@@ -0,0 +1,55 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# Define your item pipelines here
			
 
				+#
			
 
				+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
			
 
				+# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
			
 
				+
			
 
				+import pymongo
			
 
				+from scrapy.conf import settings
			
 
				+from elabSpider.items import *
			
 
				+import urllib.parse
			
 
				+
			
 
				+
			
 
				+class ElabspiderPipeline(object):
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        # self.client = pymongo.MongoClient(host='139.196.5.59', port=27017)
			
 
				+        # self.client.admin.authenticate(name='dbuser', password='elab@123', mechanism='SCRAM-SHA-1')
			
 
				+        # username = urllib.parse.quote_plus('dbuser')
			
 
				+        # password = urllib.parse.quote_plus('elab@123')
			
 
				+        # uri = "mongodb://dbuser:elab@123@example.com/?authSource=the_database&authMechanism=SCRAM-SHA-1"
			
 
				+        # self.client = pymongo.MongoClient('101.132.106.154', authSource='logdb')
			
 
				+
			
 
				+        self.client = pymongo.MongoClient(settings['MONGO_HOST'], authSource='logdb')
			
 
				+
			
 
				+        self.db = self.client[settings['MONGO_DB']]
			
 
				+        self.coll = self.db[settings['MONGO_COLL']]
			
 
				+
			
 
				+    def process_item(self, item, spider):
			
 
				+        if isinstance(item, CommunityItem):
			
 
				+            self.coll = self.db['departmengprice']
			
 
				+        elif isinstance(item, FTXCommunityItem):
			
 
				+            self.coll = self.db['ftxcommunity']
			
 
				+        elif isinstance(item, ResoldApartmentItem):
			
 
				+            self.coll = self.db[settings['MONGO_COLL']]
			
 
				+        elif isinstance(item, RentalHouseItem):
			
 
				+            if spider.name == 'lfsrentalHouse':
			
 
				+                self.coll = self.db['lfs_rental_house']
			
 
				+            elif spider.name == 'sjkrentalHouse':
			
 
				+                self.coll = self.db['sjk_rental_house']
			
 
				+            else:
			
 
				+                self.coll = self.db['rental_house']
			
 
				+        elif isinstance(item, FTXRentalHouseItem):
			
 
				+            self.coll = self.db['ftx_rental_house']
			
 
				+        elif isinstance(item, ResoldHouseItem):
			
 
				+            if spider.name == 'sjkresoldHouse':
			
 
				+                self.coll = self.db['sjk_resold_house']
			
 
				+            elif spider.name == 'lfsresoldHouse':
			
 
				+                self.coll = self.db['lfs_resold_house']
			
 
				+            else:
			
 
				+                self.coll = self.db['nb_resold_house']
			
 
				+        elif isinstance(item, LfsAveragePriceItem):
			
 
				+            self.coll = self.db['lfs_average_price']
			
 
				+        self.coll.insert_one(item)
			
 
				+        return item
			
--- a/elabSpider/proxies.py
+++ b/elabSpider/proxies.py
@@ -0,0 +1,106 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/4/19 3:47 PM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+
			
 
				+from bs4 import BeautifulSoup
			
 
				+import lxml
			
 
				+from multiprocessing import Process, Queue
			
 
				+import random
			
 
				+import json
			
 
				+import time
			
 
				+import requests
			
 
				+
			
 
				+
			
 
				+class Proxies(object):
			
 
				+
			
 
				+    def __init__(self, page=3):
			
 
				+        self.profies = []
			
 
				+        self.verify_pro = []
			
 
				+        self.page = page
			
 
				+        self.headers = {
			
 
				+            'Accept': '*/*',
			
 
				+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36',
			
 
				+            'Accept-Encoding': 'gzip, deflate, sdch',
			
 
				+            'Accept-Language': 'zh-CN,zh;q=0.8'
			
 
				+        }
			
 
				+
			
 
				+        self.get_proxies()
			
 
				+        self.get_proxies_nn()
			
 
				+
			
 
				+    def get_proxies(self):
			
 
				+        page = random.randint(1, 10)
			
 
				+        page_stop = page + self.page
			
 
				+        while page < page_stop:
			
 
				+            url = 'http://www.xicidaili.com/nt/%d' % page
			
 
				+            html = requests.get(url, headers=self.headers).content
			
 
				+            soup = BeautifulSoup(html, 'lxml')
			
 
				+            ip_list = soup.find(id='ip_list')
			
 
				+            for odd in ip_list.find_all(class_='odd'):
			
 
				+                protocol = odd.find_all('td')[5].get_text().lower() + '://'
			
 
				+                self.profies.append(protocol + ':'.join([x.get_text() for x in odd.find_all('td')[1:3]]))
			
 
				+            page += 1
			
 
				+
			
 
				+    def get_proxies_nn(self):
			
 
				+        page = random.randint(1, 10)
			
 
				+        page_stop = page + self.page
			
 
				+        while page < page_stop:
			
 
				+            url = 'http://www.xicidaili.com/nn/%d' % page
			
 
				+            html = requests.get(url, headers=self.headers).content
			
 
				+            soup = BeautifulSoup(html, 'lxml')
			
 
				+            ip_list = soup.find(id='ip_list')
			
 
				+            for odd in ip_list.find_all(class_='odd'):
			
 
				+                protocol = odd.find_all('td')[5].get_text().lower() + '://'
			
 
				+                self.profies.append(protocol + ':'.join([x.get_text() for x in odd.find_all('td')[1:3]]))
			
 
				+            page += 1
			
 
				+
			
 
				+    def verify_proxies(self):
			
 
				+        # 没验证的代理
			
 
				+        old_queue = Queue()
			
 
				+        # 验证后的代理
			
 
				+        new_queue = Queue()
			
 
				+        print('verify proxy......')
			
 
				+        works = []
			
 
				+        for i in range(15):
			
 
				+            works.append(Process(target=self.verify_one_proxy, args=(old_queue, new_queue)))
			
 
				+        for work in works:
			
 
				+            work.start()
			
 
				+        for proxy in self.profies:
			
 
				+            old_queue.put(proxy)
			
 
				+        for work in works:
			
 
				+            old_queue.put(0)
			
 
				+        for work in works:
			
 
				+            work.join()
			
 
				+        self.proxies = []
			
 
				+        while 1:
			
 
				+            try:
			
 
				+                self.profies.append(new_queue.get(timeout=1))
			
 
				+            except:
			
 
				+                break
			
 
				+        print('verify_proxies done!')
			
 
				+
			
 
				+    def verify_one_proxy(self, old_queue, new_queue):
			
 
				+        while 1:
			
 
				+            proxy = old_queue.get()
			
 
				+            if proxy == 0:
			
 
				+                break
			
 
				+            protocol = 'https' if 'https' in proxy else 'http'
			
 
				+            proxies = {protocol: proxy}
			
 
				+            try:
			
 
				+                if requests.get('http://www.baidu.com', proxies=proxies, timeout=2).status_code == 200:
			
 
				+                    print('success %s' % proxy)
			
 
				+                    new_queue.put(proxy)
			
 
				+            except:
			
 
				+                print('fail %s' % proxy)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    a = Proxies()
			
 
				+    a.verify_proxies()
			
 
				+    print(a.proxies)
			
 
				+    proxie = a.profies
			
 
				+    with open('proxies.txt', 'a') as f:
			
 
				+        for proxy in proxie:
			
 
				+            f.write(proxy+'\n')
			
--- a/elabSpider/proxies.txt
+++ b/elabSpider/proxies.txt
--- a/elabSpider/settings.py
+++ b/elabSpider/settings.py
@@ -0,0 +1,129 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+
			
 
				+# Scrapy settings for elabSpider project
			
 
				+#
			
 
				+# For simplicity, this file contains only settings considered important or
			
 
				+# commonly used. You can find more settings consulting the documentation:
			
 
				+#
			
 
				+#     https://doc.scrapy.org/en/latest/topics/settings.html
			
 
				+#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
			
 
				+#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
			
 
				+
			
 
				+BOT_NAME = 'elabSpider'
			
 
				+
			
 
				+SPIDER_MODULES = ['elabSpider.spiders']
			
 
				+NEWSPIDER_MODULE = 'elabSpider.spiders'
			
 
				+
			
 
				+# MONGO_HOST = '139.196.5.59'  #测试环境
			
 
				+# MONGO_HOST = '139.196.108.59'    #正式环境，已废弃
			
 
				+MONGO_HOST = 'mongodb://logdb:logdb@dds-uf6da0fedc9881d41450-pub.mongodb.rds.aliyuncs.com:3717,dds-uf6da0fedc9881d42459-pub.mongodb.rds.aliyuncs.com:3717/logdb?replicaSet=mgset-12835903'
			
 
				+MONGO_PORT = 27017
			
 
				+MONGO_DB = 'logdb'
			
 
				+MONGO_COLL = 'ershoufang'
			
 
				+MONGO_USER = 'dbuser'
			
 
				+MONGO_PSW = 'elab@123'
			
 
				+
			
 
				+PROXY_HOST = 'http://http-dyn.abuyun.com'
			
 
				+PROXY_PORT = '9020'
			
 
				+# PROXY_IDENTIFY = 'HY39548V0FZ45UKD'
			
 
				+# PROXY_SECRETKEY = '07DBA6C5E470150B'
			
 
				+
			
 
				+USER_AGENT_PATH = 'fake_useragent.json'
			
 
				+
			
 
				+# LOG_FILE = 'spider.log'
			
 
				+
			
 
				+
			
 
				+# Crawl responsibly by identifying yourself (and your website) on the user-agent
			
 
				+#USER_AGENT = 'elabSpider (+http://www.yourdomain.com)'
			
 
				+
			
 
				+# Obey robots.txt rules
			
 
				+ROBOTSTXT_OBEY = False
			
 
				+
			
 
				+# Configure maximum concurrent requests performed by Scrapy (default: 16)
			
 
				+#CONCURRENT_REQUESTS = 32
			
 
				+
			
 
				+# Configure a delay for requests for the same website (default: 0)
			
 
				+# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
			
 
				+# See also autothrottle settings and docs
			
 
				+#DOWNLOAD_DELAY = 3
			
 
				+# The download delay setting will honor only one of:
			
 
				+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
			
 
				+CONCURRENT_REQUESTS_PER_IP = 5
			
 
				+
			
 
				+# Disable cookies (enabled by default)
			
 
				+COOKIES_ENABLED = False
			
 
				+
			
 
				+# COOKIES_DEBUG = True
			
 
				+
			
 
				+# Disable Telnet Console (enabled by default)
			
 
				+#TELNETCONSOLE_ENABLED = False
			
 
				+
			
 
				+# Override the default request headers:
			
 
				+#DEFAULT_REQUEST_HEADERS = {
			
 
				+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
			
 
				+#   'Accept-Language': 'en',
			
 
				+#}
			
 
				+
			
 
				+# Enable or disable spider middlewares
			
 
				+# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
			
 
				+#SPIDER_MIDDLEWARES = {
			
 
				+#    'elabSpider.middlewares.ElabspiderSpiderMiddleware': 543,
			
 
				+#}
			
 
				+
			
 
				+# Enable or disable downloader middlewares
			
 
				+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
			
 
				+
			
 
				+DOWNLOADER_MIDDLEWARES = {
			
 
				+   'elabSpider.middlewares.UserAgent': 1,
			
 
				+   # 'elabSpider.middlewares.TooManyRequestsRetryMiddleware': 500,
			
 
				+   'elabSpider.middlewares.ElabspiderDownloaderMiddleware': 543,
			
 
				+   'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 550,
			
 
				+   'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
			
 
				+   'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware': None,
			
 
				+   'scrapy.spidermiddlewares.offsite.OffsiteMiddleware': None,
			
 
				+   # 'scrapy.downloadermiddlewares.retry.RetryMiddleware': None
			
 
				+
			
 
				+}
			
 
				+
			
 
				+# Enable or disable extensions
			
 
				+# See https://doc.scrapy.org/en/latest/topics/extensions.html
			
 
				+#EXTENSIONS = {
			
 
				+#    'scrapy.extensions.telnet.TelnetConsole': None,
			
 
				+#}
			
 
				+
			
 
				+# Configure item pipelines
			
 
				+# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
			
 
				+ITEM_PIPELINES = {
			
 
				+   'elabSpider.pipelines.ElabspiderPipeline': 300,
			
 
				+}
			
 
				+
			
 
				+# Enable and configure the AutoThrottle extension (disabled by default)
			
 
				+# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
			
 
				+# AUTOTHROTTLE_ENABLED = True
			
 
				+# The initial download delay
			
 
				+# AUTOTHROTTLE_START_DELAY = 0.25
			
 
				+# The maximum download delay to be set in case of high latencies
			
 
				+# AUTOTHROTTLE_MAX_DELAY = 60
			
 
				+# The average number of requests Scrapy should be sending in parallel to
			
 
				+# each remote server
			
 
				+# AUTOTHROTTLE_TARGET_CONCURRENCY = 5.0
			
 
				+# Enable showing throttling stats for every response received:
			
 
				+# AUTOTHROTTLE_DEBUG = True
			
 
				+
			
 
				+# Enable and configure HTTP caching (disabled by default)
			
 
				+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
			
 
				+#HTTPCACHE_ENABLED = True
			
 
				+#HTTPCACHE_EXPIRATION_SECS = 0
			
 
				+#HTTPCACHE_DIR = 'httpcache'
			
 
				+#HTTPCACHE_IGNORE_HTTP_CODES = []
			
 
				+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
			
 
				+REDIRECT_ENABLED = False
			
 
				+RETRY_ENABLED = True
			
 
				+RETRY_TIMES = 10
			
 
				+RETRY_HTTP_CODES = [403, 429, 404, 301, 302, 503]
			
 
				+HTTPERROR_ALLOWED_CODES = [403, 429, 404, 301, 302, 503]
			
 
				+
			
 
				+DOWNLOAD_TIMEOUT = 15
			
 
				+
			
 
				+# RANDOMIZE_DOWNLOAD_DELAY = False
			
 
				+# CONCURRENT_REQUESTS_PER_IP = 40
			
--- a/elabSpider/spiders/__init__.py
+++ b/elabSpider/spiders/__init__.py
@@ -0,0 +1,4 @@
 
				+# This package will contain the spiders of your Scrapy project
			
 
				+#
			
 
				+# Please refer to the documentation for information on how to create and manage
			
 
				+# your spiders.
			
--- a/elabSpider/spiders/departmentprice.py
+++ b/elabSpider/spiders/departmentprice.py
@@ -0,0 +1,99 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/4/27 10:50 AM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+import scrapy
			
 
				+from elabSpider.items import CommunityItem
			
 
				+import traceback
			
 
				+from elabSpider.email_util import send_email
			
 
				+
			
 
				+
			
 
				+class DepartmentPriceSpider(scrapy.Spider):
			
 
				+    name = 'departmentPrice'
			
 
				+    allowed_domains = [
			
 
				+        'hangzhou.anjuke.com',
			
 
				+        'suzhou.zuanjuke.com',
			
 
				+        'nb.anjuke.com'
			
 
				+    ]
			
 
				+    start_urls = [
			
 
				+        'https://shanghai.anjuke.com/community/xujiahui/',
			
 
				+        'https://shanghai.anjuke.com/community/nanjingxilu/',
			
 
				+        'https://shanghai.anjuke.com/community/jingansi/',
			
 
				+        'https://shanghai.anjuke.com/community/lujiazui/',
			
 
				+        'https://shanghai.anjuke.com/community/nanjingdonglu/',
			
 
				+        'https://shanghai.anjuke.com/community/renminguangchang/',
			
 
				+        'https://shanghai.anjuke.com/community/xintiandia/',
			
 
				+
			
 
				+        'https://hangzhou.anjuke.com/community/gulouy/t30/',
			
 
				+        'https://hangzhou.anjuke.com/community/hubin/t30/',
			
 
				+        'https://hangzhou.anjuke.com/community/wushana/t30/',
			
 
				+        'https://hangzhou.anjuke.com/community/wulin/t30/',
			
 
				+        'https://hangzhou.anjuke.com/community/xihuwenhuaguangchang/t30/',
			
 
				+        'https://hangzhou.anjuke.com/community/qianjiangxincheng/t30/',
			
 
				+
			
 
				+        'https://suzhou.anjuke.com/community/guanqianjie/t34/',
			
 
				+        'https://suzhou.anjuke.com/community/pingjianglua/t34/',
			
 
				+        'https://suzhou.anjuke.com/community/shilus/t34/',
			
 
				+        'https://suzhou.anjuke.com/community/shishanw/t34/',
			
 
				+        'https://suzhou.anjuke.com/community/hudongs/t34/',
			
 
				+        'https://suzhou.anjuke.com/community/huxi/t34/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/tianyiguangchang/t107/',
			
 
				+        'https://nb.anjuke.com/community/gulouh/t107/',
			
 
				+        'https://nb.anjuke.com/community/dongbuxinchengw/t107/',
			
 
				+        'https://nb.anjuke.com/community/baizhangt/t107/',
			
 
				+        'https://nb.anjuke.com/community/zhongma/t107/',
			
 
				+
			
 
				+
			
 
				+        'https://hangzhou.anjuke.com/community/gulouy/t29/',
			
 
				+        'https://hangzhou.anjuke.com/community/hubin/t29/',
			
 
				+        'https://hangzhou.anjuke.com/community/wushana/t29/',
			
 
				+        'https://hangzhou.anjuke.com/community/wulin/t29/',
			
 
				+        'https://hangzhou.anjuke.com/community/xihuwenhuaguangchang/t29/',
			
 
				+        'https://hangzhou.anjuke.com/community/qianjiangxincheng/t29/',
			
 
				+
			
 
				+        'https://suzhou.anjuke.com/community/guanqianjie/t33/',
			
 
				+        'https://suzhou.anjuke.com/community/pingjianglua/t33/',
			
 
				+        'https://suzhou.anjuke.com/community/shilus/t33/',
			
 
				+        'https://suzhou.anjuke.com/community/shishanw/t33/',
			
 
				+        'https://suzhou.anjuke.com/community/hudongs/t33/',
			
 
				+        'https://suzhou.anjuke.com/community/huxi/t33/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/tianyiguangchang/t105/',
			
 
				+        'https://nb.anjuke.com/community/gulouh/t105/',
			
 
				+        'https://nb.anjuke.com/community/dongbuxinchengw/t105/',
			
 
				+        'https://nb.anjuke.com/community/baizhangt/t105/',
			
 
				+        'https://nb.anjuke.com/community/zhongma/t105/'
			
 
				+    ]
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            community_list = response.xpath('//div[@class="maincontent"]/div[@class="list-content"]/div[@_soj="xqlb"]').extract()
			
 
				+            # house_type = ''
			
 
				+            house_type = response.xpath('//div[@class="items"][3]/span[@class="elems-l pp-mod"]/a[@class="selected-item"]/text()').extract_first()
			
 
				+            if not house_type:
			
 
				+                house_type = response.xpath('//div[@class="items no-border-bottom"]/span[@class="elems-l "]/a[@class="selected-item"]/text()').extract_first()
			
 
				+
			
 
				+            if not house_type:
			
 
				+                house_type = response.xpath('//div[@class="items"][3]/span[@class="elems-l "]/a[@class="selected-item"]/text()').extract_first()
			
 
				+
			
 
				+            # if not house_type:
			
 
				+            #     print('error')
			
 
				+
			
 
				+            if community_list:
			
 
				+                for community in community_list:
			
 
				+                    item = CommunityItem.handle_response(community, house_type)
			
 
				+                    yield item
			
 
				+        except:
			
 
				+            send_email('departmentPrice lv 1 web parse error', response._url + '\n' + traceback.format_exc())
			
 
				+            print('error info: ', response._url())
			
 
				+
			
 
				+        try:
			
 
				+            next_page = response.xpath(r'//div[@class="page-content"]/div[@class="multi-page"]/a[@class="aNxt"]/@href').extract_first()
			
 
				+            if next_page:
			
 
				+                yield scrapy.Request(next_page, callback=self.parse)
			
 
				+        except:
			
 
				+            send_email('departmentPrice get next page url error', response._url + '\n' + traceback.format_exc())
			
 
				+            print('error info: ')
			
--- a/elabSpider/spiders/example.py
+++ b/elabSpider/spiders/example.py
@@ -0,0 +1,24 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+from elabSpider.items import *
			
 
				+
			
 
				+
			
 
				+class ExampleSpider(scrapy.Spider):
			
 
				+    name = 'test'
			
 
				+    allowed_domains = ['nb.58.com']
			
 
				+    start_urls = ['http://nb.58.com/ershoufang/37267775882391x.shtml']
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            item = ResoldApartmentItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except:
			
 
				+            print('error' + response.string)
			
 
				+
			
 
				+
			
 
				+    # def parse_item(self, response):
			
 
				+    #     try:
			
 
				+    #         ResoldApartmentItem.parse_item(response)
			
 
				+    #     except:
			
 
				+    #         print('error' + response.string)
			
 
				+
			
 
				+
			
--- a/elabSpider/spiders/fangtianxiacommunity.py
+++ b/elabSpider/spiders/fangtianxiacommunity.py
@@ -0,0 +1,83 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+from elabSpider.items import *
			
 
				+from scrapy.utils.response import get_base_url
			
 
				+from urllib import parse
			
 
				+import logging
			
 
				+import scrapy.core.engine
			
 
				+from elabSpider.email_util import send_email
			
 
				+import traceback
			
 
				+
			
 
				+
			
 
				+class ExampleSpider(scrapy.Spider):
			
 
				+    name = 'fangtianxia'
			
 
				+    allowed_domains = ['fang.com/']
			
 
				+    start_urls = [
			
 
				+        'http://esf.sh.fang.com/housing/25_1633_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.sh.fang.com/housing/19_103_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.sh.fang.com/housing/21_1622_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.sh.fang.com/housing/21_1623_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.sh.fang.com/housing/24_5240_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.sh.fang.com/housing/24_5239_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.sh.fang.com/housing/22_1625_1_0_0_0_1_0_0_0/'
			
 
				+        'http://esf.hz.fang.com/housing/149__1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.hz.fang.com/housing/150__1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.hz.fang.com/housing/153__1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.suzhou.fang.com/housing/13102__1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.suzhou.fang.com/housing/278_4008_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.suzhou.fang.com/housing/277__1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.nb.fang.com/housing/162_4220_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.nb.fang.com/housing/162_13968_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.nb.fang.com/housing/1047_13973_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.nb.fang.com/housing/1047_17420_1_0_0_0_1_0_0_0/',
			
 
				+        'http://esf.nb.fang.com/housing/164__1_0_0_0_1_0_0_0/'
			
 
				+    ]
			
 
				+    # start_urls = ['http://huanqiuguangchang2.fang.com/xiangqing/']
			
 
				+    # rules = (
			
 
				+    #     Rule(link_extractor=r'http://[.]+(\.)fang.com/xiangqing', callback='parse_item')
			
 
				+    # )
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        # item = FTXCommunityItem.handle_response(response)
			
 
				+        # yield item
			
 
				+        try:
			
 
				+            for href in response.xpath(r'//a[@class="plotTit"]/@href'):
			
 
				+                url = href.extract() # type: str
			
 
				+                if not url.startswith('http'):
			
 
				+                    url = parse.urljoin(get_base_url(response), url)
			
 
				+                yield scrapy.Request(url, callback=self.parse_subweb, dont_filter=True)
			
 
				+
			
 
				+        except Exception as err:
			
 
				+            send_email('fangtianxia lv 1 web parse error', response._url + '\n' + traceback.format_exc())
			
 
				+            msg = 'lv 1 web parse error url: ' + response._url + '-'.join(err.args)
			
 
				+            logging.error(msg=msg)
			
 
				+
			
 
				+        try:
			
 
				+            next_page = response.xpath(r'//div[@class="fanye gray6"]/a[@id="PageControl1_hlk_next"]/@href').extract_first()
			
 
				+            if next_page:
			
 
				+                base_url = get_base_url(response)
			
 
				+                full_url = parse.urljoin(base_url, next_page)
			
 
				+                yield scrapy.Request(full_url, callback=self.parse, dont_filter=True)
			
 
				+        except Exception as err:
			
 
				+            send_email('fangtianxia next page url parse error', response._url + '\n' + traceback.format_exc())
			
 
				+            msg = 'next page url parse error url: ' + response._url + '-'.join(err.args)
			
 
				+            logging.error(msg=msg)
			
 
				+
			
 
				+    def parse_subweb(self, response):
			
 
				+        try:
			
 
				+            url = response.xpath(r'//li[@data="xqxq"]/a/@href').extract_first()
			
 
				+            yield scrapy.Request(url, callback=self.parse_item, dont_filter=True)
			
 
				+        except Exception as err:
			
 
				+            send_email('fangtianxia get detail url error', response._url + '\n' + traceback.format_exc())
			
 
				+            msg = 'get detail url error url: ' + response._url + '-'.join(err.args)
			
 
				+            logging.error(msg=msg)
			
 
				+
			
 
				+    def parse_item(self, response):
			
 
				+        try:
			
 
				+            item = FTXCommunityItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except Exception as err:
			
 
				+            send_email('fangtianxia lv 2 web parse error', response._url + '\n' + traceback.format_exc())
			
 
				+            msg = 'lv 2 web parse error url: ' + response._url + '-'.join(err.args)
			
 
				+            logging.error(msg=msg)
			
 
				+
			
 
				+
			
--- a/elabSpider/spiders/lfs_rental_house.py
+++ b/elabSpider/spiders/lfs_rental_house.py
@@ -0,0 +1,50 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/4/27 10:50 AM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+import scrapy
			
 
				+from elabSpider.items import RentalHouseItem
			
 
				+import logging
			
 
				+from elabSpider.email_util import send_email
			
 
				+import traceback
			
 
				+
			
 
				+
			
 
				+class RentalHouseSpider(scrapy.Spider):
			
 
				+    name = 'lfsrentalHouse'
			
 
				+    allowed_domains = [
			
 
				+        'nb.zu.anjuke.com',
			
 
				+        'nb.anjuke.com'
			
 
				+    ]
			
 
				+    start_urls = [
			
 
				+        'https://nb.anjuke.com/community/props/rent/1003094'
			
 
				+        # 'https://nb.zu.anjuke.com/rent/F717483045'
			
 
				+    ]
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            community_list = response.xpath('//ul[@class="m-house-list"]/li/a/@href').extract()
			
 
				+
			
 
				+            if community_list:
			
 
				+                for community_url in community_list:
			
 
				+                    yield scrapy.Request(community_url, callback=self.parse_item)
			
 
				+        except Exception as err:
			
 
				+            send_email('lfsrentalHouse get detail url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get detail url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+        try:
			
 
				+            next_page = response.xpath(r'//div[@class="m-page"]/div[@class="multi-page"]/a[@class="aNxt"]/@href.....').extract_first()
			
 
				+            if next_page:
			
 
				+                yield scrapy.Request(next_page, callback=self.parse)
			
 
				+        except Exception as err:
			
 
				+            send_email('lfsrentalHouse get next page error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get next page url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+    def parse_item(self, response):
			
 
				+        try:
			
 
				+            item = RentalHouseItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except Exception as err:
			
 
				+            send_email('lfsrentalHouse parse response error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
--- a/elabSpider/spiders/lfs_resold.py
+++ b/elabSpider/spiders/lfs_resold.py
@@ -0,0 +1,48 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/4/27 10:50 AM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+import scrapy
			
 
				+from elabSpider.items import ResoldHouseItem
			
 
				+import logging
			
 
				+import traceback
			
 
				+from elabSpider.email_util import send_email
			
 
				+
			
 
				+
			
 
				+class RentalHouseSpider(scrapy.Spider):
			
 
				+    name = 'lfsresoldHouse'
			
 
				+    allowed_domains = [
			
 
				+        'nb.anjuke.com'
			
 
				+    ]
			
 
				+    start_urls = [
			
 
				+        'https://nb.anjuke.com/community/props/sale/1003094/'
			
 
				+    ]
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            community_list = response.xpath('//ul[@class="m-house-list"]/li/a/@href').extract()
			
 
				+
			
 
				+            if community_list:
			
 
				+                for community_url in community_list:
			
 
				+                    yield scrapy.Request(community_url, callback=self.parse_item)
			
 
				+        except Exception as err:
			
 
				+            send_email('sjkresoldHouse get detail url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get detail url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+        try:
			
 
				+            next_page = response.xpath(r'//div[@class="m-page"]/div[@class="multi-page"]/a[@class="aNxt"]/@href').extract_first()
			
 
				+            if next_page:
			
 
				+                yield scrapy.Request(next_page, callback=self.parse)
			
 
				+        except Exception as err:
			
 
				+            send_email('sjkresoldHouse get next page url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get next page url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+    def parse_item(self, response):
			
 
				+        try:
			
 
				+            item = ResoldHouseItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except Exception as err:
			
 
				+            send_email('sjkresoldHouse parse response error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
--- a/elabSpider/spiders/lfs_sold_average.py
+++ b/elabSpider/spiders/lfs_sold_average.py
@@ -0,0 +1,24 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+import scrapy
			
 
				+import logging
			
 
				+from elabSpider.items import LfsAveragePriceItem
			
 
				+import traceback
			
 
				+from elabSpider.email_util import send_email
			
 
				+
			
 
				+
			
 
				+class LfsSpider(scrapy.Spider):
			
 
				+    name = 'lfsSoldAverage'
			
 
				+    allowed_domains = ['nb.anjuke.com']
			
 
				+    start_urls = ['https://nb.anjuke.com/community/view/1003094']
			
 
				+    # start_urls = ['https://nb.anjuke.com/community/view/275965?from=Filter_1&hfilter=filterlist']
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            item = LfsAveragePriceItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except Exception as err:
			
 
				+            send_email('lfsSoldAverage parse response error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+
			
 
				+
			
--- a/elabSpider/spiders/nb_ftx_rental_house.py
+++ b/elabSpider/spiders/nb_ftx_rental_house.py
@@ -0,0 +1,51 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/4/27 10:50 AM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+import scrapy
			
 
				+from scrapy.utils.response  import get_base_url
			
 
				+from urllib import parse
			
 
				+from elabSpider.items import FTXRentalHouseItem
			
 
				+import logging
			
 
				+import traceback
			
 
				+from elabSpider.email_util import send_email
			
 
				+
			
 
				+
			
 
				+class RentalHouseSpider(scrapy.Spider):
			
 
				+    name = 'ftxrentalHouse'
			
 
				+    allowed_domains = [
			
 
				+        'zu.nb.fang.com'
			
 
				+    ]
			
 
				+    start_urls = [
			
 
				+        'http://zu.nb.fang.com/house/h316-n31/'
			
 
				+    ]
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            community_list = response.xpath('//div[@class="houseList"]/dl/dd[@class="info rel"]/p[@class="title"]/a/@href').extract()
			
 
				+
			
 
				+            if community_list:
			
 
				+                for community_url in community_list:
			
 
				+                    if community_url.startswith('/chuzu/'):
			
 
				+                        yield scrapy.Request(parse.urljoin(get_base_url(response), community_url), callback=self.parse_item)
			
 
				+        except Exception as err:
			
 
				+            send_email('ftxrentalHouse lv 1 web parse error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error(' error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+        try:
			
 
				+            next_page = response.xpath(r'//div[@class="fanye"]/a[text()="下一页"]/@href').extract_first()
			
 
				+            if next_page and next_page.startswith('/house/'):
			
 
				+                yield scrapy.Request(parse.urljoin(get_base_url(response), next_page), callback=self.parse)
			
 
				+        except Exception as err:
			
 
				+            send_email('ftxrentalHouse get next page url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get next page url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+    def parse_item(self, response):
			
 
				+        try:
			
 
				+            item = FTXRentalHouseItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except Exception as err:
			
 
				+            send_email('ftxrentalHouse parse response error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
--- a/elabSpider/spiders/nb_rental_house.py
+++ b/elabSpider/spiders/nb_rental_house.py
@@ -0,0 +1,49 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/4/27 10:50 AM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+import scrapy
			
 
				+from elabSpider.items import RentalHouseItem
			
 
				+import logging
			
 
				+import traceback
			
 
				+from elabSpider.email_util import send_email
			
 
				+
			
 
				+
			
 
				+class RentalHouseSpider(scrapy.Spider):
			
 
				+    name = 'rentalHouse'
			
 
				+    allowed_domains = [
			
 
				+        'nb.zu.anjuke.com'
			
 
				+    ]
			
 
				+    start_urls = [
			
 
				+        'https://nb.zu.anjuke.com/fangyuan/lx8-px3-x1/',
			
 
				+        'https://nb.zu.anjuke.com/fangyuan/lx1-px3-x1/'
			
 
				+    ]
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            community_list = response.xpath('//div[@class="maincontent"]/div[@class="list-content"]/div[contains(@class, "zu-itemmod")]/a/@href').extract()
			
 
				+
			
 
				+            if community_list:
			
 
				+                for community_url in community_list:
			
 
				+                    yield scrapy.Request(community_url, callback=self.parse_item)
			
 
				+        except Exception as err:
			
 
				+            send_email('rentalHouse get detail url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get detail url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+        try:
			
 
				+            next_page = response.xpath(r'//div[@class="page-content"]/div[@class="multi-page"]/a[@class="aNxt"]/@href').extract_first()
			
 
				+            if next_page:
			
 
				+                yield scrapy.Request(next_page, callback=self.parse)
			
 
				+        except Exception as err:
			
 
				+            send_email('rentalHouse get next page url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get next page url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+    def parse_item(self, response):
			
 
				+        try:
			
 
				+            item = RentalHouseItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except Exception as err:
			
 
				+            send_email('rentalHouse parse response error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
--- a/elabSpider/spiders/nb_resold.py
+++ b/elabSpider/spiders/nb_resold.py
@@ -0,0 +1,50 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/4/27 10:50 AM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+import scrapy
			
 
				+from elabSpider.items import ResoldHouseItem
			
 
				+import logging
			
 
				+import traceback
			
 
				+from elabSpider.email_util import send_email
			
 
				+
			
 
				+
			
 
				+class RentalHouseSpider(scrapy.Spider):
			
 
				+    name = 'nbresoldHouse'
			
 
				+    allowed_domains = [
			
 
				+        'nb.anjuke.com'
			
 
				+    ]
			
 
				+    start_urls = [
			
 
				+        'https://nb.anjuke.com/sale/o5-t105/',
			
 
				+        'https://nb.anjuke.com/sale/o5-t107/'
			
 
				+        # 'https://nb.anjuke.com/prop/view/A1237992888?from=filter&spread=filtersearch_p&position=117&kwtype=filter&now_time=1526637680'
			
 
				+    ]
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            community_list = response.xpath('//ul[@id="houselist-mod-new"]/li/div[@class="house-details"]/div[@class="house-title"]/a/@href').extract()
			
 
				+
			
 
				+            if community_list:
			
 
				+                for community_url in community_list:
			
 
				+                    yield scrapy.Request(community_url, callback=self.parse_item)
			
 
				+        except Exception as err:
			
 
				+            send_email('nbresoldHouse get detail url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get detail url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+        try:
			
 
				+            next_page = response.xpath(r'//div[@class="multi-page"]/a[@class="aNxt"]/@href').extract_first()
			
 
				+            if next_page:
			
 
				+                yield scrapy.Request(next_page, callback=self.parse)
			
 
				+        except Exception as err:
			
 
				+            send_email('nbresoldHouse get next page url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get next page url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+    def parse_item(self, response):
			
 
				+        try:
			
 
				+            item = ResoldHouseItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except Exception as err:
			
 
				+            send_email('nbresoldHouse parse response error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
--- a/elabSpider/spiders/resoldapartment.py
+++ b/elabSpider/spiders/resoldapartment.py
@@ -0,0 +1,38 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+import scrapy
			
 
				+from elabSpider.items import *
			
 
				+import traceback
			
 
				+from elabSpider.email_util import send_email
			
 
				+
			
 
				+
			
 
				+class ExampleSpider(scrapy.Spider):
			
 
				+    name = '58ershoufang'
			
 
				+    allowed_domains = ['58.com']
			
 
				+    start_urls = ['http://nb.58.com/haishu/ershoufang/']
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            for href in response.xpath(r'//ul[@class="house-list-wrap"]/li/div[@class="list-info"]/h2[@class="title"]/a/@href'):
			
 
				+                url = href.extract()
			
 
				+                yield scrapy.Request(url, callback=self.parse_item)
			
 
				+        except:
			
 
				+            send_email('58ershoufang lv 1 url parse error', response._url + '\n' + traceback.format_exc())
			
 
				+            print('error')
			
 
				+
			
 
				+        try:
			
 
				+            next_page = response.xpath(r'//div[@class="pager"]/a[@class="next"]/@href').extract_first()
			
 
				+            if next_page:
			
 
				+                yield scrapy.Request(next_page, callback=self.parse)
			
 
				+        except:
			
 
				+            send_email('58ershoufang get next url error', response._url + '\n' + traceback.format_exc())
			
 
				+            print('error next page')
			
 
				+
			
 
				+    def parse_item(self, response):
			
 
				+        try:
			
 
				+            item = ResoldApartmentItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except:
			
 
				+            send_email('58ershoufang get item parse error', response._url + '\n' + traceback.format_exc())
			
 
				+            print('error' + response.string)
			
 
				+
			
 
				+
			
--- a/elabSpider/spiders/sjk_rental_house.py
+++ b/elabSpider/spiders/sjk_rental_house.py
@@ -0,0 +1,102 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/4/27 10:50 AM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+import scrapy
			
 
				+from elabSpider.items import RentalHouseItem
			
 
				+import logging
			
 
				+import traceback
			
 
				+from elabSpider.email_util import send_email
			
 
				+
			
 
				+
			
 
				+class RentalHouseSpider(scrapy.Spider):
			
 
				+    name = 'sjkrentalHouse'
			
 
				+    allowed_domains = [
			
 
				+        'nb.zu.anjuke.com',
			
 
				+        'nb.anjuke.com'
			
 
				+    ]
			
 
				+    start_urls = [
			
 
				+        'https://nb.anjuke.com/community/props/rent/275642/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/275642/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/1003094/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/1003094/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/275869/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/275869/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/973807/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/973807/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/973808/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/973808/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/275517/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/275517/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/1000067/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/1000067/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/406899/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/406899/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/1016525/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/1016525/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/275936/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/275936/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/1017728/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/1017728/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/275274/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/275274/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/275658/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/275658/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/275386/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/275386/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/1006982/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/1006982/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/275764/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/275764/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/792725/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/792725/lx8-x1/',
			
 
				+
			
 
				+        'https://nb.anjuke.com/community/props/rent/1022250/lx1-x1/',
			
 
				+        'https://nb.anjuke.com/community/props/rent/1022250/lx8-x1/'
			
 
				+
			
 
				+    ]
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            community_list = response.xpath('//ul[@class="m-house-list"]/li/a/@href').extract()
			
 
				+
			
 
				+            if community_list:
			
 
				+                for community_url in community_list:
			
 
				+                    yield scrapy.Request(community_url, callback=self.parse_item)
			
 
				+        except Exception as err:
			
 
				+            send_email('sjkrentalHouse get detail url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get detail url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+        try:
			
 
				+            next_page = response.xpath(r'//div[@class="m-page"]/div[@class="multi-page"]/a[@class="aNxt"]/@href').extract_first()
			
 
				+            if next_page:
			
 
				+                yield scrapy.Request(next_page, callback=self.parse)
			
 
				+        except Exception as err:
			
 
				+            send_email('sjkrentalHouse get next page url parse error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get next page url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+    def parse_item(self, response):
			
 
				+        try:
			
 
				+            item = RentalHouseItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except Exception as err:
			
 
				+            send_email('sjkrentalHouse parse response error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
--- a/elabSpider/spiders/sjk_resold.py
+++ b/elabSpider/spiders/sjk_resold.py
@@ -0,0 +1,66 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/4/27 10:50 AM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+import scrapy
			
 
				+from elabSpider.items import ResoldHouseItem
			
 
				+import logging
			
 
				+import traceback
			
 
				+from elabSpider.email_util import send_email
			
 
				+
			
 
				+
			
 
				+class RentalHouseSpider(scrapy.Spider):
			
 
				+    name = 'sjkresoldHouse'
			
 
				+    allowed_domains = [
			
 
				+        'nb.anjuke.com'
			
 
				+    ]
			
 
				+    start_urls = [
			
 
				+        'https://nb.anjuke.com/community/props/sale/275642/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/1003094/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/275869/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/973807/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/973808/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/275517/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/1000067/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/406899/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/1016525/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/275936/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/1017728/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/275274/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/275658/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/275386/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/1006982/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/275764/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/792725/',
			
 
				+        'https://nb.anjuke.com/community/props/sale/1022250/'
			
 
				+        # 'https://nb.anjuke.com/prop/view/A1237992888?from=filter&spread=filtersearch_p&position=117&kwtype=filter&now_time=1526637680'
			
 
				+    ]
			
 
				+
			
 
				+    def parse(self, response):
			
 
				+        try:
			
 
				+            community_list = response.xpath('//ul[@class="m-house-list"]/li/a/@href').extract()
			
 
				+
			
 
				+            if community_list:
			
 
				+                for community_url in community_list:
			
 
				+                    yield scrapy.Request(community_url, callback=self.parse_item)
			
 
				+        except Exception as err:
			
 
				+            send_email('sjkresoldHouse get detail url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get detail url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+        try:
			
 
				+            next_page = response.xpath(r'//div[@class="m-page"]/div[@class="multi-page"]/a[@class="aNxt"]/@href').extract_first()
			
 
				+            if next_page:
			
 
				+                yield scrapy.Request(next_page, callback=self.parse)
			
 
				+        except Exception as err:
			
 
				+            send_email('sjkresoldHouse get next page url error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('get next page url error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
 
				+
			
 
				+    def parse_item(self, response):
			
 
				+        try:
			
 
				+            item = ResoldHouseItem.handle_response(response)
			
 
				+            yield item
			
 
				+        except Exception as err:
			
 
				+            send_email('sjkresoldHouse parse response error', response._url + '\n' + traceback.format_exc())
			
 
				+            logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))
			
--- a/elabSpider/spiders/test_db.py
+++ b/elabSpider/spiders/test_db.py
@@ -0,0 +1,12 @@
 
				+import pymongo
			
 
				+from elabSpider.items import ResoldApartmentItem
			
 
				+
			
 
				+uri = "mongodb://logdb:logdb@dds-uf6da0fedc9881d41450-pub.mongodb.rds.aliyuncs.com:3717,dds-uf6da0fedc9881d42459-pub.mongodb.rds.aliyuncs.com:3717/logdb?replicaSet=mgset-12835903"
			
 
				+client = pymongo.MongoClient(uri, authSource='logdb')
			
 
				+
			
 
				+db = client['logdb']
			
 
				+coll = db['test_coll']
			
 
				+
			
 
				+item = ResoldApartmentItem()
			
 
				+item['title'] = '我是测试标题'
			
 
				+coll.insert_one(item)
			
--- a/elabSpider/start.py
+++ b/elabSpider/start.py
@@ -0,0 +1,16 @@
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+# @Time    : 2018/4/23 2:58 PM
			
 
				+
			
 
				+# @Author  : Swing
			
 
				+
			
 
				+
			
 
				+from scrapy import cmdline
			
 
				+
			
 
				+# cmdline.execute("scrapy crawl ftxrentalHouse".split())
			
 
				+# cmdline.execute("scrapy crawl rentalHouse".split())
			
 
				+cmdline.execute("scrapy crawl nbresoldHouse".split())
			
 
				+# cmdline.execute("scrapy crawl lfsrentalHouse".split())
			
 
				+# cmdline.execute("scrapy crawl lfsSoldAverage".split())
			
 
				+# cmdline.execute("scrapy crawl test".split())
			
 
				+
			
--- a/scrapy.cfg
+++ b/scrapy.cfg
@@ -0,0 +1,11 @@
 
				+# Automatically created by: scrapy startproject
			
 
				+#
			
 
				+# For more information about the [deploy] section see:
			
 
				+# https://scrapyd.readthedocs.io/en/latest/deploy.html
			
 
				+
			
 
				+[settings]
			
 
				+default = elabSpider.settings
			
 
				+
			
 
				+[deploy]
			
 
				+url = http://0.0.0.0:8080/
			
 
				+project = elabSpider