Browse Source

tongce: 数据清洗程序

Signed-off-by: binren <zhangbr@elab-plus.com>
binren 5 years ago
parent
commit
ec2776e526
4 changed files with 220 additions and 18 deletions
  1. 4 4
      excel_util.py
  2. 1 1
      flask_app.py
  3. 21 0
      test_info.py
  4. 194 13
      tongce.py

+ 4 - 4
excel_util.py

@@ -204,8 +204,8 @@ class ExcelUtil:
         info = {}
         for row in rows:
             key = str(int(row[1].value)) + str(int(row[9].value))
-            # 数据类型,数据项名称,所在tab
-            info[key] = [row[15].value, row[13].value, row[14].value]
+            # tag, title, name
+            info[key] = [row[15].value, row[13].value, row[14].value, row[6].value]
         return info
 
     def create_excle(self, file_name, header, data):
@@ -243,7 +243,7 @@ class ExcelUtil:
         # uuid,score(sub_option_id),created,sub_question_id
         insert_data = []
         for row in rows:
-            uuid = row[0].value
+            uuid = row[0].value + '1000'
             date = row[1].value
             question_1 = str(row[6].value).split('.')[1]
             id_1 = get_sub_option_id(20, question_1)
@@ -331,7 +331,7 @@ class ExcelUtil:
 
 if __name__ == '__main__':
     import json
-    eu = ExcelUtil('Sheet1', '5wenjuanxing.xlsx')
+    eu = ExcelUtil('Sheet1', '84_1500.xlsx')
     data = eu.wenjuanxin_84()
     print(json.dumps(data, ensure_ascii=False, indent=4))
     print(len(data))

+ 1 - 1
flask_app.py

@@ -170,7 +170,7 @@ def tongce_data():
     response = {}
     try:
         tongce = TongCe()
-        result = tongce.tongce_answer_info()
+        result = tongce.lingdi_data_scores()
         response['code'] = 0
         response['message'] = '成功'
         response['data'] = result

+ 21 - 0
test_info.py

@@ -84,6 +84,27 @@ class TestInfo(object):
             testcase_id = %s
     '''
 
+    sql_6 = '''
+            SELECT
+                score,
+                COUNT(uuid)
+            FROM
+                (
+                    SELECT
+                        score,
+                        COUNT(DISTINCT uuid) AS uuid
+                    FROM
+                        f_t_daren_score_2
+                    WHERE
+                        testcase_id = 86
+                    AND sub_question_id = 377
+                    GROUP BY
+                        uuid
+                ) a
+            GROUP BY
+                a.score
+    '''
+
     def __init__(self):
         # self.shangju_db = MysqlDB('shangju')
         self.bi_report_db = MysqlDB('bi_report')

+ 194 - 13
tongce.py

@@ -3,7 +3,6 @@ from mysql_db import MysqlDB
 from itertools import groupby
 
 
-
 class TongCe:
     """
         同策测试数据清洗
@@ -142,14 +141,17 @@ class TongCe:
 
     sql_6 = '''
         insert INTO mvp_page_display_data (
+            crowd_info_id,
             match_id,
+            page_display_rule_id,
+            name,
             value,
             STATUS,
             creator,
             created
         )
         VALUES
-            (%s, %s, 1, 'binren', now())
+            (%s, %s, %s, %s, %s, 1, 'binren', now())
     '''
 
     sql_7 = '''
@@ -189,12 +191,13 @@ class TongCe:
     '''
 
     sql_9 = '''
-            SELECT                                                                                     
+           SELECT                                                                                     
             x.city
 			,x.uuid
 			,x.sex
 			,x.nld
 			,x.zhifuli
+			,x.juzhujiegou
 			,m.father_content
 			,m.father_id
 			,m.sub_question_id
@@ -210,7 +213,8 @@ class TongCe:
                         e.sex,
                         f.nld,
                         c.zhifuli,
-                        d.city
+                        d.city,
+												w.juzhujiegou
                     FROM
                         (
                             SELECT
@@ -222,7 +226,7 @@ class TongCe:
                             LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
                             WHERE
                                 a.testcase_id IN (84, 85, 86, 87)
-                            AND b.father_id IN (47)
+                            AND b.father_id = 47
                             AND a.sub_question_id = b.sub_question_id
                             AND a.testcase_id = b.testcase_id
                             GROUP BY
@@ -238,7 +242,7 @@ class TongCe:
                         LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
                         WHERE
                             a.testcase_id IN (84, 85, 86, 87)
-                        AND b.father_id IN (48)
+                        AND b.father_id = 48
                         AND a.sub_question_id = b.sub_question_id
                         AND a.testcase_id = b.testcase_id
                         GROUP BY
@@ -254,7 +258,7 @@ class TongCe:
                         LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
                         WHERE
                             a.testcase_id IN (84, 85, 86, 87)
-                        AND b.father_id IN (234)
+                        AND b.father_id = 234
                         AND a.sub_question_id = b.sub_question_id
                         AND a.testcase_id = b.testcase_id
                         GROUP BY
@@ -270,13 +274,29 @@ class TongCe:
                         LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
                         WHERE
                             a.testcase_id IN (84, 85, 86, 87)
-                        AND b.father_id IN (254)
+                        AND b.father_id = 254
                         AND a.sub_question_id = b.sub_question_id
                         AND a.testcase_id = b.testcase_id
                         GROUP BY
                             a.testcase_id,
                             a.uuid
                     ) d ON c.uuid = d.uuid
+                    left join (
+                        SELECT
+                            a.uuid,
+                            b.sub_option_content AS juzhujiegou
+                        FROM
+                            f_t_daren_score_2 a
+                        LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
+                        WHERE
+                            a.testcase_id IN (84, 85, 86, 87)
+                        AND b.father_id = 211
+                        AND a.sub_question_id = b.sub_question_id
+                        AND a.testcase_id = b.testcase_id
+                        GROUP BY
+                            a.testcase_id,
+                            a.uuid
+                    ) w on d.uuid = w.uuid
                 ) x
             LEFT JOIN (
                 SELECT
@@ -310,11 +330,64 @@ class TongCe:
     
     '''
 
+    sql_11 = '''
+            select id, title_type, title_in_page, sub_question_id from mvp_page_display_rule where status = 1
+    '''
+
+    sql_12 = '''
+            INSERT INTO mvp_page_display_rule (
+                house_id,
+                function_id,
+                title_type,
+                title_in_page,
+                sub_question_id,
+                STATUS,
+                creator,
+                created
+            )
+            VALUE
+                (
+                    67,
+                    1,
+                    %s,
+                    %s,
+                    %s,
+                    1,
+                    'binren',
+                    now()
+                )
+    
+    '''
+
+    sql_13 = '''
+            INSERT INTO mvp_crowd_info (
+                house_id,
+                pay_ability,
+                age_area,
+                city_name,
+                life_cycle,
+                STATUS,
+                creator,
+                created
+            )
+            VALUES
+                (
+                    67,
+                    %s,
+                    %s,
+                    %s,
+                    %s,
+                    1,
+                    'binren',
+                    now()
+                )
+    '''
+
     def __init__(self):
-        # self.shangju_db = MysqlDB('shangju')
-        # self.marketing_db = MysqlDB('bi_report')
+        self.shangju_db = MysqlDB('shangju')
+        self.marketing_db = MysqlDB('bi_report')
         self.linshi_db = MysqlDB('linshi', db_type=1)
-        # self.options_info = ExcelUtil('工作表6', 'tongce.xlsx').read_options_info()
+        self.options_info = ExcelUtil('工作表6', 'tongce.xlsx').read_options_info()
 
     def get_question_info_from_db(self):
         result = self.shangju_db.select(self.sql_2, [67])
@@ -332,6 +405,114 @@ class TongCe:
         result = self.linshi_db.select(self.sql_4)
         return result
 
+    # 支付力:376,年龄:29,城市:377,居住结构:395。
+    sql_14 = '''
+          select content from bq_sub_option where sub_question_id = %s
+    '''
+
+    def insert_into_mvp_crowd_info(self):
+        zhifuli = self.shangju_db.select(self.sql_14, [376])
+        age = self.shangju_db.select(self.sql_14, [29])
+        city = self.shangju_db.select(self.sql_14, [377])
+        juzhujiegou = self.shangju_db.select(self.sql_14, [395])
+        insert_data = []
+        for zfl in zhifuli:
+            for a in age:
+                for cy in city:
+                    for jzjg in juzhujiegou:
+                       insert_data.append([zfl, a, cy, jzjg])
+        self.shangju_db.add_some(self.sql_13, insert_data)
+        pass
+
+    sql_15 = '''
+          select id, pay_ability, age_area, city_name, life_cycle from mvp_crowd_info where status = 1
+    '''
+
+    def get_crowd_info(self):
+        data = self.shangju_db.select(self.sql_15)
+        return data
+
+    def insert_into_rule(self):
+        option_info = self.options_info
+        insert_data = []
+        for key in option_info.keys():
+            data = option_info[key]
+            insert_data.append([data[0], data[1], data[3]])
+        self.shangju_db.add_some(self.sql_12, insert_data)
+
+    def get_rule_data_info(self):
+        data = self.shangju_db.select(self.sql_11)
+        return data
+
+    sql_16 = '''
+        insert INTO mvp_page_display_data (
+            crowd_info_id,
+            match_id,
+            value,
+            STATUS,
+            creator,
+            created
+        )
+        VALUES
+            (%s, %s, %s, 1, 'binren', now())
+    '''
+
+    def lingdi_data_scores(self):
+        # 1: 写入mvp_crowd_info
+        self.insert_into_mvp_crowd_info()
+        crowd_info = self.get_crowd_info()
+        # 2: 写入rule
+        self.insert_into_rule()
+        rule = self.get_rule_data_info()
+        return
+        # 3: 读入答题数据
+        self.answers = self.marketing_db.select(self.sql_9)
+        self.match_data_info = self.get_option_match_info()
+
+        # 筛选写入data的数据
+        insert_data = []
+        for ci in crowd_info:
+            crowd_info_id = ci[0]
+            zhifuli = ci[1]
+            age = ci[2]
+            city = ci[3]
+            juzhujiegou = ci[4]
+            data = self.filter_people(city, age, zhifuli, juzhujiegou)
+            data.sort(key=lambda obj: obj[0])
+            for key, questions_data in groupby(data, key=lambda obj: obj[0]):
+                question_data_list = []
+                for qd in questions_data:
+                    question_data_list.append([x for x in qd])
+                rule_id = self.get_rule_id(key, rule)
+                if rule_id:
+                    question_people = len(question_data_list)
+                    if question_people > 0:
+                        for option_name, option_data_1 in groupby(question_data_list, key=lambda obj: obj[3]):
+                            option_data_list = []
+                            for od in option_data_1:
+                                option_data_list.append([x for x in od])
+                            if len(option_data_list) >= 0:
+                                option_id = option_data_list[0][2]
+                                for md in self.match_data_info:
+                                    if md[1] == key and md[2] == option_id:
+                                        match_id = md[0]
+                                        insert_data.append([crowd_info_id, match_id, rule_id, option_name, len(option_data_list) / question_people])
+            self.shangju_db.add_some(self.sql_6, insert_data)
+
+    def get_rule_id(self, sub_question_id, rule):
+        for re in rule:
+            if str(re[3]) == str(sub_question_id):
+                return re[0]
+        return None
+
+    def filter_people(self, city, age, zhifuli, juzhujiegou):
+        result = []
+        for answer in self.answers:
+            if answer[0] == city and answer[3] == age and answer[4] == zhifuli and answer[5] == juzhujiegou:
+                # 子题id, 子题题目,子选项id,子选项题目
+                result.append([answer[9], answer[10], answer[11], answer[12]])
+        return result
+
     def get_testcase_ids_by_house_name(self, house_name):
         testcase_ids = self.shangju_db.select(self.sql_5, [house_name])
         return testcase_ids
@@ -380,7 +561,7 @@ class TongCe:
         return {'插入数据条数': len(dispaly_data), 'scores': dispaly_data}
 
     def wenjuanxin_84(self):
-        excel = ExcelUtil('Sheet1', 'wenjuanxing.xlsx')
+        excel = ExcelUtil('Sheet1', '84_1500.xlsx')
         insert_data = excel.wenjuanxin_84()
         self.linshi_db.add_some(self.sql_10, insert_data)
         print()
@@ -388,4 +569,4 @@ class TongCe:
 
 if __name__ == '__main__':
     tongce = TongCe()
-    tongce.wenjuanxin_84()
+    tongce.lingdi_data_scores()