from excel_util import ExcelUtil from mysql_db import MysqlDB from itertools import groupby class TongCe: """ 同策测试数据清洗 """ # 统计筒体结果 sql_1 = ''' SELECT a.sub_question_id, b.sub_question_content, a.score, b.sub_option_content, count(1) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a. STATUS = b. STATUS = 1 WHERE a.testcase_id in %s and a.testcase_id = b.testcase_id AND a.sub_question_id = b.sub_question_id AND ( a.score = b.score OR a.score = b.sub_option_id ) GROUP BY b.sub_question_content, a.score, b.sub_option_content ''' # 选项信息 sql_2 = ''' SELECT b.id as question_id, b. NAME as question_title, a.id as sub_question_id, a. NAME as sub_question_title, d.id as option_id, d.content as option_title, c.id as sub_option_id, c.content as sub_option_title FROM bq_sub_question a LEFT JOIN bq_question b ON a.father_id = b.id LEFT JOIN bq_sub_option c ON a.id = c.sub_question_id LEFT JOIN bq_option d ON c.father_id = d.id WHERE FIND_IN_SET( a.id, ( SELECT GROUP_CONCAT(question_ids) FROM bq_testcase WHERE house_ids = %s GROUP BY house_ids ) ) AND a. STATUS = b. STATUS = c. STATUS = 1 ORDER BY a.id ''' # 表 sql_3 = ''' INSERT INTO mvp_page_display_match ( house_id, question_id, question_title, sub_question_id, sub_question_title, option_id, option_content, sub_option_id, sub_option_content, data_item_tab, data_item_title, data_item_name, STATUS, creator, created ) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 1, 'binren', now() ) ''' sql_4 = ''' SELECT id, sub_question_id, sub_option_id, data_item_name FROM mvp_page_display_match WHERE STATUS = 1 ''' sql_5 = ''' SELECT id FROM bq_testcase WHERE STATUS = 1 AND FIND_IN_SET( ( SELECT id FROM bq_house WHERE STATUS = 1 AND NAME = %s ), house_ids ) ''' sql_6 = ''' insert INTO mvp_page_display_data ( crowd_info_id, match_id, page_display_rule_id, name, value, STATUS, creator, created ) VALUES (%s, %s, %s, %s, %s, 1, 'binren', now()) ''' sql_7 = ''' SELECT a.testcase_id, a.uuid, GROUP_CONCAT( DISTINCT b.sub_option_content ) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id WHERE a.testcase_id IN (84, 85, 86, 87) AND b.father_id IN (47, 48, 234, 254) and a.sub_question_id = b.sub_question_id and a.testcase_id = b.testcase_id GROUP BY a.testcase_id, a.uuid ''' sql_8 = ''' SELECT a.uuid, a.title, a.testcase_id, b.father_id, b.father_content, b.sub_option_id, b.sub_option_content FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id WHERE a.testcase_id = b.testcase_id AND a.sub_question_id = b.sub_question_id AND a.testcase_id IN (84, 85, 86, 87) ''' sql_9 = ''' SELECT x.city ,x.uuid ,x.sex ,x.nld ,x.zhifuli ,x.juzhujiegou ,m.father_content ,m.father_id ,m.sub_question_id ,m.sub_question_content ,m.sub_option_id ,m.sub_option_content ,m.testcase_id ,m.title FROM ( SELECT e.uuid, e.sex, f.nld, c.zhifuli, d.city, w.juzhujiegou FROM ( SELECT a.testcase_id, a.uuid, b.sub_option_content AS sex FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON (a.score = b.sub_option_id or a.score = b.score) WHERE a.testcase_id IN (84, 85, 86, 87) AND b.father_id = 47 AND a.sub_question_id = b.sub_question_id AND a.testcase_id = b.testcase_id GROUP BY a.testcase_id, a.uuid ) e LEFT JOIN ( SELECT a.uuid, b.sub_option_content AS nld FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON (a.score = b.sub_option_id or a.score = b.score) WHERE a.testcase_id IN (84, 85, 86, 87) AND b.father_id = 48 AND a.sub_question_id = b.sub_question_id AND a.testcase_id = b.testcase_id GROUP BY a.testcase_id, a.uuid ) f ON e.uuid = f.uuid LEFT JOIN ( SELECT a.uuid, b.sub_option_content AS zhifuli FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON (a.score = b.sub_option_id or a.score = b.score) WHERE a.testcase_id IN (84, 85, 86, 87) AND b.father_id = 234 AND a.sub_question_id = b.sub_question_id AND a.testcase_id = b.testcase_id GROUP BY a.testcase_id, a.uuid ) c ON f.uuid = c.uuid LEFT JOIN ( SELECT a.uuid, b.sub_option_content AS city FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON (a.score = b.sub_option_id or a.score = b.score) WHERE a.testcase_id IN (84, 85, 86, 87) AND b.father_id = 254 AND a.sub_question_id = b.sub_question_id AND a.testcase_id = b.testcase_id GROUP BY a.testcase_id, a.uuid ) d ON c.uuid = d.uuid left join ( SELECT a.uuid, b.sub_option_content AS juzhujiegou FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON (a.score = b.sub_option_id or a.score = b.score) WHERE a.testcase_id IN (84, 85, 86, 87) AND b.father_id = 211 AND a.sub_question_id = b.sub_question_id AND a.testcase_id = b.testcase_id GROUP BY a.testcase_id, a.uuid ) w on d.uuid = w.uuid ) x LEFT JOIN ( SELECT a.uuid, a.title, a.testcase_id, b.father_id, b.father_content, b.sub_question_id, b.sub_question_content, b.sub_option_id, b.sub_option_content FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = b.sub_question_id WHERE a.testcase_id = b.testcase_id AND (a.score = b.sub_option_id or a.score = b.score) AND a.testcase_id IN (84, 85, 86, 87) and province != '山西省' ) m ON x.uuid = m.uuid ''' sql_10 = ''' INSERT INTO f_t_daren_score_2 ( testcase_id, title, uuid, score, created, sub_question_id ) VALUE (84, '有钱人的生活就是很枯燥的……', %s, %s, %s, %s) ''' sql_11 = ''' select id, title_type, title_in_page, sub_question_id from mvp_page_display_rule where status = 1 ''' sql_12 = ''' INSERT INTO mvp_page_display_rule ( house_id, function_id, title_type, title_in_page, sub_question_id, STATUS, creator, created ) VALUE ( 67, 1, %s, %s, %s, 1, 'binren', now() ) ''' sql_13 = ''' INSERT INTO mvp_crowd_info ( house_id, pay_ability, age_area, city_name, life_cycle, STATUS, creator, created ) VALUES ( 67, %s, %s, %s, %s, 1, 'binren', now() ) ''' def __init__(self): self.shangju_db = MysqlDB('shangju') self.marketing_db = MysqlDB('bi_report') self.linshi_db = MysqlDB('linshi', db_type=1) self.options_info = ExcelUtil('工作表6', 'tongce1.xlsx').read_options_info() self.table_type_info = ExcelUtil('新增项目数据项类型排序与展示图表类型管理表', 'table_type.xlsx').get_table_type_info() def close(self): self.shangju_db.close() self.linshi_db.close() self.marketing_db.close() def get_question_info_from_db(self): result = self.shangju_db.select(self.sql_2, [67]) insert_data = [] for rt in result: rt = list(rt) option_configuration = self.options_info.get('67' + str(rt[6])) if option_configuration and len(option_configuration) == 4: rt.insert(0, 67) rt.extend(option_configuration[0:3]) insert_data.append(rt) return insert_data def get_option_match_info(self): result = self.linshi_db.select(self.sql_4) return result # 支付力:376,年龄:29,城市:377,居住结构:395。 sql_14 = ''' select content from bq_sub_option where sub_question_id = %s ''' def insert_into_mvp_crowd_info(self): zhifuli = self.shangju_db.select(self.sql_14, [376]) age = self.shangju_db.select(self.sql_14, [29]) city = self.shangju_db.select(self.sql_14, [377]) juzhujiegou = self.shangju_db.select(self.sql_14, [395]) insert_data = [] for zfl in zhifuli: for a in age: for cy in city: for jzjg in juzhujiegou: insert_data.append([zfl, a, cy, jzjg]) if len(insert_data) > 0: # self.linshi_db.truncate('mvp_crowd_info') self.linshi_db.add_some(self.sql_13, insert_data) sql_15 = ''' select id, pay_ability, age_area, city_name, life_cycle from mvp_crowd_info where status = 1 and house_id = 67 ''' def get_crowd_info(self): data = self.linshi_db.select(self.sql_15) return data def insert_into_rule(self): option_info = self.options_info insert_data = [] sub_question_ids = [] for key in option_info.keys(): data = option_info[key] if data[3] not in sub_question_ids: insert_data.append([data[0], data[1], data[3]]) sub_question_ids.append(data[3]) if len(insert_data) > 0: self.linshi_db.truncate('mvp_page_display_rule') self.linshi_db.add_some(self.sql_12, insert_data) def get_rule_data_info(self): data = self.linshi_db.select(self.sql_11) return data sql_16 = ''' insert INTO mvp_page_display_data ( crowd_info_id, match_id, value, STATUS, creator, created ) VALUES (%s, %s, %s, 1, 'binren', now()) ''' def lingdi_data_scores(self): # 1: 写入mvp_crowd_info # self.insert_into_mvp_crowd_info() crowd_info = self.get_crowd_info() # 2: 写入rule # self.insert_into_rule() rule = self.get_rule_data_info() # 3: 读入答题数据 self.answers = self.marketing_db.select(self.sql_9) # 4: 写入match信息 match_data = self.get_question_info_from_db() self.linshi_db.truncate('mvp_page_display_match') self.linshi_db.add_some(self.sql_3, match_data) self.match_data_info = self.get_option_match_info() self.linshi_db.truncate('mvp_page_display_data') # 筛选写入data的数据 count = 0 no_data_case = [] try: for ci in crowd_info: insert_data = [] crowd_info_id = ci[0] zhifuli = ci[1] age = ci[2] city = ci[3] juzhujiegou = ci[4] data = self.filter_people(city, age, zhifuli, juzhujiegou) data.sort(key=lambda obj: obj[0]) for key, questions_data in groupby(data, key=lambda obj: obj[0]): question_data_list = [] for qd in questions_data: question_data_list.append([x for x in qd]) rule_id = self.get_rule_id(key, rule) if rule_id is not None: question_people = len(question_data_list) if question_people > 0: question_data_list.sort(key=lambda obj: obj[3]) for option_name, option_data_1 in groupby(question_data_list, key=lambda obj: obj[3]): option_data_list = [] for od in option_data_1: option_data_list.append([x for x in od]) if len(option_data_list) > 0: match_id = 0 option_name_alias = option_name option_id = option_data_list[0][2] for md in self.match_data_info: if str(md[1]) == str(key) and str(md[2]) == str(option_id): match_id = md[0] option_name_alias = md[3] insert_data.append([crowd_info_id, match_id, rule_id, option_name_alias, len(option_data_list)]) else: no_data_case.append([zhifuli, city, age, juzhujiegou, option_name]) else: no_data_case.append([zhifuli, city, age, juzhujiegou, key]) count += len(insert_data) self.linshi_db.add_some(self.sql_6, insert_data) # isnert_data_all = [] # quanliang_scores = self.scores() # for q_s in quanliang_scores: # rule_id = self.get_rule_id(q_s[0], rule) # if rule_id: # for md in self.match_data_info: # if str(md[1]) == str(q_s[0]) and str(md[2]) == str(q_s[1]): # match_id = md[0] # option_name_alias = md[3] # isnert_data_all.append([5405, match_id, rule_id, option_name_alias, q_s[2]]) # self.linshi_db.add_some(self.sql_6, isnert_data_all) # count += len(isnert_data_all) return {'写入库中的数据': count, '无数据': len(no_data_case)} except Exception as e: return str(e) sql_20 = ''' UPDATE mvp_page_display_rule SET display_type = %s, display_size = %s WHERE title_in_page = %s ''' def table_type_insert(self): for data in self.table_type_info: self.linshi_db.update(self.sql_20, data) def get_rule_id(self, sub_question_id, rule): for re in rule: if str(re[3]) == str(sub_question_id): return re[0] def filter_people(self, city, age, zhifuli, juzhujiegou): result = [] for answer in self.answers: if answer[0] == city and answer[3] == age and answer[4] == zhifuli and answer[5] == juzhujiegou: # 子题id, 子题题目,子选项id,子选项题目 if answer[8] is not None and answer[9] is not None and answer[12] is not None and answer[13]: result.append([answer[8], answer[9], answer[10], answer[11]]) return result def get_testcase_ids_by_house_name(self, house_name): testcase_ids = self.shangju_db.select(self.sql_5, [house_name]) return testcase_ids def scores(self): testcase_ids = self.get_testcase_ids_by_house_name('同策 领地') db_data = self.marketing_db.select(self.sql_1, [testcase_ids]) answer = [] for data in db_data: answer.append([data[0], data[2], data[4]]) answer.sort(key=lambda obj: obj[0]) sub_option_score = [] for sub_question_id, others in groupby(answer, key=lambda obj: obj[0]): others_data = [] for ot in others: others_data.append([x for x in ot]) sub_question_count = sum([x[2] for x in others_data]) for td in others_data: sub_option_id = td[1] sub_option_count = td[2] rate = int(sub_option_count) / sub_question_count sub_option_score.append([sub_question_id, sub_option_id, sub_option_count]) return sub_option_score def tongce(self): """ tongce测试数据清洗 :return: """ match_data = self.get_question_info_from_db() # self.linshi_db.truncate('mvp_page_display_match') self.linshi_db.add_some(self.sql_3, match_data) scores = self.scores() match_data_info = self.get_option_match_info() dispaly_data = [] for score in scores: sub_question_id = score[0] sub_option_id = score[1] value = score[2] for mi in match_data_info: if str(mi[1]) == str(sub_question_id) and str(mi[2]) == str(sub_option_id): dispaly_data.append([mi[0], value]) if len(dispaly_data) > 0: self.linshi_db.truncate('mvp_page_display_data') self.linshi_db.add_some(self.sql_6, dispaly_data) return {'插入数据条数': len(dispaly_data), 'scores': dispaly_data} def wenjuanxin_84(self): excel = ExcelUtil('Sheet1', '84_1500.xlsx') insert_data = excel.wenjuanxin_84() self.linshi_db.add_some(self.sql_10, insert_data) print() sql_17 = ''' SELECT id, uuid, created, `status`, sub_question_id, testcase_id, title, score, province, city, district FROM f_t_daren_score_2 WHERE testcase_id IN (84, 85, 86, 87) AND sub_question_id = 377 AND score = 2917 AND ( city IN ( '昆明市', '西安市', '咸阳市', '郑州市', '洛阳市', '武汉市', '襄阳市', '重庆市', '璧山' ) OR province IN ( '昆明市', '西安市', '咸阳市', '郑州市', '洛阳市', '武汉市', '襄阳市', '重庆市', '璧山' ) OR district IN ( '昆明市', '西安市', '咸阳市', '郑州市', '洛阳市', '武汉市', '襄阳市', '重庆市', '璧山区' ) ) ''' sql_18 = ''' update f_t_daren_score_2 set score = %s where id = %s ''' city_info = { '昆明市': 2918, '西安市': 2919, '咸阳市': 2920, '郑州市': 2921, '洛阳市': 2922, '武汉市': 2923, '襄阳市': 2924, '重庆市': 2925, '璧山市': 2926 } def other_city_clean(self): update_data = [] need_update_data = self.marketing_db.select(self.sql_17) for nd in need_update_data: id = nd[0] province = nd[8] city = nd[9] district = nd[10] bishan = self.city_info.get(district) if bishan: update_data.append([bishan, id]) else: city_id = self.city_info.get(city) if city_id: update_data.append([city_id, id]) else: province_id = self.city_info.get(province) if province_id: update_data.append([province_id, id]) self.marketing_db.add_some(self.sql_18, update_data) return len(update_data) sql_19 = ''' select GROUP_CONCAT(id)from f_t_daren_score_2 where testcase_id in (84, 85, 86, 87) and score = 2925 and district = '璧山区' update f_t_daren_score_2 set score = 2926 where id in (979728,979890,981251,984783,985250,985564,990999) ''' def chongqin_to_bishan(self): pass if __name__ == '__main__': tongce = TongCe() match_data = tongce.get_question_info_from_db() tongce.linshi_db.truncate('mvp_page_display_match') tongce.linshi_db.add_some(tongce.sql_3, match_data)