123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735 |
- from excel_util import ExcelUtil
- from mysql_db import MysqlDB
- from itertools import groupby
- class TongCe:
- """
- 同策测试数据清洗
- """
- # 统计筒体结果
- sql_1 = '''
- SELECT
- a.sub_question_id,
- b.sub_question_content,
- a.score,
- b.sub_option_content,
- count(1)
- FROM
- f_t_daren_score_2 a
- LEFT JOIN d_shangju_tiku_02 b ON a. STATUS = b. STATUS = 1
- WHERE
- a.testcase_id in %s and
- a.testcase_id = b.testcase_id
- AND a.sub_question_id = b.sub_question_id
- AND (
- a.score = b.score
- OR a.score = b.sub_option_id
- )
- GROUP BY
- b.sub_question_content,
- a.score,
- b.sub_option_content
- '''
- # 选项信息
- sql_2 = '''
- SELECT
- b.id as question_id,
- b. NAME as question_title,
- a.id as sub_question_id,
- a. NAME as sub_question_title,
- d.id as option_id,
- d.content as option_title,
- c.id as sub_option_id,
- c.content as sub_option_title
- FROM
- bq_sub_question a
- LEFT JOIN bq_question b ON a.father_id = b.id
-
- LEFT JOIN bq_sub_option c ON a.id = c.sub_question_id
- LEFT JOIN bq_option d ON c.father_id = d.id
- WHERE
- FIND_IN_SET(
- a.id,
- (
- SELECT
- GROUP_CONCAT(question_ids)
- FROM
- bq_testcase
- WHERE
- house_ids = %s
- GROUP BY
- house_ids
- )
- )
- AND a. STATUS = b. STATUS = c. STATUS = 1
- ORDER BY
- a.id
- '''
- # 表
- sql_3 = '''
- INSERT INTO mvp_page_display_match (
- house_id,
- question_id,
- question_title,
- sub_question_id,
- sub_question_title,
- option_id,
- option_content,
- sub_option_id,
- sub_option_content,
- data_item_tab,
- data_item_title,
- data_item_name,
- STATUS,
- creator,
- created
- )
- VALUES
- (
- %s,
- %s,
- %s,
- %s,
- %s,
- %s,
- %s,
- %s,
- %s,
- %s,
- %s,
- %s,
- 1,
- 'binren',
- now()
- )
- '''
- sql_4 = '''
- SELECT
- id,
- sub_question_id,
- sub_option_id,
- data_item_name
- FROM
- mvp_page_display_match
- WHERE
- STATUS = 1
- '''
- sql_5 = '''
- SELECT
- id
- FROM
- bq_testcase
- WHERE
- STATUS = 1
- AND FIND_IN_SET(
- (
- SELECT
- id
- FROM
- bq_house
- WHERE
- STATUS = 1
- AND NAME = %s
- ),
- house_ids
- )
- '''
- sql_6 = '''
- insert INTO mvp_page_display_data (
- crowd_info_id,
- match_id,
- page_display_rule_id,
- name,
- value,
- STATUS,
- creator,
- created
- )
- VALUES
- (%s, %s, %s, %s, %s, 1, 'binren', now())
- '''
- sql_7 = '''
- SELECT
- a.testcase_id,
- a.uuid,
- GROUP_CONCAT(
- DISTINCT b.sub_option_content
- )
- FROM
- f_t_daren_score_2 a
- LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
- WHERE
- a.testcase_id IN (84, 85, 86, 87)
- AND b.father_id IN (47, 48, 234, 254)
- and a.sub_question_id = b.sub_question_id and a.testcase_id = b.testcase_id
- GROUP BY
- a.testcase_id,
- a.uuid
- '''
- sql_8 = '''
- SELECT
- a.uuid,
- a.title,
- a.testcase_id,
- b.father_id,
- b.father_content,
- b.sub_option_id,
- b.sub_option_content
- FROM
- f_t_daren_score_2 a
- LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
- WHERE
- a.testcase_id = b.testcase_id
- AND a.sub_question_id = b.sub_question_id
- AND a.testcase_id IN (84, 85, 86, 87)
- '''
- sql_9 = '''
- SELECT
- x.city
- ,x.uuid
- ,x.sex
- ,x.nld
- ,x.zhifuli
- ,x.juzhujiegou
- ,m.father_content
- ,m.father_id
- ,m.sub_question_id
- ,m.sub_question_content
- ,m.sub_option_id
- ,m.sub_option_content
- ,m.testcase_id
- ,m.title
- FROM
- (
- SELECT
- e.uuid,
- e.sex,
- f.nld,
- c.zhifuli,
- d.city,
- w.juzhujiegou
- FROM
- (
- SELECT
- a.testcase_id,
- a.uuid,
- b.sub_option_content AS sex
- FROM
- f_t_daren_score_2 a
- LEFT JOIN d_shangju_tiku_02 b ON (a.score = b.sub_option_id or a.score = b.score)
- WHERE
- a.testcase_id IN (84, 85, 86, 87)
- AND b.father_id = 47
- AND a.sub_question_id = b.sub_question_id
- AND a.testcase_id = b.testcase_id
- GROUP BY
- a.testcase_id,
- a.uuid
- ) e
- LEFT JOIN (
- SELECT
- a.uuid,
- b.sub_option_content AS nld
- FROM
- f_t_daren_score_2 a
- LEFT JOIN d_shangju_tiku_02 b ON (a.score = b.sub_option_id or a.score = b.score)
- WHERE
- a.testcase_id IN (84, 85, 86, 87)
- AND b.father_id = 48
- AND a.sub_question_id = b.sub_question_id
- AND a.testcase_id = b.testcase_id
- GROUP BY
- a.testcase_id,
- a.uuid
- ) f ON e.uuid = f.uuid
- LEFT JOIN (
- SELECT
- a.uuid,
- b.sub_option_content AS zhifuli
- FROM
- f_t_daren_score_2 a
- LEFT JOIN d_shangju_tiku_02 b ON (a.score = b.sub_option_id or a.score = b.score)
- WHERE
- a.testcase_id IN (84, 85, 86, 87)
- AND b.father_id = 234
- AND a.sub_question_id = b.sub_question_id
- AND a.testcase_id = b.testcase_id
- GROUP BY
- a.testcase_id,
- a.uuid
- ) c ON f.uuid = c.uuid
- LEFT JOIN (
- SELECT
- a.uuid,
- b.sub_option_content AS city
- FROM
- f_t_daren_score_2 a
- LEFT JOIN d_shangju_tiku_02 b ON (a.score = b.sub_option_id or a.score = b.score)
- WHERE
- a.testcase_id IN (84, 85, 86, 87)
- AND b.father_id = 254
- AND a.sub_question_id = b.sub_question_id
- AND a.testcase_id = b.testcase_id
- GROUP BY
- a.testcase_id,
- a.uuid
- ) d ON c.uuid = d.uuid
- left join (
- SELECT
- a.uuid,
- b.sub_option_content AS juzhujiegou
- FROM
- f_t_daren_score_2 a
- LEFT JOIN d_shangju_tiku_02 b ON (a.score = b.sub_option_id or a.score = b.score)
- WHERE
- a.testcase_id IN (84, 85, 86, 87)
- AND b.father_id = 211
- AND a.sub_question_id = b.sub_question_id
- AND a.testcase_id = b.testcase_id
- GROUP BY
- a.testcase_id,
- a.uuid
- ) w on d.uuid = w.uuid
- ) x
- LEFT JOIN (
- SELECT
- a.uuid,
- a.title,
- a.testcase_id,
- b.father_id,
- b.father_content,
- b.sub_question_id,
- b.sub_question_content,
- b.sub_option_id,
- b.sub_option_content
- FROM
- f_t_daren_score_2 a
- LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = b.sub_question_id
- WHERE
- a.testcase_id = b.testcase_id
- AND (a.score = b.sub_option_id or a.score = b.score)
- AND a.testcase_id IN (84, 85, 86, 87) and province != '山西省'
- ) m ON x.uuid = m.uuid
- '''
- sql_10 = '''
- INSERT INTO f_t_daren_score_2 (
- testcase_id,
- title,
- uuid, score, created, sub_question_id
- )
- VALUE
- (84, '有钱人的生活就是很枯燥的……', %s, %s, %s, %s)
-
- '''
- sql_11 = '''
- select id, title_type, title_in_page, sub_question_id from mvp_page_display_rule where status = 1
- '''
- sql_12 = '''
- INSERT INTO mvp_page_display_rule (
- house_id,
- function_id,
- title_type,
- title_in_page,
- sub_question_id,
- STATUS,
- creator,
- created
- )
- VALUE
- (
- 67,
- 1,
- %s,
- %s,
- %s,
- 1,
- 'binren',
- now()
- )
-
- '''
- sql_13 = '''
- INSERT INTO mvp_crowd_info (
- house_id,
- pay_ability,
- age_area,
- city_name,
- life_cycle,
- STATUS,
- creator,
- created
- )
- VALUES
- (
- 67,
- %s,
- %s,
- %s,
- %s,
- 1,
- 'binren',
- now()
- )
- '''
- def __init__(self):
- self.shangju_db = MysqlDB('shangju')
- self.marketing_db = MysqlDB('bi_report')
- self.linshi_db = MysqlDB('linshi', db_type=1)
- self.options_info = ExcelUtil('工作表6', 'tongce1.xlsx').read_options_info()
- self.table_type_info = ExcelUtil('新增项目数据项类型排序与展示图表类型管理表', 'table_type.xlsx').get_table_type_info()
- def close(self):
- self.shangju_db.close()
- self.linshi_db.close()
- self.marketing_db.close()
- def get_question_info_from_db(self):
- result = self.shangju_db.select(self.sql_2, [67])
- insert_data = []
- for rt in result:
- rt = list(rt)
- option_configuration = self.options_info.get('67' + str(rt[6]))
- if option_configuration and len(option_configuration) == 4:
- rt.insert(0, 67)
- rt.extend(option_configuration[0:3])
- insert_data.append(rt)
- return insert_data
- def get_option_match_info(self):
- result = self.linshi_db.select(self.sql_4)
- return result
- # 支付力:376,年龄:29,城市:377,居住结构:395。
- sql_14 = '''
- select content from bq_sub_option where sub_question_id = %s
- '''
- def insert_into_mvp_crowd_info(self):
- zhifuli = self.shangju_db.select(self.sql_14, [376])
- age = self.shangju_db.select(self.sql_14, [29])
- city = self.shangju_db.select(self.sql_14, [377])
- juzhujiegou = self.shangju_db.select(self.sql_14, [395])
- insert_data = []
- for zfl in zhifuli:
- for a in age:
- for cy in city:
- for jzjg in juzhujiegou:
- insert_data.append([zfl, a, cy, jzjg])
- if len(insert_data) > 0:
- # self.linshi_db.truncate('mvp_crowd_info')
- self.linshi_db.add_some(self.sql_13, insert_data)
- sql_15 = '''
- select id, pay_ability, age_area, city_name, life_cycle from mvp_crowd_info where status = 1 and house_id = 67
- '''
- def get_crowd_info(self):
- data = self.linshi_db.select(self.sql_15)
- return data
- def insert_into_rule(self):
- option_info = self.options_info
- insert_data = []
- sub_question_ids = []
- for key in option_info.keys():
- data = option_info[key]
- if data[3] not in sub_question_ids:
- insert_data.append([data[0], data[1], data[3]])
- sub_question_ids.append(data[3])
- if len(insert_data) > 0:
- self.linshi_db.truncate('mvp_page_display_rule')
- self.linshi_db.add_some(self.sql_12, insert_data)
- def get_rule_data_info(self):
- data = self.linshi_db.select(self.sql_11)
- return data
- sql_16 = '''
- insert INTO mvp_page_display_data (
- crowd_info_id,
- match_id,
- value,
- STATUS,
- creator,
- created
- )
- VALUES
- (%s, %s, %s, 1, 'binren', now())
- '''
- def lingdi_data_scores(self):
- # 1: 写入mvp_crowd_info
- # self.insert_into_mvp_crowd_info()
- crowd_info = self.get_crowd_info()
- # 2: 写入rule
- # self.insert_into_rule()
- rule = self.get_rule_data_info()
- # 3: 读入答题数据
- self.answers = self.marketing_db.select(self.sql_9)
- # 4: 写入match信息
- match_data = self.get_question_info_from_db()
- self.linshi_db.truncate('mvp_page_display_match')
- self.linshi_db.add_some(self.sql_3, match_data)
- self.match_data_info = self.get_option_match_info()
- self.linshi_db.truncate('mvp_page_display_data')
- # 筛选写入data的数据
- count = 0
- no_data_case = []
- try:
- for ci in crowd_info:
- insert_data = []
- crowd_info_id = ci[0]
- zhifuli = ci[1]
- age = ci[2]
- city = ci[3]
- juzhujiegou = ci[4]
- data = self.filter_people(city, age, zhifuli, juzhujiegou)
- data.sort(key=lambda obj: obj[0])
- for key, questions_data in groupby(data, key=lambda obj: obj[0]):
- question_data_list = []
- for qd in questions_data:
- question_data_list.append([x for x in qd])
- rule_id = self.get_rule_id(key, rule)
- if rule_id is not None:
- question_people = len(question_data_list)
- if question_people > 0:
- question_data_list.sort(key=lambda obj: obj[3])
- for option_name, option_data_1 in groupby(question_data_list, key=lambda obj: obj[3]):
- option_data_list = []
- for od in option_data_1:
- option_data_list.append([x for x in od])
- if len(option_data_list) > 0:
- match_id = 0
- option_name_alias = option_name
- option_id = option_data_list[0][2]
- for md in self.match_data_info:
- if str(md[1]) == str(key) and str(md[2]) == str(option_id):
- match_id = md[0]
- option_name_alias = md[3]
- insert_data.append([crowd_info_id, match_id, rule_id, option_name_alias, len(option_data_list)])
- else:
- no_data_case.append([zhifuli, city, age, juzhujiegou, option_name])
- else:
- no_data_case.append([zhifuli, city, age, juzhujiegou, key])
- count += len(insert_data)
- self.linshi_db.add_some(self.sql_6, insert_data)
- # isnert_data_all = []
- # quanliang_scores = self.scores()
- # for q_s in quanliang_scores:
- # rule_id = self.get_rule_id(q_s[0], rule)
- # if rule_id:
- # for md in self.match_data_info:
- # if str(md[1]) == str(q_s[0]) and str(md[2]) == str(q_s[1]):
- # match_id = md[0]
- # option_name_alias = md[3]
- # isnert_data_all.append([5405, match_id, rule_id, option_name_alias, q_s[2]])
- # self.linshi_db.add_some(self.sql_6, isnert_data_all)
- # count += len(isnert_data_all)
- return {'写入库中的数据': count, '无数据': len(no_data_case)}
- except Exception as e:
- return str(e)
- sql_20 = '''
- UPDATE mvp_page_display_rule
- SET display_type = %s,
- display_size = %s
- WHERE
- title_in_page = %s
- '''
- def table_type_insert(self):
- for data in self.table_type_info:
- self.linshi_db.update(self.sql_20, data)
- def get_rule_id(self, sub_question_id, rule):
- for re in rule:
- if str(re[3]) == str(sub_question_id):
- return re[0]
- def filter_people(self, city, age, zhifuli, juzhujiegou):
- result = []
- for answer in self.answers:
- if answer[0] == city and answer[3] == age and answer[4] == zhifuli and answer[5] == juzhujiegou:
- # 子题id, 子题题目,子选项id,子选项题目
- if answer[8] is not None and answer[9] is not None and answer[12] is not None and answer[13]:
- result.append([answer[8], answer[9], answer[10], answer[11]])
- return result
- def get_testcase_ids_by_house_name(self, house_name):
- testcase_ids = self.shangju_db.select(self.sql_5, [house_name])
- return testcase_ids
- def scores(self):
- testcase_ids = self.get_testcase_ids_by_house_name('同策 领地')
- db_data = self.marketing_db.select(self.sql_1, [testcase_ids])
- answer = []
- for data in db_data:
- answer.append([data[0], data[2], data[4]])
- answer.sort(key=lambda obj: obj[0])
- sub_option_score = []
- for sub_question_id, others in groupby(answer, key=lambda obj: obj[0]):
- others_data = []
- for ot in others:
- others_data.append([x for x in ot])
- sub_question_count = sum([x[2] for x in others_data])
- for td in others_data:
- sub_option_id = td[1]
- sub_option_count = td[2]
- rate = int(sub_option_count) / sub_question_count
- sub_option_score.append([sub_question_id, sub_option_id, sub_option_count])
- return sub_option_score
- def tongce(self):
- """
- tongce测试数据清洗
- :return:
- """
- match_data = self.get_question_info_from_db()
- # self.linshi_db.truncate('mvp_page_display_match')
- self.linshi_db.add_some(self.sql_3, match_data)
- scores = self.scores()
- match_data_info = self.get_option_match_info()
- dispaly_data = []
- for score in scores:
- sub_question_id = score[0]
- sub_option_id = score[1]
- value = score[2]
- for mi in match_data_info:
- if str(mi[1]) == str(sub_question_id) and str(mi[2]) == str(sub_option_id):
- dispaly_data.append([mi[0], value])
- if len(dispaly_data) > 0:
- self.linshi_db.truncate('mvp_page_display_data')
- self.linshi_db.add_some(self.sql_6, dispaly_data)
- return {'插入数据条数': len(dispaly_data), 'scores': dispaly_data}
- def wenjuanxin_84(self):
- excel = ExcelUtil('Sheet1', '84_1500.xlsx')
- insert_data = excel.wenjuanxin_84()
- self.linshi_db.add_some(self.sql_10, insert_data)
- print()
- sql_17 = '''
- SELECT
- id,
- uuid,
- created,
- `status`,
- sub_question_id,
- testcase_id,
- title,
- score,
- province,
- city,
- district
- FROM
- f_t_daren_score_2
- WHERE
- testcase_id IN (84, 85, 86, 87)
- AND sub_question_id = 377
- AND score = 2917
- AND (
- city IN (
- '昆明市',
- '西安市',
- '咸阳市',
- '郑州市',
- '洛阳市',
- '武汉市',
- '襄阳市',
- '重庆市',
- '璧山'
- )
- OR province IN (
- '昆明市',
- '西安市',
- '咸阳市',
- '郑州市',
- '洛阳市',
- '武汉市',
- '襄阳市',
- '重庆市',
- '璧山'
- )
- OR district IN (
- '昆明市',
- '西安市',
- '咸阳市',
- '郑州市',
- '洛阳市',
- '武汉市',
- '襄阳市',
- '重庆市',
- '璧山区'
- )
- )
- '''
- sql_18 = '''
- update f_t_daren_score_2 set score = %s where id = %s
- '''
- city_info = {
- '昆明市': 2918,
- '西安市': 2919,
- '咸阳市': 2920,
- '郑州市': 2921,
- '洛阳市': 2922,
- '武汉市': 2923,
- '襄阳市': 2924,
- '重庆市': 2925,
- '璧山市': 2926
- }
- def other_city_clean(self):
- update_data = []
- need_update_data = self.marketing_db.select(self.sql_17)
- for nd in need_update_data:
- id = nd[0]
- province = nd[8]
- city = nd[9]
- district = nd[10]
- bishan = self.city_info.get(district)
- if bishan:
- update_data.append([bishan, id])
- else:
- city_id = self.city_info.get(city)
- if city_id:
- update_data.append([city_id, id])
- else:
- province_id = self.city_info.get(province)
- if province_id:
- update_data.append([province_id, id])
- self.marketing_db.add_some(self.sql_18, update_data)
- return len(update_data)
- sql_19 = '''
- select GROUP_CONCAT(id)from f_t_daren_score_2 where testcase_id in (84, 85, 86, 87) and score = 2925 and district = '璧山区'
- update f_t_daren_score_2 set score = 2926 where id in (979728,979890,981251,984783,985250,985564,990999)
- '''
- def chongqin_to_bishan(self):
- pass
- if __name__ == '__main__':
- tongce = TongCe()
- match_data = tongce.get_question_info_from_db()
- tongce.linshi_db.truncate('mvp_page_display_match')
- tongce.linshi_db.add_some(tongce.sql_3, match_data)
|