flask_app.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. from flask import Flask, request, render_template, jsonify
  2. import os
  3. from werkzeug.utils import secure_filename
  4. import time
  5. import json
  6. import pandas as pd
  7. import openpyxl as ox
  8. from itertools import groupby
  9. import os
  10. import pymysql as ps
  11. app = Flask(__name__)
  12. class Mvp:
  13. """
  14. ce mvp 答题数据统计
  15. 城市特例 北京市,上海市, 重庆市,天津市
  16. """
  17. age_dict = {
  18. '00-04年生': '00后',
  19. '05-09年生': '05后',
  20. '50-59年生': '50后',
  21. '60-69年生': '60后',
  22. '70-74年生': '70后',
  23. '75-79年生': '75后',
  24. '80-84年生': '80后',
  25. '85-89年生': '85后',
  26. '90-94年生': '90后',
  27. '95-99年生': '95后'
  28. }
  29. crowd = ['A', 'B', 'C', 'D', 'E', 'F']
  30. # 获取答题记录中城市列表
  31. sql_1 = 'select city from f_t_daren_score_2 group by city'
  32. # 获取父选项和父题id
  33. sql_2 = 'select a.id, a.content, b.id, b.name from bq_option a left join bq_question b on a.question_id = b.id ' \
  34. 'where a.serial_number = %s and b.serial_number = %s and a.status = b.status = 1 '
  35. # 数据插入表mvp_question_classification
  36. sql_3 = 'insert into mvp_question_classification(question_serial_number, question_content, ' \
  37. 'option_serial_number, option_content, tag, corr) values(%s, %s, %s, %s, %s, %s) '
  38. # 获取答题人的年龄段集合
  39. sql_4 = 'select nld from f_t_daren_score_2 group by nld'
  40. # 根据城市,年龄段,人群分类统计答题记录数
  41. sql_5 = 'select group_type, COUNT(uuid) from f_t_daren_score_2 where (city = %s or province = %s) and nld ' \
  42. '= %s and uuid in %s group by group_type '
  43. # 根据父选项获取子选项id列表
  44. sql_6 = 'SELECT c.id, c.sub_question_id, c.content FROM bq_sub_option c WHERE c.father_id in (SELECT a.id FROM ' \
  45. 'bq_option a ' \
  46. 'LEFT JOIN bq_question b ON a.question_id = b.id WHERE a.serial_number = %s AND b.serial_number = %s ' \
  47. 'and a.status = 1 and b.status = 1) and c.status = 1 '
  48. # 根据子题id获取包含子题id的测试
  49. sql_7 = 'select group_type from bq_testcase where status = 1 and FIND_IN_SET(%s, question_ids)'
  50. # 根据子选项id统计答题数
  51. sql_8 = 'SELECT count(uuid) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = ' \
  52. 'b.sub_question_id AND a.score = b.score WHERE a.testcase_id = b.testcase_id and b.sub_option_id in %s' \
  53. 'and (a.city = %s or a.province = %s) and a.nld = %s and a.uuid in %s'
  54. # 计算值写入表汇总
  55. sql_9 = 'insert into mvp_standard_score(city, age, tag, crowd_type, score) VALUES(%s, %s, %s, %s, %s)'
  56. # 获取一个uuid下答题的子选项id列表
  57. sql_10 = 'select DISTINCT uuid, GROUP_CONCAT(DISTINCT b.sub_option_id) from f_t_daren_score_2 a left join ' \
  58. 'd_shangju_tiku_02 b on a.sub_question_id = b.sub_question_id and a.score = b.score where a.status = ' \
  59. 'b.status = 1 group by uuid '
  60. def __init__(self, path=None):
  61. self.shangju_db = MysqlDB('shangju')
  62. self.marketing_db = MysqlDB('bi_report')
  63. # self.shangju_db.truncate('mvp_standard_score')
  64. self.tag_data = ExcelUtil(file_name=path).init_mvp_data()
  65. self.crowd_info = ExcelUtil(file_name=path, sheet_name='选项-人群分类对应表').init_crowd_info()
  66. self.citys = self.init_city()
  67. self.age = self.init_age()
  68. self.people_sub_option_ids = self.marketing_db.select(self.sql_10)
  69. self.crowd_contain_sub_option_ids = self.get_crowd_contain_sub_option_ids()
  70. self.module_scores = ExcelUtil(file_name='set-behavior-tag.xlsx', sheet_name='算法关系表').init_module_info()
  71. def init_city(self):
  72. """
  73. 获取答题数据中的城市。
  74. :return:
  75. """
  76. citys = ['北京市', '上海市', '重庆市', '天津市']
  77. citys_info = self.marketing_db.select(self.sql_1)
  78. citys.extend([x[0] for x in citys_info if x[0] is not None])
  79. return citys
  80. def query_behavioral_info(self, city=None, age=None, crowd=None):
  81. """
  82. 查询行为兴趣信息
  83. :return:
  84. """
  85. # datas = []
  86. # for key in self.tag_data.keys():
  87. # values = self.tag_data[key]
  88. # for value in values:
  89. # question = value[0].split('-')[0]
  90. # option = value[0].split('-')[1]
  91. # corr = value[1]
  92. # data = self.shangju_db.select(self.sql_2, [option, question])
  93. # if len(data) > 0:
  94. # print([question, option, data[0][3], data[0][1], key, corr])
  95. # datas.append([question, option, data[0][3], data[0][1], key, corr])
  96. # self.shangju_db.truncate('mvp_question_classification')
  97. # self.shangju_db.add_some(self.sql_3, datas)
  98. scores_behavioral = self.city_age_crowd(city, age, crowd)
  99. scores_module = self.module_score(crowd, city, age, scores_behavioral['score'])
  100. result = {'行为兴趣分值': scores_behavioral, '模块分值': scores_module}
  101. print('update finished!!!')
  102. return result
  103. def write_behavioral_data_into_table(self):
  104. """
  105. 行为兴趣计算数据写入数据库
  106. :return:
  107. """
  108. scores_all = []
  109. for city in self.citys:
  110. for age in self.age:
  111. if city != '上海市' and age != '85-89年生':
  112. for crowd_type in self.crowd:
  113. # print(' {}{}'.format(city, age))
  114. people_uuids = self.get_people_uuid_by_type(crowd_type)
  115. if len(people_uuids) > 0:
  116. print('{}-{}-{}'.format(city, age, crowd_type))
  117. datas = self.behavior_tag_init(city, age, people_uuids)
  118. scores = self.calculation_standard_score(datas, city, age, crowd_type)
  119. scores_all.extend(scores)
  120. def module_score(self, crowd, city, age, scores):
  121. """
  122. 模块分数计算
  123. 城市 年龄 人群分类 模块名称 分数
  124. :return:
  125. """
  126. modules = self.module_scores[crowd]
  127. result = []
  128. for key in modules.keys():
  129. values = modules[key]
  130. module_name = key
  131. score = 0
  132. for value in values:
  133. behavioral_name = value[0]
  134. weight = float(value[2])
  135. standard_score = [x[4] for x in scores if x[2] == behavioral_name]
  136. if len(standard_score) > 0:
  137. score += standard_score[0] * weight
  138. result.append([city, age, crowd, module_name, score])
  139. return result
  140. def insert(self, scores):
  141. """
  142. 计算数据写入数据库中,供接口查看
  143. :param scores:
  144. :return:
  145. """
  146. # todo
  147. pass
  148. def init_age(self):
  149. """
  150. 获取答题数据中的年龄
  151. """
  152. age_info = self.marketing_db.select(self.sql_4)
  153. # print([x[0] for x in age_info])
  154. return [x[0] for x in age_info if x[0] is not None]
  155. def city_age_crowd(self, city=None, age=None, crowd=None):
  156. data_start = []
  157. result = []
  158. if city is not None and age is not None and crowd is not None:
  159. print('获取指定城市,年龄段,人群类型的数据...')
  160. people_uuids = self.get_people_uuid_by_type(crowd)
  161. if len(people_uuids) > 0:
  162. print('{}-{}-{}'.format(city, age, crowd))
  163. datas = self.behavior_tag_init(city, age, people_uuids)
  164. data_start.append(datas)
  165. result.extend(self.calculation_standard_score(datas, city, age, crowd))
  166. pass
  167. else:
  168. print('获取所有case的数据...')
  169. for city in self.citys:
  170. for age in self.age:
  171. if city != '上海市' and age != '85-89年生':
  172. for crowd_type in self.crowd:
  173. # print(' {}{}'.format(city, age))
  174. people_uuids = self.get_people_uuid_by_type(crowd_type)
  175. if len(people_uuids) > 0:
  176. print('{}-{}-{}'.format(city, age, crowd_type))
  177. datas = self.behavior_tag_init(city, age, people_uuids)
  178. data_start.append(datas)
  179. result.extend(self.calculation_standard_score(datas, city, age, crowd_type))
  180. # return result
  181. data_list = []
  182. for e in data_start:
  183. for key in e.keys():
  184. values = e[key]
  185. for sub_e in values:
  186. ele = [key]
  187. ele.extend(sub_e)
  188. data_list.append(ele)
  189. pass
  190. return {'score': result, 'data': data_list}
  191. def behavior_tag_init(self, city, age, people_uuids):
  192. result = {}
  193. self.group_type_count = self.marketing_db.select(self.sql_5, [city, city, age, people_uuids])
  194. for key in self.tag_data:
  195. values = self.tag_data[key]
  196. elements = []
  197. for value in values:
  198. question = value[0].split('-')[0]
  199. option = value[0].split('-')[1]
  200. corr = value[1]
  201. fz, fm = self.molecular_value(question, option, city, age, people_uuids)
  202. if fm == 0:
  203. c = 0
  204. else:
  205. c = fz / fm
  206. elements.append([question, option, corr, fz, fm, c])
  207. result[key] = elements
  208. return self.indicator_calculation_d_e(result)
  209. def molecular_value(self, queston, option, city, age, people_uuids):
  210. # 获取当前父选项包含的子选项id和子题id列表
  211. result = self.shangju_db.select(self.sql_6, [option, queston])
  212. sub_option_ids = []
  213. group_types = []
  214. for rt in result:
  215. sub_option_id, sub_question_id, content = rt[0], rt[1], rt[2]
  216. grouptypes = self.shangju_db.select(self.sql_7, [sub_question_id])
  217. for g_t in grouptypes:
  218. if g_t[0] not in group_types:
  219. group_types.append(g_t[0])
  220. sub_option_ids.append(sub_option_id)
  221. # 计算子选项在答题记录中的点击数
  222. sub_options_count = 0
  223. if len(sub_option_ids) > 0:
  224. result_1 = self.marketing_db.select(self.sql_8, [sub_option_ids, city, city, age, people_uuids])
  225. sub_options_count = result_1[0][0]
  226. # 计算父选项包含的子选项对应的子题所在的测试gt包含的点击数。
  227. denominator_value = 0
  228. for info in self.group_type_count:
  229. if info[0] in group_types:
  230. denominator_value += info[1]
  231. return sub_options_count, denominator_value
  232. def indicator_calculation_d_e(self, data):
  233. result = {}
  234. for key in data.keys():
  235. values = data[key]
  236. c_list = [x[5] for x in values]
  237. fm_list = [x[4] for x in values]
  238. sum_c = sum(fm_list)
  239. min_c = min(c_list)
  240. elements = []
  241. for value in values:
  242. _value = []
  243. c = value[5]
  244. if sum_c == 0:
  245. d = 0
  246. else:
  247. d = c / sum_c
  248. e = c - min_c
  249. _value.extend(value)
  250. _value.append(d)
  251. _value.append(e)
  252. elements.append(_value)
  253. result[key] = elements
  254. return result
  255. def calculation_standard_score(self, datas, city, age, crowd_type):
  256. scores = []
  257. for key in datas.keys():
  258. print(key)
  259. print(' 父题序号 父选项序号 相关系系数 分子值 分母值 百分比 人数权重 偏离值')
  260. values = [x[5] for x in datas[key]]
  261. min_c = min(values)
  262. f = min_c
  263. for value in datas[key]:
  264. print(' {}'.format(value))
  265. if value[2] is not None and value[7] is not None:
  266. f += float(value[2] * value[7])
  267. print(' 标准分:{}'.format(f))
  268. scores.append([city, age, key, crowd_type, f])
  269. # self.shangju_db.add_some(self.sql_9, scores)
  270. return scores
  271. def get_crowd_people(self):
  272. result = {}
  273. for type in self.crowd:
  274. uuids = self.get_people_uuid_by_type(type)
  275. result[type] = len(uuids)
  276. return result
  277. def get_people_uuid_by_type(self, type):
  278. uuids = []
  279. type_sub_option_ids = self.crowd_contain_sub_option_ids[type]
  280. for people in self.people_sub_option_ids:
  281. uuid = people[0]
  282. sub_option_ids = list(map(int, str(people[1]).split(',')))
  283. # list(set(a).intersection(set(b)))
  284. if len(list(set(sub_option_ids).intersection(set(type_sub_option_ids)))) > 0:
  285. uuids.append(uuid)
  286. return uuids
  287. def get_crowd_contain_sub_option_ids(self):
  288. """
  289. 获取ABCDEF人群包含的子选项id
  290. :return:
  291. """
  292. infos = {}
  293. for key in self.crowd_info.keys():
  294. values = self.crowd_info[key]
  295. sub_option_ids = []
  296. for value in values:
  297. if value is not None:
  298. vals = str(value).split('-')
  299. option, question = vals[1], vals[0]
  300. query_result = self.shangju_db.select(self.sql_6, [option, question])
  301. for qr in query_result:
  302. sub_option_id, sub_question_id, content = qr[0], qr[1], qr[2]
  303. sub_option_ids.append(int(sub_option_id))
  304. infos[key] = sub_option_ids
  305. print(infos)
  306. return infos
  307. class ExcelUtil:
  308. # 当前项目路径
  309. dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'/elab_mvp/resources'
  310. """
  311. 解析excel文件
  312. """
  313. def __init__(self, sheet_name=None, file_name=None):
  314. if file_name:
  315. self.path = os.path.join(self.dir_path, file_name)
  316. else:
  317. self.path = os.path.join(self.dir_path, 'mvp.xlsx')
  318. if sheet_name:
  319. self.sheet_name = sheet_name
  320. else:
  321. self.sheet_name = '硬标签+行为'
  322. def read_excel_by_pd(self):
  323. df = pd.read_excel(self.path)
  324. data = df.head()
  325. print('获取到的数据{}'.format(data))
  326. def read_excel_by_ox(self):
  327. work_book = ox.load_workbook(self.path, data_only=True)
  328. work_sheet = work_book.get_sheet_by_name(self.sheet_name)
  329. # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
  330. return work_sheet
  331. def init_crowd_info(self):
  332. """
  333. 整理不同人群包含的父选序号
  334. :return:
  335. """
  336. rows = [row for row in self.read_excel_by_ox().rows]
  337. crowd_a = []
  338. crowd_b = []
  339. crowd_c = []
  340. crowd_d = []
  341. crowd_e = []
  342. crowd_f = []
  343. for row in rows[2:]:
  344. option = row[4].value
  345. a = row[6].value
  346. if a is not None and a == 1 and option not in crowd_a:
  347. crowd_a.append(option)
  348. b = row[7].value
  349. if b is not None and b == 1 and option not in crowd_b:
  350. crowd_b.append(option)
  351. c = row[8].value
  352. if c is not None and c == 1 and option not in crowd_d:
  353. crowd_c.append(option)
  354. d = row[9].value
  355. if d is not None and d == 1 and option not in crowd_d:
  356. crowd_d.append(option)
  357. e = row[10].value
  358. if e is not None and e == 1 and option not in crowd_e:
  359. crowd_e.append(option)
  360. f = row[11].value
  361. if f is not None and f == 1 and option not in crowd_f:
  362. crowd_f.append(option)
  363. return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}
  364. def init_mvp_data(self):
  365. """
  366. 获取每个标签包括的父题父选项编号
  367. :return:
  368. """
  369. rows = [row for row in self.read_excel_by_ox().rows][24:]
  370. tag_name = None
  371. datas = []
  372. for row in rows:
  373. tag = row[1].value
  374. values = row[3].value
  375. corr = row[4].value
  376. if tag:
  377. tag_name = tag
  378. if values is not None:
  379. datas.append([tag_name, values, corr])
  380. result = {}
  381. for name, items in groupby(datas, key=lambda obj: obj[0]):
  382. orders = []
  383. for n in items:
  384. orders.append([n[1], n[2]])
  385. result[name] = orders
  386. return result
  387. def init_module_info(self):
  388. work_sheet = self.read_excel_by_ox()
  389. max_column = work_sheet.max_column
  390. rows = [row for row in work_sheet.rows][3:]
  391. crowd_name = None
  392. datas = []
  393. for row in rows:
  394. crowd = row[1].value
  395. if crowd is not None:
  396. crowd_name = crowd
  397. behavior = row[2].value
  398. score = row[4].value
  399. for index in range(6, max_column - 1, 2):
  400. module_name = row[index].value
  401. if module_name is not None:
  402. weight = row[index + 1].value
  403. datas.append([crowd_name, behavior, score, module_name, weight])
  404. results = {}
  405. for name, items in groupby(datas, key=lambda obj: obj[0]):
  406. sub_results = {}
  407. for name_1, itmes_1 in groupby(items, key=lambda obj: obj[3]):
  408. sub_data = []
  409. for n in itmes_1:
  410. sub_data.append([n[1], n[2], n[4]])
  411. sub_results[name_1] = sub_data
  412. results[name] = sub_results
  413. return results
  414. class MysqlDB:
  415. """
  416. mysql操作
  417. """
  418. con = None
  419. cursor = None
  420. def __init__(self, db_name):
  421. self.db_name = db_name
  422. self.con = ps.connect(host='172.19.189.136', port=3306, user='bi_etl', password='XPtpswuU5lwGo4kx',
  423. db=self.db_name, charset='utf8')
  424. # self.con = ps.connect(host='192.168.0.13', port=3306, user='root', password='elab@123'
  425. # , db=self.db_name, charset='utf8')
  426. self.cursor = self.con.cursor()
  427. def show_tables(self):
  428. self.cursor.execute('show tables')
  429. for talbe in self.cursor.fetchall():
  430. print(talbe)
  431. def select(self, sql, params=None):
  432. if params:
  433. self.cursor.execute(sql, params)
  434. else:
  435. self.cursor.execute(sql)
  436. return self.cursor.fetchall()
  437. def add_some(self, sql, data):
  438. try:
  439. self.cursor.executemany(sql, data)
  440. self.con.commit()
  441. except:
  442. print('数据插入异常...')
  443. self.con.rollback()
  444. def add_one(self, sql, data):
  445. try:
  446. self.cursor.execute(sql, data)
  447. self.con.commit()
  448. except:
  449. self.con.rollback()
  450. def truncate(self, table_name):
  451. sql = 'truncate table {}'.format(table_name)
  452. self.cursor.execute(sql)
  453. self.con.commit()
  454. def close(self):
  455. self.cursor.close()
  456. self.con.close()
  457. @app.route('/behavioral_statistics', methods=['GET', 'POST'])
  458. def behavioral_statistics():
  459. """
  460. 父选项对应的标准化值
  461. :return:
  462. """
  463. city = request.args.get('city', default=None, type=str)
  464. age = request.args.get('age', default=None, type=str)
  465. crowd = request.args.get('crowd', default=None, type=str)
  466. print(city, age, crowd)
  467. mvp = Mvp()
  468. scores = mvp.query_behavioral_info(city, age, crowd)
  469. mvp.shangju_db.close()
  470. mvp.marketing_db.close()
  471. return json.dumps(scores, ensure_ascii=False)
  472. @app.route('/infos', methods=["GET", 'POST'])
  473. def get_city_age_crowd():
  474. """
  475. 测试数据中城市 年龄 人群分类信息
  476. :return:
  477. """
  478. mvp = Mvp()
  479. infos = {'城市': mvp.citys, '年龄段': mvp.age}
  480. mvp.shangju_db.close()
  481. mvp.marketing_db.close()
  482. return json.dumps(infos, ensure_ascii=False)
  483. @app.route('/crowd_people', methods=['GET', 'POST'])
  484. def crowd_people():
  485. """
  486. 人群分类人数统计
  487. :return:
  488. """
  489. mvp = Mvp()
  490. people_count = mvp.get_crowd_people()
  491. mvp.shangju_db.close()
  492. mvp.marketing_db.close()
  493. return json.dumps(people_count, ensure_ascii=False)
  494. @app.route('/set_behavior_tag', methods=['GET', 'POST'])
  495. def set_behavior_tag():
  496. """
  497. 模块标准化值
  498. :return:
  499. """
  500. mvp = Mvp()
  501. return json.dumps(mvp.module_scores, ensure_ascii=False)
  502. if __name__ == '__main__':
  503. app.run(
  504. host='0.0.0.0',
  505. port=5001
  506. )