flask_app.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. from flask import Flask, request, render_template
  2. import os
  3. from werkzeug.utils import secure_filename
  4. import time
  5. import json
  6. import pandas as pd
  7. import openpyxl as ox
  8. from itertools import groupby
  9. import os
  10. import pymysql as ps
  11. app = Flask(__name__)
  12. class Mvp:
  13. """
  14. ce mvp 答题数据统计
  15. 城市特例 北京市,上海市, 重庆市,天津市
  16. """
  17. age_dict = {
  18. '00-04年生': '00后',
  19. '05-09年生': '05后',
  20. '50-59年生': '50后',
  21. '60-69年生': '60后',
  22. '70-74年生': '70后',
  23. '75-79年生': '75后',
  24. '80-84年生': '80后',
  25. '85-89年生': '85后',
  26. '90-94年生': '90后',
  27. '95-99年生': '95后'
  28. }
  29. crowd = ['A', 'B', 'C', 'D', 'E', 'F']
  30. # 获取答题记录中城市列表
  31. sql_1 = 'select city from f_t_daren_score_2 group by city'
  32. # 获取父选项和父题id
  33. sql_2 = 'select a.id, a.content, b.id, b.name from bq_option a left join bq_question b on a.question_id = b.id ' \
  34. 'where a.serial_number = %s and b.serial_number = %s and a.status = b.status = 1 '
  35. # 数据插入表mvp_question_classification
  36. sql_3 = 'insert into mvp_question_classification(question_serial_number, question_content, ' \
  37. 'option_serial_number, option_content, tag, corr) values(%s, %s, %s, %s, %s, %s) '
  38. # 获取答题人的年龄段集合
  39. sql_4 = 'select nld from f_t_daren_score_2 group by nld'
  40. # 根据城市,年龄段,人群分类统计答题记录数
  41. sql_5 = 'select group_type, COUNT(uuid) from f_t_daren_score_2 where (city = %s or province = %s) and nld ' \
  42. '= %s and uuid in %s group by group_type '
  43. # 根据父选项获取子选项id列表
  44. sql_6 = 'SELECT c.id, c.sub_question_id, c.content FROM bq_sub_option c WHERE c.father_id in (SELECT a.id FROM ' \
  45. 'bq_option a ' \
  46. 'LEFT JOIN bq_question b ON a.question_id = b.id WHERE a.serial_number = %s AND b.serial_number = %s ' \
  47. 'and a.status = 1 and b.status = 1) and c.status = 1 '
  48. # 根据子题id获取包含子题id的测试
  49. sql_7 = 'select group_type from bq_testcase where status = 1 and FIND_IN_SET(%s, question_ids)'
  50. # 根据子选项id统计答题数
  51. sql_8 = 'SELECT count(uuid) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = ' \
  52. 'b.sub_question_id AND a.score = b.score WHERE a.testcase_id = b.testcase_id and b.sub_option_id in %s' \
  53. 'and (a.city = %s or a.province = %s) and a.nld = %s and a.uuid in %s'
  54. # 计算值写入表汇总
  55. sql_9 = 'insert into mvp_standard_score(city, age, tag, crowd_type, score) VALUES(%s, %s, %s, %s, %s)'
  56. # 获取一个uuid下答题的子选项id列表
  57. sql_10 = 'select DISTINCT uuid, GROUP_CONCAT(DISTINCT b.sub_option_id) from f_t_daren_score_2 a left join ' \
  58. 'd_shangju_tiku_02 b on a.sub_question_id = b.sub_question_id and a.score = b.score where a.status = ' \
  59. 'b.status = 1 group by uuid '
  60. def __init__(self, path=None):
  61. self.shangju_db = MysqlDB('shangju')
  62. self.marketing_db = MysqlDB('bi_report')
  63. # self.shangju_db.truncate('mvp_standard_score')
  64. self.tag_data = ExcelUtil(path=path).init_mvp_data()
  65. self.crowd_info = ExcelUtil(path=path, sheet_name='选项-人群分类对应表').init_crowd_info()
  66. self.citys = self.init_city()
  67. self.age = self.init_age()
  68. self.people_sub_option_ids = self.marketing_db.select(self.sql_10)
  69. self.crowd_contain_sub_option_ids = self.get_crowd_contain_sub_option_ids()
  70. def init_city(self):
  71. """
  72. 获取答题数据中的城市。
  73. :return:
  74. """
  75. citys = ['北京市', '上海市', '重庆市', '天津市']
  76. citys_info = self.marketing_db.select(self.sql_1)
  77. citys.extend([x[0] for x in citys_info if x[0] is not None])
  78. return citys
  79. def query_behavioral_info(self, city=None, age=None, crowd=None):
  80. """
  81. 查询行为兴趣信息
  82. :return:
  83. """
  84. # datas = []
  85. # for key in self.tag_data.keys():
  86. # values = self.tag_data[key]
  87. # for value in values:
  88. # question = value[0].split('-')[0]
  89. # option = value[0].split('-')[1]
  90. # corr = value[1]
  91. # data = self.shangju_db.select(self.sql_2, [option, question])
  92. # if len(data) > 0:
  93. # print([question, option, data[0][3], data[0][1], key, corr])
  94. # datas.append([question, option, data[0][3], data[0][1], key, corr])
  95. # self.shangju_db.truncate('mvp_question_classification')
  96. # self.shangju_db.add_some(self.sql_3, datas)
  97. result = self.city_age_crowd(city, age, crowd)
  98. print('update finished!!!')
  99. return result
  100. def write_behavioral_data_into_table(self):
  101. """
  102. 行为兴趣计算数据写入数据库
  103. :return:
  104. """
  105. scores_all = []
  106. for city in self.citys:
  107. for age in self.age:
  108. if city != '上海市' and age != '85-89年生':
  109. for crowd_type in self.crowd:
  110. # print(' {}{}'.format(city, age))
  111. people_uuids = self.get_people_uuid_by_type(crowd_type)
  112. if len(people_uuids) > 0:
  113. print('{}-{}-{}'.format(city, age, crowd_type))
  114. datas = self.behavior_tag_init(city, age, people_uuids)
  115. scores = self.calculation_standard_score(datas, city, age, crowd_type)
  116. scores_all.extend(scores)
  117. def insert(self, scores):
  118. """
  119. 计算数据写入数据库中,供接口查看
  120. :param scores:
  121. :return:
  122. """
  123. # todo
  124. pass
  125. def init_age(self):
  126. """
  127. 获取答题数据中的年龄
  128. """
  129. age_info = self.marketing_db.select(self.sql_4)
  130. # print([x[0] for x in age_info])
  131. return [x[0] for x in age_info if x[0] is not None]
  132. def city_age_crowd(self, city=None, age=None, crowd=None):
  133. result = []
  134. if city is not None and age is not None and crowd is not None:
  135. print('获取指定城市,年龄段,人群类型的数据...')
  136. people_uuids = self.get_people_uuid_by_type(crowd)
  137. if len(people_uuids) > 0:
  138. print('{}-{}-{}'.format(city, age, crowd))
  139. datas = self.behavior_tag_init(city, age, people_uuids)
  140. result.extend(self.calculation_standard_score(datas, city, age, crowd))
  141. pass
  142. else:
  143. print('获取所有case的数据...')
  144. for city in self.citys:
  145. for age in self.age:
  146. if city != '上海市' and age != '85-89年生':
  147. for crowd_type in self.crowd:
  148. # print(' {}{}'.format(city, age))
  149. people_uuids = self.get_people_uuid_by_type(crowd_type)
  150. if len(people_uuids) > 0:
  151. print('{}-{}-{}'.format(city, age, crowd_type))
  152. datas = self.behavior_tag_init(city, age, people_uuids)
  153. result.extend(self.calculation_standard_score(datas, city, age, crowd_type))
  154. return result
  155. def behavior_tag_init(self, city, age, people_uuids):
  156. result = {}
  157. self.group_type_count = self.marketing_db.select(self.sql_5, [city, city, age, people_uuids])
  158. for key in self.tag_data:
  159. values = self.tag_data[key]
  160. elements = []
  161. for value in values:
  162. question = value[0].split('-')[0]
  163. option = value[0].split('-')[1]
  164. corr = value[1]
  165. fz, fm = self.molecular_value(question, option, city, age, people_uuids)
  166. if fm == 0:
  167. c = 0
  168. else:
  169. c = fz / fm
  170. elements.append([question, option, corr, fz, fm, c])
  171. result[key] = elements
  172. return self.indicator_calculation_d_e(result)
  173. def molecular_value(self, queston, option, city, age, people_uuids):
  174. # 获取当前父选项包含的子选项id和子题id列表
  175. result = self.shangju_db.select(self.sql_6, [option, queston])
  176. sub_option_ids = []
  177. group_types = []
  178. for rt in result:
  179. sub_option_id, sub_question_id, content = rt[0], rt[1], rt[2]
  180. grouptypes = self.shangju_db.select(self.sql_7, [sub_question_id])
  181. for g_t in grouptypes:
  182. if g_t[0] not in group_types:
  183. group_types.append(g_t[0])
  184. sub_option_ids.append(sub_option_id)
  185. # 计算子选项在答题记录中的点击数
  186. sub_options_count = 0
  187. if len(sub_option_ids) > 0:
  188. result_1 = self.marketing_db.select(self.sql_8, [sub_option_ids, city, city, age, people_uuids])
  189. sub_options_count = result_1[0][0]
  190. # 计算父选项包含的子选项对应的子题所在的测试gt包含的点击数。
  191. denominator_value = 0
  192. for info in self.group_type_count:
  193. if info[0] in group_types:
  194. denominator_value += info[1]
  195. return sub_options_count, denominator_value
  196. def indicator_calculation_d_e(self, data):
  197. result = {}
  198. for key in data.keys():
  199. values = data[key]
  200. c_list = [x[5] for x in values]
  201. fm_list = [x[4] for x in values]
  202. sum_c = sum(fm_list)
  203. min_c = min(c_list)
  204. elements = []
  205. for value in values:
  206. _value = []
  207. c = value[5]
  208. if sum_c == 0:
  209. d = 0
  210. else:
  211. d = c / sum_c
  212. e = c - min_c
  213. _value.extend(value)
  214. _value.append(d)
  215. _value.append(e)
  216. elements.append(_value)
  217. result[key] = elements
  218. return result
  219. def calculation_standard_score(self, datas, city, age, crowd_type):
  220. scores = []
  221. for key in datas.keys():
  222. print(key)
  223. print(' 父题序号 父选项序号 相关系系数 分子值 分母值 百分比 人数权重 偏离值')
  224. values = [x[5] for x in datas[key]]
  225. min_c = min(values)
  226. f = min_c
  227. for value in datas[key]:
  228. print(' {}'.format(value))
  229. if value[2] is not None and value[7] is not None:
  230. f += float(value[2] * value[7])
  231. print(' 标准分:{}'.format(f))
  232. scores.append([city, age, key, crowd_type, f])
  233. # self.shangju_db.add_some(self.sql_9, scores)
  234. return scores
  235. def get_crowd_people(self):
  236. result = {}
  237. for type in self.crowd:
  238. uuids = self.get_people_uuid_by_type(type)
  239. result[type] = len(uuids)
  240. return result
  241. def get_people_uuid_by_type(self, type):
  242. uuids = []
  243. type_sub_option_ids = self.crowd_contain_sub_option_ids[type]
  244. for people in self.people_sub_option_ids:
  245. uuid = people[0]
  246. sub_option_ids = list(map(int, str(people[1]).split(',')))
  247. # list(set(a).intersection(set(b)))
  248. if len(list(set(sub_option_ids).intersection(set(type_sub_option_ids)))) > 0:
  249. uuids.append(uuid)
  250. return uuids
  251. def get_crowd_contain_sub_option_ids(self):
  252. """
  253. 获取ABCDEF人群包含的子选项id
  254. :return:
  255. """
  256. infos = {}
  257. for key in self.crowd_info.keys():
  258. values = self.crowd_info[key]
  259. sub_option_ids = []
  260. for value in values:
  261. if value is not None:
  262. vals = str(value).split('-')
  263. option, question = vals[1], vals[0]
  264. query_result = self.shangju_db.select(self.sql_6, [option, question])
  265. for qr in query_result:
  266. sub_option_id, sub_question_id, content = qr[0], qr[1], qr[2]
  267. sub_option_ids.append(int(sub_option_id))
  268. infos[key] = sub_option_ids
  269. print(infos)
  270. return infos
  271. class ExcelUtil:
  272. # 当前项目路径
  273. dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'/elab_mvp/resources'
  274. """
  275. 解析excel文件
  276. """
  277. def __init__(self, sheet_name=None, path=None):
  278. if path:
  279. self.path = path
  280. else:
  281. self.path = os.path.join(self.dir_path, 'mvp.xlsx')
  282. if sheet_name:
  283. self.sheet_name = sheet_name
  284. else:
  285. self.sheet_name = '硬标签+行为'
  286. def read_excel_by_pd(self):
  287. df = pd.read_excel(self.path)
  288. data = df.head()
  289. print('获取到的数据{}'.format(data))
  290. def read_excel_by_ox(self):
  291. work_book = ox.load_workbook(self.path, data_only=True)
  292. work_sheet = work_book.get_sheet_by_name(self.sheet_name)
  293. # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
  294. return work_sheet
  295. def init_crowd_info(self):
  296. """
  297. 整理不同人群包含的父选序号
  298. :return:
  299. """
  300. rows = [row for row in self.read_excel_by_ox().rows]
  301. crowd_a = []
  302. crowd_b = []
  303. crowd_c = []
  304. crowd_d = []
  305. crowd_e = []
  306. crowd_f = []
  307. for row in rows[2:]:
  308. option = row[4].value
  309. a = row[6].value
  310. if a is not None and a == 1 and option not in crowd_a:
  311. crowd_a.append(option)
  312. b = row[7].value
  313. if b is not None and b == 1 and option not in crowd_b:
  314. crowd_b.append(option)
  315. c = row[8].value
  316. if c is not None and c == 1 and option not in crowd_d:
  317. crowd_c.append(option)
  318. d = row[9].value
  319. if d is not None and d == 1 and option not in crowd_d:
  320. crowd_d.append(option)
  321. e = row[10].value
  322. if e is not None and e == 1 and option not in crowd_e:
  323. crowd_e.append(option)
  324. f = row[11].value
  325. if f is not None and f == 1 and option not in crowd_f:
  326. crowd_f.append(option)
  327. return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}
  328. def init_mvp_data(self):
  329. """
  330. 获取每个标签包括的父题父选项编号
  331. :return:
  332. """
  333. rows = [row for row in self.read_excel_by_ox().rows][24:]
  334. tag_name = None
  335. datas = []
  336. for row in rows:
  337. tag = row[1].value
  338. values = row[3].value
  339. corr = row[4].value
  340. if tag:
  341. tag_name = tag
  342. if values is not None:
  343. datas.append([tag_name, values, corr])
  344. result = {}
  345. for name, items in groupby(datas, key=lambda obj: obj[0]):
  346. orders = []
  347. for n in items:
  348. orders.append([n[1], n[2]])
  349. result[name] = orders
  350. return result
  351. class MysqlDB:
  352. """
  353. mysql操作
  354. """
  355. con = None
  356. cursor = None
  357. def __init__(self, db_name):
  358. self.db_name = db_name
  359. self.con = ps.connect(host='172.19.189.136', port=3306, user='bi_etl', password='XPtpswuU5lwGo4kx',
  360. db=self.db_name, charset='utf8')
  361. # self.con = ps.connect(host='192.168.0.13', port=3306, user='root', password='elab@123'
  362. # , db=self.db_name, charset='utf8')
  363. self.cursor = self.con.cursor()
  364. def show_tables(self):
  365. self.cursor.execute('show tables')
  366. for talbe in self.cursor.fetchall():
  367. print(talbe)
  368. def select(self, sql, params=None):
  369. if params:
  370. self.cursor.execute(sql, params)
  371. else:
  372. self.cursor.execute(sql)
  373. return self.cursor.fetchall()
  374. def add_some(self, sql, data):
  375. try:
  376. self.cursor.executemany(sql, data)
  377. self.con.commit()
  378. except:
  379. print('数据插入异常...')
  380. self.con.rollback()
  381. def add_one(self, sql, data):
  382. try:
  383. self.cursor.execute(sql, data)
  384. self.con.commit()
  385. except:
  386. self.con.rollback()
  387. def truncate(self, table_name):
  388. sql = 'truncate table {}'.format(table_name)
  389. self.cursor.execute(sql)
  390. self.con.commit()
  391. def close(self):
  392. self.cursor.close()
  393. self.con.close()
  394. @app.route('/behavioral_statistics', methods=['GET', 'POST'])
  395. def behavioral_statistics():
  396. city = request.args.get('city', default=None, type=str)
  397. age = request.args.get('age', default=None, type=str)
  398. crowd = request.args.get('crowd', default=None, type=str)
  399. print(city, age, crowd)
  400. mvp = Mvp()
  401. scores = mvp.query_behavioral_info(city, age, crowd)
  402. mvp.shangju_db.close()
  403. mvp.marketing_db.close()
  404. return json.dumps(scores, ensure_ascii=False)
  405. @app.route('/infos', methods=["GET", 'POST'])
  406. def get_city_age_crowd():
  407. mvp = Mvp()
  408. infos = {'城市': mvp.citys, '年龄段': mvp.age}
  409. mvp.shangju_db.close()
  410. mvp.marketing_db.close()
  411. return json.dumps(infos, ensure_ascii=False)
  412. @app.route('/crowd_people', methods=['GET', 'POST'])
  413. def crowd_people():
  414. mvp = Mvp()
  415. people_count = mvp.get_crowd_people()
  416. return json.dumps(people_count, ensure_ascii=False)
  417. if __name__ == '__main__':
  418. app.run(
  419. host='0.0.0.0',
  420. port=5001
  421. )