flask_app.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. from flask import Flask, request, render_template
  2. import os
  3. from werkzeug.utils import secure_filename
  4. import time
  5. import json
  6. import pandas as pd
  7. import openpyxl as ox
  8. from itertools import groupby
  9. import os
  10. import pymysql as ps
  11. app = Flask(__name__)
  12. class Mvp:
  13. """
  14. ce mvp 答题数据统计
  15. 城市特例 北京市,上海市, 重庆市,天津市
  16. """
  17. age_dict = {
  18. '00-04年生': '00后',
  19. '05-09年生': '05后',
  20. '50-59年生': '50后',
  21. '60-69年生': '60后',
  22. '70-74年生': '70后',
  23. '75-79年生': '75后',
  24. '80-84年生': '80后',
  25. '85-89年生': '85后',
  26. '90-94年生': '90后',
  27. '95-99年生': '95后'
  28. }
  29. crowd = ['A', 'B', 'C', 'D', 'E', 'F']
  30. # 获取答题记录中城市列表
  31. sql_1 = 'select city from f_t_daren_score_2 group by city'
  32. # 获取父选项和父题id
  33. sql_2 = 'select a.id, a.content, b.id, b.name from bq_option a left join bq_question b on a.question_id = b.id ' \
  34. 'where a.serial_number = %s and b.serial_number = %s and a.status = b.status = 1 '
  35. # 数据插入表mvp_question_classification
  36. sql_3 = 'insert into mvp_question_classification(question_serial_number, question_content, ' \
  37. 'option_serial_number, option_content, tag, corr) values(%s, %s, %s, %s, %s, %s) '
  38. # 获取答题人的年龄段集合
  39. sql_4 = 'select nld from f_t_daren_score_2 group by nld'
  40. # 根据城市,年龄段,人群分类统计答题记录数
  41. sql_5 = 'select group_type, COUNT(uuid) from f_t_daren_score_2 where (city = %s or province = %s) and nld ' \
  42. '= %s and uuid in %s group by group_type '
  43. # 根据父选项获取子选项id列表
  44. sql_6 = 'SELECT c.id, c.sub_question_id, c.content FROM bq_sub_option c WHERE c.father_id in (SELECT a.id FROM ' \
  45. 'bq_option a ' \
  46. 'LEFT JOIN bq_question b ON a.question_id = b.id WHERE a.serial_number = %s AND b.serial_number = %s ' \
  47. 'and a.status = 1 and b.status = 1) and c.status = 1 '
  48. # 根据子题id获取包含子题id的测试
  49. sql_7 = 'select group_type from bq_testcase where status = 1 and FIND_IN_SET(%s, question_ids)'
  50. # 根据子选项id统计答题数
  51. sql_8 = 'SELECT count(uuid) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = ' \
  52. 'b.sub_question_id AND a.score = b.score WHERE a.testcase_id = b.testcase_id and b.sub_option_id in %s' \
  53. 'and (a.city = %s or a.province = %s) and a.nld = %s and a.uuid in %s'
  54. # 计算值写入表汇总
  55. sql_9 = 'insert into mvp_standard_score(city, age, tag, crowd_type, score) VALUES(%s, %s, %s, %s, %s)'
  56. # 获取一个uuid下答题的子选项id列表
  57. sql_10 = 'select DISTINCT uuid, GROUP_CONCAT(DISTINCT b.sub_option_id) from f_t_daren_score_2 a left join ' \
  58. 'd_shangju_tiku_02 b on a.sub_question_id = b.sub_question_id and a.score = b.score where a.status = ' \
  59. 'b.status = 1 group by uuid '
  60. def __init__(self, path=None):
  61. self.shangju_db = MysqlDB('shangju')
  62. self.marketing_db = MysqlDB('marketing_db')
  63. self.shangju_db.truncate('mvp_standard_score')
  64. self.tag_data = ExcelUtil(path=path).init_mvp_data()
  65. self.crowd_info = ExcelUtil(path=path, sheet_name='选项-人群分类对应表').init_crowd_info()
  66. self.citys = self.init_city()
  67. self.age = self.init_age()
  68. self.people_sub_option_ids = self.marketing_db.select(self.sql_10)
  69. self.crowd_contain_sub_option_ids = self.get_crowd_contain_sub_option_ids()
  70. def init_city(self):
  71. """
  72. 获取答题数据中的城市。
  73. :return:
  74. """
  75. citys = ['北京市', '上海市', '重庆市', '天津市']
  76. citys_info = self.marketing_db.select(self.sql_1)
  77. citys.extend([x[0] for x in citys_info if x[0] is not None])
  78. return citys
  79. def write_tag(self, city=None, age=None, crowd=None):
  80. """
  81. 将excel中的配置信息写入到数据库表中
  82. :return:
  83. """
  84. # datas = []
  85. # for key in self.tag_data.keys():
  86. # values = self.tag_data[key]
  87. # for value in values:
  88. # question = value[0].split('-')[0]
  89. # option = value[0].split('-')[1]
  90. # corr = value[1]
  91. # data = self.shangju_db.select(self.sql_2, [option, question])
  92. # if len(data) > 0:
  93. # print([question, option, data[0][3], data[0][1], key, corr])
  94. # datas.append([question, option, data[0][3], data[0][1], key, corr])
  95. # self.shangju_db.truncate('mvp_question_classification')
  96. # self.shangju_db.add_some(self.sql_3, datas)
  97. result = self.city_age_crowd(city, age, crowd)
  98. print('update finished!!!')
  99. return result
  100. def init_age(self):
  101. """
  102. 获取答题数据中的年龄
  103. """
  104. age_info = self.marketing_db.select(self.sql_4)
  105. # print([x[0] for x in age_info])
  106. return [x[0] for x in age_info if x[0] is not None]
  107. def city_age_crowd(self, city=None, age=None, crowd=None):
  108. result = []
  109. if city is not None and age is not None and crowd is not None:
  110. print('获取指定城市,年龄段,人群类型的数据...')
  111. people_uuids = self.get_people_uuid_by_type(crowd)
  112. if len(people_uuids) > 0:
  113. print('{}-{}-{}'.format(city, age, crowd))
  114. datas = self.behavior_tag_init(city, age, people_uuids)
  115. result.extend(self.calculation_standard_score(datas, city, age, crowd))
  116. pass
  117. else:
  118. print('获取所有case的数据...')
  119. for city in self.citys:
  120. for age in self.age:
  121. if city != '上海市' and age != '85-89年生':
  122. for crowd_type in self.crowd:
  123. # print(' {}{}'.format(city, age))
  124. people_uuids = self.get_people_uuid_by_type(crowd_type)
  125. if len(people_uuids) > 0:
  126. print('{}-{}-{}'.format(city, age, crowd_type))
  127. datas = self.behavior_tag_init(city, age, people_uuids)
  128. result.extend(self.calculation_standard_score(datas, city, age, crowd_type))
  129. return result
  130. def behavior_tag_init(self, city, age, people_uuids):
  131. result = {}
  132. self.group_type_count = self.marketing_db.select(self.sql_5, [city, city, age, people_uuids])
  133. for key in self.tag_data:
  134. values = self.tag_data[key]
  135. elements = []
  136. for value in values:
  137. question = value[0].split('-')[0]
  138. option = value[0].split('-')[1]
  139. corr = value[1]
  140. fz, fm = self.molecular_value(question, option, city, age, people_uuids)
  141. if fm == 0:
  142. c = 0
  143. else:
  144. c = fz / fm
  145. elements.append([question, option, corr, fz, fm, c])
  146. result[key] = elements
  147. return self.indicator_calculation_d_e(result)
  148. def molecular_value(self, queston, option, city, age, people_uuids):
  149. # 获取当前父选项包含的子选项id和子题id列表
  150. result = self.shangju_db.select(self.sql_6, [option, queston])
  151. sub_option_ids = []
  152. group_types = []
  153. for rt in result:
  154. sub_option_id, sub_question_id, content = rt[0], rt[1], rt[2]
  155. grouptypes = self.shangju_db.select(self.sql_7, [sub_question_id])
  156. for g_t in grouptypes:
  157. if g_t[0] not in group_types:
  158. group_types.append(g_t[0])
  159. sub_option_ids.append(sub_option_id)
  160. # 计算子选项在答题记录中的点击数
  161. sub_options_count = 0
  162. if len(sub_option_ids) > 0:
  163. result_1 = self.marketing_db.select(self.sql_8, [sub_option_ids, city, city, age, people_uuids])
  164. sub_options_count = result_1[0][0]
  165. # 计算父选项包含的子选项对应的子题所在的测试gt包含的点击数。
  166. denominator_value = 0
  167. for info in self.group_type_count:
  168. if info[0] in group_types:
  169. denominator_value += info[1]
  170. return sub_options_count, denominator_value
  171. def indicator_calculation_d_e(self, data):
  172. result = {}
  173. for key in data.keys():
  174. values = data[key]
  175. c_list = [x[5] for x in values]
  176. fm_list = [x[4] for x in values]
  177. sum_c = sum(fm_list)
  178. min_c = min(c_list)
  179. elements = []
  180. for value in values:
  181. _value = []
  182. c = value[5]
  183. if sum_c == 0:
  184. d = 0
  185. else:
  186. d = c / sum_c
  187. e = c - min_c
  188. _value.extend(value)
  189. _value.append(d)
  190. _value.append(e)
  191. elements.append(_value)
  192. result[key] = elements
  193. return result
  194. def calculation_standard_score(self, datas, city, age, crowd_type):
  195. scores = []
  196. for key in datas.keys():
  197. print(key)
  198. print(' 父题序号 父选项序号 相关系系数 分子值 分母值 百分比 人数权重 偏离值')
  199. values = [x[5] for x in datas[key]]
  200. min_c = min(values)
  201. f = min_c
  202. for value in datas[key]:
  203. print(' {}'.format(value))
  204. if value[2] is not None and value[7] is not None:
  205. f += float(value[2] * value[7])
  206. print(' 标准分:{}'.format(f))
  207. scores.append([city, age, key, crowd_type, f])
  208. # self.shangju_db.add_some(self.sql_9, scores)
  209. return scores
  210. def get_crowd_people(self):
  211. result = {}
  212. for type in self.crowd:
  213. uuids = self.get_people_uuid_by_type(type)
  214. result[type] = len(uuids)
  215. return result
  216. def get_people_uuid_by_type(self, type):
  217. uuids = []
  218. type_sub_option_ids = self.crowd_contain_sub_option_ids[type]
  219. for people in self.people_sub_option_ids:
  220. uuid = people[0]
  221. sub_option_ids = str(people[1]).split(',')
  222. # list(set(a).intersection(set(b)))
  223. if len(list(set(sub_option_ids).intersection(set(type_sub_option_ids)))) > 0:
  224. uuids.append(uuid)
  225. return uuids
  226. def get_crowd_contain_sub_option_ids(self):
  227. """
  228. 获取ABCDEF人群包含的子选项id
  229. :return:
  230. """
  231. infos = {}
  232. for key in self.crowd_info.keys():
  233. values = self.crowd_info[key]
  234. sub_option_ids = []
  235. for value in values:
  236. if value is not None:
  237. vals = str(value).split('-')
  238. option, question = vals[1], vals[0]
  239. query_result = self.shangju_db.select(self.sql_6, [option, question])
  240. for qr in query_result:
  241. sub_option_id, sub_question_id, content = qr[0], qr[1], qr[2]
  242. sub_option_ids.append(sub_option_id)
  243. infos[key] = sub_option_ids
  244. return infos
  245. class ExcelUtil:
  246. # 当前项目路径
  247. dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'/elab_mvp/resources'
  248. """
  249. 解析excel文件
  250. """
  251. def __init__(self, sheet_name=None, path=None):
  252. if path:
  253. self.path = path
  254. else:
  255. self.path = os.path.join(self.dir_path, 'mvp.xlsx')
  256. if sheet_name:
  257. self.sheet_name = sheet_name
  258. else:
  259. self.sheet_name = '硬标签+行为'
  260. def read_excel_by_pd(self):
  261. df = pd.read_excel(self.path)
  262. data = df.head()
  263. print('获取到的数据{}'.format(data))
  264. def read_excel_by_ox(self):
  265. work_book = ox.load_workbook(self.path, data_only=True)
  266. work_sheet = work_book.get_sheet_by_name(self.sheet_name)
  267. # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
  268. return work_sheet
  269. def init_crowd_info(self):
  270. """
  271. 整理不同人群包含的父选序号
  272. :return:
  273. """
  274. rows = [row for row in self.read_excel_by_ox().rows]
  275. crowd_a = []
  276. crowd_b = []
  277. crowd_c = []
  278. crowd_d = []
  279. crowd_e = []
  280. crowd_f = []
  281. for row in rows[2:]:
  282. option = row[4].value
  283. a = row[6].value
  284. if a is not None and a == 1 and option not in crowd_a:
  285. crowd_a.append(option)
  286. b = row[7].value
  287. if b is not None and b == 1 and option not in crowd_b:
  288. crowd_b.append(option)
  289. c = row[8].value
  290. if c is not None and c == 1 and option not in crowd_d:
  291. crowd_c.append(option)
  292. d = row[9].value
  293. if d is not None and d == 1 and option not in crowd_d:
  294. crowd_d.append(option)
  295. e = row[10].value
  296. if e is not None and e == 1 and option not in crowd_e:
  297. crowd_e.append(option)
  298. f = row[11].value
  299. if f is not None and f == 1 and option not in crowd_f:
  300. crowd_f.append(option)
  301. return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}
  302. def init_mvp_data(self):
  303. """
  304. 获取每个标签包括的父题父选项编号
  305. :return:
  306. """
  307. rows = [row for row in self.read_excel_by_ox().rows][24:]
  308. tag_name = None
  309. datas = []
  310. for row in rows:
  311. tag = row[1].value
  312. values = row[3].value
  313. corr = row[4].value
  314. if tag:
  315. tag_name = tag
  316. if values is not None:
  317. datas.append([tag_name, values, corr])
  318. result = {}
  319. for name, items in groupby(datas, key=lambda obj: obj[0]):
  320. orders = []
  321. for n in items:
  322. orders.append([n[1], n[2]])
  323. result[name] = orders
  324. return result
  325. class MysqlDB:
  326. """
  327. mysql操作
  328. """
  329. con = None
  330. cursor = None
  331. def __init__(self, db_name):
  332. self.db_name = db_name
  333. self.con = ps.connect(host='192.168.0.13', port=3306, user='root', password='elab@123',
  334. db=self.db_name, charset='utf8')
  335. self.cursor = self.con.cursor()
  336. def show_tables(self):
  337. self.cursor.execute('show tables')
  338. for talbe in self.cursor.fetchall():
  339. print(talbe)
  340. def select(self, sql, params=None):
  341. if params:
  342. self.cursor.execute(sql, params)
  343. else:
  344. self.cursor.execute(sql)
  345. return self.cursor.fetchall()
  346. def add_some(self, sql, data):
  347. try:
  348. self.cursor.executemany(sql, data)
  349. self.con.commit()
  350. except:
  351. print('数据插入异常...')
  352. self.con.rollback()
  353. def add_one(self, sql, data):
  354. try:
  355. self.cursor.execute(sql, data)
  356. self.con.commit()
  357. except:
  358. self.con.rollback()
  359. def truncate(self, table_name):
  360. sql = 'truncate table {}'.format(table_name)
  361. self.cursor.execute(sql)
  362. self.con.commit()
  363. def close(self):
  364. self.cursor.close()
  365. self.con.close()
  366. @app.route('/behavioral_statistics', methods=['GET', 'POST'])
  367. def behavioral_statistics():
  368. city = request.args.get('city', default=None, type=str)
  369. age = request.args.get('age', default=None, type=str)
  370. crowd = request.args.get('crowd', default=None, type=str)
  371. print(city, age, crowd)
  372. mvp = Mvp()
  373. scores = mvp.write_tag(city, age, crowd)
  374. return json.dumps(scores, ensure_ascii=False)
  375. @app.route('/infos', methods=["GET", 'POST'])
  376. def get_city_age_crowd():
  377. mvp = Mvp()
  378. infos = {'城市': mvp.citys, '年龄段': mvp.age}
  379. return json.dumps(infos, ensure_ascii=False)
  380. @app.route('/crowd_people', methods=['GET', 'POST'])
  381. def crowd_people():
  382. mvp = Mvp()
  383. people_count = mvp.get_crowd_people()
  384. return json.dumps(people_count, ensure_ascii=False)
  385. if __name__ == '__main__':
  386. app.run(
  387. host='0.0.0.0',
  388. port=5001
  389. )