瀏覽代碼

elab_mvp: 新增测试数据抽取测试接口

Signed-off-by: binren <zhangbr@elab-plus.com>
binren 5 年之前
當前提交
f0571fc580

+ 13 - 0
.idea/flask_demo.iml

@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/resources" isTestSource="false" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>

+ 6 - 0
.idea/inspectionProfiles/profiles_settings.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

+ 7 - 0
.idea/misc.xml

@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
+  <component name="PyCharmProfessionalAdvertiser">
+    <option name="shown" value="true" />
+  </component>
+</project>

+ 8 - 0
.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/flask_demo.iml" filepath="$PROJECT_DIR$/.idea/flask_demo.iml" />
+    </modules>
+  </component>
+</project>

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

二進制
__pycache__/flask_app.cpython-36.pyc


二進制
db/__pycache__/mysql_db.cpython-37.pyc


+ 60 - 0
db/mysql_db.py

@@ -0,0 +1,60 @@
+import pymysql as ps
+
+
+class MysqlDB:
+    """
+        mysql操作
+    """
+    con = None
+    cursor = None
+
+    def __init__(self, db_name):
+        self.db_name = db_name
+        self.con = ps.connect(host='192.168.0.13', port=3306, user='root', password='elab@123',
+                              db=self.db_name, charset='utf8')
+        self.cursor = self.con.cursor()
+
+    def show_tables(self):
+        self.cursor.execute('show tables')
+        for talbe in self.cursor.fetchall():
+            print(talbe)
+
+    def select(self, sql, params=None):
+        if params:
+            self.cursor.execute(sql, params)
+        else:
+            self.cursor.execute(sql)
+        return self.cursor.fetchall()
+
+    def add_some(self, sql, data):
+        try:
+            self.cursor.executemany(sql, data)
+            self.con.commit()
+        except:
+            print('数据插入异常...')
+            self.con.rollback()
+
+    def add_one(self, sql, data):
+        try:
+            self.cursor.execute(sql, data)
+            self.con.commit()
+        except:
+            self.con.rollback()
+
+    def truncate(self, table_name):
+        sql = 'truncate table {}'.format(table_name)
+        self.cursor.execute(sql)
+        self.con.commit()
+
+    def close(self):
+        self.cursor.close()
+        self.con.close()
+
+
+if __name__ == '__main__':
+    mysql_db = MysqlDB('marketing_db')
+    # sql = 'select * from bq_question a left join bq_option b on a.id = b.question_id where a.status = 1 and b.status = 1'
+    sql = 'select city from f_t_daren_score_2 group by city'
+    result = mysql_db.select(sql)
+    for rt in result:
+        print(rt)

+ 461 - 0
flask_app.py

@@ -0,0 +1,461 @@
+from flask import Flask, request, render_template
+import os
+from werkzeug.utils import secure_filename
+import time
+import json
+import pandas as pd
+import openpyxl as ox
+from itertools import groupby
+import os
+import pymysql as ps
+
+
+app = Flask(__name__)
+
+
+class Mvp:
+    """
+     ce mvp 答题数据统计
+     城市特例 北京市,上海市, 重庆市,天津市
+    """
+
+    age_dict = {
+        '00-04年生': '00后',
+        '05-09年生': '05后',
+        '50-59年生': '50后',
+        '60-69年生': '60后',
+        '70-74年生': '70后',
+        '75-79年生': '75后',
+        '80-84年生': '80后',
+        '85-89年生': '85后',
+        '90-94年生': '90后',
+        '95-99年生': '95后'
+    }
+
+    crowd = ['A', 'B', 'C', 'D', 'E', 'F']
+
+    # 获取答题记录中城市列表
+    sql_1 = 'select city from f_t_daren_score_2 group by city'
+
+    # 获取父选项和父题id
+    sql_2 = 'select a.id, a.content, b.id, b.name from bq_option a left join bq_question b on a.question_id = b.id ' \
+            'where a.serial_number = %s and b.serial_number = %s and a.status = b.status = 1 '
+    # 数据插入表mvp_question_classification
+    sql_3 = 'insert into mvp_question_classification(question_serial_number, question_content, ' \
+            'option_serial_number, option_content, tag, corr) values(%s, %s, %s, %s, %s, %s) '
+
+    # 获取答题人的年龄段集合
+    sql_4 = 'select nld from f_t_daren_score_2 group by nld'
+
+    # 根据城市,年龄段,人群分类统计答题记录数
+    sql_5 = 'select group_type, COUNT(uuid) from f_t_daren_score_2 where (city = %s or province = %s) and nld ' \
+            '= %s and uuid in %s group by group_type '
+
+    # 根据父选项获取子选项id列表
+    sql_6 = 'SELECT c.id, c.sub_question_id, c.content FROM bq_sub_option c WHERE c.father_id in (SELECT a.id FROM ' \
+            'bq_option a ' \
+            'LEFT JOIN bq_question b ON a.question_id = b.id WHERE a.serial_number = %s AND b.serial_number = %s ' \
+            'and a.status = 1 and b.status = 1) and c.status = 1 '
+
+    # 根据子题id获取包含子题id的测试
+    sql_7 = 'select group_type from bq_testcase where status = 1 and FIND_IN_SET(%s, question_ids)'
+
+    # 根据子选项id统计答题数
+    sql_8 = 'SELECT count(uuid) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = ' \
+            'b.sub_question_id AND a.score  = b.score WHERE a.testcase_id = b.testcase_id and b.sub_option_id in %s' \
+            'and (a.city = %s or a.province = %s) and a.nld = %s and a.uuid in %s'
+
+    # 计算值写入表汇总
+    sql_9 = 'insert into mvp_standard_score(city, age, tag, crowd_type, score)  VALUES(%s, %s, %s, %s, %s)'
+
+    # 获取一个uuid下答题的子选项id列表
+    sql_10 = 'select  DISTINCT uuid, GROUP_CONCAT(DISTINCT b.sub_option_id)  from f_t_daren_score_2 a left join ' \
+             'd_shangju_tiku_02 b on a.sub_question_id = b.sub_question_id and a.score = b.score where a.status = ' \
+             'b.status = 1 group by uuid '
+
+    def __init__(self, path=None):
+        self.shangju_db = MysqlDB('shangju')
+        self.marketing_db = MysqlDB('marketing_db')
+        self.shangju_db.truncate('mvp_standard_score')
+        self.tag_data = ExcelUtil(path=path).init_mvp_data()
+        self.crowd_info = ExcelUtil(path=path, sheet_name='选项-人群分类对应表').init_crowd_info()
+        self.citys = self.init_city()
+        self.age = self.init_age()
+        self.people_sub_option_ids = self.marketing_db.select(self.sql_10)
+        self.crowd_contain_sub_option_ids = self.get_crowd_contain_sub_option_ids()
+
+    def init_city(self):
+        """
+            获取答题数据中的城市。
+        :return:
+        """
+        citys = ['北京市', '上海市', '重庆市', '天津市']
+        citys_info = self.marketing_db.select(self.sql_1)
+        citys.extend([x[0] for x in citys_info if x[0] is not None])
+        return citys
+
+    def write_tag(self, city=None, age=None, crowd=None):
+        """
+            将excel中的配置信息写入到数据库表中
+        :return:
+        """
+        # datas = []
+        # for key in self.tag_data.keys():
+        #     values = self.tag_data[key]
+        #     for value in values:
+        #         question = value[0].split('-')[0]
+        #         option = value[0].split('-')[1]
+        #         corr = value[1]
+        #         data = self.shangju_db.select(self.sql_2, [option, question])
+        #         if len(data) > 0:
+        #             print([question, option, data[0][3], data[0][1], key, corr])
+        #             datas.append([question, option, data[0][3], data[0][1], key, corr])
+        # self.shangju_db.truncate('mvp_question_classification')
+        # self.shangju_db.add_some(self.sql_3, datas)
+        result = self.city_age_crowd(city, age, crowd)
+        print('update finished!!!')
+        return result
+
+    def init_age(self):
+        """
+           获取答题数据中的年龄
+        """
+        age_info = self.marketing_db.select(self.sql_4)
+        # print([x[0] for x in age_info])
+        return [x[0] for x in age_info if x[0] is not None]
+
+    def city_age_crowd(self, city=None, age=None, crowd=None):
+        result = []
+        if city is not None and age is not None and crowd is not None:
+            print('获取指定城市,年龄段,人群类型的数据...')
+            people_uuids = self.get_people_uuid_by_type(crowd)
+            if len(people_uuids) > 0:
+                print('{}-{}-{}'.format(city, age, crowd))
+                datas = self.behavior_tag_init(city, age, people_uuids)
+                result.extend(self.calculation_standard_score(datas, city, age, crowd))
+            pass
+        else:
+            print('获取所有case的数据...')
+            for city in self.citys:
+                for age in self.age:
+                    if city != '上海市' and age != '85-89年生':
+                        for crowd_type in self.crowd:
+                            # print(' {}{}'.format(city, age))
+                            people_uuids = self.get_people_uuid_by_type(crowd_type)
+                            if len(people_uuids) > 0:
+                                print('{}-{}-{}'.format(city, age, crowd_type))
+                                datas = self.behavior_tag_init(city, age, people_uuids)
+                                result.extend(self.calculation_standard_score(datas, city, age, crowd_type))
+        return result
+
+    def behavior_tag_init(self, city, age, people_uuids):
+        result = {}
+        self.group_type_count = self.marketing_db.select(self.sql_5, [city, city, age, people_uuids])
+        for key in self.tag_data:
+            values = self.tag_data[key]
+            elements = []
+            for value in values:
+                question = value[0].split('-')[0]
+                option = value[0].split('-')[1]
+                corr = value[1]
+                fz, fm = self.molecular_value(question, option, city, age, people_uuids)
+                if fm == 0:
+                    c = 0
+                else:
+                    c = fz / fm
+                elements.append([question, option, corr, fz, fm, c])
+            result[key] = elements
+        return self.indicator_calculation_d_e(result)
+
+    def molecular_value(self, queston, option, city, age, people_uuids):
+        # 获取当前父选项包含的子选项id和子题id列表
+        result = self.shangju_db.select(self.sql_6, [option, queston])
+        sub_option_ids = []
+        group_types = []
+        for rt in result:
+            sub_option_id, sub_question_id, content = rt[0], rt[1], rt[2]
+            grouptypes = self.shangju_db.select(self.sql_7, [sub_question_id])
+            for g_t in grouptypes:
+                if g_t[0] not in group_types:
+                    group_types.append(g_t[0])
+            sub_option_ids.append(sub_option_id)
+        # 计算子选项在答题记录中的点击数
+
+        sub_options_count = 0
+        if len(sub_option_ids) > 0:
+            result_1 = self.marketing_db.select(self.sql_8, [sub_option_ids, city, city, age, people_uuids])
+            sub_options_count = result_1[0][0]
+        # 计算父选项包含的子选项对应的子题所在的测试gt包含的点击数。
+        denominator_value = 0
+        for info in self.group_type_count:
+            if info[0] in group_types:
+                denominator_value += info[1]
+        return sub_options_count, denominator_value
+
+    def indicator_calculation_d_e(self, data):
+        result = {}
+        for key in data.keys():
+            values = data[key]
+            c_list = [x[5] for x in values]
+            fm_list = [x[4] for x in values]
+            sum_c = sum(fm_list)
+            min_c = min(c_list)
+            elements = []
+            for value in values:
+                _value = []
+                c = value[5]
+                if sum_c == 0:
+                    d = 0
+                else:
+                    d = c / sum_c
+                e = c - min_c
+                _value.extend(value)
+                _value.append(d)
+                _value.append(e)
+                elements.append(_value)
+            result[key] = elements
+        return result
+
+    def calculation_standard_score(self, datas, city, age, crowd_type):
+        scores = []
+        for key in datas.keys():
+            print(key)
+            print('     父题序号 父选项序号 相关系系数 分子值 分母值 百分比 人数权重 偏离值')
+            values = [x[5] for x in datas[key]]
+            min_c = min(values)
+            f = min_c
+            for value in datas[key]:
+                print('     {}'.format(value))
+                if value[2] is not None and value[7] is not None:
+                    f += float(value[2] * value[7])
+            print('     标准分:{}'.format(f))
+            scores.append([city, age, key, crowd_type, f])
+        # self.shangju_db.add_some(self.sql_9, scores)
+        return scores
+
+    def get_crowd_people(self):
+        result = {}
+        for type in self.crowd:
+            uuids = self.get_people_uuid_by_type(type)
+            result[type] = len(uuids)
+        return result
+
+    def get_people_uuid_by_type(self, type):
+        uuids = []
+        type_sub_option_ids = self.crowd_contain_sub_option_ids[type]
+        for people in self.people_sub_option_ids:
+            uuid = people[0]
+            sub_option_ids = str(people[1]).split(',')
+            # list(set(a).intersection(set(b)))
+            if len(list(set(sub_option_ids).intersection(set(type_sub_option_ids)))) > 0:
+                uuids.append(uuid)
+        return uuids
+
+    def get_crowd_contain_sub_option_ids(self):
+        """
+            获取ABCDEF人群包含的子选项id
+        :return:
+        """
+        infos = {}
+        for key in self.crowd_info.keys():
+            values = self.crowd_info[key]
+            sub_option_ids = []
+            for value in values:
+                if value is not None:
+                    vals = str(value).split('-')
+                    option, question = vals[1], vals[0]
+                    query_result = self.shangju_db.select(self.sql_6, [option, question])
+                    for qr in query_result:
+                        sub_option_id, sub_question_id, content = qr[0], qr[1], qr[2]
+                        sub_option_ids.append(sub_option_id)
+            infos[key] = sub_option_ids
+        return infos
+
+
+class ExcelUtil:
+    # 当前项目路径
+    dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'/elab_mvp/resources'
+    """
+        解析excel文件
+    """
+    def __init__(self, sheet_name=None, path=None):
+        if path:
+            self.path = path
+        else:
+            self.path = os.path.join(self.dir_path, 'mvp.xlsx')
+        if sheet_name:
+            self.sheet_name = sheet_name
+        else:
+            self.sheet_name = '硬标签+行为'
+
+    def read_excel_by_pd(self):
+        df = pd.read_excel(self.path)
+        data = df.head()
+        print('获取到的数据{}'.format(data))
+
+    def read_excel_by_ox(self):
+        work_book = ox.load_workbook(self.path, data_only=True)
+        work_sheet = work_book.get_sheet_by_name(self.sheet_name)
+        # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
+        return work_sheet
+
+    def init_crowd_info(self):
+        """
+            整理不同人群包含的父选序号
+        :return:
+        """
+        rows = [row for row in self.read_excel_by_ox().rows]
+        crowd_a = []
+        crowd_b = []
+        crowd_c = []
+        crowd_d = []
+        crowd_e = []
+        crowd_f = []
+
+        for row in rows[2:]:
+            option = row[4].value
+            a = row[6].value
+            if a is not None and a == 1 and option not in crowd_a:
+                crowd_a.append(option)
+            b = row[7].value
+            if b is not None and b == 1 and option not in crowd_b:
+                crowd_b.append(option)
+            c = row[8].value
+            if c is not None and c == 1 and option not in crowd_d:
+                crowd_c.append(option)
+            d = row[9].value
+            if d is not None and d == 1 and option not in crowd_d:
+                crowd_d.append(option)
+            e = row[10].value
+            if e is not None and e == 1 and option not in crowd_e:
+                crowd_e.append(option)
+            f = row[11].value
+            if f is not None and f == 1 and option not in crowd_f:
+                crowd_f.append(option)
+        return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}
+
+    def init_mvp_data(self):
+        """
+            获取每个标签包括的父题父选项编号
+        :return:
+        """
+        rows = [row for row in self.read_excel_by_ox().rows][24:]
+        tag_name = None
+        datas = []
+        for row in rows:
+            tag = row[1].value
+            values = row[3].value
+            corr = row[4].value
+            if tag:
+                tag_name = tag
+            if values is not None:
+                datas.append([tag_name, values, corr])
+        result = {}
+        for name, items in groupby(datas, key=lambda obj: obj[0]):
+            orders = []
+            for n in items:
+                orders.append([n[1], n[2]])
+            result[name] = orders
+        return result
+
+
+class MysqlDB:
+    """
+        mysql操作
+    """
+    con = None
+    cursor = None
+
+    def __init__(self, db_name):
+        self.db_name = db_name
+        self.con = ps.connect(host='192.168.0.13', port=3306, user='root', password='elab@123',
+                              db=self.db_name, charset='utf8')
+        self.cursor = self.con.cursor()
+
+    def show_tables(self):
+        self.cursor.execute('show tables')
+        for talbe in self.cursor.fetchall():
+            print(talbe)
+
+    def select(self, sql, params=None):
+        if params:
+            self.cursor.execute(sql, params)
+        else:
+            self.cursor.execute(sql)
+        return self.cursor.fetchall()
+
+    def add_some(self, sql, data):
+        try:
+            self.cursor.executemany(sql, data)
+            self.con.commit()
+        except:
+            print('数据插入异常...')
+            self.con.rollback()
+
+    def add_one(self, sql, data):
+        try:
+            self.cursor.execute(sql, data)
+            self.con.commit()
+        except:
+            self.con.rollback()
+
+    def truncate(self, table_name):
+        sql = 'truncate table {}'.format(table_name)
+        self.cursor.execute(sql)
+        self.con.commit()
+
+    def close(self):
+        self.cursor.close()
+        self.con.close()
+
+
+#
+# @app.route('/', methods=['GET', 'POST'])
+# def home():
+#     return render_template('form.html')
+#
+#
+# @app.route('/excel_upload', methods=["GET", 'POST'])
+# def excel_upload():
+#     dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'\elab_mvp\resources\upload_files'
+#     print(dir_path)
+#     email = request.form['email']
+#     excel = request.files['mvp_excel']
+#     file_name = str(time.time()) + secure_filename(excel.filename)
+#     save_path = os.path.join(dir_path, file_name)
+#     excel.save(save_path)
+#     mvp = Mvp(save_path)
+#     mvp.write_tag()
+#     return '成功'
+
+
+@app.route('/behavioral_statistics', methods=['GET', 'POST'])
+def behavioral_statistics():
+    city = request.args.get('city', default=None, type=str)
+    age = request.args.get('age', default=None, type=str)
+    crowd = request.args.get('crowd', default=None, type=str)
+    print(city, age, crowd)
+    mvp = Mvp()
+    scores = mvp.write_tag(city, age, crowd)
+    return json.dumps(scores, ensure_ascii=False)
+
+
+@app.route('/infos', methods=["GET", 'POST'])
+def get_city_age_crowd():
+    mvp = Mvp()
+    infos = {'城市': mvp.citys, '年龄段': mvp.age}
+    return json.dumps(infos, ensure_ascii=False)
+
+
+@app.route('/crowd_people', methods=['GET', 'POST'])
+def crowd_people():
+    mvp = Mvp()
+    people_count = mvp.get_crowd_people()
+    return json.dumps(people_count, ensure_ascii=False)
+
+
+if __name__ == '__main__':
+    app.run(
+        host='0.0.0.0',
+        port=5001
+    )

二進制
mvp/__pycache__/mvp.cpython-36.pyc


二進制
mvp/__pycache__/mvp.cpython-37.pyc


+ 267 - 0
mvp/mvp.py

@@ -0,0 +1,267 @@
+from db.mysql_db import MysqlDB
+from utils.excel_util import ExcelUtil
+
+
+class Mvp:
+    """
+     ce mvp 答题数据统计
+     城市特例 北京市,上海市, 重庆市,天津市
+    """
+
+    age_dict = {
+        '00-04年生': '00后',
+        '05-09年生': '05后',
+        '50-59年生': '50后',
+        '60-69年生': '60后',
+        '70-74年生': '70后',
+        '75-79年生': '75后',
+        '80-84年生': '80后',
+        '85-89年生': '85后',
+        '90-94年生': '90后',
+        '95-99年生': '95后'
+    }
+
+    crowd = ['A', 'B', 'C', 'D', 'E', 'F']
+
+    # 获取答题记录中城市列表
+    sql_1 = 'select city from f_t_daren_score_2 group by city'
+
+    # 获取父选项和父题id
+    sql_2 = 'select a.id, a.content, b.id, b.name from bq_option a left join bq_question b on a.question_id = b.id ' \
+            'where a.serial_number = %s and b.serial_number = %s and a.status = b.status = 1 '
+    # 数据插入表mvp_question_classification
+    sql_3 = 'insert into mvp_question_classification(question_serial_number, question_content, ' \
+            'option_serial_number, option_content, tag, corr) values(%s, %s, %s, %s, %s, %s) '
+
+    # 获取答题人的年龄段集合
+    sql_4 = 'select nld from f_t_daren_score_2 group by nld'
+
+    # 根据城市,年龄段,人群分类统计答题记录数
+    sql_5 = 'select group_type, COUNT(uuid) from f_t_daren_score_2 where (city = %s or province = %s) and nld ' \
+            '= %s and uuid in %s group by group_type '
+
+    # 根据父选项获取子选项id列表
+    sql_6 = 'SELECT c.id, c.sub_question_id, c.content FROM bq_sub_option c WHERE c.father_id in (SELECT a.id FROM ' \
+            'bq_option a ' \
+            'LEFT JOIN bq_question b ON a.question_id = b.id WHERE a.serial_number = %s AND b.serial_number = %s ' \
+            'and a.status = 1 and b.status = 1) and c.status = 1 '
+
+    # 根据子题id获取包含子题id的测试
+    sql_7 = 'select group_type from bq_testcase where status = 1 and FIND_IN_SET(%s, question_ids)'
+
+    # 根据子选项id统计答题数
+    sql_8 = 'SELECT count(uuid) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = ' \
+            'b.sub_question_id AND a.score  = b.score WHERE a.testcase_id = b.testcase_id and b.sub_option_id in %s' \
+            'and (a.city = %s or a.province = %s) and a.nld = %s and a.uuid in %s'
+
+    # 计算值写入表汇总
+    sql_9 = 'insert into mvp_standard_score(city, age, tag, crowd_type, score)  VALUES(%s, %s, %s, %s, %s)'
+
+    # 获取一个uuid下答题的子选项id列表
+    sql_10 = 'select  DISTINCT uuid, GROUP_CONCAT(DISTINCT b.sub_option_id)  from f_t_daren_score_2 a left join ' \
+             'd_shangju_tiku_02 b on a.sub_question_id = b.sub_question_id and a.score = b.score where a.status = ' \
+             'b.status = 1 group by uuid '
+
+    def __init__(self, path=None):
+        self.shangju_db = MysqlDB('shangju')
+        self.marketing_db = MysqlDB('marketing_db')
+        self.shangju_db.truncate('mvp_standard_score')
+        self.tag_data = ExcelUtil(path=path).init_mvp_data()
+        self.crowd_info = ExcelUtil(path=path, sheet_name='选项-人群分类对应表').init_crowd_info()
+        self.citys = self.init_city()
+        self.age = self.init_age()
+        self.people_sub_option_ids = self.marketing_db.select(self.sql_10)
+        self.crowd_contain_sub_option_ids = self.get_crowd_contain_sub_option_ids()
+
+    def init_city(self):
+        """
+            获取答题数据中的城市。
+        :return:
+        """
+        citys = ['北京市', '上海市', '重庆市', '天津市']
+        citys_info = self.marketing_db.select(self.sql_1)
+        citys.extend([x[0] for x in citys_info if x[0] is not None])
+        return citys
+
+    def write_tag(self, city=None, age=None, crowd=None):
+        """
+            将excel中的配置信息写入到数据库表中
+        :return:
+        """
+        # datas = []
+        # for key in self.tag_data.keys():
+        #     values = self.tag_data[key]
+        #     for value in values:
+        #         question = value[0].split('-')[0]
+        #         option = value[0].split('-')[1]
+        #         corr = value[1]
+        #         data = self.shangju_db.select(self.sql_2, [option, question])
+        #         if len(data) > 0:
+        #             print([question, option, data[0][3], data[0][1], key, corr])
+        #             datas.append([question, option, data[0][3], data[0][1], key, corr])
+        # self.shangju_db.truncate('mvp_question_classification')
+        # self.shangju_db.add_some(self.sql_3, datas)
+        result = self.city_age_crowd(city, age, crowd)
+        print('update finished!!!')
+        return result
+
+    def init_age(self):
+        """
+           获取答题数据中的年龄
+        """
+        age_info = self.marketing_db.select(self.sql_4)
+        # print([x[0] for x in age_info])
+        return [x[0] for x in age_info if x[0] is not None]
+
+    def city_age_crowd(self, city=None, age=None, crowd=None):
+        result = []
+        if city is not None and age is not None and crowd is not None:
+            print('获取指定城市,年龄段,人群类型的数据...')
+            people_uuids = self.get_people_uuid_by_type(crowd)
+            if len(people_uuids) > 0:
+                print('{}-{}-{}'.format(city, age, crowd))
+                datas = self.behavior_tag_init(city, age, people_uuids)
+                result.extend(self.calculation_standard_score(datas, city, age, crowd))
+            pass
+        else:
+            print('获取所有case的数据...')
+            for city in self.citys:
+                for age in self.age:
+                    if city != '上海市' and age != '85-89年生':
+                        for crowd_type in self.crowd:
+                            # print(' {}{}'.format(city, age))
+                            people_uuids = self.get_people_uuid_by_type(crowd_type)
+                            if len(people_uuids) > 0:
+                                print('{}-{}-{}'.format(city, age, crowd_type))
+                                datas = self.behavior_tag_init(city, age, people_uuids)
+                                result.extend(self.calculation_standard_score(datas, city, age, crowd_type))
+        return result
+
+    def behavior_tag_init(self, city, age, people_uuids):
+        result = {}
+        self.group_type_count = self.marketing_db.select(self.sql_5, [city, city, age, people_uuids])
+        for key in self.tag_data:
+            values = self.tag_data[key]
+            elements = []
+            for value in values:
+                question = value[0].split('-')[0]
+                option = value[0].split('-')[1]
+                corr = value[1]
+                fz, fm = self.molecular_value(question, option, city, age, people_uuids)
+                if fm == 0:
+                    c = 0
+                else:
+                    c = fz / fm
+                elements.append([question, option, corr, fz, fm, c])
+            result[key] = elements
+        return self.indicator_calculation_d_e(result)
+
+    def molecular_value(self, queston, option, city, age, people_uuids):
+        # 获取当前父选项包含的子选项id和子题id列表
+        result = self.shangju_db.select(self.sql_6, [option, queston])
+        sub_option_ids = []
+        group_types = []
+        for rt in result:
+            sub_option_id, sub_question_id, content = rt[0], rt[1], rt[2]
+            grouptypes = self.shangju_db.select(self.sql_7, [sub_question_id])
+            for g_t in grouptypes:
+                if g_t[0] not in group_types:
+                    group_types.append(g_t[0])
+            sub_option_ids.append(sub_option_id)
+        # 计算子选项在答题记录中的点击数
+
+        sub_options_count = 0
+        if len(sub_option_ids) > 0:
+            result_1 = self.marketing_db.select(self.sql_8, [sub_option_ids, city, city, age, people_uuids])
+            sub_options_count = result_1[0][0]
+        # 计算父选项包含的子选项对应的子题所在的测试gt包含的点击数。
+        denominator_value = 0
+        for info in self.group_type_count:
+            if info[0] in group_types:
+                denominator_value += info[1]
+        return sub_options_count, denominator_value
+
+    def indicator_calculation_d_e(self, data):
+        result = {}
+        for key in data.keys():
+            values = data[key]
+            c_list = [x[5] for x in values]
+            fm_list = [x[4] for x in values]
+            sum_c = sum(fm_list)
+            min_c = min(c_list)
+            elements = []
+            for value in values:
+                _value = []
+                c = value[5]
+                if sum_c == 0:
+                    d = 0
+                else:
+                    d = c / sum_c
+                e = c - min_c
+                _value.extend(value)
+                _value.append(d)
+                _value.append(e)
+                elements.append(_value)
+            result[key] = elements
+        return result
+
+    def calculation_standard_score(self, datas, city, age, crowd_type):
+        scores = []
+        for key in datas.keys():
+            print(key)
+            print('     父题序号 父选项序号 相关系系数 分子值 分母值 百分比 人数权重 偏离值')
+            values = [x[5] for x in datas[key]]
+            min_c = min(values)
+            f = min_c
+            for value in datas[key]:
+                print('     {}'.format(value))
+                if value[2] is not None and value[7] is not None:
+                    f += float(value[2] * value[7])
+            print('     标准分:{}'.format(f))
+            scores.append([city, age, key, crowd_type, f])
+        # self.shangju_db.add_some(self.sql_9, scores)
+        return scores
+
+    def get_crowd_people(self):
+        result = {}
+        for type in self.crowd:
+            uuids = self.get_people_uuid_by_type(type)
+            result[type] = len(uuids)
+        return result
+
+    def get_people_uuid_by_type(self, type):
+        # 获取每个答题者所答题的子选项id
+        uuids = []
+        type_sub_option_ids = self.crowd_contain_sub_option_ids[type]
+        for people in self.people_sub_option_ids:
+            uuid = people[0]
+            sub_option_ids = str(people[1]).split(',')
+            # list(set(a).intersection(set(b)))
+            if len(list(set(sub_option_ids).intersection(set(type_sub_option_ids)))) > 0:
+                uuids.append(uuid)
+        return uuids
+
+    def get_crowd_contain_sub_option_ids(self):
+        """
+            获取ABCDEF人群包含的子选项id
+        :return:
+        """
+        infos = {}
+        for key in self.crowd_info.keys():
+            values = self.crowd_info[key]
+            sub_option_ids = []
+            for value in values:
+                if value is not None:
+                    vals = str(value).split('-')
+                    option, question = vals[1], vals[0]
+                    query_result = self.shangju_db.select(self.sql_6, [option, question])
+                    for qr in query_result:
+                        sub_option_id, sub_question_id, content = qr[0], qr[1], qr[2]
+                        sub_option_ids.append(sub_option_id)
+            infos[key] = sub_option_ids
+        return infos
+
+
+if __name__ == '__main__':
+    mvp = Mvp()
+    mvp.write_tag()

+ 30 - 0
resources/mvp.sql

@@ -0,0 +1,30 @@
+create table `mvp_standard_score` (
+    `id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id',
+    `score` VARCHAR(10) NOT NULL COMMENT '标准分',
+	`city` VARCHAR(10) DEFAULT NULL COMMENT '城市',
+	`age` varchar(10) default NULL COMMENT '年龄',
+    `tag` VARCHAR(10) DEFAULT NULL COMMENT '标签',
+	`status` int(11) DEFAULT '1' COMMENT '1:EFFECTIVE:有效\r\n            -1:INVALID:无效',
+    `created` datetime DEFAULT NULL COMMENT '创建时间',
+    `creator` varchar(100) DEFAULT NULL COMMENT '创建人',
+    `updated` datetime DEFAULT NULL COMMENT '修改时间',
+    `updator` varchar(100) DEFAULT NULL COMMENT '修改人',
+    PRIMARY KEY (`id`)
+)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COMMENT='mvp标准分'
+
+
+create table `mvp_question_classification` (
+    `id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id',
+	`tag` VARCHAR(50) DEFAULT NULL COMMENT '标签名',
+	`question_serial_number` varchar(50) DEFAULT NULL COMMENT '父题编号',
+	`option_serial_number` varchar(50) DEFAULT NULL COMMENT '父选项编号',
+	`question_content` VARCHAR(50) DEFAULT NULL COMMENT '父题内容',
+	`option_content` VARCHAR(50) DEFAULT NULL COMMENT '父选项内容',
+	`corr` DOUBLE(16,2) DEFAULT NULL COMMENT '关联度系数',
+    `status` int(11) DEFAULT '1' COMMENT '1:EFFECTIVE:有效,-1:INVALID:无效',
+    `created` datetime DEFAULT NULL COMMENT '创建时间',
+    `creator` varchar(100) DEFAULT NULL COMMENT '创建人',
+    `updated` datetime DEFAULT NULL COMMENT '修改时间',
+    `updator` varchar(100) DEFAULT NULL COMMENT '修改人',
+	PRIMARY KEY (`id`)
+) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COMMENT='mvp父题分类'

二進制
resources/mvp.xlsx


+ 3 - 0
start_shell.sh

@@ -0,0 +1,3 @@
+#!/bin/bash
+# 开发环境部署
+python3 service.py >>/tmp/huxingbao.log 2>&1 &

+ 176 - 0
templates/form.html

@@ -0,0 +1,176 @@
+<html>
+<head>
+  <title>文件生成</title>
+   <style>
+    .header {
+      width: 100%;
+      height: 50px;
+      background: rgb(44, 58, 74);
+      font-size: 25px;
+      font-weight: 500px;
+      line-height: 50px;
+      color: #fff;
+      padding-left: 15px;
+      margin-bottom: 25px;
+    }
+
+    .message {
+      color: red;
+      padding: 20px 0px;
+    }
+
+    .form {
+      width: 500px;
+      margin: 0 auto;
+      background: #fff;
+      box-shadow: 4px 4px 40px rgba(0, 0, 0, .2);
+      border-color: rgba(0, 0, 0, .2);
+      padding: 0px 20px;
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      padding-bottom: 20px;
+    }
+
+    .title {
+      font-size: 14px;
+      color: #606266;
+      line-height: 20px;
+      box-sizing: border-box;
+      margin-right: 20px;
+    }
+
+    .file-row {
+      display: flex;
+      flex-direction: row;
+      align-items: center;
+      padding-bottom: 20px;
+      width: 100%;
+    }
+
+    .file-name {
+      font-size: 14px;
+      color: #606266;
+      line-height: 20px;
+      margin-left: 10px;
+    }
+
+    .file {
+      position: relative;
+      display: inline-block;
+      background: #409eff;
+      border: 1px solid #409eff;
+      border-radius: 4px;
+      padding: 4px 12px;
+      overflow: hidden;
+      color: #fff;
+      text-decoration: none;
+      text-indent: 0;
+      line-height: 20px;
+      font-size: 14px;
+    }
+
+    .file input {
+      position: absolute;
+      font-size: 100px;
+      right: 0;
+      top: 0;
+      opacity: 0;
+    }
+
+    .file:hover {
+      background: #409eff;
+      border-color: #409eff;
+      color: #fff;
+      text-decoration: none;
+    }
+
+    .number input {
+      -webkit-appearance: none;
+      background-color: #fff;
+      background-image: none;
+      border-radius: 4px;
+      border: 1px solid #dcdfe6;
+      box-sizing: border-box;
+      color: #606266;
+      display: inline-block;
+      font-size: inherit;
+      height: 40px;
+      line-height: 40px;
+      outline: 0;
+      padding: 0 15px;
+      transition: border-color .2s cubic-bezier(.645, .045, .355, 1);
+      width: 100%;
+    }
+
+    .submitBtn {
+      display: inline-block;
+      line-height: 1;
+      white-space: nowrap;
+      cursor: pointer;
+      color: #fff;
+      background-color: #409eff;
+      border-color: #409eff;
+      -webkit-appearance: none;
+      text-align: center;
+      box-sizing: border-box;
+      outline: 0;
+      margin: 20px 0px;
+      transition: .1s;
+      font-weight: 500;
+      padding: 12px 20px;
+      font-size: 14px;
+      border-radius: 4px;
+    }
+  </style>
+    <script src="http://libs.baidu.com/jquery/2.0.0/jquery.js"></script>
+    <script>
+    $(document).ready(function () {
+      $(".file").on("change", "input[type='file']", function () {
+        var filePath = $(this).val();
+        var arr = filePath.split('\\');
+        var fileName = arr[arr.length - 1];
+        document.getElementById('showFileName').innerHTML = fileName;
+      })
+    });
+   </script>
+</head>
+<!--<body>-->
+<!--  {% if message %}-->
+<!--  <p style="color:red">{{ message }}</p>-->
+<!--  {% endif %}-->
+<!--  <form action="/generation" method="post" enctype="multipart/form-data">-->
+<!--    <legend>Please sign in:</legend>-->
+<!--    <p><input name="dxf_file" placeholder="dxf_file" type="file"></p>-->
+<!--    <p><input name="area" placeholder="面积" type="number"></p>-->
+<!--    <p><button type="submit">提交户型轮廓文件</button></p>-->
+<!--  </form>-->
+<!--</body>-->
+
+<body style="margin: 0;padding: 0; background: #F2F6FC;">
+  <div class="header">mvp</div>
+  <form action="/excel_upload" method="post" enctype="multipart/form-data">
+  <div class="form">
+    <div class="message">
+        {% if message %}
+        {{ message }}
+        {% endif %}
+    </div>
+    <div class="file-row">
+      <div class="title">文件:</div>
+      <a href="javascript:;" class="file">选择文件
+        <input name="mvp_excel" placeholder="mvp_excel" type="file">
+      </a>
+      <div class="file-name" id="showFileName"></div>
+    </div>
+    <div class="file-row">
+      <div class="title">邮箱:</div>
+      <div class="number">
+        <input name="email" placeholder="请输入邮箱" type="number">
+      </div>
+    </div>
+    <button type="submit" class="submitBtn">确认更新</button>
+  </div>
+  </form>
+</body>
+</html>

二進制
utils/__pycache__/excel_util.cpython-37.pyc


+ 103 - 0
utils/excel_util.py

@@ -0,0 +1,103 @@
+import pandas as pd
+import openpyxl as ox
+from itertools import groupby
+import os
+
+
+class ExcelUtil:
+    # 当前项目路径
+    dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'\resources'
+    """
+        解析excel文件
+    """
+    def __init__(self, sheet_name=None, path=None):
+        if path:
+            self.path = path
+        else:
+            self.path = os.path.join(self.dir_path, 'mvp.xlsx')
+        if sheet_name:
+            self.sheet_name = sheet_name
+        else:
+            self.sheet_name = '硬标签+行为'
+
+    def read_excel_by_pd(self):
+        df = pd.read_excel(self.path)
+        data = df.head()
+        print('获取到的数据{}'.format(data))
+
+    def read_excel_by_ox(self):
+        work_book = ox.load_workbook(self.path, data_only=True)
+        work_sheet = work_book.get_sheet_by_name(self.sheet_name)
+        # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
+        return work_sheet
+
+    def init_crowd_info(self):
+        """
+            整理不同人群包含的父选序号
+        :return:
+        """
+        rows = [row for row in self.read_excel_by_ox().rows]
+        crowd_a = []
+        crowd_b = []
+        crowd_c = []
+        crowd_d = []
+        crowd_e = []
+        crowd_f = []
+
+        for row in rows[2:]:
+            option = row[4].value
+            a = row[6].value
+            if a is not None and a == 1 and option not in crowd_a:
+                crowd_a.append(option)
+            b = row[7].value
+            if b is not None and b == 1 and option not in crowd_b:
+                crowd_b.append(option)
+            c = row[8].value
+            if c is not None and c == 1 and option not in crowd_d:
+                crowd_c.append(option)
+            d = row[9].value
+            if d is not None and d == 1 and option not in crowd_d:
+                crowd_d.append(option)
+            e = row[10].value
+            if e is not None and e == 1 and option not in crowd_e:
+                crowd_e.append(option)
+            f = row[11].value
+            if f is not None and f == 1 and option not in crowd_f:
+                crowd_f.append(option)
+        return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}
+
+    def init_mvp_data(self):
+        """
+            获取每个标签包括的父题父选项编号
+        :return:
+        """
+        rows = [row for row in self.read_excel_by_ox().rows][24:]
+        tag_name = None
+        datas = []
+        for row in rows:
+            tag = row[1].value
+            values = row[3].value
+            corr = row[4].value
+            if tag:
+                tag_name = tag
+            if values is not None:
+                datas.append([tag_name, values, corr])
+        result = {}
+        for name, items in groupby(datas, key=lambda obj: obj[0]):
+            orders = []
+            for n in items:
+                orders.append([n[1], n[2]])
+            result[name] = orders
+        return result
+
+
+if __name__ == '__main__':
+    # eu = ExcelUtil()
+    # results = eu.init_mvp_data()
+    # for key in results.keys():
+    #     print(key)
+    #     print('     {}'.format(results[key]))
+    #     print('-'*40)
+    import os
+    dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    print(dir_path)