Forráskód Böngészése

elab_mvp: 调整项目目录结构

Signed-off-by: binren <zhangbr@elab-plus.com>
binren 5 éve
szülő
commit
09e846ed77

+ 7 - 0
.gitignore

@@ -0,0 +1,7 @@
+.idea/
+/out/
+*/__pycache__/
+/venv/
+/全部户型-镜像/*
+/algorithm/file/
+/resources/need_finish_files/*

BIN
__pycache__/excel_util.cpython-36.pyc


BIN
__pycache__/excel_util.cpython-37.pyc


BIN
__pycache__/flask_app.cpython-36.pyc


BIN
__pycache__/mvp.cpython-36.pyc


BIN
__pycache__/mvp.cpython-37.pyc


BIN
__pycache__/mysql_db.cpython-36.pyc


BIN
__pycache__/mysql_db.cpython-37.pyc


BIN
db/__pycache__/mysql_db.cpython-37.pyc


+ 62 - 20
utils/excel_util.py

@@ -6,13 +6,14 @@ import os
 
 class ExcelUtil:
     # 当前项目路径
-    dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'\resources'
+    dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'/elab_mvp/resources'
     """
         解析excel文件
     """
-    def __init__(self, sheet_name=None, path=None):
-        if path:
-            self.path = path
+
+    def __init__(self, sheet_name=None, file_name=None):
+        if file_name:
+            self.path = os.path.join(self.dir_path, file_name)
         else:
             self.path = os.path.join(self.dir_path, 'mvp.xlsx')
         if sheet_name:
@@ -73,31 +74,72 @@ class ExcelUtil:
         """
         rows = [row for row in self.read_excel_by_ox().rows][24:]
         tag_name = None
+        tag_type = None
         datas = []
         for row in rows:
+            tag_type_1 = row[0].value
             tag = row[1].value
             values = row[3].value
             corr = row[4].value
+            if tag_type_1:
+                tag_type = tag_type_1
             if tag:
                 tag_name = tag
             if values is not None:
-                datas.append([tag_name, values, corr])
+                datas.append([tag_type, tag_name, values, corr])
         result = {}
-        for name, items in groupby(datas, key=lambda obj: obj[0]):
-            orders = []
-            for n in items:
-                orders.append([n[1], n[2]])
-            result[name] = orders
+        datas.sort(key=lambda obj: obj[0])
+        for tag_type, sub_datas in groupby(datas, key=lambda obj: obj[0]):
+            sub_list = [x for x in sub_datas]
+            sub_list.sort(key=lambda obj: obj[1])
+            sub_result = {}
+            for name, items in groupby(sub_list, key=lambda obj: obj[1]):
+                orders = []
+                for n in items:
+                    orders.append([n[2], n[3]])
+                sub_result[name] = orders
+            result[tag_type] = sub_result
         return result
 
+    def init_scores(self):
+        work_sheet = self.read_excel_by_ox()
+        rows = [row for row in work_sheet.rows]
+        datas = []
+        for row in rows[1:]:
+            if row[0].value is not None:
+                datas.append([row[0].value, row[1].value, row[2].value, row[3].value, row[4].value])
+        return datas
 
-if __name__ == '__main__':
-    # eu = ExcelUtil()
-    # results = eu.init_mvp_data()
-    # for key in results.keys():
-    #     print(key)
-    #     print('     {}'.format(results[key]))
-    #     print('-'*40)
-    import os
-    dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    print(dir_path)
+    def init_module_info(self):
+        work_sheet = self.read_excel_by_ox()
+        max_column = work_sheet.max_column
+        rows = [row for row in work_sheet.rows][3:]
+        crowd_name = None
+        datas = []
+        for row in rows:
+            crowd = row[1].value
+            if crowd is not None:
+                crowd_name = crowd
+            behavior = row[2].value
+            score = row[4].value
+            for index in range(6, max_column - 1, 2):
+                module_name = row[index].value
+                if module_name is not None:
+                    weight = row[index + 1].value
+                    datas.append([crowd_name, behavior, score, module_name, weight])
+        results = {}
+        datas.sort(key=lambda obj: obj[0])
+        for name, items in groupby(datas, key=lambda obj: obj[0]):
+            sub_results = {}
+            sub_list = []
+            for it in items:
+                sub_list.append([x for x in it])
+            sub_list.sort(key=lambda obj: obj[3])
+            for name_1, itmes_1 in groupby(sub_list, key=lambda obj: obj[3]):
+                sub_data = []
+                for n in itmes_1:
+                    # print('         {}'.format(n[1]))
+                    sub_data.append([n[1], n[2], n[4]])
+                sub_results[name_1] = sub_data
+            results[name] = sub_results
+        return results

+ 16 - 597
flask_app.py

@@ -1,599 +1,10 @@
-from flask import Flask, request, render_template, jsonify
-import os
-from werkzeug.utils import secure_filename
-import time
+from flask import Flask, request
+from mvp import Mvp
 import json
-import pandas as pd
-import openpyxl as ox
-from itertools import groupby
-import os
-import pymysql as ps
 
 app = Flask(__name__)
 
 
-class Mvp:
-    """
-     ce mvp 答题数据统计
-     城市特例 北京市,上海市, 重庆市,天津市
-    """
-
-    age_dict = {
-        '00-04年生': '00后',
-        '05-09年生': '05后',
-        '50-59年生': '50后',
-        '60-69年生': '60后',
-        '70-74年生': '70后',
-        '75-79年生': '75后',
-        '80-84年生': '80后',
-        '85-89年生': '85后',
-        '90-94年生': '90后',
-        '95-99年生': '95后'
-    }
-
-    crowd = ['A', 'B', 'C', 'D', 'E', 'F']
-
-    # 获取答题记录中城市列表
-    sql_1 = 'select city from f_t_daren_score_2 group by city'
-
-    # 获取父选项和父题id
-    sql_2 = 'select a.id, a.content, b.id, b.name from bq_option a left join bq_question b on a.question_id = b.id ' \
-            'where a.serial_number = %s and b.serial_number = %s and a.status = b.status = 1 '
-
-    # 获取答题人的年龄段集合
-    sql_4 = 'select nld from f_t_daren_score_2 group by nld'
-
-    # 根据城市,年龄段,人群分类统计答题记录数
-    sql_5 = 'select group_type, COUNT(uuid) from f_t_daren_score_2 where (city = %s or province = %s) and nld ' \
-            '= %s and uuid in %s group by group_type '
-
-    # 根据父选项获取子选项id列表
-    sql_6 = 'SELECT c.id, c.sub_question_id, c.content FROM bq_sub_option c WHERE c.father_id in (SELECT a.id FROM ' \
-            'bq_option a ' \
-            'LEFT JOIN bq_question b ON a.question_id = b.id WHERE a.serial_number = %s AND b.serial_number = %s ' \
-            'and a.status = 1 and b.status = 1) and c.status = 1 '
-
-    # 根据子题id获取包含子题id的测试
-    sql_7 = 'select group_type from bq_testcase where status = 1 and FIND_IN_SET(%s, question_ids)'
-
-    # 根据子选项id统计答题数
-    sql_8 = 'SELECT count(1) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = ' \
-            'b.sub_question_id AND a.score  = b.score and a.testcase_id = b.testcase_id WHERE b.sub_option_id in %s' \
-            'and (a.city = %s or a.province = %s) and a.nld = %s and a.uuid in %s'
-
-    # 获取一个uuid下答题的子选项id列表
-    sql_10 = 'select  DISTINCT uuid, GROUP_CONCAT(DISTINCT b.sub_option_id)  from f_t_daren_score_2 a left join ' \
-             'd_shangju_tiku_02 b on a.sub_question_id = b.sub_question_id and a.score = b.score where a.status = ' \
-             'b.status = 1 group by uuid '
-
-    # 向表mvp_crowd_info插入数据
-    sql_11 = 'insert into mvp_crowd_info(age_area, city_name, crowd_type, status) values(%s, %s, %s, 1)'
-
-    # 向表mvp_crowd_info_behavior中插入数据
-    sql_12 = 'insert into mvp_crowd_info_behavior(crowd_info_id, behavioral_interest, standard_value, status) values(' \
-             '%s, %s, ' \
-             '%s, 1) '
-
-    # 向表mvp_crowd_info_module中插入数据
-    sql_13 = 'insert into mvp_crowd_info_module(crowd_info_id, module_name, standard_value, status) values (%s, %s, ' \
-             '%s, 1) '
-
-    sql_14 = 'select a.id, a.age_area, a.city_name, a.crowd_type from mvp_crowd_info a where a.status = 1'
-
-    def __init__(self, path=None):
-        self.shangju_db = MysqlDB('shangju')
-        self.marketing_db = MysqlDB('bi_report')
-        # self.shangju_db.truncate('mvp_standard_score')
-        self.tag_data = ExcelUtil(file_name=path).init_mvp_data()
-        self.crowd_info = ExcelUtil(file_name=path, sheet_name='选项-人群分类对应表').init_crowd_info()
-        self.citys = self.init_city()
-        self.age = self.init_age()
-        self.people_sub_option_ids = self.marketing_db.select(self.sql_10)
-        self.crowd_contain_sub_option_ids = self.get_crowd_contain_sub_option_ids()
-        self.module_scores = ExcelUtil(file_name='set-behavior-tag.xlsx', sheet_name='算法关系表').init_module_info()
-        # self.scores_tag = ExcelUtil(file_name='行为与模块分值汇总.xlsx', sheet_name='行为').init_scores()
-        # self.score_module = ExcelUtil(file_name='行为与模块分值汇总.xlsx', sheet_name='模块').init_scores()
-        self.scores_tag = None
-        self.score_module = None
-
-
-    def init_city(self):
-        """
-            获取答题数据中的城市。
-        :return:
-        """
-        citys = ['北京市', '上海市', '重庆市', '天津市']
-        citys_info = self.marketing_db.select(self.sql_1)
-        citys.extend([x[0] for x in citys_info if x[0] is not None])
-        return citys
-
-    def query_behavioral_info(self, city=None, age=None, crowd=None):
-        """
-            查询行为兴趣信息
-        :return:
-        """
-        # datas = []
-        # for key in self.tag_data.keys():
-        #     values = self.tag_data[key]
-        #     for value in values:
-        #         question = value[0].split('-')[0]
-        #         option = value[0].split('-')[1]
-        #         corr = value[1]
-        #         data = self.shangju_db.select(self.sql_2, [option, question])
-        #         if len(data) > 0:
-        #             print([question, option, data[0][3], data[0][1], key, corr])
-        #             datas.append([question, option, data[0][3], data[0][1], key, corr])
-        # self.shangju_db.truncate('mvp_question_classification')
-        # self.shangju_db.add_some(self.sql_3, datas)
-        scores_behavioral = self.city_age_crowd(city, age, crowd)
-        # scores_module = self.module_score(crowd, city, age, scores_behavioral['score'])
-        # result = {'行为兴趣分值': scores_behavioral['score'], '模块分值': scores_module}
-        print('update finished!!!')
-        return scores_behavioral
-
-    def module_score(self, crowd, city, age, scores):
-        """
-            模块分数计算
-            城市 年龄 人群分类 模块名称 分数
-        :return:
-        """
-        modules = self.module_scores[crowd]
-        result = []
-        for key in modules.keys():
-            values = modules[key]
-            module_name = key
-            score = 0
-            for value in values:
-                behavioral_name = value[0]
-                weight = float(value[2])
-                standard_score = [x[4] for x in scores if x[2] == behavioral_name]
-                if len(standard_score) > 0:
-                    score += standard_score[0] * weight
-            result.append([city, age, crowd, module_name, score])
-        return result
-
-    # def insert_data(self, scores_behavioral, scores_module):
-    def insert(self):
-        """
-            计算数据写入数据库中,供接口查看
-        :return:
-        """
-        infos = []
-        for city in ['上海市', '宁波市', '苏州市', '杭州市', ' 无锡市']:
-            for age in ['50-59年生', '60-69年生', '70-74年生', '75-79年生', '80-84年生', '85-89年生', '90-94年生', '95-99年生', '00'
-                                                                                                                '-04年生', '05-09年生']:
-                for c_type in ['A', 'B', 'C', 'D', 'E', 'F']:
-                    age_area = self.age_dict.get(age)
-                    if age_area:
-                        infos.append([age_area, city, c_type])
-        self.shangju_db.add_some(self.sql_11, infos)
-
-    def query_data(self):
-        ids = self.shangju_db.select(self.sql_14)
-        return ids
-
-    def shanghai_85_module_score_insert(self):
-        """
-            上海市,85后模块分数计算
-        :return:
-        """
-        result = []
-        for crowd in self.crowd:
-            modules = self.module_scores[crowd]
-            for key in modules.keys():
-                values = modules[key]
-                module_name = key
-                score = 0
-                for value in values:
-                    behavioral_name = value[0]
-                    weight = float(value[2])
-                    # standard_score = [x[4] for x in scores if x[2] == behavioral_name]
-                    standard_score = float(value[1])
-                    if standard_score is not None:
-                        score += standard_score * weight
-                result.append(['上海市', '85后', crowd, module_name, score])
-        return {'score': result, 'data': self.module_scores}
-
-    def tag_module_score_insert(self):
-        """
-            标签模块分数写入数据库
-        :return:
-        """
-        ids = self.query_data()
-        insert_data = []
-        insert_data_1 = []
-        for tag, module in zip(self.scores_tag, self.score_module):
-            city = tag[0]
-            age = tag[1]
-            crowd = tag[2]
-            tag_name = tag[3]
-            tag_score = tag[4]
-
-            city_2 = module[0]
-            age_2 = module[1]
-            crowd_2 = module[2]
-            module_name_2 = module[3]
-            module_score_2 = module[4]
-
-            for id in ids:
-                city_1 = id[2]
-                age_1 = id[1]
-                crowd_1 = id[3]
-                id_1 = id[0]
-                if city == city_1 and self.age_dict[age] == age_1 and crowd == crowd_1:
-                    insert_data.append([id_1, tag_name, tag_score])
-                if city_2 == city_1 and self.age_dict[age_2] == age_1 and crowd_2 == crowd_1:
-                    insert_data_1.append([id_1, module_name_2, module_score_2])
-        self.shangju_db.add_some(self.sql_12, insert_data)
-        self.shangju_db.add_some(self.sql_13, insert_data_1)
-
-    def init_age(self):
-        """
-           获取答题数据中的年龄
-        """
-        age_info = self.marketing_db.select(self.sql_4)
-        # print([x[0] for x in age_info])
-        return [x[0] for x in age_info if x[0] is not None]
-
-    def city_age_crowd(self, city=None, age=None, crowd=None):
-        data_start = []
-        result = []
-        module_scores = []
-        if city is not None and age is not None and crowd is not None:
-            print('获取指定城市,年龄段,人群类型的数据...')
-            people_uuids = self.get_people_uuid_by_type(crowd)
-            if len(people_uuids) > 0:
-                print('{}-{}-{}'.format(city, age, crowd))
-                datas = self.behavior_tag_init(city, age, people_uuids)
-                data_start.append(datas)
-                result.extend(self.calculation_standard_score(datas, city, age, crowd))
-            module_scores.extend(self.module_score(crowd, city, age, result))
-            pass
-        else:
-            print('获取所有case的数据...')
-            # for city in self.citys:
-            # for city in [city]:
-            for age in self.age:
-                for crowd_type in self.crowd:
-                    if age == '85-89年生' and city == '上海市':
-                        print('上海市85后数据导入人工值,无需计算...')
-                        pass
-                    else:
-                        # print(' {}{}'.format(city, age))
-                        people_uuids = self.get_people_uuid_by_type(crowd_type)
-                        if len(people_uuids) > 0:
-                            print('{}-{}-{}'.format(city, age, crowd_type))
-                            datas = self.behavior_tag_init(city, age, people_uuids)
-                            data_start.append(datas)
-                            result.extend(self.calculation_standard_score(datas, city, age, crowd_type))
-                        module_scores.extend(self.module_score(crowd_type, city, age, result))
-        # return result
-        # data_list = []
-        # for e in data_start:
-        #     for key in e.keys():
-        #         values = e[key]
-        #         for sub_e in values:
-        #             ele = [key]
-        #             ele.extend(sub_e)
-        #             data_list.append(ele)
-        #     pass
-        return {'tag_score': result, 'module_score': module_scores}
-        # return {'score': result, 'data': data_list}
-
-    def behavior_tag_init(self, city, age, people_uuids):
-        result = {}
-        self.group_type_count = self.marketing_db.select(self.sql_5, [city, city, age, people_uuids])
-        for key in self.tag_data:
-            values = self.tag_data[key]
-            elements = []
-            for value in values:
-                question = value[0].split('-')[0]
-                option = value[0].split('-')[1]
-                corr = value[1]
-                fz, fm = self.molecular_value(question, option, city, age, people_uuids)
-                if fm == 0:
-                    c = 0
-                else:
-                    c = fz / fm
-                elements.append([question, option, corr, fz, fm, c])
-            result[key] = elements
-        return self.indicator_calculation_d_e(result)
-
-    def molecular_value(self, queston, option, city, age, people_uuids):
-        # 获取当前父选项包含的子选项id和子题id列表
-        result = self.shangju_db.select(self.sql_6, [option, queston])
-        sub_option_ids = []
-        group_types = []
-        for rt in result:
-            sub_option_id, sub_question_id, content = rt[0], rt[1], rt[2]
-            grouptypes = self.shangju_db.select(self.sql_7, [sub_question_id])
-            for g_t in grouptypes:
-                if g_t[0] not in group_types:
-                    group_types.append(g_t[0])
-            sub_option_ids.append(sub_option_id)
-        # 计算子选项在答题记录中的点击数
-        sub_options_count = 0
-        if len(sub_option_ids) > 0:
-            result_1 = self.marketing_db.select(self.sql_8, [sub_option_ids, city, city, age, people_uuids])
-            sub_options_count = result_1[0][0]
-        # 计算父选项包含的子选项对应的子题所在的测试gt包含的点击数。
-        denominator_value = 0
-        for info in self.group_type_count:
-            if info[0] in group_types:
-                denominator_value += info[1]
-        return sub_options_count, denominator_value
-
-    def indicator_calculation_d_e(self, data):
-        result = {}
-        for key in data.keys():
-            values = data[key]
-            c_list = []
-            for x in values:
-                _x = x[5]
-                if _x is not None and x != 0:
-                    c_list.append(_x)
-            fm_list = [x[4] for x in values]
-            sum_c = sum(fm_list)
-            if len(c_list) == 0:
-                min_c = 0
-            else:
-                min_c = min(c_list)
-            elements = []
-            for value in values:
-                _value = []
-                c = value[5]
-                if sum_c == 0:
-                    d = 0
-                else:
-                    d = c / sum_c
-                e = c - min_c
-                _value.extend(value)
-                _value.append(d)
-                _value.append(e)
-                elements.append(_value)
-            result[key] = elements
-        return result
-
-    def calculation_standard_score(self, datas, city, age, crowd_type):
-        scores = []
-        for key in datas.keys():
-            print(key)
-            print('     父题序号 父选项序号 相关系系数 分子值 分母值 百分比 人数权重 偏离值')
-            values = [x[5] for x in datas[key]]
-            min_c = min(values)
-            f = min_c
-            for value in datas[key]:
-                print('     {}'.format(value))
-                if value[2] is not None and value[7] is not None:
-                    f += float(value[2] * value[7])
-            print('     标准分:{}'.format(f))
-            scores.append([city, age, key, crowd_type, f])
-        # self.shangju_db.add_some(self.sql_9, scores)
-        return scores
-
-    def get_crowd_people(self):
-        result = {}
-        for type in self.crowd:
-            uuids = self.get_people_uuid_by_type(type)
-            result[type] = len(uuids)
-        return result
-
-    def get_people_uuid_by_type(self, type):
-        uuids = []
-        type_sub_option_ids = self.crowd_contain_sub_option_ids[type]
-        for people in self.people_sub_option_ids:
-            uuid = people[0]
-            sub_option_ids = list(map(int, str(people[1]).split(',')))
-            # list(set(a).intersection(set(b)))
-            if len(list(set(sub_option_ids).intersection(set(type_sub_option_ids)))) > 0 and uuid not in uuids:
-                uuids.append(uuid)
-        return uuids
-
-    def get_crowd_contain_sub_option_ids(self):
-        """
-            获取ABCDEF人群包含的子选项id
-        :return:
-        """
-        infos = {}
-        for key in self.crowd_info.keys():
-            values = self.crowd_info[key]
-            sub_option_ids = []
-            for value in values:
-                if value is not None:
-                    vals = str(value).split('-')
-                    option, question = vals[1], vals[0]
-                    query_result = self.shangju_db.select(self.sql_6, [option, question])
-                    for qr in query_result:
-                        sub_option_id, sub_question_id, content = qr[0], qr[1], qr[2]
-                        sub_option_ids.append(int(sub_option_id))
-            infos[key] = sub_option_ids
-        print(infos)
-        return infos
-
-
-class ExcelUtil:
-    # 当前项目路径
-    dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'/elab_mvp/resources'
-    """
-        解析excel文件
-    """
-
-    def __init__(self, sheet_name=None, file_name=None):
-        if file_name:
-            self.path = os.path.join(self.dir_path, file_name)
-        else:
-            self.path = os.path.join(self.dir_path, 'mvp.xlsx')
-        if sheet_name:
-            self.sheet_name = sheet_name
-        else:
-            self.sheet_name = '硬标签+行为'
-
-    def read_excel_by_pd(self):
-        df = pd.read_excel(self.path)
-        data = df.head()
-        print('获取到的数据{}'.format(data))
-
-    def read_excel_by_ox(self):
-        work_book = ox.load_workbook(self.path, data_only=True)
-        work_sheet = work_book.get_sheet_by_name(self.sheet_name)
-        # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
-        return work_sheet
-
-    def init_crowd_info(self):
-        """
-            整理不同人群包含的父选序号
-        :return:
-        """
-        rows = [row for row in self.read_excel_by_ox().rows]
-        crowd_a = []
-        crowd_b = []
-        crowd_c = []
-        crowd_d = []
-        crowd_e = []
-        crowd_f = []
-
-        for row in rows[2:]:
-            option = row[4].value
-            a = row[6].value
-            if a is not None and a == 1 and option not in crowd_a:
-                crowd_a.append(option)
-            b = row[7].value
-            if b is not None and b == 1 and option not in crowd_b:
-                crowd_b.append(option)
-            c = row[8].value
-            if c is not None and c == 1 and option not in crowd_d:
-                crowd_c.append(option)
-            d = row[9].value
-            if d is not None and d == 1 and option not in crowd_d:
-                crowd_d.append(option)
-            e = row[10].value
-            if e is not None and e == 1 and option not in crowd_e:
-                crowd_e.append(option)
-            f = row[11].value
-            if f is not None and f == 1 and option not in crowd_f:
-                crowd_f.append(option)
-        return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}
-
-    def init_mvp_data(self):
-        """
-            获取每个标签包括的父题父选项编号
-        :return:
-        """
-        rows = [row for row in self.read_excel_by_ox().rows][24:]
-        tag_name = None
-        datas = []
-        for row in rows:
-            tag = row[1].value
-            values = row[3].value
-            corr = row[4].value
-            if tag:
-                tag_name = tag
-            if values is not None:
-                datas.append([tag_name, values, corr])
-        result = {}
-        for name, items in groupby(datas, key=lambda obj: obj[0]):
-            orders = []
-            for n in items:
-                orders.append([n[1], n[2]])
-            result[name] = orders
-        return result
-
-    def init_scores(self):
-        work_sheet = self.read_excel_by_ox()
-        rows = [row for row in work_sheet.rows]
-        datas = []
-        for row in rows[1:]:
-            if row[0].value is not None:
-                datas.append([row[0].value, row[1].value, row[2].value, row[3].value, row[4].value])
-        return datas
-
-    def init_module_info(self):
-        work_sheet = self.read_excel_by_ox()
-        max_column = work_sheet.max_column
-        rows = [row for row in work_sheet.rows][3:]
-        crowd_name = None
-        datas = []
-        for row in rows:
-            crowd = row[1].value
-            if crowd is not None:
-                crowd_name = crowd
-            behavior = row[2].value
-            score = row[4].value
-            for index in range(6, max_column - 1, 2):
-                module_name = row[index].value
-                if module_name is not None:
-                    weight = row[index + 1].value
-                    datas.append([crowd_name, behavior, score, module_name, weight])
-        results = {}
-        datas.sort(key=lambda obj: obj[0])
-        for name, items in groupby(datas, key=lambda obj: obj[0]):
-            sub_results = {}
-            sub_list = []
-            for it in items:
-                sub_list.append([x for x in it])
-            sub_list.sort(key=lambda obj: obj[3])
-            for name_1, itmes_1 in groupby(sub_list, key=lambda obj: obj[3]):
-                sub_data = []
-                for n in itmes_1:
-                    # print('         {}'.format(n[1]))
-                    sub_data.append([n[1], n[2], n[4]])
-                sub_results[name_1] = sub_data
-            results[name] = sub_results
-        return results
-
-
-class MysqlDB:
-    """
-        mysql操作
-    """
-    con = None
-    cursor = None
-
-    def __init__(self, db_name):
-        self.db_name = db_name
-        self.con = ps.connect(host='172.19.189.136', port=3306, user='bi_etl', password='XPtpswuU5lwGo4kx',
-                              db=self.db_name, charset='utf8')
-        # self.con = ps.connect(host='192.168.0.13', port=3306, user='root', password='elab@123'
-        #                       , db=self.db_name, charset='utf8')
-        self.cursor = self.con.cursor()
-
-    def show_tables(self):
-        self.cursor.execute('show tables')
-        for talbe in self.cursor.fetchall():
-            print(talbe)
-
-    def select(self, sql, params=None):
-        if params:
-            self.cursor.execute(sql, params)
-        else:
-            self.cursor.execute(sql)
-        return self.cursor.fetchall()
-
-    def add_some(self, sql, data):
-        try:
-            self.cursor.executemany(sql, data)
-            self.con.commit()
-        except:
-            print('数据插入异常...')
-            self.con.rollback()
-
-    def add_one(self, sql, data):
-        try:
-            self.cursor.execute(sql, data)
-            self.con.commit()
-        except:
-            self.con.rollback()
-
-    def truncate(self, table_name):
-        sql = 'truncate table {}'.format(table_name)
-        self.cursor.execute(sql)
-        self.con.commit()
-
-    def close(self):
-        self.cursor.close()
-        self.con.close()
-
-
 @app.route('/behavioral_statistics', methods=['GET', 'POST'])
 def behavioral_statistics():
     """
@@ -606,8 +17,7 @@ def behavioral_statistics():
     print(city, age, crowd)
     mvp = Mvp()
     scores = mvp.query_behavioral_info(city, age, crowd)
-    mvp.shangju_db.close()
-    mvp.marketing_db.close()
+    mvp.close()
     return json.dumps(scores, ensure_ascii=False)
 
 
@@ -619,8 +29,7 @@ def get_city_age_crowd():
     """
     mvp = Mvp()
     infos = {'城市': mvp.citys, '年龄段': mvp.age, '人群分类': mvp.crowd}
-    mvp.shangju_db.close()
-    mvp.marketing_db.close()
+    mvp.close()
     return json.dumps(infos, ensure_ascii=False)
 
 
@@ -632,8 +41,7 @@ def crowd_people():
     """
     mvp = Mvp()
     people_count = mvp.get_crowd_people()
-    mvp.shangju_db.close()
-    mvp.marketing_db.close()
+    mvp.close()
     return json.dumps(people_count, ensure_ascii=False)
 
 
@@ -644,6 +52,7 @@ def set_behavior_tag():
     :return:
     """
     mvp = Mvp()
+    mvp.close()
     return json.dumps(mvp.module_scores, ensure_ascii=False)
 
 
@@ -652,6 +61,7 @@ def insert_info():
     mvp = Mvp()
     mvp.insert()
     query_data = mvp.query_data()
+    mvp.close()
     return json.dumps(query_data, ensure_ascii=False)
 
 
@@ -659,6 +69,7 @@ def insert_info():
 def insert_score():
     mvp = Mvp()
     mvp.tag_module_score_insert()
+    mvp.close()
     return '!!!分数添加成功!!!'
 
 
@@ -666,9 +77,17 @@ def insert_score():
 def shanghai_85():
     mvp = Mvp()
     data = mvp.shanghai_85_module_score_insert()
+    mvp.close()
     return json.dumps(data, ensure_ascii=False)
 
 
+@app.route('/tag_tree', methods=['GET', 'POST'])
+def tag_tree():
+    mvp = Mvp()
+    tags = mvp.tag_data
+    return json.dumps(tags, ensure_ascii=False)
+
+
 if __name__ == '__main__':
     app.run(
         host='0.0.0.0',

+ 419 - 0
mvp.py

@@ -0,0 +1,419 @@
+from mysql_db import MysqlDB
+from excel_util import ExcelUtil
+
+
+class Mvp:
+    """
+     ce mvp 答题数据统计
+     城市特例 北京市,上海市, 重庆市,天津市
+    """
+
+    age_dict = {
+        '00-04年生': '00后',
+        '05-09年生': '05后',
+        '50-59年生': '50后',
+        '60-69年生': '60后',
+        '70-74年生': '70后',
+        '75-79年生': '75后',
+        '80-84年生': '80后',
+        '85-89年生': '85后',
+        '90-94年生': '90后',
+        '95-99年生': '95后'
+    }
+
+    crowd = ['A', 'B', 'C', 'D', 'E', 'F']
+
+    # 获取答题记录中城市列表
+    sql_1 = 'select city from f_t_daren_score_2 group by city'
+
+    # 获取父选项和父题id
+    sql_2 = 'select a.id, a.content, b.id, b.name from bq_option a left join bq_question b on a.question_id = b.id ' \
+            'where a.serial_number = %s and b.serial_number = %s and a.status = b.status = 1 '
+
+    # 获取答题人的年龄段集合
+    sql_4 = 'select nld from f_t_daren_score_2 group by nld'
+
+    # 根据城市,年龄段,人群分类统计答题记录数
+    sql_5 = 'select group_type, COUNT(uuid) from f_t_daren_score_2 where (city = %s or province = %s) and nld ' \
+            '= %s and uuid in %s group by group_type '
+
+    # 根据父选项获取子选项id列表
+    sql_6 = 'SELECT c.id, c.sub_question_id, c.content FROM bq_sub_option c WHERE c.father_id in (SELECT a.id FROM ' \
+            'bq_option a ' \
+            'LEFT JOIN bq_question b ON a.question_id = b.id WHERE a.serial_number = %s AND b.serial_number = %s ' \
+            'and a.status = 1 and b.status = 1) and c.status = 1 '
+
+    # 根据子题id获取包含子题id的测试
+    sql_7 = 'select group_type from bq_testcase where status = 1 and FIND_IN_SET(%s, question_ids)'
+
+    # 根据子选项id统计答题数
+    sql_8 = 'SELECT count(1) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = ' \
+            'b.sub_question_id AND (a.score  = b.score or a.score = b.sub_option_id) and a.testcase_id = ' \
+            'b.testcase_id WHERE b.sub_option_id in %s' \
+            'and (a.city = %s or a.province = %s) and a.nld = %s and a.uuid in %s'
+
+    # 获取一个uuid下答题的子选项id列表
+    sql_10 = 'select  DISTINCT uuid, GROUP_CONCAT(DISTINCT b.sub_option_id)  from f_t_daren_score_2 a left join ' \
+             'd_shangju_tiku_02 b on a.sub_question_id = b.sub_question_id and (a.score = b.score or a.score = ' \
+             'b.sub_option_id) where a.status = ' \
+             'b.status = 1 group by uuid '
+
+    # 向表mvp_crowd_info插入数据
+    sql_11 = 'insert into mvp_crowd_info(age_area, city_name, crowd_type, status) values(%s, %s, %s, 1)'
+
+    # 向表mvp_crowd_info_behavior中插入数据
+    sql_12 = 'insert into mvp_crowd_info_behavior(crowd_info_id, behavioral_interest, standard_value, status) values(' \
+             '%s, %s, ' \
+             '%s, 1) '
+
+    # 向表mvp_crowd_info_module中插入数据
+    sql_13 = 'insert into mvp_crowd_info_module(crowd_info_id, module_name, standard_value, status) values (%s, %s, ' \
+             '%s, 1) '
+
+    sql_14 = 'select a.id, a.age_area, a.city_name, a.crowd_type from mvp_crowd_info a where a.status = 1'
+
+    def __init__(self, path=None):
+        self.shangju_db = MysqlDB('shangju')
+        self.marketing_db = MysqlDB('bi_report')
+        # self.shangju_db.truncate('mvp_standard_score')
+        self.tag_data = ExcelUtil(file_name=path).init_mvp_data()
+        self.crowd_info = ExcelUtil(file_name=path, sheet_name='选项-人群分类对应表').init_crowd_info()
+        self.citys = self.init_city()
+        self.age = self.init_age()
+        self.people_sub_option_ids = self.marketing_db.select(self.sql_10)
+        self.crowd_contain_sub_option_ids = self.get_crowd_contain_sub_option_ids()
+        self.module_scores = ExcelUtil(file_name='set-behavior-tag.xlsx', sheet_name='算法关系表').init_module_info()
+        # self.scores_tag = ExcelUtil(file_name='行为与模块分值汇总.xlsx', sheet_name='行为').init_scores()
+        # self.score_module = ExcelUtil(file_name='行为与模块分值汇总.xlsx', sheet_name='模块').init_scores()
+        self.scores_tag = None
+        self.score_module = None
+
+    def close(self):
+        self.shangju_db.close()
+        self.marketing_db.close()
+
+    def init_city(self):
+        """
+            获取答题数据中的城市。
+        :return:
+        """
+        citys = ['宁波市', '上海市', '苏州市', '无锡市', '宁波市']
+        # citys_info = self.marketing_db.select(self.sql_1)
+        # citys.extend([x[0] for x in citys_info if x[0] is not None])
+        return citys
+
+    def query_behavioral_info(self, city=None, age=None, crowd=None):
+        """
+            查询行为兴趣信息
+        :return:
+        """
+        # datas = []
+        # for key in self.tag_data.keys():
+        #     values = self.tag_data[key]
+        #     for value in values:
+        #         question = value[0].split('-')[0]
+        #         option = value[0].split('-')[1]
+        #         corr = value[1]
+        #         data = self.shangju_db.select(self.sql_2, [option, question])
+        #         if len(data) > 0:
+        #             print([question, option, data[0][3], data[0][1], key, corr])
+        #             datas.append([question, option, data[0][3], data[0][1], key, corr])
+        # self.shangju_db.truncate('mvp_question_classification')
+        # self.shangju_db.add_some(self.sql_3, datas)
+        scores_behavioral = self.city_age_crowd(city, age, crowd)
+        # scores_module = self.module_score(crowd, city, age, scores_behavioral['score'])
+        # result = {'行为兴趣分值': scores_behavioral['score'], '模块分值': scores_module}
+        print('update finished!!!')
+        return scores_behavioral
+
+    def module_score(self, crowd, city, age, scores):
+        """
+            模块分数计算
+            城市 年龄 人群分类 模块名称 分数
+        :return:
+        """
+        behavioral_score = scores['用户画像-行为兴趣']
+        modules = self.module_scores[crowd]
+        result = []
+        for key in modules.keys():
+            values = modules[key]
+            module_name = key
+            score = 0
+            for value in values:
+                behavioral_name = value[0]
+                weight = float(value[2])
+                standard_score = [x[4] for x in behavioral_score if x[2] == behavioral_name]
+                if len(standard_score) > 0:
+                    score += standard_score[0] * weight
+            result.append([city, age, crowd, module_name, score])
+        return result
+
+    # def insert_data(self, scores_behavioral, scores_module):
+    def insert(self):
+        """
+            计算数据写入数据库中,供接口查看
+        :return:
+        """
+        infos = []
+        for city in ['上海市', '宁波市', '苏州市', '杭州市', ' 无锡市']:
+            for age in ['50-59年生', '60-69年生', '70-74年生', '75-79年生', '80-84年生', '85-89年生', '90-94年生', '95-99年生', '00'
+                                                                                                                '-04年生',
+                        '05-09年生']:
+                for c_type in ['A', 'B', 'C', 'D', 'E', 'F']:
+                    age_area = self.age_dict.get(age)
+                    if age_area:
+                        infos.append([age_area, city, c_type])
+        self.shangju_db.add_some(self.sql_11, infos)
+
+    def query_data(self):
+        ids = self.shangju_db.select(self.sql_14)
+        return ids
+
+    def shanghai_85_module_score_insert(self):
+        """
+            上海市,85后模块分数计算
+        :return:
+        """
+        result = []
+        for crowd in self.crowd:
+            modules = self.module_scores[crowd]
+            for key in modules.keys():
+                values = modules[key]
+                module_name = key
+                score = 0
+                for value in values:
+                    behavioral_name = value[0]
+                    weight = float(value[2])
+                    # standard_score = [x[4] for x in scores if x[2] == behavioral_name]
+                    standard_score = float(value[1])
+                    if standard_score is not None:
+                        score += standard_score * weight
+                result.append(['上海市', '85后', crowd, module_name, score])
+        return {'score': result, 'data': self.module_scores}
+
+    def tag_module_score_insert(self):
+        """
+            标签模块分数写入数据库
+        :return:
+        """
+        ids = self.query_data()
+        insert_data = []
+        insert_data_1 = []
+        for tag, module in zip(self.scores_tag, self.score_module):
+            city = tag[0]
+            age = tag[1]
+            crowd = tag[2]
+            tag_name = tag[3]
+            tag_score = tag[4]
+
+            city_2 = module[0]
+            age_2 = module[1]
+            crowd_2 = module[2]
+            module_name_2 = module[3]
+            module_score_2 = module[4]
+
+            for id in ids:
+                city_1 = id[2]
+                age_1 = id[1]
+                crowd_1 = id[3]
+                id_1 = id[0]
+                if city == city_1 and self.age_dict[age] == age_1 and crowd == crowd_1:
+                    insert_data.append([id_1, tag_name, tag_score])
+                if city_2 == city_1 and self.age_dict[age_2] == age_1 and crowd_2 == crowd_1:
+                    insert_data_1.append([id_1, module_name_2, module_score_2])
+        self.shangju_db.add_some(self.sql_12, insert_data)
+        self.shangju_db.add_some(self.sql_13, insert_data_1)
+
+    def init_age(self):
+        """
+           获取答题数据中的年龄
+        """
+        age_info = self.marketing_db.select(self.sql_4)
+        # print([x[0] for x in age_info])
+        return [x[0] for x in age_info if x[0] is not None]
+
+    def city_age_crowd(self, city=None, age=None, crowd=None):
+        data_start = []
+        result = []
+        module_scores = []
+        if city is not None and age is not None and crowd is not None:
+            print('获取指定城市,年龄段,人群类型的数据...')
+            people_uuids = self.get_people_uuid_by_type(crowd)
+            if len(people_uuids) > 0:
+                print('{}-{}-{}'.format(city, age, crowd))
+                datas = self.behavior_tag_init(city, age, people_uuids)
+                data_start.append(datas)
+                result.extend(self.calculation_standard_score(datas, city, age, crowd))
+            module_scores.extend(self.module_score(crowd, city, age, result))
+            pass
+        else:
+            print('获取所有case的数据...')
+            # for city in self.citys:
+            # for city in [city]:
+            for age in self.age:
+                for crowd_type in self.crowd:
+                    if age == '85-89年生' and city == '上海市':
+                        print('上海市85后数据导入人工值,无需计算...')
+                        pass
+                    else:
+                        # print(' {}{}'.format(city, age))
+                        people_uuids = self.get_people_uuid_by_type(crowd_type)
+                        if len(people_uuids) > 0:
+                            print('{}-{}-{}'.format(city, age, crowd_type))
+                            datas = self.behavior_tag_init(city, age, people_uuids)
+                            data_start.append(datas)
+                            result.extend(self.calculation_standard_score(datas, city, age, crowd_type))
+                        module_scores.extend(self.module_score(crowd_type, city, age, result))
+        # return result
+        # data_list = []
+        # for e in data_start:
+        #     for key in e.keys():
+        #         values = e[key]
+        #         for sub_e in values:
+        #             ele = [key]
+        #             ele.extend(sub_e)
+        #             data_list.append(ele)
+        #     pass
+        return {'tag_score': result, 'module_score': module_scores}
+        # return {'score': result, 'data': data_list}
+
+    def behavior_tag_init(self, city, age, people_uuids):
+        result = {}
+        self.group_type_count = self.marketing_db.select(self.sql_5, [city, city, age, people_uuids])
+        # 表名
+        for key in self.tag_data:
+            values = self.tag_data[key]
+            result_sub = {}
+            # 标签
+            for key_tag_name in values.keys():
+                questions = values[key_tag_name]
+                elements = []
+                for value in questions:
+                    question = value[0].split('-')[0]
+                    option = value[0].split('-')[1]
+                    corr = value[1]
+                    fz, fm = self.molecular_value(question, option, city, age, people_uuids)
+                    if fm == 0:
+                        c = 0
+                    else:
+                        c = fz / fm
+                    elements.append([question, option, corr, fz, fm, c])
+                result_sub[key_tag_name] = elements
+            result[key] = self.indicator_calculation_d_e(result_sub)
+        return result
+
+    def molecular_value(self, queston, option, city, age, people_uuids):
+        # 获取当前父选项包含的子选项id和子题id列表
+        result = self.shangju_db.select(self.sql_6, [option, queston])
+        sub_option_ids = []
+        group_types = []
+        for rt in result:
+            sub_option_id, sub_question_id, content = rt[0], rt[1], rt[2]
+            grouptypes = self.shangju_db.select(self.sql_7, [sub_question_id])
+            for g_t in grouptypes:
+                if g_t[0] not in group_types:
+                    group_types.append(g_t[0])
+            sub_option_ids.append(sub_option_id)
+        # 计算子选项在答题记录中的点击数
+        sub_options_count = 0
+        if len(sub_option_ids) > 0:
+            result_1 = self.marketing_db.select(self.sql_8, [sub_option_ids, city, city, age, people_uuids])
+            sub_options_count = result_1[0][0]
+        # 计算父选项包含的子选项对应的子题所在的测试gt包含的点击数。
+        denominator_value = 0
+        for info in self.group_type_count:
+            if info[0] in group_types:
+                denominator_value += info[1]
+        return sub_options_count, denominator_value
+
+    def indicator_calculation_d_e(self, data):
+        result = {}
+        for key in data.keys():
+            values = data[key]
+            c_list = []
+            for x in values:
+                _x = x[5]
+                if _x is not None and x != 0:
+                    c_list.append(_x)
+            fm_list = [x[4] for x in values]
+            sum_c = sum(fm_list)
+            if len(c_list) == 0:
+                min_c = 0
+            else:
+                min_c = min(c_list)
+            elements = []
+            for value in values:
+                _value = []
+                c = value[5]
+                if sum_c == 0:
+                    d = 0
+                else:
+                    d = c / sum_c
+                e = c - min_c
+                _value.extend(value)
+                _value.append(d)
+                _value.append(e)
+                elements.append(_value)
+            result[key] = elements
+        return result
+
+    def calculation_standard_score(self, datas, city, age, crowd_type):
+        scores = {}
+        for key_tag_type in datas.keys():
+            tag_type_data = datas[key_tag_type]
+            scores_sub = []
+            for key_tag in tag_type_data.keys():
+                key_tag_data = tag_type_data[key_tag]
+                print(key_tag)
+                print('     父题序号 父选项序号 相关系系数 分子值 分母值 百分比 人数权重 偏离值')
+                values = [x[5] for x in key_tag_data]
+                min_c = min(values)
+                f = min_c
+                for value in key_tag_data:
+                    print('     {}'.format(value))
+                    if value[2] is not None and value[7] is not None:
+                        f += float(value[2] * value[7])
+                print('     标准分:{}'.format(f))
+                scores_sub.append([city, age, key_tag, crowd_type, f])
+            scores[key_tag_type] = scores_sub
+            # self.shangju_db.add_some(self.sql_9, scores)
+        return scores
+
+    def get_crowd_people(self):
+        result = {}
+        for type in self.crowd:
+            uuids = self.get_people_uuid_by_type(type)
+            result[type] = len(uuids)
+        return result
+
+    def get_people_uuid_by_type(self, type):
+        uuids = []
+        type_sub_option_ids = self.crowd_contain_sub_option_ids[type]
+        for people in self.people_sub_option_ids:
+            uuid = people[0]
+            sub_option_ids = list(map(int, str(people[1]).split(',')))
+            # list(set(a).intersection(set(b)))
+            if len(list(set(sub_option_ids).intersection(set(type_sub_option_ids)))) > 0 and uuid not in uuids:
+                uuids.append(uuid)
+        return uuids
+
+    def get_crowd_contain_sub_option_ids(self):
+        """
+            获取ABCDEF人群包含的子选项id
+        :return:
+        """
+        infos = {}
+        for key in self.crowd_info.keys():
+            values = self.crowd_info[key]
+            sub_option_ids = []
+            for value in values:
+                if value is not None:
+                    vals = str(value).split('-')
+                    option, question = vals[1], vals[0]
+                    query_result = self.shangju_db.select(self.sql_6, [option, question])
+                    for qr in query_result:
+                        sub_option_id, sub_question_id, content = qr[0], qr[1], qr[2]
+                        sub_option_ids.append(int(sub_option_id))
+            infos[key] = sub_option_ids
+        print(infos)
+        return infos

BIN
mvp/__pycache__/mvp.cpython-36.pyc


BIN
mvp/__pycache__/mvp.cpython-37.pyc


+ 0 - 267
mvp/mvp.py

@@ -1,267 +0,0 @@
-from db.mysql_db import MysqlDB
-from utils.excel_util import ExcelUtil
-
-
-class Mvp:
-    """
-     ce mvp 答题数据统计
-     城市特例 北京市,上海市, 重庆市,天津市
-    """
-
-    age_dict = {
-        '00-04年生': '00后',
-        '05-09年生': '05后',
-        '50-59年生': '50后',
-        '60-69年生': '60后',
-        '70-74年生': '70后',
-        '75-79年生': '75后',
-        '80-84年生': '80后',
-        '85-89年生': '85后',
-        '90-94年生': '90后',
-        '95-99年生': '95后'
-    }
-
-    crowd = ['A', 'B', 'C', 'D', 'E', 'F']
-
-    # 获取答题记录中城市列表
-    sql_1 = 'select city from f_t_daren_score_2 group by city'
-
-    # 获取父选项和父题id
-    sql_2 = 'select a.id, a.content, b.id, b.name from bq_option a left join bq_question b on a.question_id = b.id ' \
-            'where a.serial_number = %s and b.serial_number = %s and a.status = b.status = 1 '
-    # 数据插入表mvp_question_classification
-    sql_3 = 'insert into mvp_question_classification(question_serial_number, question_content, ' \
-            'option_serial_number, option_content, tag, corr) values(%s, %s, %s, %s, %s, %s) '
-
-    # 获取答题人的年龄段集合
-    sql_4 = 'select nld from f_t_daren_score_2 group by nld'
-
-    # 根据城市,年龄段,人群分类统计答题记录数
-    sql_5 = 'select group_type, COUNT(uuid) from f_t_daren_score_2 where (city = %s or province = %s) and nld ' \
-            '= %s and uuid in %s group by group_type '
-
-    # 根据父选项获取子选项id列表
-    sql_6 = 'SELECT c.id, c.sub_question_id, c.content FROM bq_sub_option c WHERE c.father_id in (SELECT a.id FROM ' \
-            'bq_option a ' \
-            'LEFT JOIN bq_question b ON a.question_id = b.id WHERE a.serial_number = %s AND b.serial_number = %s ' \
-            'and a.status = 1 and b.status = 1) and c.status = 1 '
-
-    # 根据子题id获取包含子题id的测试
-    sql_7 = 'select group_type from bq_testcase where status = 1 and FIND_IN_SET(%s, question_ids)'
-
-    # 根据子选项id统计答题数
-    sql_8 = 'SELECT count(uuid) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = ' \
-            'b.sub_question_id AND a.score  = b.score WHERE a.testcase_id = b.testcase_id and b.sub_option_id in %s' \
-            'and (a.city = %s or a.province = %s) and a.nld = %s and a.uuid in %s'
-
-    # 计算值写入表汇总
-    sql_9 = 'insert into mvp_standard_score(city, age, tag, crowd_type, score)  VALUES(%s, %s, %s, %s, %s)'
-
-    # 获取一个uuid下答题的子选项id列表
-    sql_10 = 'select  DISTINCT uuid, GROUP_CONCAT(DISTINCT b.sub_option_id)  from f_t_daren_score_2 a left join ' \
-             'd_shangju_tiku_02 b on a.sub_question_id = b.sub_question_id and a.score = b.score where a.status = ' \
-             'b.status = 1 group by uuid '
-
-    def __init__(self, path=None):
-        self.shangju_db = MysqlDB('shangju')
-        self.marketing_db = MysqlDB('marketing_db')
-        self.shangju_db.truncate('mvp_standard_score')
-        self.tag_data = ExcelUtil(path=path).init_mvp_data()
-        self.crowd_info = ExcelUtil(path=path, sheet_name='选项-人群分类对应表').init_crowd_info()
-        self.citys = self.init_city()
-        self.age = self.init_age()
-        self.people_sub_option_ids = self.marketing_db.select(self.sql_10)
-        self.crowd_contain_sub_option_ids = self.get_crowd_contain_sub_option_ids()
-
-    def init_city(self):
-        """
-            获取答题数据中的城市。
-        :return:
-        """
-        citys = ['北京市', '上海市', '重庆市', '天津市']
-        citys_info = self.marketing_db.select(self.sql_1)
-        citys.extend([x[0] for x in citys_info if x[0] is not None])
-        return citys
-
-    def write_tag(self, city=None, age=None, crowd=None):
-        """
-            将excel中的配置信息写入到数据库表中
-        :return:
-        """
-        # datas = []
-        # for key in self.tag_data.keys():
-        #     values = self.tag_data[key]
-        #     for value in values:
-        #         question = value[0].split('-')[0]
-        #         option = value[0].split('-')[1]
-        #         corr = value[1]
-        #         data = self.shangju_db.select(self.sql_2, [option, question])
-        #         if len(data) > 0:
-        #             print([question, option, data[0][3], data[0][1], key, corr])
-        #             datas.append([question, option, data[0][3], data[0][1], key, corr])
-        # self.shangju_db.truncate('mvp_question_classification')
-        # self.shangju_db.add_some(self.sql_3, datas)
-        result = self.city_age_crowd(city, age, crowd)
-        print('update finished!!!')
-        return result
-
-    def init_age(self):
-        """
-           获取答题数据中的年龄
-        """
-        age_info = self.marketing_db.select(self.sql_4)
-        # print([x[0] for x in age_info])
-        return [x[0] for x in age_info if x[0] is not None]
-
-    def city_age_crowd(self, city=None, age=None, crowd=None):
-        result = []
-        if city is not None and age is not None and crowd is not None:
-            print('获取指定城市,年龄段,人群类型的数据...')
-            people_uuids = self.get_people_uuid_by_type(crowd)
-            if len(people_uuids) > 0:
-                print('{}-{}-{}'.format(city, age, crowd))
-                datas = self.behavior_tag_init(city, age, people_uuids)
-                result.extend(self.calculation_standard_score(datas, city, age, crowd))
-            pass
-        else:
-            print('获取所有case的数据...')
-            for city in self.citys:
-                for age in self.age:
-                    if city != '上海市' and age != '85-89年生':
-                        for crowd_type in self.crowd:
-                            # print(' {}{}'.format(city, age))
-                            people_uuids = self.get_people_uuid_by_type(crowd_type)
-                            if len(people_uuids) > 0:
-                                print('{}-{}-{}'.format(city, age, crowd_type))
-                                datas = self.behavior_tag_init(city, age, people_uuids)
-                                result.extend(self.calculation_standard_score(datas, city, age, crowd_type))
-        return result
-
-    def behavior_tag_init(self, city, age, people_uuids):
-        result = {}
-        self.group_type_count = self.marketing_db.select(self.sql_5, [city, city, age, people_uuids])
-        for key in self.tag_data:
-            values = self.tag_data[key]
-            elements = []
-            for value in values:
-                question = value[0].split('-')[0]
-                option = value[0].split('-')[1]
-                corr = value[1]
-                fz, fm = self.molecular_value(question, option, city, age, people_uuids)
-                if fm == 0:
-                    c = 0
-                else:
-                    c = fz / fm
-                elements.append([question, option, corr, fz, fm, c])
-            result[key] = elements
-        return self.indicator_calculation_d_e(result)
-
-    def molecular_value(self, queston, option, city, age, people_uuids):
-        # 获取当前父选项包含的子选项id和子题id列表
-        result = self.shangju_db.select(self.sql_6, [option, queston])
-        sub_option_ids = []
-        group_types = []
-        for rt in result:
-            sub_option_id, sub_question_id, content = rt[0], rt[1], rt[2]
-            grouptypes = self.shangju_db.select(self.sql_7, [sub_question_id])
-            for g_t in grouptypes:
-                if g_t[0] not in group_types:
-                    group_types.append(g_t[0])
-            sub_option_ids.append(sub_option_id)
-        # 计算子选项在答题记录中的点击数
-
-        sub_options_count = 0
-        if len(sub_option_ids) > 0:
-            result_1 = self.marketing_db.select(self.sql_8, [sub_option_ids, city, city, age, people_uuids])
-            sub_options_count = result_1[0][0]
-        # 计算父选项包含的子选项对应的子题所在的测试gt包含的点击数。
-        denominator_value = 0
-        for info in self.group_type_count:
-            if info[0] in group_types:
-                denominator_value += info[1]
-        return sub_options_count, denominator_value
-
-    def indicator_calculation_d_e(self, data):
-        result = {}
-        for key in data.keys():
-            values = data[key]
-            c_list = [x[5] for x in values]
-            fm_list = [x[4] for x in values]
-            sum_c = sum(fm_list)
-            min_c = min(c_list)
-            elements = []
-            for value in values:
-                _value = []
-                c = value[5]
-                if sum_c == 0:
-                    d = 0
-                else:
-                    d = c / sum_c
-                e = c - min_c
-                _value.extend(value)
-                _value.append(d)
-                _value.append(e)
-                elements.append(_value)
-            result[key] = elements
-        return result
-
-    def calculation_standard_score(self, datas, city, age, crowd_type):
-        scores = []
-        for key in datas.keys():
-            print(key)
-            print('     父题序号 父选项序号 相关系系数 分子值 分母值 百分比 人数权重 偏离值')
-            values = [x[5] for x in datas[key]]
-            min_c = min(values)
-            f = min_c
-            for value in datas[key]:
-                print('     {}'.format(value))
-                if value[2] is not None and value[7] is not None:
-                    f += float(value[2] * value[7])
-            print('     标准分:{}'.format(f))
-            scores.append([city, age, key, crowd_type, f])
-        # self.shangju_db.add_some(self.sql_9, scores)
-        return scores
-
-    def get_crowd_people(self):
-        result = {}
-        for type in self.crowd:
-            uuids = self.get_people_uuid_by_type(type)
-            result[type] = len(uuids)
-        return result
-
-    def get_people_uuid_by_type(self, type):
-        # 获取每个答题者所答题的子选项id
-        uuids = []
-        type_sub_option_ids = self.crowd_contain_sub_option_ids[type]
-        for people in self.people_sub_option_ids:
-            uuid = people[0]
-            sub_option_ids = str(people[1]).split(',')
-            # list(set(a).intersection(set(b)))
-            if len(list(set(sub_option_ids).intersection(set(type_sub_option_ids)))) > 0:
-                uuids.append(uuid)
-        return uuids
-
-    def get_crowd_contain_sub_option_ids(self):
-        """
-            获取ABCDEF人群包含的子选项id
-        :return:
-        """
-        infos = {}
-        for key in self.crowd_info.keys():
-            values = self.crowd_info[key]
-            sub_option_ids = []
-            for value in values:
-                if value is not None:
-                    vals = str(value).split('-')
-                    option, question = vals[1], vals[0]
-                    query_result = self.shangju_db.select(self.sql_6, [option, question])
-                    for qr in query_result:
-                        sub_option_id, sub_question_id, content = qr[0], qr[1], qr[2]
-                        sub_option_ids.append(sub_option_id)
-            infos[key] = sub_option_ids
-        return infos
-
-
-if __name__ == '__main__':
-    mvp = Mvp()
-    mvp.write_tag()

+ 5 - 12
db/mysql_db.py

@@ -10,8 +10,10 @@ class MysqlDB:
 
     def __init__(self, db_name):
         self.db_name = db_name
-        self.con = ps.connect(host='192.168.0.13', port=3306, user='root', password='elab@123',
-                              db=self.db_name, charset='utf8')
+        # self.con = ps.connect(host='172.19.189.136', port=3306, user='bi_etl', password='XPtpswuU5lwGo4kx',
+        #                       db=self.db_name, charset='utf8')
+        self.con = ps.connect(host='192.168.0.13', port=3306, user='root', password='elab@123'
+                              , db=self.db_name, charset='utf8')
         self.cursor = self.con.cursor()
 
     def show_tables(self):
@@ -48,13 +50,4 @@ class MysqlDB:
 
     def close(self):
         self.cursor.close()
-        self.con.close()
-
-
-if __name__ == '__main__':
-    mysql_db = MysqlDB('marketing_db')
-    # sql = 'select * from bq_question a left join bq_option b on a.id = b.question_id where a.status = 1 and b.status = 1'
-    sql = 'select city from f_t_daren_score_2 group by city'
-    result = mysql_db.select(sql)
-    for rt in result:
-        print(rt)
+        self.con.close()

+ 16 - 30
resources/mvp.sql

@@ -1,30 +1,16 @@
-create table `mvp_standard_score` (
-    `id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id',
-    `score` VARCHAR(10) NOT NULL COMMENT '标准分',
-	`city` VARCHAR(10) DEFAULT NULL COMMENT '城市',
-	`age` varchar(10) default NULL COMMENT '年龄',
-    `tag` VARCHAR(10) DEFAULT NULL COMMENT '标签',
-	`status` int(11) DEFAULT '1' COMMENT '1:EFFECTIVE:有效\r\n            -1:INVALID:无效',
-    `created` datetime DEFAULT NULL COMMENT '创建时间',
-    `creator` varchar(100) DEFAULT NULL COMMENT '创建人',
-    `updated` datetime DEFAULT NULL COMMENT '修改时间',
-    `updator` varchar(100) DEFAULT NULL COMMENT '修改人',
-    PRIMARY KEY (`id`)
-)ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COMMENT='mvp标准分'
-
-
-create table `mvp_question_classification` (
-    `id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id',
-	`tag` VARCHAR(50) DEFAULT NULL COMMENT '标签名',
-	`question_serial_number` varchar(50) DEFAULT NULL COMMENT '父题编号',
-	`option_serial_number` varchar(50) DEFAULT NULL COMMENT '父选项编号',
-	`question_content` VARCHAR(50) DEFAULT NULL COMMENT '父题内容',
-	`option_content` VARCHAR(50) DEFAULT NULL COMMENT '父选项内容',
-	`corr` DOUBLE(16,2) DEFAULT NULL COMMENT '关联度系数',
-    `status` int(11) DEFAULT '1' COMMENT '1:EFFECTIVE:有效,-1:INVALID:无效',
-    `created` datetime DEFAULT NULL COMMENT '创建时间',
-    `creator` varchar(100) DEFAULT NULL COMMENT '创建人',
-    `updated` datetime DEFAULT NULL COMMENT '修改时间',
-    `updator` varchar(100) DEFAULT NULL COMMENT '修改人',
-	PRIMARY KEY (`id`)
-) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COMMENT='mvp父题分类'
+CREATE TABLE `mvp_crowd_info_module` (
+  `id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id',
+  `crowd_info_id` int(11) DEFAULT NULL COMMENT '用户人群信息id',
+  `module_name` varchar(50) DEFAULT NULL COMMENT '模块名称',
+  `default_value` float(5,2) DEFAULT NULL COMMENT '原始数值',
+  `standard_value` float(5,2) DEFAULT NULL COMMENT '标准值',
+  `content` varchar(50) DEFAULT NULL COMMENT '内容',
+  `url` varchar(300) DEFAULT NULL COMMENT '链接',
+  `remarks` varchar(500) DEFAULT NULL COMMENT '备注',
+  `status` int(11) DEFAULT '1' COMMENT '1:EFFECTIVE:有效 -1:INVALID:无效',
+  `created` datetime DEFAULT NULL COMMENT '创建时间',
+  `creator` varchar(100) DEFAULT NULL COMMENT '创建人',
+  `updated` datetime DEFAULT NULL COMMENT '修改时间',
+  `updator` varchar(100) DEFAULT NULL COMMENT '修改人',
+  PRIMARY KEY (`id`)
+) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COMMENT='模块分数'

BIN
resources/mvp.xlsx


BIN
resources/行为与模块分值汇总.xlsx


+ 2 - 0
start_shell.sh

@@ -1,3 +1,5 @@
 #!/bin/bash
 # 开发环境部署
 python3 flask_app.py >>/tmp/huxingbao.log 2>&1 &
+# 生产环境部署方式,使用gunicorn进行部署。
+# gunicorn -w 1 -b 0.0.0.0:5001 service:app

BIN
utils/__pycache__/excel_util.cpython-37.pyc