فهرست منبع

elab_mvp: 修改人群分类城市信息获取逻辑

Signed-off-by: binren <zhangbr@elab-plus.com>
binren 5 سال پیش
والد
کامیت
e8dd9d4102
3فایلهای تغییر یافته به همراه142 افزوده شده و 8 حذف شده
  1. 16 0
      entity.py
  2. 126 8
      mvp.py
  3. BIN
      resources/mvp.xlsx

+ 16 - 0
entity.py

@@ -0,0 +1,16 @@
+class PeopleInfo:
+    """
+    答题人个人信息
+   """
+
+    def __init__(self, uuid, city=None, age=None, sex=None, crowd=None):
+        self.uuid = uuid
+        if city == '上海市' or city == '一线':
+            self.city = '上海市'
+        elif city in ['无锡市', '杭州市', '苏州市', '宁波市']:
+            self.city = '上海周边'
+        else:
+            self.city = city
+        self.age = age
+        self.sex = sex
+        self.crowd = crowd

+ 126 - 8
mvp.py

@@ -1,6 +1,7 @@
 from mysql_db import MysqlDB
 from excel_util import ExcelUtil
 import time
+from entity import PeopleInfo
 
 
 class Mvp:
@@ -43,6 +44,15 @@ class Mvp:
         '空间需求图谱-空间拓普图': ['mvp_innovate_space_space_top', ''],
         '模块分数': ['mvp_crowd_info_module', 'module_name']
     }
+    crowd_info = {
+        1973: 'A',
+        1974: 'B',
+        1975: 'C',
+        1976: 'D',
+        1977: 'E',
+        1978: 'F',
+        1979: 'G',
+    }
     base_insert_sql = 'insert into {}(crowd_info_id, {}, standard_value, status) values(%s, %s, %s, '\
                       '1) '
 
@@ -79,8 +89,7 @@ class Mvp:
     sql_4 = 'select nld from f_t_daren_score_2 group by nld'
 
     # 根据城市,年龄段,人群分类统计答题记录数
-    sql_5 = 'select group_type, COUNT(uuid) from f_t_daren_score_2 where (city = %s or province = %s) and nld ' \
-            '= %s and uuid in %s group by group_type '
+    sql_5 = 'select group_type, COUNT(uuid) from f_t_daren_score_2 where uuid in %s group by group_type '
 
     # 根据父选项获取子选项id列表
     sql_6 = 'SELECT c.id, c.sub_question_id, c.content FROM bq_sub_option c WHERE c.father_id in (SELECT a.id FROM ' \
@@ -94,8 +103,7 @@ class Mvp:
     # 根据子选项id统计答题数
     sql_8 = 'SELECT count(1) FROM f_t_daren_score_2 a LEFT JOIN d_shangju_tiku_02 b ON a.sub_question_id = ' \
             'b.sub_question_id AND (a.score  = b.score or a.score = b.sub_option_id) and a.testcase_id = ' \
-            'b.testcase_id WHERE b.sub_option_id in %s' \
-            'and (a.city = %s or a.province = %s) and a.nld = %s and a.uuid in %s'
+            'b.testcase_id WHERE b.sub_option_id in %s and a.uuid in %s'
 
     # 获取一个uuid下答题的子选项id列表
     sql_10 = 'select  DISTINCT uuid, GROUP_CONCAT(DISTINCT b.sub_option_id)  from f_t_daren_score_2 a left join ' \
@@ -117,6 +125,64 @@ class Mvp:
 
     sql_14 = 'select a.id, a.age_area, a.city_name, a.crowd_type from mvp_crowd_info a where a.status = 1'
 
+    # 获取答题城市信息from city
+    sql_15 = '''
+        SELECT
+            a.uuid,
+            GROUP_CONCAT(DISTINCT a.city, a.province) AS city,
+            GROUP_CONCAT(DISTINCT a.nld) AS nld,
+            GROUP_CONCAT(DISTINCT a.sex) AS sex,
+            GROUP_CONCAT(DISTINCT b.sub_option_id),
+            GROUP_CONCAT(DISTINCT a.testcase_id)
+        FROM
+            f_t_daren_score_2 a
+        LEFT JOIN d_shangju_tiku_02 b ON a.testcase_id = b.testcase_id
+        WHERE
+            a.testcase_id = b.testcase_id
+        AND a.sub_question_id = b.sub_question_id
+        AND (
+            a.score = b.score
+            OR a.score = b.sub_option_id
+        )
+        GROUP BY
+            a.uuid
+    '''
+
+    # 根据用户uuid获取城市信息
+    sql_16 = '''
+        SELECT
+            *
+        FROM
+            f_t_daren_score_2 a
+        LEFT JOIN d_shangju_tiku_02 b ON a.testcase_id = b.testcase_id
+        WHERE
+            a.sub_question_id = b.sub_question_id
+        AND (
+            a.score = b.score
+            OR a.score = b.sub_option_id
+        )
+        AND a.uuid = %s
+        AND a.sub_question_id = 303
+    '''
+
+    # 答题人人群分类信息
+    sql_17 = ''''
+        SELECT
+            a.uuid,
+            b.sub_option_id
+        FROM
+            f_t_daren_score_2 a
+        LEFT JOIN d_shangju_tiku_02 b ON a.testcase_id = b.testcase_id
+        WHERE
+            a.sub_question_id = b.sub_question_id
+        AND (
+            a.score = b.score
+            OR a.score = b.sub_option_id
+        )
+        AND a.uuid = %s
+        AND a.sub_question_id = 286 and a.status = b.status = 1
+        '''
+
     def __init__(self, path=None):
         self.shangju_db = MysqlDB('shangju')
         self.marketing_db = MysqlDB('bi_report')
@@ -132,6 +198,7 @@ class Mvp:
         # self.score_module = ExcelUtil(file_name='行为与模块分值汇总.xlsx', sheet_name='模块').init_scores()
         self.scores_tag = None
         self.score_module = None
+        self.people_info_1 = self.people_info()
 
     def close(self):
         self.shangju_db.close()
@@ -171,6 +238,55 @@ class Mvp:
         print('update finished!!!')
         return scores_behavioral
 
+    def people_info(self):
+        """
+            答题人个人信息获取
+        :return:
+        """
+        people_info_city = self.marketing_db.select(self.sql_15)
+        people_infos = []
+        for people in people_info_city:
+            uuid = people[0]
+            city = people[1]
+            nld = people[2]
+            sex = people[3]
+            sub_option_ids = people[4]
+            testcaseid = people[5]
+            if city is None and testcaseid > 75:
+                # 从答题结果中获取城市信息
+                citys = self.marketing_db.select(self.sql_16, [uuid])
+                if len(citys) > 0:
+                    city = citys[0][1]
+            # 根据用户子选项id集合,获取用户的人群分类
+            crowd = []
+            if testcaseid in [75, 76, 77]:
+                # 特定的测试人群分类从答题结果中获取
+                sub_option_ids = self.marketing_db.select(self.sql_17, [uuid])
+                for option in sub_option_ids:
+                    crowd.append(self.crowd_info[option[1]])
+            else:
+                crowd.extend(self.get_people_uuid_by_sub_option_ids(sub_option_ids))
+            people_info = PeopleInfo(uuid, city, nld, sex, crowd)
+            people_infos.append(people_info)
+        return people_infos
+
+    def people_filter(self, city, nld, crowd):
+        uuids = []
+        for people in self.people_info_1:
+            if people.city == city and people.age == nld and crowd in people.crowd:
+                uuids.append(people.uuid)
+        return uuids
+
+    def get_people_uuid_by_sub_option_ids(self, sub_option_ids):
+        types = []
+        for key in self.crowd_contain_sub_option_ids.keys():
+            type_sub_option_ids = self.crowd_contain_sub_option_ids[key]
+            sub_option_ids = list(map(int, str(sub_option_ids).split(',')))
+            # list(set(a).intersection(set(b)))
+            if len(list(set(sub_option_ids).intersection(set(type_sub_option_ids)))) > 0 and key not in type:
+                types.append(key)
+        return types
+
     def update_data(self):
         """
             定时更新分值
@@ -356,7 +472,8 @@ class Mvp:
         module_scores = []
         if city is not None and age is not None and crowd is not None:
             print('获取指定城市,年龄段,人群类型的数据...')
-            people_uuids = self.get_people_uuid_by_type(crowd)
+            # people_uuids = self.get_people_uuid_by_type(crowd)
+            people_uuids = self.people_filter(city, age, city)
             behavior_data = None
             if len(people_uuids) > 0:
                 print('{}-{}-{}'.format(city, age, crowd))
@@ -378,7 +495,8 @@ class Mvp:
                         pass
                     else:
                         # print(' {}{}'.format(city, age))
-                        people_uuids = self.get_people_uuid_by_type(crowd_type)
+                        # people_uuids = self.get_people_uuid_by_type(crowd_type)
+                        people_uuids = self.people_filter(city, age, city)
                         behavior_data = None
                         if len(people_uuids) > 0:
                             print('{}-{}-{}'.format(city, age, crowd_type))
@@ -404,7 +522,7 @@ class Mvp:
 
     def behavior_tag_init(self, city, age, people_uuids):
         result = {}
-        self.group_type_count = self.marketing_db.select(self.sql_5, [city, city, age, people_uuids])
+        self.group_type_count = self.marketing_db.select(self.sql_5, [people_uuids])
         # 表名
         for key in self.tag_data:
             values = self.tag_data[key]
@@ -442,7 +560,7 @@ class Mvp:
         # 计算子选项在答题记录中的点击数
         sub_options_count = 0
         if len(sub_option_ids) > 0:
-            result_1 = self.marketing_db.select(self.sql_8, [sub_option_ids, city, city, age, people_uuids])
+            result_1 = self.marketing_db.select(self.sql_8, [sub_option_ids, people_uuids])
             sub_options_count = result_1[0][0]
         # 计算父选项包含的子选项对应的子题所在的测试gt包含的点击数。
         denominator_value = 0

BIN
resources/mvp.xlsx