Browse Source

mvp: 增加人工调整数据的程序

Signed-off-by: binren <zhangbr@elab-plus.com>
binren 5 years ago
parent
commit
71d59c9b09
3 changed files with 65 additions and 3 deletions
  1. 10 0
      excel_util.py
  2. 55 3
      mvp.py
  3. BIN
      resources/mvp.xlsx

+ 10 - 0
excel_util.py

@@ -73,6 +73,16 @@ class ExcelUtil:
                 crowd_f.append(option)
         return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}
 
+
+    def init_out_way(self):
+        result = {}
+        work_sheet = self.read_excel_by_ox_name('用户画像-出行方式')
+        rows = work_sheet.rows
+        for row in rows:
+            key = row[3].value + row[4].value + '市' + row[6].value + row[7].value
+            result[key] = float(row[9].value)
+        return result
+
     def init_mvp_data(self):
         """
             获取每个标签包括的父题父选项编号

+ 55 - 3
mvp.py

@@ -363,6 +363,7 @@ class Mvp:
         # self.scores_tag = ExcelUtil(file_name='行为与模块分值汇总.xlsx', sheet_name='行为').init_scores()
         # self.score_module = ExcelUtil(file_name='行为与模块分值汇总.xlsx', sheet_name='模块').init_scores()
         self.people_info_1 = self.people_info()
+        self.out_way_datas = Exception(file_name=path).init_out_way()
 
     def close(self):
         self.shangju_db.close()
@@ -538,8 +539,11 @@ class Mvp:
                     crowd_info_id = self.get_crowd_info_id([city, age, crowd])
                     if crowd_info_id and (boy + girl) > 0:
                         boy_rate = boy / (boy + girl)
-                        insert_data.append([crowd_info_id, 1, boy_rate])
                         girl_rate = girl / (boy + girl)
+                        if age == '95后' and city == '上海市':
+                            boy_rate = random.uniform(0.4, 0.6)
+                            girl_rate = 1 - boy_rate
+                        insert_data.append([crowd_info_id, 1, boy_rate])
                         insert_data.append([crowd_info_id, 0, girl_rate])
         if len(insert_data) > 0:
             self.linshi_db.truncate('mvp_crowd_info_gender_rate')
@@ -607,7 +611,7 @@ class Mvp:
                     insert_data = []
                     score = b_score[key]
                     for data in score:
-                        insert_data_element = self.need_inert(data)
+                        insert_data_element = self.need_inert(data, key)
                         if insert_data_element:
                             insert_data.append(insert_data_element)
                     if len(insert_data) > 0:
@@ -623,7 +627,7 @@ class Mvp:
                     print('未找到对应的表,数据无法插入...')
             print('行为分数更新完成...')
 
-    def need_inert(self, data):
+    def need_inert(self, data, table=None):
         city = data[0]
         age = data[1]
         crowd = data[2]
@@ -638,8 +642,56 @@ class Mvp:
             crowd_1 = id_data[3]
             id_1 = id_data[0]
             if city == city_1 and age == age_1 and crowd == crowd_1:
+                if table:
+                    people_tag_score = self.think_adjustment_data(table, city, age, tag_name, tag_score, crowd)
+                    tag_score = people_tag_score if people_tag_score is not None else tag_score
                 return [id_1, tag_name, tag_score]
 
+    def think_adjustment_data(self, table, city, age, tag_name, score, crowd):
+        """
+            人为调整数据
+        :param table:
+        :param city:
+        :param age:
+        :param score:
+        :return:
+        """
+        if age == '85后' and city in ('上海市', '上海周边'):
+            if table in ('用户画像-行业', '用户画像-生活方式', '用户画像-消费结构', '用户画像-'):
+                score = score * random.uniform(0.8, 1.0)
+            if table in ('用户画像-审美偏好', '用户画像-消费观念'):
+                if table == '用户画像-消费观念' and tag_name not in ('高端奢侈', '国潮国货', '小众品牌',
+                                                             '亲民平价', '私人定制', '抽象艺术', '街头艺术',
+                                                             '非遗艺术', '古典艺术', '颜控', '养成类',
+                                                             '实力派','黑科技', '实用科技'):
+                    pass
+                else:
+                    score = random.uniform(0, 0.5)
+        if age == '95后' and city == '上海市':
+            if table in ('用户画像-社交模式'):
+                score = random.uniform(0.8, 1.0) * score
+            if table in ('用户画像-行业', '用户画像-审美偏好', '用户画像-消费观念', '用户画像-生活方式', '用户画像-消费结构'):
+                if table in ('用户画像-审美偏好', '用户画像-消费观念'):
+                    if table == '用户画像-消费观念' and tag_name in ('高端奢侈', '国潮国货', '小众品牌',
+                                                                 '亲民平价', '私人定制', '抽象艺术', '街头艺术',
+                                                                 '非遗艺术', '古典艺术', '颜控', '养成类',
+                                                                 '实力派', '黑科技', '实用科技'):
+
+                        score = random.uniform(0, 0.5)
+                    else:
+                        pass
+            if table == '用户画像-出行方式':
+                # 使用模拟数据
+                people_score = self.out_way_datas.get(age + city + crowd + tag_name)
+                if people_score:
+                    score = people_score
+        if age == '95后' and city == '上海周边':
+            if table in ('用户画像-出行方式', '用户画像-行业', '用户画像-审美偏好', '用户画像-消费观念', '用户画像-消费结构', '用户画像-社交模式'):
+                score = score * random.uniform(0.8, 1.0)
+            if table in ('用户画像-生活方式'):
+                score = random.uniform(0, 0.5)
+        return score
+
     def module_score(self, crowd, city, age, scores):
         """
             模块分数计算

BIN
resources/mvp.xlsx