Переглянути джерело

mvp: 修改清洗规则

Signed-off-by: binren <zhangbr@elab-plus.com>
binren 5 роки тому
батько
коміт
d2acec74e9
1 змінених файлів з 48 додано та 58 видалено
  1. 48 58
      mvp.py

+ 48 - 58
mvp.py

@@ -288,6 +288,23 @@ class Mvp:
             (%s, %s, %s, 1, 'binren', now())
     '''
 
+    sql_23 = '''
+        DELETE
+        FROM
+            mvp_crowd_info_module
+        WHERE
+            crowd_info_id NOT IN (
+                SELECT
+                    GROUP_CONCAT(id)
+                FROM
+                    mvp_crowd_info
+                WHERE
+                    city_name = '上海市'
+                AND age_area = '85后'
+                AND STATUS = 1
+            )
+    '''
+
     """
         数据debug SQL
         1:
@@ -343,8 +360,6 @@ class Mvp:
         self.module_scores = ExcelUtil(file_name='set-behavior-tag.xlsx', sheet_name='算法关系表').init_module_info()
         # self.scores_tag = ExcelUtil(file_name='行为与模块分值汇总.xlsx', sheet_name='行为').init_scores()
         # self.score_module = ExcelUtil(file_name='行为与模块分值汇总.xlsx', sheet_name='模块').init_scores()
-        self.scores_tag = None
-        self.score_module = None
         self.people_info_1 = self.people_info()
 
     def close(self):
@@ -477,6 +492,9 @@ class Mvp:
                     result = self.city_age_crowd(city, age, crowd)
                     self.insert_score_to_db(result)
         print('{}数据更新完成...'.format(time.time()))
+        insert_data = self.shanghai_85_module_score_insert()
+        self.insert_score_to_db(insert_data)
+        print('上海市-85后模块分值模拟数据入库成功...')
         self.update_gender_rate()
         self.update_icon()
 
@@ -558,7 +576,7 @@ class Mvp:
             if len(module_insert_data) > 0:
                 table_name = self.get_table_name('模块分数')
                 if table_name is not None and table_name not in self.insert_table:
-                    self.linshi_db.truncate(table_name)
+                    self.linshi_db.delete(self.sql_23)
                 self.linshi_db.add_some(module_insert_sql, module_insert_data)
                 self.insert_table.append(table_name)
                 print('模块分数更新完成...')
@@ -576,10 +594,10 @@ class Mvp:
                     if len(insert_data) > 0:
                         table_name = self.get_table_name(key)
                         if table_name and table_name not in self.insert_table:
-                            # if table_name == 'mvp_crowd_info_behavior':
-                            #     self.linshi_db.delete(self.sql_18)
-                            # else:
-                            self.linshi_db.truncate(table_name)
+                            if table_name == 'mvp_crowd_info_behavior':
+                                self.linshi_db.delete(self.sql_18)
+                            else:
+                                self.linshi_db.truncate(table_name)
                         self.linshi_db.add_some(insert_sql, insert_data)
                         self.insert_table.append(table_name)
                 else:
@@ -658,47 +676,15 @@ class Mvp:
                 module_name = key
                 score = 0
                 for value in values:
-                    behavioral_name = value[0]
+                    # behavioral_name = value[0]
                     weight = float(value[2])
                     # standard_score = [x[4] for x in scores if x[2] == behavioral_name]
                     standard_score = float(value[1])
                     if standard_score is not None:
                         score += standard_score * weight
                 result.append(['上海市', '85后', crowd, module_name, score])
-        return {'score': result, 'data': self.module_scores}
-
-    def tag_module_score_insert(self):
-        """
-            标签模块分数写入数据库
-        :return:
-        """
-        ids = self.query_data()
-        insert_data = []
-        insert_data_1 = []
-        for tag, module in zip(self.scores_tag, self.score_module):
-            city = tag[0]
-            age = tag[1]
-            crowd = tag[2]
-            tag_name = tag[3]
-            tag_score = tag[4]
-
-            city_2 = module[0]
-            age_2 = module[1]
-            crowd_2 = module[2]
-            module_name_2 = module[3]
-            module_score_2 = module[4]
-
-            for id in ids:
-                city_1 = id[2]
-                age_1 = id[1]
-                crowd_1 = id[3]
-                id_1 = id[0]
-                if city == city_1 and self.age_dict[age] == age_1 and crowd == crowd_1:
-                    insert_data.append([id_1, tag_name, tag_score])
-                if city_2 == city_1 and self.age_dict[age_2] == age_1 and crowd_2 == crowd_1:
-                    insert_data_1.append([id_1, module_name_2, module_score_2])
-        self.shangju_db.add_some(self.sql_12, insert_data)
-        self.shangju_db.add_some(self.sql_13, insert_data_1)
+        # return result
+        return {'behavior_score': [], 'module_score': result}
 
     def init_age(self):
         """
@@ -757,22 +743,26 @@ class Mvp:
         for key in self.tag_data.keys():
             values = self.tag_data[key]
             result_sub = {}
-            # 标签
-            for key_tag_name in values.keys():
-                questions = values[key_tag_name]
-                elements = []
-                for value in questions:
-                    question = value[0].split('-')[0]
-                    option = value[0].split('-')[1]
-                    corr = value[1]
-                    fz, fm = self.molecular_value(question, option, city, age, people_uuids)
-                    if fm == 0:
-                        c = 0
-                    else:
-                        c = fz / fm
-                    elements.append([question, option, corr, fz, fm, c])
-                result_sub[key_tag_name] = elements
-            result[key] = self.indicator_calculation_d_e(result_sub)
+            if key == '用户画像-行为兴趣' and city == '上海市' and age == '85后':
+                print('用户画像-行为兴趣,上海市,85后无需计算。')
+                pass
+            else:
+                # 标签
+                for key_tag_name in values.keys():
+                    questions = values[key_tag_name]
+                    elements = []
+                    for value in questions:
+                        question = value[0].split('-')[0]
+                        option = value[0].split('-')[1]
+                        corr = value[1]
+                        fz, fm = self.molecular_value(question, option, city, age, people_uuids)
+                        if fm == 0:
+                            c = 0
+                        else:
+                            c = fz / fm
+                        elements.append([question, option, corr, fz, fm, c])
+                    result_sub[key_tag_name] = elements
+                result[key] = self.indicator_calculation_d_e(result_sub)
         return result
 
     def molecular_value(self, queston, option, city, age, people_uuids):