123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 |
- import pandas as pd
- import openpyxl as ox
- from itertools import groupby
- import os
- class ExcelUtil:
- # 当前项目路径
- dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'/elab_mvp/resources'
- """
- 解析excel文件
- """
- def __init__(self, sheet_name=None, file_name=None):
- if file_name:
- self.path = os.path.join(self.dir_path, file_name)
- else:
- self.path = os.path.join(self.dir_path, 'mvp.xlsx')
- if sheet_name:
- self.sheet_name = sheet_name
- else:
- self.sheet_name = '硬标签+行为'
- def read_excel_by_pd(self):
- df = pd.read_excel(self.path)
- data = df.head()
- print('获取到的数据{}'.format(data))
- def read_excel_by_ox(self):
- work_book = ox.load_workbook(self.path, data_only=True)
- work_sheet = work_book.get_sheet_by_name(self.sheet_name)
- # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
- return work_sheet
- def init_crowd_info(self):
- """
- 整理不同人群包含的父选序号
- :return:
- """
- rows = [row for row in self.read_excel_by_ox().rows]
- crowd_a = []
- crowd_b = []
- crowd_c = []
- crowd_d = []
- crowd_e = []
- crowd_f = []
- for row in rows[2:]:
- option = row[4].value
- a = row[6].value
- if a is not None and a == 1 and option not in crowd_a:
- crowd_a.append(option)
- b = row[7].value
- if b is not None and b == 1 and option not in crowd_b:
- crowd_b.append(option)
- c = row[8].value
- if c is not None and c == 1 and option not in crowd_d:
- crowd_c.append(option)
- d = row[9].value
- if d is not None and d == 1 and option not in crowd_d:
- crowd_d.append(option)
- e = row[10].value
- if e is not None and e == 1 and option not in crowd_e:
- crowd_e.append(option)
- f = row[11].value
- if f is not None and f == 1 and option not in crowd_f:
- crowd_f.append(option)
- return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}
- def init_mvp_data(self):
- """
- 获取每个标签包括的父题父选项编号
- :return:
- """
- rows = [row for row in self.read_excel_by_ox().rows][24:]
- tag_name = None
- tag_type = None
- datas = []
- for row in rows:
- tag_type_1 = row[0].value
- tag = row[1].value
- values = row[3].value
- corr = row[4].value
- if tag_type_1:
- tag_type = tag_type_1
- if tag:
- tag_name = tag
- if values is not None and values != '找不到':
- datas.append([tag_type, tag_name, values, corr])
- result = {}
- datas.sort(key=lambda obj: obj[0])
- for tag_type, sub_datas in groupby(datas, key=lambda obj: obj[0]):
- sub_list = [x for x in sub_datas]
- sub_list.sort(key=lambda obj: obj[1])
- sub_result = {}
- for name, items in groupby(sub_list, key=lambda obj: obj[1]):
- orders = []
- for n in items:
- orders.append([n[2], n[3]])
- sub_result[name] = orders
- result[tag_type] = sub_result
- return result
- def init_scores(self):
- work_sheet = self.read_excel_by_ox()
- rows = [row for row in work_sheet.rows]
- datas = []
- for row in rows[1:]:
- if row[0].value is not None:
- datas.append([row[0].value, row[1].value, row[2].value, row[3].value, row[4].value])
- return datas
- def init_module_info(self):
- work_sheet = self.read_excel_by_ox()
- max_column = work_sheet.max_column
- rows = [row for row in work_sheet.rows][3:]
- crowd_name = None
- datas = []
- for row in rows:
- crowd = row[1].value
- if crowd is not None:
- crowd_name = crowd
- behavior = row[2].value
- score = row[4].value
- for index in range(6, max_column - 1, 2):
- module_name = row[index].value
- if module_name is not None:
- weight = row[index + 1].value
- datas.append([crowd_name, behavior, score, module_name, weight])
- results = {}
- datas.sort(key=lambda obj: obj[0])
- for name, items in groupby(datas, key=lambda obj: obj[0]):
- sub_results = {}
- sub_list = []
- for it in items:
- sub_list.append([x for x in it])
- sub_list.sort(key=lambda obj: obj[3])
- for name_1, itmes_1 in groupby(sub_list, key=lambda obj: obj[3]):
- sub_data = []
- for n in itmes_1:
- # print(' {}'.format(n[1]))
- sub_data.append([n[1], n[2], n[4]])
- sub_results[name_1] = sub_data
- results[name] = sub_results
- return results
|