import pandas as pd import openpyxl as ox from itertools import groupby import os class ExcelUtil: # 当前项目路径 dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'\resources' """ 解析excel文件 """ def __init__(self, sheet_name=None, path=None): if path: self.path = path else: self.path = os.path.join(self.dir_path, 'mvp.xlsx') if sheet_name: self.sheet_name = sheet_name else: self.sheet_name = '硬标签+行为' def read_excel_by_pd(self): df = pd.read_excel(self.path) data = df.head() print('获取到的数据{}'.format(data)) def read_excel_by_ox(self): work_book = ox.load_workbook(self.path, data_only=True) work_sheet = work_book.get_sheet_by_name(self.sheet_name) # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column)) return work_sheet def init_crowd_info(self): """ 整理不同人群包含的父选序号 :return: """ rows = [row for row in self.read_excel_by_ox().rows] crowd_a = [] crowd_b = [] crowd_c = [] crowd_d = [] crowd_e = [] crowd_f = [] for row in rows[2:]: option = row[4].value a = row[6].value if a is not None and a == 1 and option not in crowd_a: crowd_a.append(option) b = row[7].value if b is not None and b == 1 and option not in crowd_b: crowd_b.append(option) c = row[8].value if c is not None and c == 1 and option not in crowd_d: crowd_c.append(option) d = row[9].value if d is not None and d == 1 and option not in crowd_d: crowd_d.append(option) e = row[10].value if e is not None and e == 1 and option not in crowd_e: crowd_e.append(option) f = row[11].value if f is not None and f == 1 and option not in crowd_f: crowd_f.append(option) return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f} def init_mvp_data(self): """ 获取每个标签包括的父题父选项编号 :return: """ rows = [row for row in self.read_excel_by_ox().rows][24:] tag_name = None datas = [] for row in rows: tag = row[1].value values = row[3].value corr = row[4].value if tag: tag_name = tag if values is not None: datas.append([tag_name, values, corr]) result = {} for name, items in groupby(datas, key=lambda obj: obj[0]): orders = [] for n in items: orders.append([n[1], n[2]]) result[name] = orders return result if __name__ == '__main__': # eu = ExcelUtil() # results = eu.init_mvp_data() # for key in results.keys(): # print(key) # print(' {}'.format(results[key])) # print('-'*40) import os dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) print(dir_path)