zhangbr
/
elab_mvp


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
							import pandas as pd
import openpyxl as ox
from itertools import groupby
import os


class ExcelUtil:
    # 当前项目路径
    dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'\resources'
    """
        解析excel文件
    """
    def __init__(self, sheet_name=None, path=None):
        if path:
            self.path = path
        else:
            self.path = os.path.join(self.dir_path, 'mvp.xlsx')
        if sheet_name:
            self.sheet_name = sheet_name
        else:
            self.sheet_name = '硬标签+行为'

    def read_excel_by_pd(self):
        df = pd.read_excel(self.path)
        data = df.head()
        print('获取到的数据{}'.format(data))

    def read_excel_by_ox(self):
        work_book = ox.load_workbook(self.path, data_only=True)
        work_sheet = work_book.get_sheet_by_name(self.sheet_name)
        # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
        return work_sheet

    def init_crowd_info(self):
        """
            整理不同人群包含的父选序号
        :return:
        """
        rows = [row for row in self.read_excel_by_ox().rows]
        crowd_a = []
        crowd_b = []
        crowd_c = []
        crowd_d = []
        crowd_e = []
        crowd_f = []

        for row in rows[2:]:
            option = row[4].value
            a = row[6].value
            if a is not None and a == 1 and option not in crowd_a:
                crowd_a.append(option)
            b = row[7].value
            if b is not None and b == 1 and option not in crowd_b:
                crowd_b.append(option)
            c = row[8].value
            if c is not None and c == 1 and option not in crowd_d:
                crowd_c.append(option)
            d = row[9].value
            if d is not None and d == 1 and option not in crowd_d:
                crowd_d.append(option)
            e = row[10].value
            if e is not None and e == 1 and option not in crowd_e:
                crowd_e.append(option)
            f = row[11].value
            if f is not None and f == 1 and option not in crowd_f:
                crowd_f.append(option)
        return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}

    def init_mvp_data(self):
        """
            获取每个标签包括的父题父选项编号
        :return:
        """
        rows = [row for row in self.read_excel_by_ox().rows][24:]
        tag_name = None
        datas = []
        for row in rows:
            tag = row[1].value
            values = row[3].value
            corr = row[4].value
            if tag:
                tag_name = tag
            if values is not None:
                datas.append([tag_name, values, corr])
        result = {}
        for name, items in groupby(datas, key=lambda obj: obj[0]):
            orders = []
            for n in items:
                orders.append([n[1], n[2]])
            result[name] = orders
        return result


if __name__ == '__main__':
    # eu = ExcelUtil()
    # results = eu.init_mvp_data()
    # for key in results.keys():
    #     print(key)
    #     print('     {}'.format(results[key]))
    #     print('-'*40)
    import os
    dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    print(dir_path)