excel_util.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. import pandas as pd
  2. import openpyxl as ox
  3. from itertools import groupby
  4. import os
  5. class ExcelUtil:
  6. # 当前项目路径
  7. dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'/elab_mvp/resources'
  8. """
  9. 解析excel文件
  10. """
  11. def __init__(self, sheet_name=None, file_name=None):
  12. if file_name:
  13. self.path = os.path.join(self.dir_path, file_name)
  14. else:
  15. self.path = os.path.join(self.dir_path, 'mvp.xlsx')
  16. if sheet_name:
  17. self.sheet_name = sheet_name
  18. else:
  19. self.sheet_name = '硬标签+行为'
  20. def read_excel_by_pd(self):
  21. df = pd.read_excel(self.path)
  22. data = df.head()
  23. print('获取到的数据{}'.format(data))
  24. def read_excel_by_ox(self):
  25. work_book = ox.load_workbook(self.path, data_only=True)
  26. work_sheet = work_book.get_sheet_by_name(self.sheet_name)
  27. # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
  28. return work_sheet
  29. def init_crowd_info(self):
  30. """
  31. 整理不同人群包含的父选序号
  32. :return:
  33. """
  34. rows = [row for row in self.read_excel_by_ox().rows]
  35. crowd_a = []
  36. crowd_b = []
  37. crowd_c = []
  38. crowd_d = []
  39. crowd_e = []
  40. crowd_f = []
  41. for row in rows[2:]:
  42. option = row[4].value
  43. a = row[6].value
  44. if a is not None and a == 1 and option not in crowd_a:
  45. crowd_a.append(option)
  46. b = row[7].value
  47. if b is not None and b == 1 and option not in crowd_b:
  48. crowd_b.append(option)
  49. c = row[8].value
  50. if c is not None and c == 1 and option not in crowd_d:
  51. crowd_c.append(option)
  52. d = row[9].value
  53. if d is not None and d == 1 and option not in crowd_d:
  54. crowd_d.append(option)
  55. e = row[10].value
  56. if e is not None and e == 1 and option not in crowd_e:
  57. crowd_e.append(option)
  58. f = row[11].value
  59. if f is not None and f == 1 and option not in crowd_f:
  60. crowd_f.append(option)
  61. return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}
  62. def init_mvp_data(self):
  63. """
  64. 获取每个标签包括的父题父选项编号
  65. :return:
  66. """
  67. rows = [row for row in self.read_excel_by_ox().rows][24:]
  68. tag_name = None
  69. tag_type = None
  70. datas = []
  71. for row in rows:
  72. tag_type_1 = row[0].value
  73. tag = row[1].value
  74. values = row[3].value
  75. corr = row[4].value
  76. if tag_type_1:
  77. tag_type = tag_type_1
  78. if tag:
  79. tag_name = tag
  80. if values is not None and values != '找不到':
  81. datas.append([tag_type, tag_name, values, corr])
  82. result = {}
  83. datas.sort(key=lambda obj: obj[0])
  84. for tag_type, sub_datas in groupby(datas, key=lambda obj: obj[0]):
  85. sub_list = [x for x in sub_datas]
  86. sub_list.sort(key=lambda obj: obj[1])
  87. sub_result = {}
  88. for name, items in groupby(sub_list, key=lambda obj: obj[1]):
  89. orders = []
  90. for n in items:
  91. orders.append([n[2], n[3]])
  92. sub_result[name] = orders
  93. result[tag_type] = sub_result
  94. return result
  95. def init_scores(self):
  96. work_sheet = self.read_excel_by_ox()
  97. rows = [row for row in work_sheet.rows]
  98. datas = []
  99. for row in rows[1:]:
  100. if row[0].value is not None:
  101. datas.append([row[0].value, row[1].value, row[2].value, row[3].value, row[4].value])
  102. return datas
  103. def init_module_info(self):
  104. work_sheet = self.read_excel_by_ox()
  105. max_column = work_sheet.max_column
  106. rows = [row for row in work_sheet.rows][3:]
  107. crowd_name = None
  108. datas = []
  109. for row in rows:
  110. crowd = row[1].value
  111. if crowd is not None:
  112. crowd_name = crowd
  113. behavior = row[2].value
  114. score = row[4].value
  115. for index in range(6, max_column - 1, 2):
  116. module_name = row[index].value
  117. if module_name is not None:
  118. weight = row[index + 1].value
  119. datas.append([crowd_name, behavior, score, module_name, weight])
  120. results = {}
  121. datas.sort(key=lambda obj: obj[0])
  122. for name, items in groupby(datas, key=lambda obj: obj[0]):
  123. sub_results = {}
  124. sub_list = []
  125. for it in items:
  126. sub_list.append([x for x in it])
  127. sub_list.sort(key=lambda obj: obj[3])
  128. for name_1, itmes_1 in groupby(sub_list, key=lambda obj: obj[3]):
  129. sub_data = []
  130. for n in itmes_1:
  131. # print(' {}'.format(n[1]))
  132. sub_data.append([n[1], n[2], n[4]])
  133. sub_results[name_1] = sub_data
  134. results[name] = sub_results
  135. return results