excel_util.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. import pandas as pd
  2. import openpyxl as ox
  3. from itertools import groupby
  4. import os
  5. import tablib
  6. class ExcelUtil:
  7. # 当前项目路径
  8. dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'/elab_mvp/resources'
  9. """
  10. 解析excel文件
  11. """
  12. def __init__(self, sheet_name=None, file_name=None):
  13. if file_name:
  14. self.path = os.path.join(self.dir_path, file_name)
  15. else:
  16. self.path = os.path.join(self.dir_path, 'mvp.xlsx')
  17. if sheet_name:
  18. self.sheet_name = sheet_name
  19. else:
  20. self.sheet_name = '测试数据'
  21. def read_excel_by_pd(self):
  22. df = pd.read_excel(self.path)
  23. data = df.head()
  24. print('获取到的数据{}'.format(data))
  25. def read_excel_by_ox(self):
  26. work_book = ox.load_workbook(self.path, data_only=True)
  27. work_sheet = work_book.get_sheet_by_name(self.sheet_name)
  28. # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
  29. return work_sheet
  30. def read_excel_by_ox_name(self, sheet_name):
  31. work_book = ox.load_workbook(self.path, data_only=True)
  32. work_sheet = work_book.get_sheet_by_name(sheet_name)
  33. # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))
  34. return work_sheet
  35. def init_crowd_info(self):
  36. """
  37. 整理不同人群包含的父选序号
  38. :return:
  39. """
  40. rows = [row for row in self.read_excel_by_ox().rows]
  41. crowd_a = []
  42. crowd_b = []
  43. crowd_c = []
  44. crowd_d = []
  45. crowd_e = []
  46. crowd_f = []
  47. for row in rows[2:]:
  48. option = row[4].value
  49. a = row[6].value
  50. if a is not None and a == 1 and option not in crowd_a:
  51. crowd_a.append(option)
  52. b = row[7].value
  53. if b is not None and b == 1 and option not in crowd_b:
  54. crowd_b.append(option)
  55. c = row[8].value
  56. if c is not None and c == 1 and option not in crowd_d:
  57. crowd_c.append(option)
  58. d = row[9].value
  59. if d is not None and d == 1 and option not in crowd_d:
  60. crowd_d.append(option)
  61. e = row[10].value
  62. if e is not None and e == 1 and option not in crowd_e:
  63. crowd_e.append(option)
  64. f = row[11].value
  65. if f is not None and f == 1 and option not in crowd_f:
  66. crowd_f.append(option)
  67. return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}
  68. def init_out_way(self):
  69. result = {}
  70. work_sheet = self.read_excel_by_ox_name('用户画像-出行方式')
  71. rows = work_sheet.rows
  72. for row in rows:
  73. key = row[3].value + row[4].value + '市' + row[6].value + row[7].value
  74. result[key] = float(row[9].value)
  75. return result
  76. def init_mvp_data(self):
  77. """
  78. 获取每个标签包括的父题父选项编号
  79. :return:
  80. """
  81. no_need_module = ['空间需求图谱-单品偏好', '空间需求图谱-精装关注点', '空间需求图谱-空间特性偏好', '空间需求-材质',
  82. '空间需求-色调', '空间需求-色相']
  83. rows = [row for row in self.read_excel_by_ox().rows][36:]
  84. tag_name = None
  85. tag_type = None
  86. datas = []
  87. for row in rows:
  88. tag_type_1 = row[0].value
  89. tag = row[1].value
  90. values = row[3].value
  91. corr = row[4].value
  92. if tag_type_1:
  93. tag_type = tag_type_1
  94. if tag:
  95. tag_name = tag
  96. if values is not None and values != '找不到':
  97. datas.append([tag_type, tag_name, values, corr])
  98. result = {}
  99. datas.sort(key=lambda obj: obj[0])
  100. for tag_type, sub_datas in groupby(datas, key=lambda obj: obj[0]):
  101. if tag_type not in no_need_module:
  102. sub_list = [x for x in sub_datas]
  103. sub_list.sort(key=lambda obj: obj[1])
  104. sub_result = {}
  105. for name, items in groupby(sub_list, key=lambda obj: obj[1]):
  106. orders = []
  107. for n in items:
  108. orders.append([n[2], n[3]])
  109. sub_result[name] = orders
  110. result[tag_type] = sub_result
  111. return result
  112. def init_scores(self):
  113. work_sheet = self.read_excel_by_ox()
  114. rows = [row for row in work_sheet.rows]
  115. datas = []
  116. for row in rows[1:]:
  117. if row[0].value is not None:
  118. datas.append([row[0].value, row[1].value, row[2].value, row[3].value, row[4].value])
  119. return datas
  120. def init_module_info(self):
  121. work_sheet = self.read_excel_by_ox()
  122. max_column = work_sheet.max_column
  123. rows = [row for row in work_sheet.rows][3:]
  124. crowd_name = None
  125. datas = []
  126. for row in rows:
  127. crowd = row[1].value
  128. if crowd is not None:
  129. crowd_name = crowd
  130. behavior = row[2].value
  131. score = row[4].value
  132. for index in range(6, max_column - 1, 2):
  133. module_name = row[index].value
  134. if module_name is not None:
  135. weight = row[index + 1].value
  136. datas.append([crowd_name, behavior, score, module_name, weight])
  137. results = {}
  138. datas.sort(key=lambda obj: obj[0])
  139. for name, items in groupby(datas, key=lambda obj: obj[0]):
  140. sub_results = {}
  141. sub_list = []
  142. for it in items:
  143. sub_list.append([x for x in it])
  144. sub_list.sort(key=lambda obj: obj[3])
  145. for name_1, itmes_1 in groupby(sub_list, key=lambda obj: obj[3]):
  146. sub_data = []
  147. for n in itmes_1:
  148. # print(' {}'.format(n[1]))
  149. sub_data.append([n[1], n[2], n[4]])
  150. sub_results[name_1] = sub_data
  151. results[name] = sub_results
  152. return results
  153. def module_behavior_info(self):
  154. """
  155. 构建模块和行为的关联信息
  156. :return:
  157. """
  158. work_sheet = self.read_excel_by_ox_name('行为-模块映射表')
  159. max_column = work_sheet.max_column
  160. rows = [row for row in work_sheet.rows][1:]
  161. infos = []
  162. for row in rows:
  163. behavior_name = row[1].value
  164. for i in range(2, max_column - 1):
  165. module_name = row[i].value
  166. if module_name:
  167. if i == 2:
  168. weight = 1
  169. else:
  170. weight = 0.5
  171. infos.append([row[i].value, behavior_name, weight])
  172. infos.sort(key=lambda obj: obj[0])
  173. result = {}
  174. for key, data in groupby(infos, key=lambda obj: obj[0]):
  175. behavior_data = []
  176. for dt in data:
  177. dt_list = [x for x in dt]
  178. if len(behavior_data) <= 14:
  179. behavior_data.append([dt_list[1], dt_list[2]])
  180. result[key] = behavior_data
  181. return result
  182. def read_options_info(self):
  183. """
  184. 获取选项的配置信息
  185. :return:
  186. """
  187. work_sheet = self.read_excel_by_ox()
  188. rows = [row for row in work_sheet.rows][1:]
  189. info = {}
  190. for row in rows:
  191. key = str(int(row[1].value)) + str(int(row[9].value))
  192. # tag, title, name
  193. info[key] = [row[15].value, row[13].value, row[14].value, row[6].value]
  194. return info
  195. def create_excle(self, file_name, header, data):
  196. data_set = tablib.Dataset(data, header=header)
  197. save_path = os.path.join(self.dir_path, file_name)
  198. with open(save_path, 'wb', encoding='utf8') as f:
  199. f.write(data_set.xlsx)
  200. def wenjuanxin_84(self):
  201. work_sheet = self.read_excel_by_ox()
  202. question_work_sheet = self.read_excel_by_ox_name('Sheet2')
  203. rows = [row for row in work_sheet.rows][1:]
  204. question_rows = [row for row in question_work_sheet.rows][1:]
  205. question_data = []
  206. for qr in question_rows:
  207. question_data.append([str(qr[0].value), str(qr[1].value), str(qr[2].value)])
  208. question_dict = {}
  209. question_data.sort(key=lambda obj: obj[0])
  210. for key, data in groupby(question_data, key=lambda obj: obj[0]):
  211. data_list = []
  212. for dt in data:
  213. data_list.append(dt[1:])
  214. question_dict[str(key)] = data_list
  215. # print(json.dumps(question_dict, indent=4, ensure_ascii=False))
  216. def get_sub_option_id(sub_question_id, sub_option_content):
  217. sub_option_contents = question_dict.get(str(sub_question_id))
  218. if sub_option_contents:
  219. for sc in sub_option_contents:
  220. if sc[1] == sub_option_content:
  221. return sc[0]
  222. else:
  223. # print(sub_question_id, sub_option_content)
  224. pass
  225. # uuid,score(sub_option_id),created,sub_question_id
  226. insert_data = []
  227. for row in rows:
  228. uuid = row[0].value + '1000'
  229. date = row[1].value
  230. question_1 = str(row[6].value).split('.')[1]
  231. id_1 = get_sub_option_id(20, question_1)
  232. if id_1:
  233. insert_data.append([uuid, id_1, date, 20])
  234. question_2 = str(row[7].value).split('.')[1]
  235. id_2 = get_sub_option_id(29, question_2)
  236. if id_2:
  237. insert_data.append([uuid, id_2, date, 29])
  238. question_3 = str(row[8].value).split('┋')
  239. for q3 in question_3:
  240. content = str(q3.split('.')[1])
  241. id_3 = get_sub_option_id(370, content)
  242. if id_3:
  243. insert_data.append([uuid, id_3, date, 370])
  244. question_4 = str(row[9].value).split('┋')
  245. for q4 in question_4:
  246. content = q4.split('.')[1]
  247. id_4 = get_sub_option_id(371, content)
  248. if id_4:
  249. insert_data.append([uuid, id_4, date, 371])
  250. question_5 = str(row[10].value).split('┋')
  251. for q5 in question_5:
  252. content = q5.split('、')[1]
  253. id_5 = get_sub_option_id(372, content)
  254. if id_5:
  255. insert_data.append([uuid, id_5, date, 372])
  256. question_6 = str(row[11].value).split('┋')
  257. for q6 in question_6:
  258. if q6.find('E') == 0:
  259. content = q6.replace('E', '')
  260. else:
  261. content = q6.split('.')[1]
  262. id_6 = get_sub_option_id(379, content)
  263. if id_6:
  264. insert_data.append([uuid, id_6, date, 379])
  265. question_7 = str(row[12].value).split('┋')
  266. for q7 in question_7:
  267. content = q7.split('.')[1]
  268. id_7 = get_sub_option_id(373, content)
  269. if id_7:
  270. insert_data.append([uuid, id_7, date, 373])
  271. question_8 = str(row[13]).split('┋')
  272. for q8 in question_8:
  273. content = q8.split('.')[1]
  274. id_8 = get_sub_option_id(374, content)
  275. if id_8:
  276. insert_data.append([uuid, id_8, date, 374])
  277. question_9 = str(row[14].value).split('┋')
  278. for q9 in question_9:
  279. content = q9.split('.')[1]
  280. id_9 = get_sub_option_id(375, content)
  281. if id_9:
  282. insert_data.append([uuid, id_9, date, 375])
  283. question_10 = str(row[15].value).split('.')
  284. id_10 = get_sub_option_id(376, question_10)
  285. if id_10:
  286. insert_data.append([uuid, id_10, date, 376])
  287. question_11 = str(row[16].value).split('、')
  288. id_11 = get_sub_option_id(379, question_11)
  289. if id_11:
  290. insert_data.append([uuid, id_11, date, 379])
  291. question_12 = str(row[17].value).split('┋')
  292. for q12 in question_12:
  293. content = q12.split('.')
  294. id_12 = get_sub_option_id(380, content)
  295. if id_12:
  296. insert_data.append([uuid, id_12, date, 380])
  297. question_13 = str(row[18].value).split('┋')
  298. for q13 in question_13:
  299. content = q13.split('、')
  300. id_13 = get_sub_option_id(381, content)
  301. if id_13:
  302. insert_data.append([uuid, id_13, date, 381])
  303. question_14 = str(row[19].value).split('、')
  304. id_14 = get_sub_option_id(395, question_14)
  305. if id_14:
  306. insert_data.append([uuid, id_14, date, 395])
  307. city = str(row[20].value).split('-')[1]
  308. id_city = get_sub_option_id(377, city)
  309. if id_city:
  310. insert_data.append([uuid, id_city, date, 377])
  311. return insert_data
  312. def get_table_type_info(self):
  313. work_sheet = self.read_excel_by_ox()
  314. rows = [row for row in work_sheet.rows][1:]
  315. result = []
  316. for row in rows:
  317. question_name = row[3].value
  318. info = row[8].value
  319. infos = str(info).split('+')
  320. if len(infos) == 2:
  321. table_type = infos[1]
  322. table_size = infos[0].split('p')[0]
  323. result.append([table_type, table_size, question_name])
  324. pass
  325. else:
  326. pass
  327. return result
  328. sql_jianye_1 = """insert into report_push_customer_info(name,house_or_region,customer_type,mail,status,creator,created) values(%s,%s,%s,%s,1,'binren', now())"""
  329. sql_jianye_2 = """
  330. insert into report_customer_authority_info (customer_id, house_or_brand_id, status, creator, created) values (%s, %s, 1, 'binren', now())
  331. """
  332. sql_jianye_3 = """
  333. insert into report_task_info(customer_id, task_key, status, creator, created) values (%s, %s, 1, 'binren', now())
  334. """
  335. def jinaye_report(self):
  336. mail_list_sheet = self.read_excel_by_ox_name('邮箱列表')
  337. mail_rows =[row for row in mail_list_sheet.rows][1:]
  338. customers = []
  339. for row in mail_rows:
  340. mail = row[0].value
  341. name = row[1].value
  342. customer_type = row[4].value
  343. house_or_region = row[2].value
  344. house_or_brand_id = row[6].value
  345. # if house_or_brand_id:
  346. customers.append([name, house_or_region, customer_type, mail, house_or_brand_id])
  347. houses = {}
  348. # region_list = self.read_excel_by_ox_name('区域映射关系')
  349. # region_rows = [row for row in region_list.rows][1:]
  350. # region_names = []
  351. # for row in region_rows:
  352. # name = row[2].value
  353. # if name not in region_names:
  354. # region_names.append(name)
  355. # for name in region_names:
  356. # ids = []
  357. # for row in region_rows:
  358. # if row[2].value == name:
  359. # ids.append(row[0].value)
  360. # houses[name] = ids
  361. return customers
  362. if __name__ == '__main__':
  363. from mysql_db import MysqlDB
  364. eu = ExcelUtil('邮箱列表', r'D:\elab\elab_mvp\resources\建业邮件推送数据收件信息汇总(1).xlsx')
  365. mysql = MysqlDB('linshi', 1)
  366. customers = eu.jinaye_report()
  367. result1 = mysql.select("select * from report_push_customer_info where created like '2020-04-17%' and customer_type != 3")
  368. # mysql.add_some(eu.sql_jianye_1, customers)
  369. insert = []
  370. for r in result1:
  371. for c in customers:
  372. if r[1] == c[0] and r[3] == c[2]:
  373. insert.append([r[0], c[4]])
  374. print(insert)