python对于合并excel本篇文章给出合并xlsx文件的方法,xls的方法不在本文介绍
拆分数据可以适用于xlsx或xls
先读取所有的excel,得到df列表,在将这个df列表 按行进行合并
- import pandas as pd
-
-
- # 读取所有xlsx
- def read_xlsx(xlsx_list):
- """
- :param xlsx_list: xlsx文件列表
- :return:
- """
- if len(xlsx_list) == 0 or xlsx_list is None:
- print("没有任何xlsx文件,无法读取数据")
- return
-
- df_list = []
- for xlsx in xlsx_list:
- df = pd.read_excel(xlsx)
- df_list.append(df)
- return df_list
-
-
- # 合并xlsx
- def merge_xlsx(df_list):
- """
- :param df_list: df列表
- :return:
- """
- if len(df_list) == 0 or df_list is None:
- print("没有任何df,无法合并数据")
- return
-
- merged_df = pd.concat(df_list, axis=0)
- xlsx_path = "文件保存路径"
- merged_df.to_excel(xlsx_path, index=False)
- return xlsx_path
-
-
- file_list = ["xlsx文件路径1","xlsx文件路径2","xlsx文件路径3","xlsx文件路径4"]
- read_df_list = read_xlsx(file_list)
- merged_file = merge_xlsx(read_df_list)
- print(f"合并后的文件路径:{merged_file}")
按照指定的行数对原表的数据进行拆分,拆分的数据每一个表格会保留表头(标题行)
- import xlrd
- import xlwt
- import pandas as pd
-
-
- # 拆分数据
- def split_file(file_path):
- """
- 将传入的xls文件格式的表格进行,按照指定行数进行拆分
- :return: 返回拆分后的文件路径列表
- """
-
- if file_path is None:
- print("没有需要拆分的表格")
- return
-
- split_rows = 500 # 每个表拆分数据的行数
- wb = xlrd.open_workbook(file_path)
- sheet = wb.sheet_by_index(0)
- total_rows = sheet.nrows
- split_size = split_rows + 1
- header = sheet.row_values(0)
-
- # 拆分大于指定的长度的文件
- first_file = True
- for i in range(total_rows // split_size):
- if first_file: # 如果是第一个文件则从第二行开始(跳过表头)
- start_row = i * split_size + 1
- first_file = False
- else:
- start_row = i * split_size
-
- end_row = start_row + split_rows
- data = [sheet.row_values(j) for j in range(start_row, end_row)]
- filename = f'拆分的文件_{i + 1}.xls'
-
- workbook = xlwt.Workbook()
- sheet_new = workbook.add_sheet("Sheet1")
- for col_index, value in enumerate(header):
- sheet_new.write(0, col_index, value) # 写入表头
- for row_index, row in enumerate(data):
- for col_index, value in enumerate(row):
- sheet_new.write(row_index + 1, col_index, value) # 写入数据
- workbook.save(filename)
- print(f"拆分的文件路径:{filename}")
-
-
- your_file_path = "需要拆分的表格路径"
- split_file(your_file_path )