确保Python已安装依赖项:pandas、openpyxl、xlrd
pip install pandas openpyxl xlrd
把以下代码保存为merge_csv_files.py文件:
import os
import pandas as pd
from pathlib import Path
from datetime import datetime
def is_valid_file(file_path):
encodings = ['utf-8', 'gbk', 'gb2312', 'latin1', 'cp1252', 'utf-16', 'big5', 'shift_jis']
# 优先尝试 CSV 解析
for encoding in encodings:
try:
pd.read_csv(file_path, nrows=1, encoding=encoding)
return True
except:
continue
# 回退到 Excel 解析,使用不同引擎
for engine in ['openpyxl', 'xlrd']:
try:
pd.read_excel(file_path, nrows=1, engine=engine)
return True
except:
continue
return False
def read_file(file_path):
encodings = ['utf-8', 'gbk', 'gb2312', 'latin1', 'cp1252', 'utf-16', 'big5', 'shift_jis']
# 优先尝试 CSV 解析
for encoding in encodings:
try:
return pd.read_csv(file_path, encoding=encoding)
except Exception:
continue
# 回退到 Excel 解析,使用不同引擎
for engine in ['openpyxl', 'xlrd']:
try:
return pd.read_excel(file_path, engine=engine)
except Exception as e:
print(f"使用 {engine} 引擎读取 Excel 文件 {file_path} 失败: {e}")
continue
print(f"读取文件 {file_path} 错误: 无法使用可用编码或 Excel 引擎解码")
return None
def merge_csv_files():
current_dir = Path.cwd()
dfs = []
for file_path in current_dir.glob('*.[cC][sS][vV]'):
if is_valid_file(file_path):
df = read_file(file_path)
if df is not None:
dfs.append(df)
print(f"成功处理: {file_path}")
else:
print(f"跳过无效文件: {file_path}")
if dfs:
try:
merged_df = pd.concat(dfs, ignore_index=True)
# 生成带时间戳的文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = current_dir / f'merged_output_{timestamp}.csv'
merged_df.to_csv(output_path, index=False, encoding='utf-8')
print(f"合并文件已保存为: {output_path}")
print(f"总行数: {len(merged_df)}")
except Exception as e:
print(f"合并文件时出错: {e}")
else:
print("未找到可合并的有效 CSV 文件")
if __name__ == "__main__":
merge_csv_files()

