Python合并当前文件夹下的CSV

确保Python已安装依赖项：pandas、openpyxl、xlrd

pip install pandas openpyxl xlrd

把以下代码保存为merge_csv_files.py文件：

import os
import pandas as pd
from pathlib import Path
from datetime import datetime

def is_valid_file(file_path):
    encodings = ['utf-8', 'gbk', 'gb2312', 'latin1', 'cp1252', 'utf-16', 'big5', 'shift_jis']
    # 优先尝试 CSV 解析
    for encoding in encodings:
        try:
            pd.read_csv(file_path, nrows=1, encoding=encoding)
            return True
        except:
            continue
    # 回退到 Excel 解析，使用不同引擎
    for engine in ['openpyxl', 'xlrd']:
        try:
            pd.read_excel(file_path, nrows=1, engine=engine)
            return True
        except:
            continue
    return False

def read_file(file_path):
    encodings = ['utf-8', 'gbk', 'gb2312', 'latin1', 'cp1252', 'utf-16', 'big5', 'shift_jis']
    # 优先尝试 CSV 解析
    for encoding in encodings:
        try:
            return pd.read_csv(file_path, encoding=encoding)
        except Exception:
            continue
    # 回退到 Excel 解析，使用不同引擎
    for engine in ['openpyxl', 'xlrd']:
        try:
            return pd.read_excel(file_path, engine=engine)
        except Exception as e:
            print(f"使用 {engine} 引擎读取 Excel 文件 {file_path} 失败: {e}")
            continue
    print(f"读取文件 {file_path} 错误: 无法使用可用编码或 Excel 引擎解码")
    return None

def merge_csv_files():
    current_dir = Path.cwd()
    dfs = []
    
    for file_path in current_dir.glob('*.[cC][sS][vV]'):
        if is_valid_file(file_path):
            df = read_file(file_path)
            if df is not None:
                dfs.append(df)
                print(f"成功处理: {file_path}")
        else:
            print(f"跳过无效文件: {file_path}")
    
    if dfs:
        try:
            merged_df = pd.concat(dfs, ignore_index=True)
            # 生成带时间戳的文件名
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_path = current_dir / f'merged_output_{timestamp}.csv'
            merged_df.to_csv(output_path, index=False, encoding='utf-8')
            print(f"合并文件已保存为: {output_path}")
            print(f"总行数: {len(merged_df)}")
        except Exception as e:
            print(f"合并文件时出错: {e}")
    else:
        print("未找到可合并的有效 CSV 文件")

if __name__ == "__main__":
    merge_csv_files()

Python合并当前文件夹下的CSV

相关文章

随机推荐

热门标签