确保Python已安装依赖项:pandas和openpyxl
pip install pandas openpyxl
把以下代码保存为replace_words.py文件:
import pandas as pd
import re
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
def replace_words(html_text, replace_text):
# Function to replace whole words, preserving case and punctuation
def replacement(match):
word = match.group(0)
# Preserve the punctuation
punctuation = ''
if word and word[-1] in ',.!?':
punctuation = word[-1]
word = word[:-1]
# Only replace if it's an exact word match (case sensitive)
if word.lower() == replace_text.lower():
return f'it{punctuation}'
return match.group(0)
# Use word boundaries to match whole words only
pattern = r'\b' + re.escape(replace_text) + r'\b[,.\!\?]?|\b' + re.escape(replace_text) + r'\b'
return re.sub(pattern, replacement, html_text, flags=re.IGNORECASE)
def process_excel(input_file, output_file):
try:
# Read Excel sheets
sheet1 = pd.read_excel(input_file, sheet_name='Sheet1')
sheet2 = pd.read_excel(input_file, sheet_name='Sheet2')
# Ensure required columns exist
if not all(col in sheet1.columns for col in ['Variant SKU', 'Body (HTML)']):
raise ValueError("Sheet1 must contain 'Variant SKU' and 'Body (HTML)' columns")
if not all(col in sheet2.columns for col in ['Variant SKU', 'Replace Text']):
raise ValueError("Sheet2 must contain 'Variant SKU' and 'Replace Text' columns")
# Create a dictionary for SKU to Replace Text mapping
replace_dict = dict(zip(sheet2['Variant SKU'], sheet2['Replace Text']))
# Process replacements
sheet1['Body (HTML)'] = sheet1.apply(
lambda row: replace_words(row['Body (HTML)'], replace_dict.get(row['Variant SKU'], ''))
if row['Variant SKU'] in replace_dict else row['Body (HTML)'],
axis=1
)
# Save to new Excel file while preserving formatting
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
sheet1.to_excel(writer, sheet_name='Sheet1', index=False)
sheet2.to_excel(writer, sheet_name='Sheet2', index=False)
# Adjust column widths
wb = writer.book
for sheet_name in ['Sheet1', 'Sheet2']:
ws = wb[sheet_name]
for col in ws.columns:
max_length = 0
column = col[0].column_letter
for cell in col:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
adjusted_width = max_length + 2
ws.column_dimensions[column].width = adjusted_width
except Exception as e:
print(f"Error processing Excel file: {str(e)}")
if __name__ == "__main__":
input_file = "title.xlsx" # Replace with your input file path
output_file = "output.xlsx" # Replace with your output file path
process_excel(input_file, output_file)
print("Excel processing completed. Check the output file.")

