Files
coco df489d5640 a
2026-07-03 16:05:30 +08:00

353 lines
12 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
文件编码转换工具
功能:检测并转换GBK/GB2312编码的源代码文件为UTF-8(无BOM)格式
使用方法:
python convert_encoding.py
"""
import os
import sys
from pathlib import Path
from datetime import datetime
# 需要处理的文件扩展名
SOURCE_EXTENSIONS = ['.cpp', '.c', '.h', '.hpp', '.cc', '.cxx']
# 转换列表文件名
CONVERSION_LIST_FILE = 'encoding_conversion_list.txt'
# 要尝试的编码列表(按优先级)
ENCODINGS_TO_TRY = ['gbk', 'gb2312', 'gb18030', 'utf-8', 'utf-8-sig']
class EncodingConverter:
"""编码转换器类"""
def __init__(self, root_dir):
self.root_dir = Path(root_dir)
self.conversion_list = []
self.list_file = self.root_dir / CONVERSION_LIST_FILE
def detect_encoding(self, file_path):
"""检测文件编码(通过尝试不同编码读取)"""
for encoding in ENCODINGS_TO_TRY:
try:
with open(file_path, 'r', encoding=encoding) as f:
content = f.read()
# 检查是否包含中文字符
has_chinese = any('\u4e00' <= char <= '\u9fff' for char in content)
return encoding, has_chinese, content
except (UnicodeDecodeError, LookupError):
continue
return None, False, None
def has_bom(self, file_path):
"""检查文件是否有BOM标记"""
try:
with open(file_path, 'rb') as f:
header = f.read(3)
# UTF-8 BOM: EF BB BF
return header == b'\xef\xbb\xbf'
except:
return False
def scan_files(self):
"""扫描所有源代码文件"""
print(f"开始扫描目录: {self.root_dir}")
print(f"搜索文件类型: {', '.join(SOURCE_EXTENSIONS)}")
print("-" * 80)
file_count = 0
for ext in SOURCE_EXTENSIONS:
for file_path in self.root_dir.rglob(f"*{ext}"):
# 跳过特定目录
str_path = str(file_path)
if any(skip in str_path for skip in ['.git', '__pycache__', 'node_modules', '.vs']):
continue
file_count += 1
if file_count % 50 == 0:
print(f"已扫描 {file_count} 个文件...", end='\r')
encoding, has_chinese, content = self.detect_encoding(file_path)
if encoding:
needs_conversion = False
reason = ""
# 判断是否需要转换
if encoding in ['gbk', 'gb2312', 'gb18030']:
needs_conversion = True
reason = f"{encoding.upper()}"
elif encoding == 'utf-8-sig' or (encoding == 'utf-8' and self.has_bom(file_path)):
needs_conversion = True
reason = "UTF-8 with BOM"
if needs_conversion:
relative_path = file_path.relative_to(self.root_dir)
self.conversion_list.append({
'path': str(file_path),
'relative_path': str(relative_path),
'encoding': encoding,
'reason': reason,
'has_chinese': has_chinese,
'converted': False,
'content': content
})
print(f"\n找到待转换: {relative_path} [{reason}]")
print("\n" + "-" * 80)
print(f"扫描完成!共扫描 {file_count} 个文件,找到 {len(self.conversion_list)} 个需要转换")
return len(self.conversion_list)
def save_list(self):
"""保存转换列表到文件"""
try:
with open(self.list_file, 'w', encoding='utf-8') as f:
f.write(f"# 文件编码转换列表\n")
f.write(f"# 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"# 总计: {len(self.conversion_list)} 个文件\n")
f.write(f"# 格式: 相对路径 | 原编码 | 状态\n")
f.write("#" + "=" * 78 + "\n\n")
for item in self.conversion_list:
status = "✓ 已转换" if item['converted'] else "✗ 待转换"
f.write(f"{item['relative_path']} | {item['reason']} | {status}\n")
print(f"转换列表已保存: {self.list_file}")
return True
except Exception as e:
print(f"保存列表失败: {e}")
return False
def load_list(self):
"""加载转换列表"""
if not self.list_file.exists():
return False
try:
self.conversion_list = []
with open(self.list_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
parts = [p.strip() for p in line.split('|')]
if len(parts) >= 3:
relative_path = parts[0]
file_path = self.root_dir / relative_path
if file_path.exists():
encoding, has_chinese, content = self.detect_encoding(file_path)
self.conversion_list.append({
'path': str(file_path),
'relative_path': relative_path,
'encoding': encoding if encoding else 'unknown',
'reason': parts[1],
'has_chinese': has_chinese,
'converted': '已转换' in parts[2],
'content': content
})
print(f"已加载转换列表: {len(self.conversion_list)} 个文件")
return True
except Exception as e:
print(f"加载列表失败: {e}")
return False
def convert_file(self, file_info):
"""转换单个文件"""
file_path = Path(file_info['path'])
try:
# 使用已读取的内容或重新读取
content = file_info.get('content')
if content is None:
encoding = file_info['encoding']
with open(file_path, 'r', encoding=encoding) as f:
content = f.read()
# 写为UTF-8无BOM
with open(file_path, 'w', encoding='utf-8') as f:
f.write(content)
# 验证
try:
with open(file_path, 'r', encoding='utf-8') as f:
f.read()
# 检查是否还有BOM
if not self.has_bom(file_path):
file_info['converted'] = True
return True, "成功"
else:
return False, "仍有BOM"
except UnicodeDecodeError:
return False, "无法用UTF-8读取"
except Exception as e:
return False, str(e)
def convert_one(self, index=0):
"""转换单个文件(测试用)"""
if index >= len(self.conversion_list):
print("索引超出范围")
return False
file_info = self.conversion_list[index]
if file_info['converted']:
print(f"\n文件已转换: {file_info['relative_path']}")
return True
print(f"\n准备转换测试文件:")
print(f" 路径: {file_info['relative_path']}")
print(f" 编码: {file_info['reason']}")
print(f" 包含中文: {'是' if file_info['has_chinese'] else '否'}")
success, msg = self.convert_file(file_info)
if success:
print(f" ✓ 转换{msg}")
self.save_list()
print(f"\n请用 Visual Studio 打开以下文件检查:")
print(f" {file_info['path']}")
return True
else:
print(f" ✗ 转换失败: {msg}")
return False
def convert_all(self, skip_converted=True):
"""批量转换所有文件"""
total = len(self.conversion_list)
success_count = 0
fail_count = 0
skip_count = 0
print(f"\n开始批量转换 {total} 个文件...")
print("=" * 80)
for i, file_info in enumerate(self.conversion_list, 1):
rel_path = file_info['relative_path']
if skip_converted and file_info['converted']:
print(f"[{i}/{total}] 跳过: {rel_path}")
skip_count += 1
continue
print(f"[{i}/{total}] 转换: {rel_path}", end=" ... ")
success, msg = self.convert_file(file_info)
if success:
print("✓")
success_count += 1
else:
print(f"✗ {msg}")
fail_count += 1
print("=" * 80)
print(f"转换完成!")
print(f" 成功: {success_count}")
print(f" 失败: {fail_count}")
print(f" 跳过: {skip_count}")
self.save_list()
return success_count, fail_count
def main():
"""主函数"""
script_dir = Path(__file__).parent
root_dir = script_dir.parent # GeomativeStudio目录
print("=" * 80)
print("文件编码转换工具 - GBK/GB2312 转 UTF-8(无BOM)")
print("=" * 80)
print(f"项目根目录: {root_dir}\n")
converter = EncodingConverter(root_dir)
# 尝试加载已有列表
list_loaded = converter.load_list()
if not list_loaded:
print("未找到转换列表,开始扫描...\n")
count = converter.scan_files()
if count == 0:
print("\n未找到需要转换的文件!")
return
converter.save_list()
# 主菜单
while True:
print("\n" + "=" * 80)
print("请选择操作:")
print(" 1. 转换一个文件(测试)")
print(" 2. 批量转换所有文件")
print(" 3. 重新扫描")
print(" 4. 查看待转换列表")
print(" 0. 退出")
print("=" * 80)
choice = input("\n输入选项 (0-4): ").strip()
if choice == '1':
# 找第一个未转换的
index = -1
for i, item in enumerate(converter.conversion_list):
if not item['converted']:
index = i
break
if index == -1:
print("\n所有文件都已转换!")
else:
converter.convert_one(index)
elif choice == '2':
print("\n⚠️ 警告: 即将批量转换所有文件!")
confirm = input("确认继续? (输入 yes 确认): ").strip().lower()
if confirm == 'yes':
converter.convert_all()
else:
print("已取消")
elif choice == '3':
print("\n重新扫描...")
converter.conversion_list = []
converter.scan_files()
converter.save_list()
elif choice == '4':
print("\n待转换文件列表:")
print("-" * 80)
for i, item in enumerate(converter.conversion_list, 1):
status = "✓" if item['converted'] else "✗"
print(f"{status} [{i}] {item['relative_path']} ({item['reason']})")
print("-" * 80)
elif choice == '0':
print("\n退出程序")
break
else:
print("\n无效选项!")
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print("\n\n程序被中断")
sys.exit(0)
except Exception as e:
print(f"\n程序出错: {e}")
import traceback
traceback.print_exc()
sys.exit(1)