#!/usr/bin/env python # -*- coding: utf-8 -*- """ 文件编码转换工具 功能:检测并转换GBK/GB2312编码的源代码文件为UTF-8(无BOM)格式 使用方法: python convert_encoding.py """ import os import sys from pathlib import Path from datetime import datetime # 需要处理的文件扩展名 SOURCE_EXTENSIONS = ['.cpp', '.c', '.h', '.hpp', '.cc', '.cxx'] # 转换列表文件名 CONVERSION_LIST_FILE = 'encoding_conversion_list.txt' # 要尝试的编码列表(按优先级) ENCODINGS_TO_TRY = ['gbk', 'gb2312', 'gb18030', 'utf-8', 'utf-8-sig'] class EncodingConverter: """编码转换器类""" def __init__(self, root_dir): self.root_dir = Path(root_dir) self.conversion_list = [] self.list_file = self.root_dir / CONVERSION_LIST_FILE def detect_encoding(self, file_path): """检测文件编码(通过尝试不同编码读取)""" for encoding in ENCODINGS_TO_TRY: try: with open(file_path, 'r', encoding=encoding) as f: content = f.read() # 检查是否包含中文字符 has_chinese = any('\u4e00' <= char <= '\u9fff' for char in content) return encoding, has_chinese, content except (UnicodeDecodeError, LookupError): continue return None, False, None def has_bom(self, file_path): """检查文件是否有BOM标记""" try: with open(file_path, 'rb') as f: header = f.read(3) # UTF-8 BOM: EF BB BF return header == b'\xef\xbb\xbf' except: return False def scan_files(self): """扫描所有源代码文件""" print(f"开始扫描目录: {self.root_dir}") print(f"搜索文件类型: {', '.join(SOURCE_EXTENSIONS)}") print("-" * 80) file_count = 0 for ext in SOURCE_EXTENSIONS: for file_path in self.root_dir.rglob(f"*{ext}"): # 跳过特定目录 str_path = str(file_path) if any(skip in str_path for skip in ['.git', '__pycache__', 'node_modules', '.vs']): continue file_count += 1 if file_count % 50 == 0: print(f"已扫描 {file_count} 个文件...", end='\r') encoding, has_chinese, content = self.detect_encoding(file_path) if encoding: needs_conversion = False reason = "" # 判断是否需要转换 if encoding in ['gbk', 'gb2312', 'gb18030']: needs_conversion = True reason = f"{encoding.upper()}" elif encoding == 'utf-8-sig' or (encoding == 'utf-8' and self.has_bom(file_path)): needs_conversion = True reason = "UTF-8 with BOM" if needs_conversion: relative_path = file_path.relative_to(self.root_dir) self.conversion_list.append({ 'path': str(file_path), 'relative_path': str(relative_path), 'encoding': encoding, 'reason': reason, 'has_chinese': has_chinese, 'converted': False, 'content': content }) print(f"\n找到待转换: {relative_path} [{reason}]") print("\n" + "-" * 80) print(f"扫描完成!共扫描 {file_count} 个文件,找到 {len(self.conversion_list)} 个需要转换") return len(self.conversion_list) def save_list(self): """保存转换列表到文件""" try: with open(self.list_file, 'w', encoding='utf-8') as f: f.write(f"# 文件编码转换列表\n") f.write(f"# 生成时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") f.write(f"# 总计: {len(self.conversion_list)} 个文件\n") f.write(f"# 格式: 相对路径 | 原编码 | 状态\n") f.write("#" + "=" * 78 + "\n\n") for item in self.conversion_list: status = "✓ 已转换" if item['converted'] else "✗ 待转换" f.write(f"{item['relative_path']} | {item['reason']} | {status}\n") print(f"转换列表已保存: {self.list_file}") return True except Exception as e: print(f"保存列表失败: {e}") return False def load_list(self): """加载转换列表""" if not self.list_file.exists(): return False try: self.conversion_list = [] with open(self.list_file, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if not line or line.startswith('#'): continue parts = [p.strip() for p in line.split('|')] if len(parts) >= 3: relative_path = parts[0] file_path = self.root_dir / relative_path if file_path.exists(): encoding, has_chinese, content = self.detect_encoding(file_path) self.conversion_list.append({ 'path': str(file_path), 'relative_path': relative_path, 'encoding': encoding if encoding else 'unknown', 'reason': parts[1], 'has_chinese': has_chinese, 'converted': '已转换' in parts[2], 'content': content }) print(f"已加载转换列表: {len(self.conversion_list)} 个文件") return True except Exception as e: print(f"加载列表失败: {e}") return False def convert_file(self, file_info): """转换单个文件""" file_path = Path(file_info['path']) try: # 使用已读取的内容或重新读取 content = file_info.get('content') if content is None: encoding = file_info['encoding'] with open(file_path, 'r', encoding=encoding) as f: content = f.read() # 写为UTF-8无BOM with open(file_path, 'w', encoding='utf-8') as f: f.write(content) # 验证 try: with open(file_path, 'r', encoding='utf-8') as f: f.read() # 检查是否还有BOM if not self.has_bom(file_path): file_info['converted'] = True return True, "成功" else: return False, "仍有BOM" except UnicodeDecodeError: return False, "无法用UTF-8读取" except Exception as e: return False, str(e) def convert_one(self, index=0): """转换单个文件(测试用)""" if index >= len(self.conversion_list): print("索引超出范围") return False file_info = self.conversion_list[index] if file_info['converted']: print(f"\n文件已转换: {file_info['relative_path']}") return True print(f"\n准备转换测试文件:") print(f" 路径: {file_info['relative_path']}") print(f" 编码: {file_info['reason']}") print(f" 包含中文: {'是' if file_info['has_chinese'] else '否'}") success, msg = self.convert_file(file_info) if success: print(f" ✓ 转换{msg}") self.save_list() print(f"\n请用 Visual Studio 打开以下文件检查:") print(f" {file_info['path']}") return True else: print(f" ✗ 转换失败: {msg}") return False def convert_all(self, skip_converted=True): """批量转换所有文件""" total = len(self.conversion_list) success_count = 0 fail_count = 0 skip_count = 0 print(f"\n开始批量转换 {total} 个文件...") print("=" * 80) for i, file_info in enumerate(self.conversion_list, 1): rel_path = file_info['relative_path'] if skip_converted and file_info['converted']: print(f"[{i}/{total}] 跳过: {rel_path}") skip_count += 1 continue print(f"[{i}/{total}] 转换: {rel_path}", end=" ... ") success, msg = self.convert_file(file_info) if success: print("✓") success_count += 1 else: print(f"✗ {msg}") fail_count += 1 print("=" * 80) print(f"转换完成!") print(f" 成功: {success_count}") print(f" 失败: {fail_count}") print(f" 跳过: {skip_count}") self.save_list() return success_count, fail_count def main(): """主函数""" script_dir = Path(__file__).parent root_dir = script_dir.parent # GeomativeStudio目录 print("=" * 80) print("文件编码转换工具 - GBK/GB2312 转 UTF-8(无BOM)") print("=" * 80) print(f"项目根目录: {root_dir}\n") converter = EncodingConverter(root_dir) # 尝试加载已有列表 list_loaded = converter.load_list() if not list_loaded: print("未找到转换列表,开始扫描...\n") count = converter.scan_files() if count == 0: print("\n未找到需要转换的文件!") return converter.save_list() # 主菜单 while True: print("\n" + "=" * 80) print("请选择操作:") print(" 1. 转换一个文件(测试)") print(" 2. 批量转换所有文件") print(" 3. 重新扫描") print(" 4. 查看待转换列表") print(" 0. 退出") print("=" * 80) choice = input("\n输入选项 (0-4): ").strip() if choice == '1': # 找第一个未转换的 index = -1 for i, item in enumerate(converter.conversion_list): if not item['converted']: index = i break if index == -1: print("\n所有文件都已转换!") else: converter.convert_one(index) elif choice == '2': print("\n⚠️ 警告: 即将批量转换所有文件!") confirm = input("确认继续? (输入 yes 确认): ").strip().lower() if confirm == 'yes': converter.convert_all() else: print("已取消") elif choice == '3': print("\n重新扫描...") converter.conversion_list = [] converter.scan_files() converter.save_list() elif choice == '4': print("\n待转换文件列表:") print("-" * 80) for i, item in enumerate(converter.conversion_list, 1): status = "✓" if item['converted'] else "✗" print(f"{status} [{i}] {item['relative_path']} ({item['reason']})") print("-" * 80) elif choice == '0': print("\n退出程序") break else: print("\n无效选项!") if __name__ == '__main__': try: main() except KeyboardInterrupt: print("\n\n程序被中断") sys.exit(0) except Exception as e: print(f"\n程序出错: {e}") import traceback traceback.print_exc() sys.exit(1)