diff --git a/backend/handlers/Rankings/rank_data_scraper.py b/backend/handlers/Rankings/rank_data_scraper.py index 4e6a289..289ce19 100644 --- a/backend/handlers/Rankings/rank_data_scraper.py +++ b/backend/handlers/Rankings/rank_data_scraper.py @@ -97,6 +97,8 @@ class DouyinPlayVVScraper: logging.info('[定时器模式] 使用批量存储') self._cleanup_old_profiles() + # 智能清理Chrome缓存(仅当超过50MB时) + self._cleanup_chrome_cache_smart() self._setup_mongodb() self._load_image_cache() @@ -221,6 +223,101 @@ class DouyinPlayVVScraper: logging.warning(f'清理Chrome进程时出错: {e}') return False + def _get_directory_size(self, directory_path): + """计算目录大小(MB)""" + total_size = 0 + try: + for dirpath, dirnames, filenames in os.walk(directory_path): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + try: + total_size += os.path.getsize(filepath) + except (OSError, FileNotFoundError): + continue + except Exception as e: + logging.warning(f'计算目录大小时出错: {e}') + return total_size / (1024 * 1024) # 转换为MB + + def _cleanup_chrome_cache_smart(self, size_threshold_mb=50): + """智能清理Chrome配置文件缓存 + + Args: + size_threshold_mb (int): 触发清理的大小阈值(MB),默认50MB + """ + try: + script_dir = os.path.dirname(os.path.abspath(__file__)) + profile_dir = os.path.join(script_dir, 'config', 'chrome_profile', 'douyin_persistent') + + if not os.path.exists(profile_dir): + logging.info('Chrome配置文件目录不存在,跳过缓存清理') + return False + + # 计算当前配置文件大小 + current_size_mb = self._get_directory_size(profile_dir) + logging.info(f'Chrome配置文件当前大小: {current_size_mb:.2f} MB') + + # 检查是否超过阈值 + if current_size_mb <= size_threshold_mb: + logging.info(f'配置文件大小 ({current_size_mb:.2f} MB) 未超过阈值 ({size_threshold_mb} MB),跳过清理') + return False + + logging.info(f'配置文件大小 ({current_size_mb:.2f} MB) 超过阈值 ({size_threshold_mb} MB),开始清理缓存...') + + # 定义需要清理的缓存目录和文件 + cache_items = [ + 'Default/Cache', + 'Default/Code Cache', + 'Default/GPUCache', + 'Default/Service Worker/CacheStorage', + 'Default/Service Worker/ScriptCache', + 'Default/IndexedDB', + 'Default/Local Storage', + 'Default/Session Storage', + 'Default/Web Data-journal', + 'Default/History-journal', + 'Default/Favicons-journal', + 'GrShaderCache', + 'optimization_guide_model_store', + 'BrowserMetrics' + ] + + cleaned_size = 0 + cleaned_items = 0 + + for cache_item in cache_items: + cache_path = os.path.join(profile_dir, cache_item) + if os.path.exists(cache_path): + try: + # 计算要删除的大小 + if os.path.isdir(cache_path): + item_size = self._get_directory_size(cache_path) + shutil.rmtree(cache_path) + else: + item_size = os.path.getsize(cache_path) / (1024 * 1024) + os.remove(cache_path) + + cleaned_size += item_size + cleaned_items += 1 + logging.debug(f'已清理: {cache_item} ({item_size:.2f} MB)') + + except Exception as e: + logging.warning(f'清理 {cache_item} 时出错: {e}') + + # 计算清理后的大小 + final_size_mb = self._get_directory_size(profile_dir) + + logging.info(f'缓存清理完成:') + logging.info(f' - 清理前大小: {current_size_mb:.2f} MB') + logging.info(f' - 清理后大小: {final_size_mb:.2f} MB') + logging.info(f' - 释放空间: {cleaned_size:.2f} MB') + logging.info(f' - 清理项目: {cleaned_items} 个') + + return True + + except Exception as e: + logging.error(f'智能缓存清理失败: {e}') + return False + def setup_driver(self): logging.info('初始化Chrome WebDriver (启用CDP网络日志)')