From 64455034bb64d1bf656d8a7cb34c6aa2172339df Mon Sep 17 00:00:00 2001 From: Qyir <13521889462@163.com> Date: Wed, 29 Oct 2025 16:24:12 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86Chrome=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=E6=96=87=E4=BB=B6=E6=B8=85=E7=90=86=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=20=E6=AF=8F=E6=AC=A1=E8=BF=90=E8=A1=8C=E7=9A=84=E6=97=B6?= =?UTF-8?q?=E5=80=99=E9=83=BD=E4=BC=9A=E6=9F=A5=E7=9C=8B=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E4=B8=AD=E6=96=87=E4=BB=B6=E7=9A=84=E5=A4=A7?= =?UTF-8?q?=E5=B0=8F=EF=BC=8C=E8=B6=85=E8=BF=8750MB=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=B8=85=E7=90=86=20=E6=9A=B4=E9=9C=B2=E6=9C=80=E9=87=8D?= =?UTF-8?q?=E8=A6=81=E7=9A=84=E7=94=A8=E6=88=B7=E7=99=BB=E5=BD=95=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../handlers/Rankings/rank_data_scraper.py | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/backend/handlers/Rankings/rank_data_scraper.py b/backend/handlers/Rankings/rank_data_scraper.py index 4e6a289..289ce19 100644 --- a/backend/handlers/Rankings/rank_data_scraper.py +++ b/backend/handlers/Rankings/rank_data_scraper.py @@ -97,6 +97,8 @@ class DouyinPlayVVScraper: logging.info('[定时器模式] 使用批量存储') self._cleanup_old_profiles() + # 智能清理Chrome缓存(仅当超过50MB时) + self._cleanup_chrome_cache_smart() self._setup_mongodb() self._load_image_cache() @@ -221,6 +223,101 @@ class DouyinPlayVVScraper: logging.warning(f'清理Chrome进程时出错: {e}') return False + def _get_directory_size(self, directory_path): + """计算目录大小(MB)""" + total_size = 0 + try: + for dirpath, dirnames, filenames in os.walk(directory_path): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + try: + total_size += os.path.getsize(filepath) + except (OSError, FileNotFoundError): + continue + except Exception as e: + logging.warning(f'计算目录大小时出错: {e}') + return total_size / (1024 * 1024) # 转换为MB + + def _cleanup_chrome_cache_smart(self, size_threshold_mb=50): + """智能清理Chrome配置文件缓存 + + Args: + size_threshold_mb (int): 触发清理的大小阈值(MB),默认50MB + """ + try: + script_dir = os.path.dirname(os.path.abspath(__file__)) + profile_dir = os.path.join(script_dir, 'config', 'chrome_profile', 'douyin_persistent') + + if not os.path.exists(profile_dir): + logging.info('Chrome配置文件目录不存在,跳过缓存清理') + return False + + # 计算当前配置文件大小 + current_size_mb = self._get_directory_size(profile_dir) + logging.info(f'Chrome配置文件当前大小: {current_size_mb:.2f} MB') + + # 检查是否超过阈值 + if current_size_mb <= size_threshold_mb: + logging.info(f'配置文件大小 ({current_size_mb:.2f} MB) 未超过阈值 ({size_threshold_mb} MB),跳过清理') + return False + + logging.info(f'配置文件大小 ({current_size_mb:.2f} MB) 超过阈值 ({size_threshold_mb} MB),开始清理缓存...') + + # 定义需要清理的缓存目录和文件 + cache_items = [ + 'Default/Cache', + 'Default/Code Cache', + 'Default/GPUCache', + 'Default/Service Worker/CacheStorage', + 'Default/Service Worker/ScriptCache', + 'Default/IndexedDB', + 'Default/Local Storage', + 'Default/Session Storage', + 'Default/Web Data-journal', + 'Default/History-journal', + 'Default/Favicons-journal', + 'GrShaderCache', + 'optimization_guide_model_store', + 'BrowserMetrics' + ] + + cleaned_size = 0 + cleaned_items = 0 + + for cache_item in cache_items: + cache_path = os.path.join(profile_dir, cache_item) + if os.path.exists(cache_path): + try: + # 计算要删除的大小 + if os.path.isdir(cache_path): + item_size = self._get_directory_size(cache_path) + shutil.rmtree(cache_path) + else: + item_size = os.path.getsize(cache_path) / (1024 * 1024) + os.remove(cache_path) + + cleaned_size += item_size + cleaned_items += 1 + logging.debug(f'已清理: {cache_item} ({item_size:.2f} MB)') + + except Exception as e: + logging.warning(f'清理 {cache_item} 时出错: {e}') + + # 计算清理后的大小 + final_size_mb = self._get_directory_size(profile_dir) + + logging.info(f'缓存清理完成:') + logging.info(f' - 清理前大小: {current_size_mb:.2f} MB') + logging.info(f' - 清理后大小: {final_size_mb:.2f} MB') + logging.info(f' - 释放空间: {cleaned_size:.2f} MB') + logging.info(f' - 清理项目: {cleaned_items} 个') + + return True + + except Exception as e: + logging.error(f'智能缓存清理失败: {e}') + return False + def setup_driver(self): logging.info('初始化Chrome WebDriver (启用CDP网络日志)')