修改了Chrome配置文件清理功能

每次运行的时候都会查看配置文件中文件的大小,超过50MB自动清理
暴露最重要的用户登录信息。
This commit is contained in:
Qyir 2025-10-29 16:24:12 +08:00
parent 149ea00f1e
commit 64455034bb

View File

@ -97,6 +97,8 @@ class DouyinPlayVVScraper:
logging.info('[定时器模式] 使用批量存储')
self._cleanup_old_profiles()
# 智能清理Chrome缓存仅当超过50MB时
self._cleanup_chrome_cache_smart()
self._setup_mongodb()
self._load_image_cache()
@ -221,6 +223,101 @@ class DouyinPlayVVScraper:
logging.warning(f'清理Chrome进程时出错: {e}')
return False
def _get_directory_size(self, directory_path):
"""计算目录大小MB"""
total_size = 0
try:
for dirpath, dirnames, filenames in os.walk(directory_path):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
try:
total_size += os.path.getsize(filepath)
except (OSError, FileNotFoundError):
continue
except Exception as e:
logging.warning(f'计算目录大小时出错: {e}')
return total_size / (1024 * 1024) # 转换为MB
def _cleanup_chrome_cache_smart(self, size_threshold_mb=50):
"""智能清理Chrome配置文件缓存
Args:
size_threshold_mb (int): 触发清理的大小阈值MB默认50MB
"""
try:
script_dir = os.path.dirname(os.path.abspath(__file__))
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile', 'douyin_persistent')
if not os.path.exists(profile_dir):
logging.info('Chrome配置文件目录不存在跳过缓存清理')
return False
# 计算当前配置文件大小
current_size_mb = self._get_directory_size(profile_dir)
logging.info(f'Chrome配置文件当前大小: {current_size_mb:.2f} MB')
# 检查是否超过阈值
if current_size_mb <= size_threshold_mb:
logging.info(f'配置文件大小 ({current_size_mb:.2f} MB) 未超过阈值 ({size_threshold_mb} MB),跳过清理')
return False
logging.info(f'配置文件大小 ({current_size_mb:.2f} MB) 超过阈值 ({size_threshold_mb} MB),开始清理缓存...')
# 定义需要清理的缓存目录和文件
cache_items = [
'Default/Cache',
'Default/Code Cache',
'Default/GPUCache',
'Default/Service Worker/CacheStorage',
'Default/Service Worker/ScriptCache',
'Default/IndexedDB',
'Default/Local Storage',
'Default/Session Storage',
'Default/Web Data-journal',
'Default/History-journal',
'Default/Favicons-journal',
'GrShaderCache',
'optimization_guide_model_store',
'BrowserMetrics'
]
cleaned_size = 0
cleaned_items = 0
for cache_item in cache_items:
cache_path = os.path.join(profile_dir, cache_item)
if os.path.exists(cache_path):
try:
# 计算要删除的大小
if os.path.isdir(cache_path):
item_size = self._get_directory_size(cache_path)
shutil.rmtree(cache_path)
else:
item_size = os.path.getsize(cache_path) / (1024 * 1024)
os.remove(cache_path)
cleaned_size += item_size
cleaned_items += 1
logging.debug(f'已清理: {cache_item} ({item_size:.2f} MB)')
except Exception as e:
logging.warning(f'清理 {cache_item} 时出错: {e}')
# 计算清理后的大小
final_size_mb = self._get_directory_size(profile_dir)
logging.info(f'缓存清理完成:')
logging.info(f' - 清理前大小: {current_size_mb:.2f} MB')
logging.info(f' - 清理后大小: {final_size_mb:.2f} MB')
logging.info(f' - 释放空间: {cleaned_size:.2f} MB')
logging.info(f' - 清理项目: {cleaned_items}')
return True
except Exception as e:
logging.error(f'智能缓存清理失败: {e}')
return False
def setup_driver(self):
logging.info('初始化Chrome WebDriver (启用CDP网络日志)')