修改了Chrome配置文件清理功能
每次运行的时候都会查看配置文件中文件的大小,超过50MB自动清理 暴露最重要的用户登录信息。
This commit is contained in:
parent
149ea00f1e
commit
64455034bb
@ -97,6 +97,8 @@ class DouyinPlayVVScraper:
|
|||||||
logging.info('[定时器模式] 使用批量存储')
|
logging.info('[定时器模式] 使用批量存储')
|
||||||
|
|
||||||
self._cleanup_old_profiles()
|
self._cleanup_old_profiles()
|
||||||
|
# 智能清理Chrome缓存(仅当超过50MB时)
|
||||||
|
self._cleanup_chrome_cache_smart()
|
||||||
self._setup_mongodb()
|
self._setup_mongodb()
|
||||||
self._load_image_cache()
|
self._load_image_cache()
|
||||||
|
|
||||||
@ -221,6 +223,101 @@ class DouyinPlayVVScraper:
|
|||||||
logging.warning(f'清理Chrome进程时出错: {e}')
|
logging.warning(f'清理Chrome进程时出错: {e}')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _get_directory_size(self, directory_path):
|
||||||
|
"""计算目录大小(MB)"""
|
||||||
|
total_size = 0
|
||||||
|
try:
|
||||||
|
for dirpath, dirnames, filenames in os.walk(directory_path):
|
||||||
|
for filename in filenames:
|
||||||
|
filepath = os.path.join(dirpath, filename)
|
||||||
|
try:
|
||||||
|
total_size += os.path.getsize(filepath)
|
||||||
|
except (OSError, FileNotFoundError):
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'计算目录大小时出错: {e}')
|
||||||
|
return total_size / (1024 * 1024) # 转换为MB
|
||||||
|
|
||||||
|
def _cleanup_chrome_cache_smart(self, size_threshold_mb=50):
|
||||||
|
"""智能清理Chrome配置文件缓存
|
||||||
|
|
||||||
|
Args:
|
||||||
|
size_threshold_mb (int): 触发清理的大小阈值(MB),默认50MB
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile', 'douyin_persistent')
|
||||||
|
|
||||||
|
if not os.path.exists(profile_dir):
|
||||||
|
logging.info('Chrome配置文件目录不存在,跳过缓存清理')
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 计算当前配置文件大小
|
||||||
|
current_size_mb = self._get_directory_size(profile_dir)
|
||||||
|
logging.info(f'Chrome配置文件当前大小: {current_size_mb:.2f} MB')
|
||||||
|
|
||||||
|
# 检查是否超过阈值
|
||||||
|
if current_size_mb <= size_threshold_mb:
|
||||||
|
logging.info(f'配置文件大小 ({current_size_mb:.2f} MB) 未超过阈值 ({size_threshold_mb} MB),跳过清理')
|
||||||
|
return False
|
||||||
|
|
||||||
|
logging.info(f'配置文件大小 ({current_size_mb:.2f} MB) 超过阈值 ({size_threshold_mb} MB),开始清理缓存...')
|
||||||
|
|
||||||
|
# 定义需要清理的缓存目录和文件
|
||||||
|
cache_items = [
|
||||||
|
'Default/Cache',
|
||||||
|
'Default/Code Cache',
|
||||||
|
'Default/GPUCache',
|
||||||
|
'Default/Service Worker/CacheStorage',
|
||||||
|
'Default/Service Worker/ScriptCache',
|
||||||
|
'Default/IndexedDB',
|
||||||
|
'Default/Local Storage',
|
||||||
|
'Default/Session Storage',
|
||||||
|
'Default/Web Data-journal',
|
||||||
|
'Default/History-journal',
|
||||||
|
'Default/Favicons-journal',
|
||||||
|
'GrShaderCache',
|
||||||
|
'optimization_guide_model_store',
|
||||||
|
'BrowserMetrics'
|
||||||
|
]
|
||||||
|
|
||||||
|
cleaned_size = 0
|
||||||
|
cleaned_items = 0
|
||||||
|
|
||||||
|
for cache_item in cache_items:
|
||||||
|
cache_path = os.path.join(profile_dir, cache_item)
|
||||||
|
if os.path.exists(cache_path):
|
||||||
|
try:
|
||||||
|
# 计算要删除的大小
|
||||||
|
if os.path.isdir(cache_path):
|
||||||
|
item_size = self._get_directory_size(cache_path)
|
||||||
|
shutil.rmtree(cache_path)
|
||||||
|
else:
|
||||||
|
item_size = os.path.getsize(cache_path) / (1024 * 1024)
|
||||||
|
os.remove(cache_path)
|
||||||
|
|
||||||
|
cleaned_size += item_size
|
||||||
|
cleaned_items += 1
|
||||||
|
logging.debug(f'已清理: {cache_item} ({item_size:.2f} MB)')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'清理 {cache_item} 时出错: {e}')
|
||||||
|
|
||||||
|
# 计算清理后的大小
|
||||||
|
final_size_mb = self._get_directory_size(profile_dir)
|
||||||
|
|
||||||
|
logging.info(f'缓存清理完成:')
|
||||||
|
logging.info(f' - 清理前大小: {current_size_mb:.2f} MB')
|
||||||
|
logging.info(f' - 清理后大小: {final_size_mb:.2f} MB')
|
||||||
|
logging.info(f' - 释放空间: {cleaned_size:.2f} MB')
|
||||||
|
logging.info(f' - 清理项目: {cleaned_items} 个')
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f'智能缓存清理失败: {e}')
|
||||||
|
return False
|
||||||
|
|
||||||
def setup_driver(self):
|
def setup_driver(self):
|
||||||
logging.info('初始化Chrome WebDriver (启用CDP网络日志)')
|
logging.info('初始化Chrome WebDriver (启用CDP网络日志)')
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user