目前后端和前端:
1.添加后台管理页面,使用网址进入后台管理页面:http://localhost:5174/admin 2.剧种分类完成,只要用户再后台管理页面选择类型之后会一直显示 3.在两个脚本运行的时候可以同时启动两个浏览器页面不受影响(原因是:必须要管理数据库有了数据之后前端点赞才可以显示,但是主代码运行慢,为了不浪费时间 每次定时器运行的时候都会通过视频ID来同步短剧的详细信息) 前端可以稳定的显示数据。
This commit is contained in:
parent
3b95c52fcb
commit
d4d555cdb1
@ -80,39 +80,91 @@ class DouyinAutoScheduler:
|
|||||||
return 0
|
return 0
|
||||||
return play_vv
|
return play_vv
|
||||||
|
|
||||||
def _deduplicate_videos_by_mix_name(self, videos, include_rank=False):
|
def check_browser_login_status(self):
|
||||||
"""按短剧名称去重,保留播放量最高的记录"""
|
"""检查浏览器登录状态,如果没有登录则提示用户登录"""
|
||||||
unique_data = {}
|
try:
|
||||||
for video in videos:
|
import os
|
||||||
mix_name = video.get("mix_name", "").strip()
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_timer', 'douyin_persistent')
|
||||||
|
|
||||||
# 过滤掉空的或无效的mix_name
|
# # 检查配置文件目录是否存在
|
||||||
if not mix_name or mix_name == "" or mix_name.lower() == "null":
|
# if not os.path.exists(profile_dir):
|
||||||
self.logger.warning(f"跳过空的或无效的mix_name记录: {video.get('_id', 'unknown')}")
|
# print("⚠️ 检测到定时器浏览器配置目录不存在,需要首次登录")
|
||||||
continue
|
# print(" 请在浏览器中完成抖音登录,并导航到【我的】→【收藏】→【合集】页面")
|
||||||
|
# print(" 完成后按回车键继续...")
|
||||||
|
# input()
|
||||||
|
# return
|
||||||
|
|
||||||
# 标准化播放量数据类型
|
# 检查配置文件是否为空(可能未登录)
|
||||||
play_vv = self._normalize_play_vv(video.get("play_vv", 0))
|
import glob
|
||||||
|
profile_files = glob.glob(os.path.join(profile_dir, "*"))
|
||||||
|
if len(profile_files) < 5: # 如果文件太少,可能未登录
|
||||||
|
print("⚠️ 检测到定时器浏览器可能未登录")
|
||||||
|
print(" 请在浏览器中完成抖音登录,并导航到【我的】→【收藏】→【合集】页面")
|
||||||
|
print(" 完成后按回车键继续...")
|
||||||
|
input()
|
||||||
|
else:
|
||||||
|
print("✅ 定时器浏览器已配置,继续执行...")
|
||||||
|
|
||||||
# 确保播放量大于0,过滤无效数据
|
except Exception as e:
|
||||||
if play_vv <= 0:
|
logging.warning(f"检查浏览器登录状态时出错: {e}")
|
||||||
self.logger.warning(f"跳过播放量为0或无效的记录: mix_name={mix_name}, play_vv={video.get('play_vv', 0)}")
|
print("⚠️ 检查浏览器状态失败,请确保浏览器已正确配置")
|
||||||
continue
|
print(" 完成后按回车键继续...")
|
||||||
|
input()
|
||||||
|
|
||||||
if mix_name not in unique_data or play_vv > unique_data[mix_name].get("play_vv", 0):
|
def _cleanup_chrome_processes(self):
|
||||||
if include_rank:
|
"""清理可能占用配置文件的Chrome进程"""
|
||||||
# 用于昨天数据的格式
|
try:
|
||||||
unique_data[mix_name] = {
|
import psutil
|
||||||
"play_vv": play_vv,
|
import os
|
||||||
"video_id": str(video.get("_id", "")),
|
|
||||||
"rank": 0 # 稍后计算排名
|
# 获取当前配置文件路径
|
||||||
}
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_timer', 'douyin_persistent')
|
||||||
|
|
||||||
|
# 查找使用该配置文件的Chrome进程
|
||||||
|
killed_processes = []
|
||||||
|
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
|
||||||
|
try:
|
||||||
|
if proc.info['name'] and 'chrome' in proc.info['name'].lower():
|
||||||
|
cmdline = proc.info['cmdline']
|
||||||
|
if cmdline and any(profile_dir in arg for arg in cmdline):
|
||||||
|
proc.terminate()
|
||||||
|
killed_processes.append(proc.info['pid'])
|
||||||
|
logging.info(f'终止占用配置文件的Chrome进程: PID {proc.info["pid"]}')
|
||||||
|
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 等待进程终止
|
||||||
|
if killed_processes:
|
||||||
|
import time
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
return len(killed_processes) > 0
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
# 如果没有psutil,使用系统命令
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_timer', 'douyin_persistent')
|
||||||
|
|
||||||
|
# 使用taskkill命令终止Chrome进程
|
||||||
|
result = subprocess.run(['taskkill', '/F', '/IM', 'chrome.exe'], capture_output=True, text=True)
|
||||||
|
if result.returncode == 0:
|
||||||
|
logging.info('使用系统命令终止Chrome进程')
|
||||||
|
return True
|
||||||
else:
|
else:
|
||||||
# 用于今天数据的格式,直接更新原视频对象
|
logging.warning('无法终止Chrome进程')
|
||||||
video["play_vv"] = play_vv
|
return False
|
||||||
unique_data[mix_name] = video
|
except Exception as e:
|
||||||
|
logging.warning(f'系统命令清理Chrome进程失败: {e}')
|
||||||
return unique_data
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'清理Chrome进程时出错: {e}')
|
||||||
|
return False
|
||||||
|
|
||||||
def run_douyin_scraper(self):
|
def run_douyin_scraper(self):
|
||||||
"""执行抖音播放量抓取任务"""
|
"""执行抖音播放量抓取任务"""
|
||||||
@ -126,14 +178,14 @@ class DouyinAutoScheduler:
|
|||||||
scraper = DouyinPlayVVScraper(
|
scraper = DouyinPlayVVScraper(
|
||||||
start_url="https://www.douyin.com/user/self?showTab=favorite_collection&showSubTab=compilation",
|
start_url="https://www.douyin.com/user/self?showTab=favorite_collection&showSubTab=compilation",
|
||||||
auto_continue=True,
|
auto_continue=True,
|
||||||
duration_s=60
|
duration_s=180 # 增加到180秒,给更多时间收集数据
|
||||||
)
|
)
|
||||||
|
|
||||||
print("📁 开始执行抓取任务...")
|
print("开始执行抓取任务...")
|
||||||
logging.info("📁 开始执行抓取任务...")
|
logging.info("📁 开始执行抓取任务...")
|
||||||
scraper.run()
|
scraper.run()
|
||||||
|
|
||||||
print("✅ 抖音播放量抓取任务执行成功")
|
print("抖音播放量抓取任务执行成功")
|
||||||
logging.info("✅ 抖音播放量抓取任务执行成功")
|
logging.info("✅ 抖音播放量抓取任务执行成功")
|
||||||
|
|
||||||
# 数据抓取完成后,自动生成当日榜单
|
# 数据抓取完成后,自动生成当日榜单
|
||||||
@ -381,7 +433,7 @@ class DouyinAutoScheduler:
|
|||||||
# 🎯 核心榜单字段
|
# 🎯 核心榜单字段
|
||||||
"rank": rank, # 使用排名计数器
|
"rank": rank, # 使用排名计数器
|
||||||
"title": mix_name,
|
"title": mix_name,
|
||||||
"mix_name": mix_name, # 确保包含mix_name字段用于同步
|
"mix_name": mix_name,
|
||||||
"play_vv": current_play_vv,
|
"play_vv": current_play_vv,
|
||||||
"series_author": video.get("series_author", ""),
|
"series_author": video.get("series_author", ""),
|
||||||
"video_id": video_id,
|
"video_id": video_id,
|
||||||
@ -478,7 +530,7 @@ class DouyinAutoScheduler:
|
|||||||
if item.get("Copyright_field"):
|
if item.get("Copyright_field"):
|
||||||
items_with_copyright += 1
|
items_with_copyright += 1
|
||||||
|
|
||||||
print(f"📊 数据完整性统计:")
|
print(f"数据完整性统计:")
|
||||||
print(f" 总项目数: {total_items}")
|
print(f" 总项目数: {total_items}")
|
||||||
print(f" 从Rankings_management获取到详细信息: {items_with_management_data}")
|
print(f" 从Rankings_management获取到详细信息: {items_with_management_data}")
|
||||||
print(f" 包含Manufacturing_Field: {items_with_manufacturing}")
|
print(f" 包含Manufacturing_Field: {items_with_manufacturing}")
|
||||||
|
|||||||
@ -19,9 +19,13 @@
|
|||||||
{
|
{
|
||||||
"video_id": "7471924777410645283",
|
"video_id": "7471924777410645283",
|
||||||
"episode_num": 0
|
"episode_num": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"video_id": "7472791705268325641",
|
||||||
|
"episode_num": 0
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"total_count": 5,
|
"total_count": 6,
|
||||||
"last_update": "2025-10-22T09:55:36.943794",
|
"last_update": "2025-11-06T17:43:54.929209",
|
||||||
"mix_name": "《青蛇传》"
|
"mix_name": "《青蛇传》"
|
||||||
}
|
}
|
||||||
@ -47,9 +47,17 @@
|
|||||||
{
|
{
|
||||||
"video_id": "7548447317729234239",
|
"video_id": "7548447317729234239",
|
||||||
"episode_num": 0
|
"episode_num": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"video_id": "7568747381357808923",
|
||||||
|
"episode_num": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"video_id": "7568800392985791784",
|
||||||
|
"episode_num": 0
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"total_count": 12,
|
"total_count": 14,
|
||||||
"last_update": "2025-10-22T09:55:50.726907",
|
"last_update": "2025-11-06T17:48:06.014161",
|
||||||
"mix_name": "青云修仙传"
|
"mix_name": "青云修仙传"
|
||||||
}
|
}
|
||||||
@ -107,9 +107,17 @@
|
|||||||
{
|
{
|
||||||
"video_id": "7560551213957500195",
|
"video_id": "7560551213957500195",
|
||||||
"episode_num": 0
|
"episode_num": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"video_id": "7562056353343966464",
|
||||||
|
"episode_num": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"video_id": "7567981488823318927",
|
||||||
|
"episode_num": 0
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"total_count": 27,
|
"total_count": 29,
|
||||||
"last_update": "2025-10-22T09:56:16.947762",
|
"last_update": "2025-11-06T17:15:32.747557",
|
||||||
"mix_name": "绝境逆袭"
|
"mix_name": "绝境逆袭"
|
||||||
}
|
}
|
||||||
@ -181,9 +181,15 @@ class DouyinPlayVVScraper:
|
|||||||
"""清理超过一天的旧临时Chrome配置文件"""
|
"""清理超过一天的旧临时Chrome配置文件"""
|
||||||
try:
|
try:
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
profile_base_dir = os.path.join(script_dir, 'config', 'chrome_profile')
|
# 清理两个配置目录的旧文件
|
||||||
if not os.path.exists(profile_base_dir):
|
profile_dirs = [
|
||||||
return
|
os.path.join(script_dir, 'config', 'chrome_profile_scraper'),
|
||||||
|
os.path.join(script_dir, 'config', 'chrome_profile_timer')
|
||||||
|
]
|
||||||
|
|
||||||
|
for profile_base_dir in profile_dirs:
|
||||||
|
if not os.path.exists(profile_base_dir):
|
||||||
|
continue
|
||||||
|
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
one_day_ago = current_time - 24 * 60 * 60 # 24小时前
|
one_day_ago = current_time - 24 * 60 * 60 # 24小时前
|
||||||
@ -219,7 +225,7 @@ class DouyinPlayVVScraper:
|
|||||||
|
|
||||||
# 获取当前配置文件路径
|
# 获取当前配置文件路径
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile', 'douyin_persistent')
|
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_scraper', 'douyin_persistent')
|
||||||
|
|
||||||
# 查找使用该配置文件的Chrome进程
|
# 查找使用该配置文件的Chrome进程
|
||||||
killed_processes = []
|
killed_processes = []
|
||||||
@ -279,7 +285,13 @@ class DouyinPlayVVScraper:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile', 'douyin_persistent')
|
# 根据运行模式选择对应的配置目录
|
||||||
|
is_timer_mode = os.environ.get('TIMER_MODE') == '1'
|
||||||
|
|
||||||
|
if is_timer_mode:
|
||||||
|
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_timer', 'douyin_persistent')
|
||||||
|
else:
|
||||||
|
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_scraper', 'douyin_persistent')
|
||||||
|
|
||||||
if not os.path.exists(profile_dir):
|
if not os.path.exists(profile_dir):
|
||||||
logging.info('Chrome配置文件目录不存在,跳过缓存清理')
|
logging.info('Chrome配置文件目录不存在,跳过缓存清理')
|
||||||
@ -368,9 +380,20 @@ class DouyinPlayVVScraper:
|
|||||||
chrome_options.add_argument('--remote-debugging-port=0')
|
chrome_options.add_argument('--remote-debugging-port=0')
|
||||||
chrome_options.add_argument('--start-maximized')
|
chrome_options.add_argument('--start-maximized')
|
||||||
chrome_options.add_argument('--lang=zh-CN')
|
chrome_options.add_argument('--lang=zh-CN')
|
||||||
# 使用固定的Chrome配置文件目录以保持登录状态
|
|
||||||
|
# 根据运行模式选择不同的Chrome配置文件目录
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile', 'douyin_persistent')
|
is_timer_mode = os.environ.get('TIMER_MODE') == '1'
|
||||||
|
|
||||||
|
if is_timer_mode:
|
||||||
|
# 定时器模式使用独立的配置目录
|
||||||
|
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_timer', 'douyin_persistent')
|
||||||
|
logging.info(f'[定时器模式] 使用独立Chrome配置文件: {profile_dir}')
|
||||||
|
else:
|
||||||
|
# 普通模式使用原有的配置目录
|
||||||
|
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_scraper', 'douyin_persistent')
|
||||||
|
logging.info(f'[普通模式] 使用独立Chrome配置文件: {profile_dir}')
|
||||||
|
|
||||||
os.makedirs(profile_dir, exist_ok=True)
|
os.makedirs(profile_dir, exist_ok=True)
|
||||||
chrome_options.add_argument(f'--user-data-dir={profile_dir}')
|
chrome_options.add_argument(f'--user-data-dir={profile_dir}')
|
||||||
logging.info(f'使用持久化Chrome配置文件: {profile_dir}')
|
logging.info(f'使用持久化Chrome配置文件: {profile_dir}')
|
||||||
@ -517,6 +540,24 @@ class DouyinPlayVVScraper:
|
|||||||
logging.warning(f'错误上下文: {error_details["context"]}')
|
logging.warning(f'错误上下文: {error_details["context"]}')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# 定时器模式下的登录检查
|
||||||
|
is_timer_mode = os.environ.get('TIMER_MODE') == '1'
|
||||||
|
if is_timer_mode:
|
||||||
|
logging.info("定时器模式:检查浏览器登录状态...")
|
||||||
|
# 在定时器模式下,浏览器已经启动并导航到页面,现在检查登录状态
|
||||||
|
if not self._check_login_and_page():
|
||||||
|
logging.warning("定时器模式:检测到未登录状态,需要手动登录")
|
||||||
|
print("⚠️ 定时器浏览器未登录")
|
||||||
|
print(" 请在浏览器中完成抖音登录,并导航到【我的】→【收藏】→【合集】页面")
|
||||||
|
print(" 完成后按回车键继续...")
|
||||||
|
input()
|
||||||
|
# 重新检查登录状态
|
||||||
|
if not self._check_login_and_page():
|
||||||
|
logging.warning("定时器模式:登录确认后仍然未登录,继续执行...")
|
||||||
|
else:
|
||||||
|
logging.info("定时器模式:浏览器已登录,继续执行...")
|
||||||
|
return
|
||||||
|
|
||||||
logging.info("进入手动登录确认循环...")
|
logging.info("进入手动登录确认循环...")
|
||||||
while True:
|
while True:
|
||||||
# 要求用户输入特定文本确认
|
# 要求用户输入特定文本确认
|
||||||
@ -641,6 +682,16 @@ class DouyinPlayVVScraper:
|
|||||||
|
|
||||||
def trigger_loading(self):
|
def trigger_loading(self):
|
||||||
logging.info('触发数据加载:滚动 + 刷新')
|
logging.info('触发数据加载:滚动 + 刷新')
|
||||||
|
|
||||||
|
# 在auto_continue模式下增加页面加载等待时间
|
||||||
|
if self.auto_continue:
|
||||||
|
logging.info('自动继续模式:增加页面加载等待时间')
|
||||||
|
time.sleep(8) # 等待页面完全加载
|
||||||
|
else:
|
||||||
|
# 普通模式也需要增加页面加载等待时间
|
||||||
|
logging.info('普通模式:增加页面加载等待时间')
|
||||||
|
time.sleep(8) # 等待页面完全加载
|
||||||
|
|
||||||
# 滚动触发懒加载
|
# 滚动触发懒加载
|
||||||
for i in range(8):
|
for i in range(8):
|
||||||
self.driver.execute_script(f'window.scrollTo(0, {i * 900});')
|
self.driver.execute_script(f'window.scrollTo(0, {i * 900});')
|
||||||
@ -1218,6 +1269,7 @@ class DouyinPlayVVScraper:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f'[实时保存] 更新排名失败: {e}')
|
logging.error(f'[实时保存] 更新排名失败: {e}')
|
||||||
|
|
||||||
|
|
||||||
def extract_douyin_image_id(self, cover_url):
|
def extract_douyin_image_id(self, cover_url):
|
||||||
"""
|
"""
|
||||||
从抖音图片URL中提取唯一的图片ID
|
从抖音图片URL中提取唯一的图片ID
|
||||||
@ -2251,8 +2303,14 @@ class DouyinPlayVVScraper:
|
|||||||
}
|
}
|
||||||
|
|
||||||
all_videos = []
|
all_videos = []
|
||||||
|
# 使用服务端提供的游标进行分页,而不是使用 len(all_videos)
|
||||||
|
cursor = 0
|
||||||
|
seen_cursors = set()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
# 将当前游标设置到请求参数(字符串以兼容部分接口)
|
||||||
|
params['cursor'] = str(cursor)
|
||||||
|
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
'https://www.douyin.com/aweme/v1/web/mix/aweme/',
|
'https://www.douyin.com/aweme/v1/web/mix/aweme/',
|
||||||
params=params,
|
params=params,
|
||||||
@ -2267,8 +2325,10 @@ class DouyinPlayVVScraper:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
data = response.json()
|
data = response.json()
|
||||||
aweme_list = data.get('aweme_list', [])
|
# 兼容可能的列表字段名
|
||||||
|
aweme_list = data.get('aweme_list') or data.get('mix_aweme_list') or []
|
||||||
if not aweme_list:
|
if not aweme_list:
|
||||||
|
logging.info("当前页无视频,结束分页")
|
||||||
break
|
break
|
||||||
|
|
||||||
for aweme in aweme_list:
|
for aweme in aweme_list:
|
||||||
@ -2279,11 +2339,28 @@ class DouyinPlayVVScraper:
|
|||||||
'episode_num': int(aweme.get('episode_num', 0))
|
'episode_num': int(aweme.get('episode_num', 0))
|
||||||
})
|
})
|
||||||
|
|
||||||
has_more = data.get('has_more', False)
|
# 读取服务端分页标识
|
||||||
if not has_more:
|
has_more = data.get('has_more') or data.get('hasMore') or False
|
||||||
|
next_cursor = (
|
||||||
|
data.get('cursor') or
|
||||||
|
data.get('next_cursor') or
|
||||||
|
data.get('max_cursor') or
|
||||||
|
data.get('min_cursor')
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info(f"分页: cursor={cursor}, next_cursor={next_cursor}, has_more={has_more}, 本页视频={len(aweme_list)}, 累计={len(all_videos)}")
|
||||||
|
|
||||||
|
# 退出条件:没有更多或没有有效下一游标
|
||||||
|
if not has_more or not next_cursor:
|
||||||
break
|
break
|
||||||
|
|
||||||
params['cursor'] = str(len(all_videos))
|
# 防止重复游标导致的死循环
|
||||||
|
if next_cursor in seen_cursors:
|
||||||
|
logging.warning(f"检测到重复游标 {next_cursor},停止分页以避免死循环")
|
||||||
|
break
|
||||||
|
|
||||||
|
seen_cursors.add(next_cursor)
|
||||||
|
cursor = next_cursor
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
@ -3725,7 +3802,7 @@ class DouyinPlayVVScraper:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def cleanup_old_management_data(self, days_to_keep: int = 7):
|
def cleanup_old_management_data(self, days_to_keep: int = 7):
|
||||||
"""清理目标数据库中的旧数据,基于last_updated字段保留指定天数的数据"""
|
"""清理目标数据库Rankings_management中的旧数据,基于last_updated字段保留指定天数的数据"""
|
||||||
target_collection = self.collection # 使用根据模式选择的集合
|
target_collection = self.collection # 使用根据模式选择的集合
|
||||||
if target_collection is None:
|
if target_collection is None:
|
||||||
logging.warning('[数据清理] 目标集合未初始化,跳过清理')
|
logging.warning('[数据清理] 目标集合未初始化,跳过清理')
|
||||||
@ -3824,7 +3901,7 @@ if __name__ == '__main__':
|
|||||||
parser = argparse.ArgumentParser(description='Selenium+CDP 抖音play_vv抓取器')
|
parser = argparse.ArgumentParser(description='Selenium+CDP 抖音play_vv抓取器')
|
||||||
parser.add_argument('--url', default='https://www.douyin.com/user/self?showTab=favorite_collection&showSubTab=compilation', help='收藏合集列表页面URL')
|
parser.add_argument('--url', default='https://www.douyin.com/user/self?showTab=favorite_collection&showSubTab=compilation', help='收藏合集列表页面URL')
|
||||||
parser.add_argument('--auto', action='store_true', help='自动继续,跳过回车等待')
|
parser.add_argument('--auto', action='store_true', help='自动继续,跳过回车等待')
|
||||||
parser.add_argument('--duration', type=int, default=60, help='网络响应收集时长(秒)')
|
parser.add_argument('--duration', type=int, default=180, help='网络响应收集时长(秒)')
|
||||||
parser.add_argument('--driver', help='覆盖chromedriver路径')
|
parser.add_argument('--driver', help='覆盖chromedriver路径')
|
||||||
parser.add_argument('--timer', action='store_true', help='启用定时器模式,应用config.py中的定时器配置')
|
parser.add_argument('--timer', action='store_true', help='启用定时器模式,应用config.py中的定时器配置')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|||||||
@ -656,23 +656,19 @@ def get_top_mixes(limit=10):
|
|||||||
# 按播放量排序获取热门合集
|
# 按播放量排序获取热门合集
|
||||||
cursor = collection.find().sort("play_vv", -1).limit(limit)
|
cursor = collection.find().sort("play_vv", -1).limit(limit)
|
||||||
docs = list(cursor)
|
docs = list(cursor)
|
||||||
|
|
||||||
if not docs:
|
if not docs:
|
||||||
return {"success": False, "message": "暂无数据"}
|
return {"success": False, "message": "暂无数据"}
|
||||||
|
|
||||||
# 格式化数据
|
# 格式化数据
|
||||||
top_list = []
|
top_list = []
|
||||||
for doc in docs:
|
for doc in docs:
|
||||||
item = format_mix_item(doc)
|
item = format_mix_item(doc)
|
||||||
top_list.append(item)
|
top_list.append(item)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
"data": top_list,
|
"data": top_list,
|
||||||
"total": len(top_list),
|
"total": len(top_list),
|
||||||
"update_time": format_time(docs[0].get("batch_time")) if docs else ""
|
"update_time": format_time(docs[0].get("batch_time")) if docs else ""
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"获取热门合集失败: {e}")
|
logging.error(f"获取热门合集失败: {e}")
|
||||||
return {"success": False, "message": f"获取数据失败: {str(e)}"}
|
return {"success": False, "message": f"获取数据失败: {str(e)}"}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user