Merge branch 'main' of http://git.xintiao100.com/Qyir/rank_backend
This commit is contained in:
commit
43ec2c397e
@ -103,12 +103,6 @@ class DouyinAutoScheduler:
|
|||||||
|
|
||||||
# 设置环境变量,确保自动模式
|
# 设置环境变量,确保自动模式
|
||||||
os.environ['AUTO_CONTINUE'] = '1'
|
os.environ['AUTO_CONTINUE'] = '1'
|
||||||
# 设置定时器模式环境变量,跳过评论抓取等函数
|
|
||||||
os.environ['TIMER_MODE'] = '1'
|
|
||||||
|
|
||||||
# 只在定时器模式下设置静默模式(非测试、非单次执行、非仅生成榜单)
|
|
||||||
if hasattr(self, '_is_timer_mode') and self._is_timer_mode:
|
|
||||||
os.environ['QUIET_MODE'] = '1'
|
|
||||||
|
|
||||||
# 直接创建并运行 DouyinPlayVVScraper 实例
|
# 直接创建并运行 DouyinPlayVVScraper 实例
|
||||||
scraper = DouyinPlayVVScraper(
|
scraper = DouyinPlayVVScraper(
|
||||||
@ -117,10 +111,10 @@ class DouyinAutoScheduler:
|
|||||||
duration_s=60
|
duration_s=60
|
||||||
)
|
)
|
||||||
|
|
||||||
logging.warning("📁 开始执行抓取任务...")
|
logging.info("📁 开始执行抓取任务...")
|
||||||
scraper.run()
|
scraper.run()
|
||||||
|
|
||||||
logging.warning("✅ 抖音播放量抓取任务执行成功")
|
logging.info("✅ 抖音播放量抓取任务执行成功")
|
||||||
|
|
||||||
# 数据抓取完成后,自动生成当日榜单
|
# 数据抓取完成后,自动生成当日榜单
|
||||||
self.generate_daily_rankings()
|
self.generate_daily_rankings()
|
||||||
@ -168,43 +162,35 @@ class DouyinAutoScheduler:
|
|||||||
today_videos_raw = list(douyin_collection.find({"batch_time": latest_batch_time}).sort("play_vv", -1))
|
today_videos_raw = list(douyin_collection.find({"batch_time": latest_batch_time}).sort("play_vv", -1))
|
||||||
logging.info(f"📊 最新批次数据数量: {len(today_videos_raw)}")
|
logging.info(f"📊 最新批次数据数量: {len(today_videos_raw)}")
|
||||||
|
|
||||||
# 调试:检查原始数据
|
# 按短剧名称去重,每个短剧只保留播放量最高的一条
|
||||||
if today_videos_raw:
|
unique_videos = {}
|
||||||
sample_video = today_videos_raw[0]
|
for video in today_videos_raw:
|
||||||
logging.info(f"🔍 样本数据检查:")
|
mix_name = video.get("mix_name", "")
|
||||||
logging.info(f" mix_name: {sample_video.get('mix_name')}")
|
if mix_name and (mix_name not in unique_videos or video.get("play_vv", 0) > unique_videos[mix_name].get("play_vv", 0)):
|
||||||
logging.info(f" play_vv: {sample_video.get('play_vv')} (类型: {type(sample_video.get('play_vv'))})")
|
unique_videos[mix_name] = video
|
||||||
logging.info(f" author: {sample_video.get('author')}")
|
|
||||||
|
|
||||||
# 按短剧名称去重并确保数据类型正确
|
|
||||||
unique_videos = self._deduplicate_videos_by_mix_name(today_videos_raw, include_rank=False)
|
|
||||||
today_videos = list(unique_videos.values())
|
today_videos = list(unique_videos.values())
|
||||||
|
|
||||||
logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)")
|
logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)")
|
||||||
|
|
||||||
# 获取昨天最后一批次的数据
|
# 获取昨天的榜单数据(如果存在),取最新的计算结果
|
||||||
yesterday_start = datetime(yesterday.year, yesterday.month, yesterday.day)
|
yesterday_ranking = rankings_collection.find_one({
|
||||||
yesterday_end = yesterday_start + timedelta(days=1)
|
"date": yesterday_str,
|
||||||
yesterday_batch = douyin_collection.find_one({
|
"type": "comprehensive"
|
||||||
"batch_time": {"$gte": yesterday_start, "$lt": yesterday_end}
|
}, sort=[("calculation_sequence", -1)])
|
||||||
}, sort=[("batch_time", -1)])
|
|
||||||
|
|
||||||
yesterday_data = {}
|
yesterday_data = {}
|
||||||
if yesterday_batch:
|
if yesterday_ranking and "data" in yesterday_ranking:
|
||||||
# 获取昨天最后一批次的所有数据
|
# 将昨天的数据转换为字典,以短剧名称为键
|
||||||
yesterday_videos = list(douyin_collection.find({
|
for item in yesterday_ranking["data"]:
|
||||||
"batch_time": yesterday_batch["batch_time"]
|
title = item.get("title", "")
|
||||||
}).sort("play_vv", -1))
|
if title:
|
||||||
|
yesterday_data[title] = {
|
||||||
# 按短剧名称去重,保留播放量最高的记录,并确保数据类型正确
|
"rank": item.get("rank", 0),
|
||||||
yesterday_data = self._deduplicate_videos_by_mix_name(yesterday_videos, include_rank=True)
|
"play_vv": item.get("play_vv", 0),
|
||||||
|
"video_id": item.get("video_id", "")
|
||||||
# 计算排名
|
}
|
||||||
sorted_videos = sorted(yesterday_data.items(), key=lambda x: x[1]["play_vv"], reverse=True)
|
logging.info(f"📊 找到昨天的榜单数据,共 {len(yesterday_data)} 个短剧")
|
||||||
for rank, (mix_name, data) in enumerate(sorted_videos, 1):
|
|
||||||
yesterday_data[mix_name]["rank"] = rank
|
|
||||||
|
|
||||||
logging.info(f"📊 找到昨天的原始数据,共 {len(yesterday_data)} 个短剧")
|
|
||||||
else:
|
else:
|
||||||
logging.info("📊 未找到昨天的原始数据,将作为首次生成")
|
logging.info("📊 未找到昨天的原始数据,将作为首次生成")
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user