修复bug,修复优化代码时出现的异常,和新短剧不能正确的计算播放量插值

This commit is contained in:
Qyir 2025-11-06 10:08:10 +08:00
parent 39239c3e85
commit 3b95c52fcb

View File

@ -192,23 +192,24 @@ class DouyinAutoScheduler:
today_videos_raw = list(douyin_collection.find({"batch_time": latest_batch_time}).sort("play_vv", -1))
logging.info(f"📊 最新批次数据数量: {len(today_videos_raw)}")
# 按短剧名称去重,每个短剧只保留播放量最高的一条
# 🚫 过滤掉空的或无效的mix_name和播放量为0的记录
# 按短剧ID去重,每个短剧只保留播放量最高的一条
# 🚫 过滤掉空的或无效的mix_id和播放量为0的记录
unique_videos = {}
for video in today_videos_raw:
mix_id = video.get("mix_id", "").strip()
mix_name = video.get("mix_name", "").strip()
play_vv = video.get("play_vv", 0)
# 过滤掉空的或无效的mix_name
if not mix_name or mix_name == "" or mix_name.lower() == "null":
# 过滤掉空的或无效的mix_id
if not mix_id or mix_id == "" or mix_id.lower() == "null":
continue
# 过滤掉播放量为0或无效的记录
if play_vv <= 0:
continue
if mix_name not in unique_videos or play_vv > unique_videos[mix_name].get("play_vv", 0):
unique_videos[mix_name] = video
if mix_id not in unique_videos or play_vv > unique_videos[mix_id].get("play_vv", 0):
unique_videos[mix_id] = video
today_videos = list(unique_videos.values())
@ -236,27 +237,28 @@ class DouyinAutoScheduler:
"batch_time": yesterday_batch_time
}).sort("play_vv", -1))
# 按短剧名称去重,每个短剧只保留播放量最高的一条
# 🚫 过滤掉空的或无效的mix_name和播放量为0的记录
# 按短剧ID去重,每个短剧只保留播放量最高的一条
# 🚫 过滤掉空的或无效的mix_id和播放量为0的记录
unique_yesterday_videos = {}
for video in yesterday_videos_raw:
mix_id = video.get("mix_id", "").strip()
mix_name = video.get("mix_name", "").strip()
play_vv = video.get("play_vv", 0)
# 过滤掉空的或无效的mix_name
if not mix_name or mix_name == "" or mix_name.lower() == "null":
# 过滤掉空的或无效的mix_id
if not mix_id or mix_id == "" or mix_id.lower() == "null":
continue
# 过滤掉播放量为0或无效的记录
if play_vv <= 0:
continue
if mix_name not in unique_yesterday_videos or play_vv > unique_yesterday_videos[mix_name].get("play_vv", 0):
unique_yesterday_videos[mix_name] = video
if mix_id not in unique_yesterday_videos or play_vv > unique_yesterday_videos[mix_id].get("play_vv", 0):
unique_yesterday_videos[mix_id] = video
# 将昨天的数据转换为字典,以短剧名称为键
for mix_name, video in unique_yesterday_videos.items():
yesterday_data[mix_name] = {
# 将昨天的数据转换为字典,以短剧ID为键
for mix_id, video in unique_yesterday_videos.items():
yesterday_data[mix_id] = {
"rank": 0, # 原始数据没有排名设为0
"play_vv": video.get("play_vv", 0),
"video_id": str(video.get("_id", ""))
@ -278,10 +280,10 @@ class DouyinAutoScheduler:
play_vv_change_rate = 0
is_new = True
mix_name = video.get("mix_name", "")
if mix_name in yesterday_data:
mix_id = video.get("mix_id", "")
if mix_id in yesterday_data:
is_new = False
yesterday_play_vv = yesterday_data[mix_name]["play_vv"]
yesterday_play_vv = yesterday_data[mix_id]["play_vv"]
# 计算播放量变化
play_vv_change = current_play_vv - yesterday_play_vv
@ -294,7 +296,7 @@ class DouyinAutoScheduler:
"play_vv_change": play_vv_change,
"play_vv_change_rate": play_vv_change_rate,
"is_new": is_new,
"yesterday_data": yesterday_data.get(mix_name, {})
"yesterday_data": yesterday_data.get(mix_id, {})
}
videos_with_growth.append(video_with_growth)
@ -391,7 +393,7 @@ class DouyinAutoScheduler:
"batch_id": management_data.get("batch_id", "") if management_data else "",
"batch_time": management_data.get("batch_time") if management_data else None,
"item_sequence": management_data.get("item_sequence", 0) if management_data else 0,
"mix_id": management_data.get("mix_id", "") if management_data else "",
"mix_id": video.get("mix_id", ""), # 直接从原始数据获取mix_id
"playcount": management_data.get("playcount", "") if management_data else "",
"request_id": management_data.get("request_id", "") if management_data else "",
"cover_image_url_original": management_data.get("cover_image_url_original", "") if management_data else "",