修复bug,修复优化代码时出现的异常,和新短剧不能正确的计算播放量插值

This commit is contained in:
Qyir 2025-11-06 10:08:10 +08:00
parent 39239c3e85
commit 3b95c52fcb

View File

@ -192,24 +192,25 @@ class DouyinAutoScheduler:
today_videos_raw = list(douyin_collection.find({"batch_time": latest_batch_time}).sort("play_vv", -1)) today_videos_raw = list(douyin_collection.find({"batch_time": latest_batch_time}).sort("play_vv", -1))
logging.info(f"📊 最新批次数据数量: {len(today_videos_raw)}") logging.info(f"📊 最新批次数据数量: {len(today_videos_raw)}")
# 按短剧名称去重,每个短剧只保留播放量最高的一条 # 按短剧ID去重,每个短剧只保留播放量最高的一条
# 🚫 过滤掉空的或无效的mix_name和播放量为0的记录 # 🚫 过滤掉空的或无效的mix_id和播放量为0的记录
unique_videos = {} unique_videos = {}
for video in today_videos_raw: for video in today_videos_raw:
mix_id = video.get("mix_id", "").strip()
mix_name = video.get("mix_name", "").strip() mix_name = video.get("mix_name", "").strip()
play_vv = video.get("play_vv", 0) play_vv = video.get("play_vv", 0)
# 过滤掉空的或无效的mix_name # 过滤掉空的或无效的mix_id
if not mix_name or mix_name == "" or mix_name.lower() == "null": if not mix_id or mix_id == "" or mix_id.lower() == "null":
continue continue
# 过滤掉播放量为0或无效的记录 # 过滤掉播放量为0或无效的记录
if play_vv <= 0: if play_vv <= 0:
continue continue
if mix_name not in unique_videos or play_vv > unique_videos[mix_name].get("play_vv", 0): if mix_id not in unique_videos or play_vv > unique_videos[mix_id].get("play_vv", 0):
unique_videos[mix_name] = video unique_videos[mix_id] = video
today_videos = list(unique_videos.values()) today_videos = list(unique_videos.values())
logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)") logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)")
@ -236,27 +237,28 @@ class DouyinAutoScheduler:
"batch_time": yesterday_batch_time "batch_time": yesterday_batch_time
}).sort("play_vv", -1)) }).sort("play_vv", -1))
# 按短剧名称去重,每个短剧只保留播放量最高的一条 # 按短剧ID去重,每个短剧只保留播放量最高的一条
# 🚫 过滤掉空的或无效的mix_name和播放量为0的记录 # 🚫 过滤掉空的或无效的mix_id和播放量为0的记录
unique_yesterday_videos = {} unique_yesterday_videos = {}
for video in yesterday_videos_raw: for video in yesterday_videos_raw:
mix_id = video.get("mix_id", "").strip()
mix_name = video.get("mix_name", "").strip() mix_name = video.get("mix_name", "").strip()
play_vv = video.get("play_vv", 0) play_vv = video.get("play_vv", 0)
# 过滤掉空的或无效的mix_name # 过滤掉空的或无效的mix_id
if not mix_name or mix_name == "" or mix_name.lower() == "null": if not mix_id or mix_id == "" or mix_id.lower() == "null":
continue continue
# 过滤掉播放量为0或无效的记录 # 过滤掉播放量为0或无效的记录
if play_vv <= 0: if play_vv <= 0:
continue continue
if mix_name not in unique_yesterday_videos or play_vv > unique_yesterday_videos[mix_name].get("play_vv", 0): if mix_id not in unique_yesterday_videos or play_vv > unique_yesterday_videos[mix_id].get("play_vv", 0):
unique_yesterday_videos[mix_name] = video unique_yesterday_videos[mix_id] = video
# 将昨天的数据转换为字典,以短剧名称为键 # 将昨天的数据转换为字典,以短剧ID为键
for mix_name, video in unique_yesterday_videos.items(): for mix_id, video in unique_yesterday_videos.items():
yesterday_data[mix_name] = { yesterday_data[mix_id] = {
"rank": 0, # 原始数据没有排名设为0 "rank": 0, # 原始数据没有排名设为0
"play_vv": video.get("play_vv", 0), "play_vv": video.get("play_vv", 0),
"video_id": str(video.get("_id", "")) "video_id": str(video.get("_id", ""))
@ -278,23 +280,23 @@ class DouyinAutoScheduler:
play_vv_change_rate = 0 play_vv_change_rate = 0
is_new = True is_new = True
mix_name = video.get("mix_name", "") mix_id = video.get("mix_id", "")
if mix_name in yesterday_data: if mix_id in yesterday_data:
is_new = False is_new = False
yesterday_play_vv = yesterday_data[mix_name]["play_vv"] yesterday_play_vv = yesterday_data[mix_id]["play_vv"]
# 计算播放量变化 # 计算播放量变化
play_vv_change = current_play_vv - yesterday_play_vv play_vv_change = current_play_vv - yesterday_play_vv
if yesterday_play_vv > 0: if yesterday_play_vv > 0:
play_vv_change_rate = round((play_vv_change / yesterday_play_vv) * 100, 2) play_vv_change_rate = round((play_vv_change / yesterday_play_vv) * 100, 2)
# 创建包含增长数据的视频项 # 创建包含增长数据的视频项
video_with_growth = { video_with_growth = {
"video": video, "video": video,
"play_vv_change": play_vv_change, "play_vv_change": play_vv_change,
"play_vv_change_rate": play_vv_change_rate, "play_vv_change_rate": play_vv_change_rate,
"is_new": is_new, "is_new": is_new,
"yesterday_data": yesterday_data.get(mix_name, {}) "yesterday_data": yesterday_data.get(mix_id, {})
} }
videos_with_growth.append(video_with_growth) videos_with_growth.append(video_with_growth)
@ -391,7 +393,7 @@ class DouyinAutoScheduler:
"batch_id": management_data.get("batch_id", "") if management_data else "", "batch_id": management_data.get("batch_id", "") if management_data else "",
"batch_time": management_data.get("batch_time") if management_data else None, "batch_time": management_data.get("batch_time") if management_data else None,
"item_sequence": management_data.get("item_sequence", 0) if management_data else 0, "item_sequence": management_data.get("item_sequence", 0) if management_data else 0,
"mix_id": management_data.get("mix_id", "") if management_data else "", "mix_id": video.get("mix_id", ""), # 直接从原始数据获取mix_id
"playcount": management_data.get("playcount", "") if management_data else "", "playcount": management_data.get("playcount", "") if management_data else "",
"request_id": management_data.get("request_id", "") if management_data else "", "request_id": management_data.get("request_id", "") if management_data else "",
"cover_image_url_original": management_data.get("cover_image_url_original", "") if management_data else "", "cover_image_url_original": management_data.get("cover_image_url_original", "") if management_data else "",