diff --git a/backend/Timer_worker.py b/backend/Timer_worker.py index 4fd819c..70f11aa 100644 --- a/backend/Timer_worker.py +++ b/backend/Timer_worker.py @@ -192,24 +192,25 @@ class DouyinAutoScheduler: today_videos_raw = list(douyin_collection.find({"batch_time": latest_batch_time}).sort("play_vv", -1)) logging.info(f"📊 最新批次数据数量: {len(today_videos_raw)}") - # 按短剧名称去重,每个短剧只保留播放量最高的一条 - # 🚫 过滤掉空的或无效的mix_name和播放量为0的记录 + # 按短剧ID去重,每个短剧只保留播放量最高的一条 + # 🚫 过滤掉空的或无效的mix_id和播放量为0的记录 unique_videos = {} for video in today_videos_raw: + mix_id = video.get("mix_id", "").strip() mix_name = video.get("mix_name", "").strip() play_vv = video.get("play_vv", 0) - - # 过滤掉空的或无效的mix_name - if not mix_name or mix_name == "" or mix_name.lower() == "null": + + # 过滤掉空的或无效的mix_id + if not mix_id or mix_id == "" or mix_id.lower() == "null": continue - + # 过滤掉播放量为0或无效的记录 if play_vv <= 0: continue - - if mix_name not in unique_videos or play_vv > unique_videos[mix_name].get("play_vv", 0): - unique_videos[mix_name] = video - + + if mix_id not in unique_videos or play_vv > unique_videos[mix_id].get("play_vv", 0): + unique_videos[mix_id] = video + today_videos = list(unique_videos.values()) logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)") @@ -236,27 +237,28 @@ class DouyinAutoScheduler: "batch_time": yesterday_batch_time }).sort("play_vv", -1)) - # 按短剧名称去重,每个短剧只保留播放量最高的一条 - # 🚫 过滤掉空的或无效的mix_name和播放量为0的记录 + # 按短剧ID去重,每个短剧只保留播放量最高的一条 + # 🚫 过滤掉空的或无效的mix_id和播放量为0的记录 unique_yesterday_videos = {} for video in yesterday_videos_raw: + mix_id = video.get("mix_id", "").strip() mix_name = video.get("mix_name", "").strip() play_vv = video.get("play_vv", 0) - - # 过滤掉空的或无效的mix_name - if not mix_name or mix_name == "" or mix_name.lower() == "null": + + # 过滤掉空的或无效的mix_id + if not mix_id or mix_id == "" or mix_id.lower() == "null": continue - + # 过滤掉播放量为0或无效的记录 if play_vv <= 0: continue - - if mix_name not in unique_yesterday_videos or play_vv > unique_yesterday_videos[mix_name].get("play_vv", 0): - unique_yesterday_videos[mix_name] = video - - # 将昨天的数据转换为字典,以短剧名称为键 - for mix_name, video in unique_yesterday_videos.items(): - yesterday_data[mix_name] = { + + if mix_id not in unique_yesterday_videos or play_vv > unique_yesterday_videos[mix_id].get("play_vv", 0): + unique_yesterday_videos[mix_id] = video + + # 将昨天的数据转换为字典,以短剧ID为键 + for mix_id, video in unique_yesterday_videos.items(): + yesterday_data[mix_id] = { "rank": 0, # 原始数据没有排名,设为0 "play_vv": video.get("play_vv", 0), "video_id": str(video.get("_id", "")) @@ -278,23 +280,23 @@ class DouyinAutoScheduler: play_vv_change_rate = 0 is_new = True - mix_name = video.get("mix_name", "") - if mix_name in yesterday_data: + mix_id = video.get("mix_id", "") + if mix_id in yesterday_data: is_new = False - yesterday_play_vv = yesterday_data[mix_name]["play_vv"] - + yesterday_play_vv = yesterday_data[mix_id]["play_vv"] + # 计算播放量变化 play_vv_change = current_play_vv - yesterday_play_vv if yesterday_play_vv > 0: play_vv_change_rate = round((play_vv_change / yesterday_play_vv) * 100, 2) - + # 创建包含增长数据的视频项 video_with_growth = { "video": video, "play_vv_change": play_vv_change, "play_vv_change_rate": play_vv_change_rate, "is_new": is_new, - "yesterday_data": yesterday_data.get(mix_name, {}) + "yesterday_data": yesterday_data.get(mix_id, {}) } videos_with_growth.append(video_with_growth) @@ -391,7 +393,7 @@ class DouyinAutoScheduler: "batch_id": management_data.get("batch_id", "") if management_data else "", "batch_time": management_data.get("batch_time") if management_data else None, "item_sequence": management_data.get("item_sequence", 0) if management_data else 0, - "mix_id": management_data.get("mix_id", "") if management_data else "", + "mix_id": video.get("mix_id", ""), # 直接从原始数据获取mix_id "playcount": management_data.get("playcount", "") if management_data else "", "request_id": management_data.get("request_id", "") if management_data else "", "cover_image_url_original": management_data.get("cover_image_url_original", "") if management_data else "",