优化了实时保存的功能

This commit is contained in:
Qyir 2025-10-28 17:46:45 +08:00
parent 9295e77cf1
commit 149ea00f1e
3 changed files with 567 additions and 201 deletions

View File

@ -179,24 +179,44 @@ class DouyinAutoScheduler:
logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)") logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)")
# 获取昨天的榜单数据(如果存在),取最新的计算结果 # 从Ranking_storage_list中获取昨天最后一次抓取的数据
yesterday_ranking = rankings_collection.find_one({ yesterday_start = datetime.combine(yesterday, datetime.min.time())
"date": yesterday_str, yesterday_end = datetime.combine(yesterday, datetime.max.time())
"type": "comprehensive"
}, sort=[("calculation_sequence", -1)]) # 获取昨天的最后一次抓取数据按batch_time排序取最新的
yesterday_latest_batch = douyin_collection.find_one({
"batch_time": {
"$gte": yesterday_start,
"$lte": yesterday_end
}
}, sort=[("batch_time", -1)])
yesterday_data = {} yesterday_data = {}
if yesterday_ranking and "data" in yesterday_ranking: if yesterday_latest_batch:
yesterday_batch_time = yesterday_latest_batch.get("batch_time")
logging.info(f"📊 找到昨天最后一次抓取时间: {yesterday_batch_time}")
# 获取昨天最后一次抓取的所有数据
yesterday_videos_raw = list(douyin_collection.find({
"batch_time": yesterday_batch_time
}).sort("play_vv", -1))
# 按短剧名称去重,每个短剧只保留播放量最高的一条
unique_yesterday_videos = {}
for video in yesterday_videos_raw:
mix_name = video.get("mix_name", "")
if mix_name and (mix_name not in unique_yesterday_videos or video.get("play_vv", 0) > unique_yesterday_videos[mix_name].get("play_vv", 0)):
unique_yesterday_videos[mix_name] = video
# 将昨天的数据转换为字典,以短剧名称为键 # 将昨天的数据转换为字典,以短剧名称为键
for item in yesterday_ranking["data"]: for mix_name, video in unique_yesterday_videos.items():
title = item.get("title", "") yesterday_data[mix_name] = {
if title: "rank": 0, # 原始数据没有排名设为0
yesterday_data[title] = { "play_vv": video.get("play_vv", 0),
"rank": item.get("rank", 0), "video_id": str(video.get("_id", ""))
"play_vv": item.get("play_vv", 0),
"video_id": item.get("video_id", "")
} }
logging.info(f"📊 找到昨天的榜单数据,共 {len(yesterday_data)} 个短剧")
logging.info(f"📊 找到昨天的原始数据,共 {len(yesterday_data)} 个短剧(原始数据:{len(yesterday_videos_raw)} 条)")
else: else:
logging.info("📊 未找到昨天的原始数据,将作为首次生成") logging.info("📊 未找到昨天的原始数据,将作为首次生成")

View File

@ -3,9 +3,9 @@ import importlib
# 数据库配置 # 数据库配置
MONGO_URI = "mongodb://localhost:27017" MONGO_URI = "mongodb://localhost:27017"
MONGO_DB_NAME = "kemeng_media" # MONGO_DB_NAME = "Rankings"
# MONGO_URI = "mongodb://mongouser:Jdei2243afN@172.16.0.6:27017,172.16.0.4:27017/test?replicaSet=cmgo-r6qkaern_0&authSource=admin" # MONGO_URI = "mongodb://mongouser:Jdei2243afN@172.16.0.6:27017,172.16.0.4:27017/test?replicaSet=cmgo-r6qkaern_0&authSource=admin"
# MONGO_DB_NAME = "kemeng_media" MONGO_DB_NAME = "kemeng_media"
# 应用配置 # 应用配置
APP_ENV = os.getenv('APP_ENV', 'development') APP_ENV = os.getenv('APP_ENV', 'development')

File diff suppressed because it is too large Load Diff