优化了实时保存的功能

This commit is contained in:
Qyir 2025-10-28 17:46:45 +08:00
parent 9295e77cf1
commit 149ea00f1e
3 changed files with 567 additions and 201 deletions

View File

@ -179,24 +179,44 @@ class DouyinAutoScheduler:
logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)")
# 获取昨天的榜单数据(如果存在),取最新的计算结果
yesterday_ranking = rankings_collection.find_one({
"date": yesterday_str,
"type": "comprehensive"
}, sort=[("calculation_sequence", -1)])
# 从Ranking_storage_list中获取昨天最后一次抓取的数据
yesterday_start = datetime.combine(yesterday, datetime.min.time())
yesterday_end = datetime.combine(yesterday, datetime.max.time())
# 获取昨天的最后一次抓取数据按batch_time排序取最新的
yesterday_latest_batch = douyin_collection.find_one({
"batch_time": {
"$gte": yesterday_start,
"$lte": yesterday_end
}
}, sort=[("batch_time", -1)])
yesterday_data = {}
if yesterday_ranking and "data" in yesterday_ranking:
if yesterday_latest_batch:
yesterday_batch_time = yesterday_latest_batch.get("batch_time")
logging.info(f"📊 找到昨天最后一次抓取时间: {yesterday_batch_time}")
# 获取昨天最后一次抓取的所有数据
yesterday_videos_raw = list(douyin_collection.find({
"batch_time": yesterday_batch_time
}).sort("play_vv", -1))
# 按短剧名称去重,每个短剧只保留播放量最高的一条
unique_yesterday_videos = {}
for video in yesterday_videos_raw:
mix_name = video.get("mix_name", "")
if mix_name and (mix_name not in unique_yesterday_videos or video.get("play_vv", 0) > unique_yesterday_videos[mix_name].get("play_vv", 0)):
unique_yesterday_videos[mix_name] = video
# 将昨天的数据转换为字典,以短剧名称为键
for item in yesterday_ranking["data"]:
title = item.get("title", "")
if title:
yesterday_data[title] = {
"rank": item.get("rank", 0),
"play_vv": item.get("play_vv", 0),
"video_id": item.get("video_id", "")
for mix_name, video in unique_yesterday_videos.items():
yesterday_data[mix_name] = {
"rank": 0, # 原始数据没有排名设为0
"play_vv": video.get("play_vv", 0),
"video_id": str(video.get("_id", ""))
}
logging.info(f"📊 找到昨天的榜单数据,共 {len(yesterday_data)} 个短剧")
logging.info(f"📊 找到昨天的原始数据,共 {len(yesterday_data)} 个短剧(原始数据:{len(yesterday_videos_raw)} 条)")
else:
logging.info("📊 未找到昨天的原始数据,将作为首次生成")

View File

@ -3,9 +3,9 @@ import importlib
# 数据库配置
MONGO_URI = "mongodb://localhost:27017"
MONGO_DB_NAME = "kemeng_media"
# MONGO_DB_NAME = "Rankings"
# MONGO_URI = "mongodb://mongouser:Jdei2243afN@172.16.0.6:27017,172.16.0.4:27017/test?replicaSet=cmgo-r6qkaern_0&authSource=admin"
# MONGO_DB_NAME = "kemeng_media"
MONGO_DB_NAME = "kemeng_media"
# 应用配置
APP_ENV = os.getenv('APP_ENV', 'development')

File diff suppressed because it is too large Load Diff