优化了实时保存的功能
This commit is contained in:
parent
9295e77cf1
commit
149ea00f1e
@ -179,24 +179,44 @@ class DouyinAutoScheduler:
|
|||||||
|
|
||||||
logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)")
|
logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)")
|
||||||
|
|
||||||
# 获取昨天的榜单数据(如果存在),取最新的计算结果
|
# 从Ranking_storage_list中获取昨天最后一次抓取的数据
|
||||||
yesterday_ranking = rankings_collection.find_one({
|
yesterday_start = datetime.combine(yesterday, datetime.min.time())
|
||||||
"date": yesterday_str,
|
yesterday_end = datetime.combine(yesterday, datetime.max.time())
|
||||||
"type": "comprehensive"
|
|
||||||
}, sort=[("calculation_sequence", -1)])
|
# 获取昨天的最后一次抓取数据(按batch_time排序取最新的)
|
||||||
|
yesterday_latest_batch = douyin_collection.find_one({
|
||||||
|
"batch_time": {
|
||||||
|
"$gte": yesterday_start,
|
||||||
|
"$lte": yesterday_end
|
||||||
|
}
|
||||||
|
}, sort=[("batch_time", -1)])
|
||||||
|
|
||||||
yesterday_data = {}
|
yesterday_data = {}
|
||||||
if yesterday_ranking and "data" in yesterday_ranking:
|
if yesterday_latest_batch:
|
||||||
|
yesterday_batch_time = yesterday_latest_batch.get("batch_time")
|
||||||
|
logging.info(f"📊 找到昨天最后一次抓取时间: {yesterday_batch_time}")
|
||||||
|
|
||||||
|
# 获取昨天最后一次抓取的所有数据
|
||||||
|
yesterday_videos_raw = list(douyin_collection.find({
|
||||||
|
"batch_time": yesterday_batch_time
|
||||||
|
}).sort("play_vv", -1))
|
||||||
|
|
||||||
|
# 按短剧名称去重,每个短剧只保留播放量最高的一条
|
||||||
|
unique_yesterday_videos = {}
|
||||||
|
for video in yesterday_videos_raw:
|
||||||
|
mix_name = video.get("mix_name", "")
|
||||||
|
if mix_name and (mix_name not in unique_yesterday_videos or video.get("play_vv", 0) > unique_yesterday_videos[mix_name].get("play_vv", 0)):
|
||||||
|
unique_yesterday_videos[mix_name] = video
|
||||||
|
|
||||||
# 将昨天的数据转换为字典,以短剧名称为键
|
# 将昨天的数据转换为字典,以短剧名称为键
|
||||||
for item in yesterday_ranking["data"]:
|
for mix_name, video in unique_yesterday_videos.items():
|
||||||
title = item.get("title", "")
|
yesterday_data[mix_name] = {
|
||||||
if title:
|
"rank": 0, # 原始数据没有排名,设为0
|
||||||
yesterday_data[title] = {
|
"play_vv": video.get("play_vv", 0),
|
||||||
"rank": item.get("rank", 0),
|
"video_id": str(video.get("_id", ""))
|
||||||
"play_vv": item.get("play_vv", 0),
|
|
||||||
"video_id": item.get("video_id", "")
|
|
||||||
}
|
}
|
||||||
logging.info(f"📊 找到昨天的榜单数据,共 {len(yesterday_data)} 个短剧")
|
|
||||||
|
logging.info(f"📊 找到昨天的原始数据,共 {len(yesterday_data)} 个短剧(原始数据:{len(yesterday_videos_raw)} 条)")
|
||||||
else:
|
else:
|
||||||
logging.info("📊 未找到昨天的原始数据,将作为首次生成")
|
logging.info("📊 未找到昨天的原始数据,将作为首次生成")
|
||||||
|
|
||||||
|
|||||||
@ -3,9 +3,9 @@ import importlib
|
|||||||
|
|
||||||
# 数据库配置
|
# 数据库配置
|
||||||
MONGO_URI = "mongodb://localhost:27017"
|
MONGO_URI = "mongodb://localhost:27017"
|
||||||
MONGO_DB_NAME = "kemeng_media"
|
# MONGO_DB_NAME = "Rankings"
|
||||||
# MONGO_URI = "mongodb://mongouser:Jdei2243afN@172.16.0.6:27017,172.16.0.4:27017/test?replicaSet=cmgo-r6qkaern_0&authSource=admin"
|
# MONGO_URI = "mongodb://mongouser:Jdei2243afN@172.16.0.6:27017,172.16.0.4:27017/test?replicaSet=cmgo-r6qkaern_0&authSource=admin"
|
||||||
# MONGO_DB_NAME = "kemeng_media"
|
MONGO_DB_NAME = "kemeng_media"
|
||||||
|
|
||||||
# 应用配置
|
# 应用配置
|
||||||
APP_ENV = os.getenv('APP_ENV', 'development')
|
APP_ENV = os.getenv('APP_ENV', 'development')
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user