diff --git a/backend/Timer_worker.py b/backend/Timer_worker.py index 6e1fa88..dd4235c 100644 --- a/backend/Timer_worker.py +++ b/backend/Timer_worker.py @@ -435,11 +435,17 @@ class DouyinAutoScheduler: "realtime_saved": management_data.get("realtime_saved", True) if management_data else True, "created_at": management_data.get("created_at") if management_data else None, "last_updated": management_data.get("last_updated") if management_data else None, + # 🎬 评论总结字段 + "comments_summary": management_data.get("comments_summary", "") if management_data else "", + # 🔑 分类字段:区分今天数据和历史数据 # - 如果今天有数据:从今天的数据获取所有字段 # - 如果今天没有数据:只从历史记录获取分类字段和锁定状态,其他字段为空 - "Manufacturing_Field": management_data.get("Manufacturing_Field", "") if management_data else "", - "Copyright_field": management_data.get("Copyright_field", "") if management_data else "", + # 注意:使用 .get() 的第二个参数确保即使字段不存在也会返回空字符串 + "Manufacturing_Field": (management_data.get("Manufacturing_Field", "") if management_data else "") or "", + "Copyright_field": (management_data.get("Copyright_field", "") if management_data else "") or "", + "classification_type": (management_data.get("classification_type", "") if management_data else "") or "", # 新增:类型/元素(确保字段存在) + "release_date": (management_data.get("release_date", "") if management_data else "") or "", # 新增:上线日期(确保字段存在) "Novel_IDs": ( management_data.get("Novel_IDs", []) if management_data else (classification_data.get("Novel_IDs", []) if classification_data else []) diff --git a/backend/config.py b/backend/config.py index 1ba92a7..9db8d58 100644 --- a/backend/config.py +++ b/backend/config.py @@ -52,6 +52,18 @@ API_CONFIG = { 'OSS_HOST': TOS_CONFIG['self_domain'] } +# DeepSeek API 配置(用于评论总结功能) +DEEPSEEK_CONFIG = { + 'api_key': 'sk-7b47e34bdcb549e6b00115a99b9b5c4c', # DeepSeek API密钥 + 'api_base': 'https://api.deepseek.com/v1', # API基础URL + 'model': 'deepseek-chat', # 使用的模型 + 'max_retries': 3, # 最大重试次数 + 'retry_delays': [2, 5, 10], # 重试延迟(秒) + 'batch_size': 800, # 每批评论数量 + 'max_tokens': 15000, # 每批最大token数 + 'summary_max_length': 200 # 最终总结最大字数 +} + def apply_timer_environment(): """应用定时器环境变量配置""" for key, value in TIMER_ENV_CONFIG.items(): diff --git a/backend/handlers/Rankings/rank_data_scraper.py b/backend/handlers/Rankings/rank_data_scraper.py index 6cfcde8..cda7dc7 100644 --- a/backend/handlers/Rankings/rank_data_scraper.py +++ b/backend/handlers/Rankings/rank_data_scraper.py @@ -54,6 +54,236 @@ from handlers.Rankings.tos_client import oss_client import config +# ==================== 评论总结器类 ==================== +class CommentsSummarizer: + """评论总结器 - 支持大量评论的分批处理和汇总""" + + def __init__(self): + self.api_key = config.DEEPSEEK_CONFIG['api_key'] + self.api_base = config.DEEPSEEK_CONFIG['api_base'] + self.model = config.DEEPSEEK_CONFIG['model'] + self.max_retries = config.DEEPSEEK_CONFIG['max_retries'] + self.retry_delays = config.DEEPSEEK_CONFIG['retry_delays'] + self.batch_size = config.DEEPSEEK_CONFIG['batch_size'] + self.max_tokens = config.DEEPSEEK_CONFIG['max_tokens'] + self.summary_max_length = config.DEEPSEEK_CONFIG['summary_max_length'] + self.logger = logging.getLogger(__name__) + + def _call_deepseek_api(self, messages: List[Dict], retry_count: int = 0) -> Optional[str]: + """调用 DeepSeek API""" + try: + headers = { + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {self.api_key}' + } + + data = { + 'model': self.model, + 'messages': messages, + 'temperature': 0.7, + 'max_tokens': 2000 + } + + response = requests.post( + f'{self.api_base}/chat/completions', + headers=headers, + json=data, + timeout=60 + ) + + if response.status_code == 200: + result = response.json() + content = result['choices'][0]['message']['content'] + self.logger.info(f"✅ DeepSeek API 调用成功") + return content.strip() + else: + self.logger.error(f"❌ DeepSeek API 返回错误: {response.status_code} - {response.text}") + + if retry_count < self.max_retries: + delay = self.retry_delays[retry_count] + self.logger.info(f"⏳ {delay}秒后进行第 {retry_count + 1} 次重试...") + time.sleep(delay) + return self._call_deepseek_api(messages, retry_count + 1) + + return None + + except Exception as e: + self.logger.error(f"❌ DeepSeek API 调用异常: {e}") + + if retry_count < self.max_retries: + delay = self.retry_delays[retry_count] + self.logger.info(f"⏳ {delay}秒后进行第 {retry_count + 1} 次重试...") + time.sleep(delay) + return self._call_deepseek_api(messages, retry_count + 1) + + return None + + def _estimate_comment_length(self, comment: str) -> int: + """估算评论的字符长度""" + return len(comment) + + def _split_comments_into_batches(self, comments: List[str]) -> List[List[str]]: + """将评论智能分批,根据评论长度动态调整每批数量""" + if not comments: + return [] + + batches = [] + current_batch = [] + current_length = 0 + + avg_length = sum(self._estimate_comment_length(c) for c in comments[:100]) / min(100, len(comments)) + + if avg_length < 50: + batch_size = 1000 + elif avg_length < 200: + batch_size = 600 + else: + batch_size = 400 + + self.logger.info(f"📊 评论平均长度: {avg_length:.0f} 字,批次大小: {batch_size}") + + for comment in comments: + comment_length = self._estimate_comment_length(comment) + + if len(current_batch) >= batch_size or (current_length + comment_length > self.max_tokens * 3): + if current_batch: + batches.append(current_batch) + current_batch = [] + current_length = 0 + + current_batch.append(comment) + current_length += comment_length + + if current_batch: + batches.append(current_batch) + + return batches + + def _generate_analysis_prompt(self, content: str, max_length: int = 200) -> str: + """生成通用的分析提示词""" + return f"""你是一位资深的用户反馈分析师,擅长从海量评论中提炼真实观点,用客观自然的语言准确传达用户的声音和整体评价趋势。 + +请基于以下内容,写一份真实客观的观众反馈分析: + +{content} + +分析要求: +1. 识别高频话题和关键观点(如剧情、演技、制作、节奏等维度) +2. 准确判断整体情感倾向,如实反映好评或差评的比例和强度 +3. 用自然的语言描述观众的真实感受,避免模板化和官方措辞 +4. 明确指出观众最在意的亮点和槽点 +5. 负面评价要委婉表达,使用"有待提升"、"存在改进空间"、"部分观众认为"等温和措辞 +6. 字数控制在{max_length}字以内,语言简洁有力 + +输出格式要求(严格遵守): +必须使用【】符号标注每个部分,格式示例: + +【核心观点】用户普遍识别出AI制作属性,对技术应用表示惊叹,同时对作品质量提出了一些看法 + +【用户关注焦点】 + 优点:AI人物颜值高、特效精美、制作成本低 + 待提升:部分观众认为角色表情和动作的自然度有待改进,剧情逻辑存在优化空间 + +【情感分布】观众意见较为分散,约65%的观众提出了改进建议 + +【核心看法】技术创新获得认可,制作细节方面仍有提升空间 + +格式规则: +- 使用【】符号标注每个分析维度的标题(标题可以自由命名,不限于示例) +- 每个【】标题后直接跟内容,不要换行 +- 每个部分结束后换行,再开始下一个【】部分 +- 可以根据实际评论内容灵活组织分析维度 +- 不要添加其他前缀或后缀 +- 严格按照【标题】内容的格式输出""" + + def _summarize_batch(self, comments: List[str], batch_num: int, total_batches: int) -> Optional[str]: + """总结一批评论""" + self.logger.info(f"📝 正在总结第 {batch_num}/{total_batches} 批评论(共 {len(comments)} 条)...") + + comments_text = "\n".join([f"{i+1}. {comment}" for i, comment in enumerate(comments)]) + content = f"用户评论:\n{comments_text}" + + prompt = self._generate_analysis_prompt(content, max_length=200) + messages = [{"role": "user", "content": prompt}] + + return self._call_deepseek_api(messages) + + def _merge_summaries(self, batch_summaries: List[str]) -> Optional[str]: + """合并所有批次总结为最终总结""" + self.logger.info(f"🔄 正在合并 {len(batch_summaries)} 个批次总结...") + + if len(batch_summaries) == 1: + return batch_summaries[0] + + summaries_text = "\n\n".join([f"批次{i+1}总结:\n{summary}" for i, summary in enumerate(batch_summaries)]) + content = f"多个批次的评论总结:\n\n{summaries_text}" + + prompt = self._generate_analysis_prompt(content, max_length=self.summary_max_length) + messages = [{"role": "user", "content": prompt}] + + return self._call_deepseek_api(messages) + + def summarize_comments(self, comments: List[str], drama_name: str = "") -> Optional[str]: + """总结评论(主入口)""" + if not comments: + self.logger.warning("⚠️ 评论列表为空,无法总结") + return None + + self.logger.info(f"🚀 开始总结评论:{drama_name}(共 {len(comments)} 条评论)") + + # 过滤空评论,处理字符串和字典两种格式 + valid_comments = [] + for c in comments: + if isinstance(c, dict): + text = c.get('text', '').strip() + if text: + valid_comments.append(text) + elif isinstance(c, str): + text = c.strip() + if text: + valid_comments.append(text) + + if not valid_comments: + self.logger.warning("⚠️ 没有有效评论,无法总结") + return None + + self.logger.info(f"📊 有效评论数量: {len(valid_comments)}") + + # 分批处理 + batches = self._split_comments_into_batches(valid_comments) + self.logger.info(f"📦 评论已分为 {len(batches)} 批") + + # 逐批总结 + batch_summaries = [] + failed_batches = [] + + for i, batch in enumerate(batches, 1): + summary = self._summarize_batch(batch, i, len(batches)) + if summary: + batch_summaries.append(summary) + else: + self.logger.error(f"❌ 第 {i} 批总结失败") + failed_batches.append(i) + + if not batch_summaries: + self.logger.error(f"❌ 所有批次总结都失败了") + return None + + if failed_batches: + self.logger.warning(f"⚠️ 以下批次总结失败: {failed_batches}") + + # 合并批次总结 + final_summary = self._merge_summaries(batch_summaries) + + if final_summary: + self.logger.info(f"✅ 评论总结完成:{drama_name}") + self.logger.info(f"📝 总结长度: {len(final_summary)} 字") + return final_summary + else: + self.logger.error(f"❌ 最终总结合并失败:{drama_name}") + return None + + # 配置日志 # 确保logs目录存在 script_dir = os.path.dirname(os.path.abspath(__file__)) @@ -729,6 +959,22 @@ class DouyinPlayVVScraper: self._cleanup_chrome_cache_smart() self._setup_mongodb() self._load_image_cache() + + # 初始化评论总结器 + try: + # 检查配置是否存在 + if not hasattr(config, 'DEEPSEEK_CONFIG'): + logging.warning('⚠️ config.py 中未找到 DEEPSEEK_CONFIG 配置,将跳过评论总结功能') + self.comments_summarizer = None + else: + self.comments_summarizer = CommentsSummarizer() + logging.info('✅ 评论总结器初始化成功') + logging.info(f'📝 DeepSeek API 配置: model={config.DEEPSEEK_CONFIG.get("model")}, base={config.DEEPSEEK_CONFIG.get("api_base")}') + except Exception as e: + logging.warning(f'⚠️ 评论总结器初始化失败: {e},将跳过评论总结功能') + import traceback + logging.warning(f'详细错误: {traceback.format_exc()}') + self.comments_summarizer = None def _setup_mongodb(self): """设置MongoDB连接""" @@ -1580,10 +1826,38 @@ class DouyinPlayVVScraper: self.update_video_details_incrementally( document_id, episode_video_ids, mix_name, mix_id ) + + # 🎬 生成评论总结(在所有数据收集完成后) + self.generate_comments_summary(document_id, mix_name) except Exception as e: logging.error(f'[实时保存] 获取详细内容失败: {item_data.get("mix_name", "未知")} - {e}') logging.info(f'[实时保存] 所有数据处理完成,共 {len(self.saved_items)} 个合集') + + # 🔄 同步字段到 Ranking_storage(包括评论总结) + try: + logging.info('[字段同步] 🔄 开始同步字段到 Ranking_storage') + + # 导入同步函数 + import sys + import os + sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'routers')) + from rank_api_routes import sync_ranking_storage_fields + + # 获取今天的日期 + today_str = datetime.now().strftime('%Y-%m-%d') + + # 执行同步(强制更新以确保评论总结被同步) + sync_result = sync_ranking_storage_fields(target_date=today_str, force_update=True) + + if sync_result.get("success", False): + logging.info(f'[字段同步] ✅ 同步成功: {sync_result.get("message", "")}') + else: + logging.info(f'[字段同步] ⚠️ 同步完成: {sync_result.get("message", "")}') + + except Exception as e: + logging.error(f'[字段同步] ❌ 同步失败: {e}') + # 同步失败不影响主流程 else: # 传统批量保存模式 self.save_to_mongodb() @@ -1918,6 +2192,30 @@ class DouyinPlayVVScraper: } for i in range(item.get('updated_to_episode', 0)) ] + # 生成评论总结 + comments_summary = '' + if self.comments_summarizer and episode_details: + try: + # 收集所有集的评论 + all_comments = [] + for episode in episode_details: + comments = episode.get('comments', []) + if comments: + all_comments.extend(comments) + + if all_comments: + logging.info(f'🎬 开始为短剧 {mix_name} 生成评论总结(共 {len(all_comments)} 条评论)') + comments_summary = self.comments_summarizer.summarize_comments(all_comments, mix_name) + if comments_summary: + logging.info(f'✅ 短剧 {mix_name} 评论总结生成成功') + else: + logging.warning(f'⚠️ 短剧 {mix_name} 评论总结生成失败') + else: + logging.info(f'ℹ️ 短剧 {mix_name} 没有评论,跳过总结') + except Exception as e: + logging.error(f'❌ 生成评论总结时出错: {e}') + comments_summary = '' + # 保留用户要求的7个字段 + cover_image_url作为合集封面图片完整链接 + 新增字段 doc = { 'batch_time': batch_time, @@ -1940,6 +2238,7 @@ class DouyinPlayVVScraper: 'episode_details': episode_details, # 每集的详细信息 'Manufacturing_Field': item.get('Manufacturing_Field', ''), # 承制信息 'Copyright_field': item.get('Copyright_field', ''), # 版权信息 + 'comments_summary': comments_summary, # AI生成的评论总结 } documents.append(doc) @@ -2048,6 +2347,8 @@ class DouyinPlayVVScraper: 'series_author': item_data.get('series_author', ''), 'Manufacturing_Field': item_data.get('Manufacturing_Field', ''), 'Copyright_field': item_data.get('Copyright_field', ''), + 'classification_type': '', # 新增:类型/元素(锁定字段,初始为空) + 'release_date': '', # 新增:上线日期(锁定字段,初始为空) 'desc': item_data.get('desc', ''), 'updated_to_episode': current_episode_count, 'episode_video_ids': [], # 稍后更新 @@ -2113,6 +2414,8 @@ class DouyinPlayVVScraper: existing_field_lock_status = existing_doc.get('field_lock_status', {}) existing_manufacturing = existing_doc.get('Manufacturing_Field', '') existing_copyright = existing_doc.get('Copyright_field', '') + existing_classification = existing_doc.get('classification_type', '') # 新增 + existing_release_date = existing_doc.get('release_date', '') # 新增 existing_novel_ids = existing_doc.get('Novel_IDs', []) existing_anime_ids = existing_doc.get('Anime_IDs', []) existing_drama_ids = existing_doc.get('Drama_IDs', []) @@ -2151,6 +2454,34 @@ class DouyinPlayVVScraper: logging.info(f'[锁定字段] 更新Copyright_field: {mix_name} -> "{new_copyright}"') # 如果现有为空且新数据也为空,则不设置该字段(保持为空) + # classification_type 保护逻辑(新增) + existing_classification = existing_doc.get('classification_type') + new_classification = target_doc.get('classification_type', '') + if existing_field_lock_status.get('classification_type_locked', False): + logging.info(f'[锁定字段] 跳过classification_type更新: {mix_name} -> 字段已被用户锁定') + elif existing_classification: + logging.info(f'[锁定字段] 跳过classification_type更新: {mix_name} -> 保持现有值 "{existing_classification}"') + else: + set_fields['classification_type'] = new_classification or '' + if new_classification: + logging.info(f'[锁定字段] 更新classification_type: {mix_name} -> "{new_classification}"') + else: + logging.info(f'[锁定字段] 初始化classification_type: {mix_name} -> 空值') + + # release_date 保护逻辑(新增) + existing_release_date = existing_doc.get('release_date') + new_release_date = target_doc.get('release_date', '') + if existing_field_lock_status.get('release_date_locked', False): + logging.info(f'[锁定字段] 跳过release_date更新: {mix_name} -> 字段已被用户锁定') + elif existing_release_date: + logging.info(f'[锁定字段] 跳过release_date更新: {mix_name} -> 保持现有值 "{existing_release_date}"') + else: + set_fields['release_date'] = new_release_date or '' + if new_release_date: + logging.info(f'[锁定字段] 更新release_date: {mix_name} -> "{new_release_date}"') + else: + logging.info(f'[锁定字段] 初始化release_date: {mix_name} -> 空值') + # Novel_IDs 保护逻辑 if existing_field_lock_status.get('Novel_IDs_locked', False): # 字段被用户锁定,跳过更新 @@ -2194,10 +2525,12 @@ class DouyinPlayVVScraper: # 新记录,只设置非分类字段 set_fields['Manufacturing_Field'] = target_doc.get('Manufacturing_Field', '') set_fields['Copyright_field'] = target_doc.get('Copyright_field', '') + set_fields['classification_type'] = target_doc.get('classification_type', '') # 新增 + set_fields['release_date'] = target_doc.get('release_date', '') # 新增 # 注意:不设置分类字段 Novel_IDs, Anime_IDs, Drama_IDs # 因为爬虫数据不包含这些用户手动设置的分类信息 # 新记录的分类字段将保持为空,等待用户手动设置 - logging.info(f'[锁定字段] 新记录,设置初始非分类字段: {mix_name}') + logging.info(f'[锁定字段] 新记录,设置初始非分类字段(包含新增的2个锁定字段): {mix_name}') # 使用upsert操作:如果存在则更新,不存在则插入 upsert_result = target_collection.update_one( @@ -2442,6 +2775,68 @@ class DouyinPlayVVScraper: logging.error(f'错误上下文: {error_details["context"]}') return False + def generate_comments_summary(self, document_id, mix_name: str): + """生成评论总结并保存到数据库""" + logging.info(f'[评论总结] 🔍 检查评论总结条件: comments_summarizer={self.comments_summarizer is not None}, document_id={document_id}') + + if not self.comments_summarizer or not document_id: + if not self.comments_summarizer: + logging.warning(f'[评论总结] ⚠️ 评论总结器未初始化,跳过: {mix_name}') + if not document_id: + logging.warning(f'[评论总结] ⚠️ document_id 为空,跳过: {mix_name}') + return + + try: + # 从数据库获取最新的 episode_details + target_collection = self.collection + doc = target_collection.find_one({'_id': document_id}) + logging.info(f'[评论总结] 从数据库查询文档: 找到={doc is not None}') + + if not doc or not doc.get('episode_details'): + logging.warning(f'[评论总结] 未找到文档或episode_details为空: {mix_name}') + return + + # 🔍 检查是否已有评论总结 + existing_summary = doc.get('comments_summary', '') + if existing_summary: + logging.info(f'[评论总结] ⏭️ 短剧 {mix_name} 已有评论总结,跳过生成') + return + + logging.info(f'[评论总结] 🎬 开始为短剧 {mix_name} 生成评论总结') + + # 收集所有集的评论 + all_comments = [] + for episode in doc['episode_details']: + comments = episode.get('comments', []) + if comments: + all_comments.extend(comments) + + if not all_comments: + logging.info(f'[评论总结] ℹ️ 短剧 {mix_name} 没有评论,跳过总结') + return + + logging.info(f'[评论总结] 共收集到 {len(all_comments)} 条评论') + comments_summary = self.comments_summarizer.summarize_comments(all_comments, mix_name) + + if comments_summary: + # 更新评论总结到数据库 + target_collection.update_one( + {'_id': document_id}, + {'$set': { + 'comments_summary': comments_summary, + 'last_updated': datetime.now() + }} + ) + logging.info(f'[评论总结] ✅ 短剧 {mix_name} 评论总结生成并保存成功') + logging.info(f'[评论总结] 📝 总结内容(前100字): {comments_summary[:100]}...') + else: + logging.warning(f'[评论总结] ⚠️ 短剧 {mix_name} 评论总结生成失败') + + except Exception as e: + logging.error(f'[评论总结] ❌ 生成评论总结时出错: {mix_name} - {e}') + import traceback + logging.error(f'详细错误: {traceback.format_exc()}') + def save_single_item_realtime(self, item_data: dict): """分阶段实时保存合集数据(新版本)""" logging.info(f'[分阶段保存] 开始处理合集: {item_data.get("mix_name", "未知")}') @@ -2500,6 +2895,12 @@ class DouyinPlayVVScraper: logging.warning(f'[字段同步] 同步失败,但不影响数据保存: {mix_name} - {sync_error}') # 同步失败不影响数据保存的成功状态 + logging.info(f'[分阶段保存] ✅ 前四阶段完成,准备生成评论总结: {mix_name}') + + # 🎬 第五阶段:生成评论总结(在所有数据收集完成后) + self.generate_comments_summary(document_id, mix_name) + + logging.info(f'[分阶段保存] ✅ 所有阶段完成: {mix_name}') return True def update_video_details_incrementally(self, document_id, episode_video_ids: list, mix_name: str, mix_id: str = ''): diff --git a/backend/routers/rank_api_routes.py b/backend/routers/rank_api_routes.py index ccb4041..f83e5aa 100644 --- a/backend/routers/rank_api_routes.py +++ b/backend/routers/rank_api_routes.py @@ -196,10 +196,12 @@ def format_mix_item(doc, target_date=None): "request_id": doc.get("request_id", ""), "rank": doc.get("rank", 0), "cover_image_url": doc.get("cover_image_url", ""), - # 新增字段 + # 基础字段 "series_author": doc.get("series_author", ""), "Manufacturing_Field": doc.get("Manufacturing_Field", ""), "Copyright_field": doc.get("Copyright_field", ""), + "classification_type": doc.get("classification_type", ""), # 新增:类型/元素 + "release_date": doc.get("release_date", ""), # 新增:上线日期 "desc": doc.get("desc", ""), "updated_to_episode": doc.get("updated_to_episode", 0), "cover_backup_urls": doc.get("cover_backup_urls", []), @@ -213,6 +215,8 @@ def format_mix_item(doc, target_date=None): "total_comments_formatted": total_comments_formatted, # 播放量变化数据 "timeline_data": doc.get("timeline_data", []), + # 评论总结 + "comments_summary": doc.get("comments_summary", ""), } @@ -1156,6 +1160,14 @@ def update_drama_info(): update_fields['Copyright_field'] = data['Copyright_field'] # 标记版权方字段已被用户锁定 field_lock_updates['field_lock_status.Copyright_field_locked'] = True + if 'classification_type' in data: + update_fields['classification_type'] = data['classification_type'] + # 标记类型/元素字段已被用户锁定 + field_lock_updates['field_lock_status.classification_type_locked'] = True + if 'release_date' in data: + update_fields['release_date'] = data['release_date'] + # 标记上线日期字段已被用户锁定 + field_lock_updates['field_lock_status.release_date_locked'] = True if 'desc' in data: update_fields['desc'] = data['desc'] if 'play_vv' in data: @@ -1167,6 +1179,8 @@ def update_drama_info(): update_fields['cover_backup_urls'] = data['cover_backup_urls'] if 'timeline_data' in data: update_fields['timeline_data'] = data['timeline_data'] + if 'comments_summary' in data: + update_fields['comments_summary'] = data['comments_summary'] # 检查分类字段的锁定状态 if 'Novel_IDs' in data: @@ -1683,6 +1697,8 @@ def sync_ranking_storage_fields(target_date=None, force_update=False, max_retrie has_locked_fields = any([ field_lock_status.get('Manufacturing_Field_locked', False), field_lock_status.get('Copyright_field_locked', False), + field_lock_status.get('classification_type_locked', False), # 新增 + field_lock_status.get('release_date_locked', False), # 新增 field_lock_status.get('Novel_IDs_locked', False), field_lock_status.get('Anime_IDs_locked', False), field_lock_status.get('Drama_IDs_locked', False) @@ -1692,6 +1708,8 @@ def sync_ranking_storage_fields(target_date=None, force_update=False, max_retrie has_user_data = has_locked_fields or any([ data_item.get('Manufacturing_Field'), data_item.get('Copyright_field'), + data_item.get('classification_type'), # 新增 + data_item.get('release_date'), # 新增 data_item.get('Novel_IDs'), data_item.get('Anime_IDs'), data_item.get('Drama_IDs') @@ -1735,10 +1753,14 @@ def sync_ranking_storage_fields(target_date=None, force_update=False, max_retrie 'last_updated': data_item.get('last_updated'), 'Manufacturing_Field': data_item.get('Manufacturing_Field'), 'Copyright_field': data_item.get('Copyright_field'), + 'classification_type': data_item.get('classification_type', ''), # 新增:类型/元素 + 'release_date': data_item.get('release_date', ''), # 新增:上线日期 # 新增:内容分类字段 'Novel_IDs': data_item.get('Novel_IDs', []), 'Anime_IDs': data_item.get('Anime_IDs', []), 'Drama_IDs': data_item.get('Drama_IDs', []), + # 评论总结字段 + 'comments_summary': data_item.get('comments_summary', ''), # 计算字段 } @@ -1750,33 +1772,31 @@ def sync_ranking_storage_fields(target_date=None, force_update=False, max_retrie anime_ids_locked = field_lock_status.get('Anime_IDs_locked', False) drama_ids_locked = field_lock_status.get('Drama_IDs_locked', False) - # 检查哪些字段需要更新 + # 检查哪些字段需要更新(检查目标数据是否缺少字段) needs_update = False - for field_name, field_value in fields_to_check.items(): + for field_name, source_field_value in fields_to_check.items(): # 🔒 字段锁定保护:如果字段已锁定,跳过更新 if field_name == 'Manufacturing_Field' and manufacturing_locked: - logging.info(f"[字段锁定] 跳过Manufacturing_Field更新: {mix_name} (已锁定)") continue elif field_name == 'Copyright_field' and copyright_locked: - logging.info(f"[字段锁定] 跳过Copyright_field更新: {mix_name} (已锁定)") continue elif field_name == 'Novel_IDs' and novel_ids_locked: - logging.info(f"[字段锁定] 跳过Novel_IDs更新: {mix_name} (已锁定)") continue elif field_name == 'Anime_IDs' and anime_ids_locked: - logging.info(f"[字段锁定] 跳过Anime_IDs更新: {mix_name} (已锁定)") continue elif field_name == 'Drama_IDs' and drama_ids_locked: - logging.info(f"[字段锁定] 跳过Drama_IDs更新: {mix_name} (已锁定)") continue + # 🔑 关键修复:检查目标数据(data_item)中的字段值,而不是源数据 + current_value = data_item.get(field_name) + # 对于数组字段,检查是否为空数组 if field_name in ['cover_backup_urls', 'episode_video_ids', 'episode_details', 'Novel_IDs', 'Anime_IDs', 'Drama_IDs']: - if force_update or field_value is None or (isinstance(field_value, list) and len(field_value) == 0): + if force_update or current_value is None or (isinstance(current_value, list) and len(current_value) == 0): needs_update = True break - # 对于其他字段,使用原来的条件 - elif force_update or field_value is None or field_value == '' or field_value == 0: + # 对于其他字段,检查目标数据是否缺少或为空 + elif force_update or current_value is None or current_value == '': needs_update = True break @@ -1786,7 +1806,7 @@ def sync_ranking_storage_fields(target_date=None, force_update=False, max_retrie # 从源数据获取字段值并更新data_item item_updated = False - for field_name, current_value in fields_to_check.items(): + for field_name, source_field_value in fields_to_check.items(): # 🔒 字段锁定保护:如果字段已锁定,跳过更新 if field_name == 'Manufacturing_Field' and manufacturing_locked: logging.info(f"[字段锁定] 保护Manufacturing_Field不被覆盖: {mix_name}") @@ -1804,12 +1824,15 @@ def sync_ranking_storage_fields(target_date=None, force_update=False, max_retrie logging.info(f"[字段锁定] 保护Drama_IDs不被覆盖: {mix_name}") continue + # 🔑 关键修复:检查目标数据(data_item)中的字段值 + current_value = data_item.get(field_name) + # 对于数组字段,检查是否为空数组 should_update = False if field_name in ['cover_backup_urls', 'episode_video_ids', 'episode_details', 'Novel_IDs', 'Anime_IDs', 'Drama_IDs']: should_update = force_update or current_value is None or (isinstance(current_value, list) and len(current_value) == 0) else: - should_update = force_update or current_value is None or current_value == '' or current_value == 0 + should_update = force_update or current_value is None or current_value == '' if should_update: if field_name == 'episode_details': @@ -1869,6 +1892,16 @@ def sync_ranking_storage_fields(target_date=None, force_update=False, max_retrie # 当前也没有值,设置为空数组 data_item[field_name] = [] item_updated = True + elif field_name == 'comments_summary': + # 🎬 特殊处理评论总结字段:只有源数据有值时才更新,保护已有的总结 + source_value = source_data.get(field_name, '') + if source_value: # 只有当源数据有评论总结时才更新 + data_item[field_name] = source_value + item_updated = True + logging.info(f"[评论总结] 更新评论总结: {mix_name}") + else: + # 源数据没有总结,保留当前值(不覆盖) + logging.debug(f"[评论总结] 保留现有评论总结: {mix_name}") else: # 对于其他字段,直接从源数据获取 source_value = source_data.get(field_name, '') @@ -1978,4 +2011,251 @@ def validate_classification_exclusivity_api(): return jsonify({ "success": False, "message": f"验证分类互斥性失败: {str(e)}" - }), 500 \ No newline at end of file + }), 500 + + +@rank_bp.route('/get_comments_summary', methods=['GET']) +def get_comments_summary(): + """获取短剧的评论总结(优先使用 mix_id)""" + try: + mix_id = request.args.get('mix_id') + mix_name = request.args.get('mix_name') + date_str = request.args.get('date') + + if not mix_id and not mix_name: + return jsonify({"success": False, "message": "缺少必需参数 mix_id 或 mix_name"}) + + if not date_str: + from datetime import date + date_str = date.today().strftime('%Y-%m-%d') + + # 从 Ranking_storage 获取榜单数据 + ranking_doc = collection.find_one({ + "date": date_str, + "type": "comprehensive" + }, sort=[("created_at", -1)]) + + if not ranking_doc: + return jsonify({ + "success": False, + "message": f"未找到 {date_str} 的榜单数据" + }) + + # 在 data 数组中查找短剧(优先使用 mix_id) + data_items = ranking_doc.get("data", []) + drama_item = None + + for item in data_items: + # 优先使用 mix_id 匹配 + if mix_id and item.get("mix_id") == mix_id: + drama_item = item + break + # 备用:使用 mix_name 匹配 + elif mix_name and item.get("mix_name") == mix_name: + drama_item = item + # 继续查找,看是否有 mix_id 匹配的 + + if not drama_item: + return jsonify({ + "success": False, + "message": f"未找到短剧: {mix_name or mix_id}" + }) + + comments_summary = drama_item.get("comments_summary", "") + + if not comments_summary: + return jsonify({ + "success": False, + "message": "该短剧暂无评论总结" + }) + + return jsonify({ + "success": True, + "data": { + "mix_id": drama_item.get("mix_id"), + "mix_name": drama_item.get("mix_name"), + "date": date_str, + "comments_summary": comments_summary + } + }) + + except Exception as e: + logging.error(f"获取评论总结失败: {e}") + return jsonify({ + "success": False, + "message": f"获取评论总结失败: {str(e)}" + }), 500 + + +@rank_bp.route('/clear_comments_summary', methods=['POST']) +def clear_comments_summary(): + """清空短剧的评论总结(优先使用 mix_id)""" + try: + data = request.get_json() + mix_id = data.get('mix_id') + mix_name = data.get('mix_name') + date_str = data.get('date') + + if not mix_id and not mix_name: + return jsonify({"success": False, "message": "缺少必需参数 mix_id 或 mix_name"}) + + if not date_str: + from datetime import date + date_str = date.today().strftime('%Y-%m-%d') + + # 从 Ranking_storage 获取榜单数据 + ranking_doc = collection.find_one({ + "date": date_str, + "type": "comprehensive" + }, sort=[("created_at", -1)]) + + if not ranking_doc: + return jsonify({ + "success": False, + "message": f"未找到 {date_str} 的榜单数据" + }) + + # 在 data 数组中查找短剧并获取 mix_id + data_items = ranking_doc.get("data", []) + target_mix_id = None + target_mix_name = None + + for item in data_items: + if mix_id and item.get("mix_id") == mix_id: + target_mix_id = item.get("mix_id") + target_mix_name = item.get("mix_name") + break + elif mix_name and item.get("mix_name") == mix_name: + target_mix_id = item.get("mix_id") + target_mix_name = item.get("mix_name") + + if not target_mix_id and not target_mix_name: + return jsonify({ + "success": False, + "message": f"未找到短剧: {mix_name or mix_id}" + }) + + # 清空评论总结字段(优先使用 mix_id) + if target_mix_id: + result = collection.update_many( + { + "date": date_str, + "type": "comprehensive", + "data.mix_id": target_mix_id + }, + { + "$set": { + "data.$[elem].comments_summary": "" + } + }, + array_filters=[{"elem.mix_id": target_mix_id}] + ) + else: + # 备用:使用 mix_name + result = collection.update_many( + { + "date": date_str, + "type": "comprehensive", + "data.mix_name": target_mix_name + }, + { + "$set": { + "data.$[elem].comments_summary": "" + } + }, + array_filters=[{"elem.mix_name": target_mix_name}] + ) + + # 同时清空 Rankings_management 中的评论总结 + management_result = None + if target_mix_id: + management_result = rankings_management_collection.update_one( + {"mix_id": target_mix_id}, + {"$set": {"comments_summary": ""}} + ) + elif target_mix_name: + management_result = rankings_management_collection.update_one( + {"mix_name": target_mix_name}, + {"$set": {"comments_summary": ""}} + ) + + if result.modified_count > 0 or (management_result and management_result.modified_count > 0): + return jsonify({ + "success": True, + "message": f"已清空短剧 {target_mix_name} 的评论总结(Ranking_storage: {result.modified_count}, Rankings_management: {management_result.modified_count if management_result else 0})", + "modified_count": result.modified_count + }) + else: + return jsonify({ + "success": False, + "message": "未找到需要清空的评论总结" + }) + + except Exception as e: + logging.error(f"清空评论总结失败: {e}") + return jsonify({ + "success": False, + "message": f"清空评论总结失败: {str(e)}" + }), 500 + + +@rank_bp.route('/drama/') +def get_drama_detail_by_id(drama_id): + """ + 根据短剧ID获取详细信息(用于详情页) + 支持通过 mix_id 或 _id 查询 + """ + try: + # 获取日期参数(可选) + date_str = request.args.get('date') + if not date_str: + date_str = datetime.now().date().strftime("%Y-%m-%d") + + # 首先尝试从 Ranking_storage 中查找 + ranking_doc = collection.find_one({ + "date": date_str, + "type": "comprehensive" + }, sort=[("calculation_sequence", -1)]) + + drama_data = None + + if ranking_doc and "data" in ranking_doc: + # 在 data 数组中查找匹配的短剧 + for item in ranking_doc.get("data", []): + if item.get("mix_id") == drama_id or str(item.get("_id")) == drama_id: + drama_data = item + break + + # 如果在 Ranking_storage 中没找到,尝试从 Rankings_management 查找 + if not drama_data: + from bson import ObjectId + try: + mgmt_doc = rankings_management_collection.find_one({"mix_id": drama_id}) + if not mgmt_doc: + mgmt_doc = rankings_management_collection.find_one({"_id": ObjectId(drama_id)}) + if mgmt_doc: + drama_data = mgmt_doc + except: + pass + + if not drama_data: + return jsonify({ + "success": False, + "message": f"未找到短剧: {drama_id}" + }) + + # 格式化数据(format_mix_item已经包含了所有新字段) + formatted_data = format_mix_item(drama_data, date_str) + + return jsonify({ + "success": True, + "data": formatted_data, + "update_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S") + }) + + except Exception as e: + logging.error(f"获取短剧详情失败: {e}") + return jsonify({ + "success": False, + "message": f"获取短剧详情失败: {str(e)}" + }) \ No newline at end of file diff --git a/frontend/src/AdminPanel.vue b/frontend/src/AdminPanel.vue index bcc2cc8..e553e52 100644 --- a/frontend/src/AdminPanel.vue +++ b/frontend/src/AdminPanel.vue @@ -19,6 +19,8 @@ const editForm = reactive({ series_author: '', Manufacturing_Field: '', Copyright_field: '', + classification_type: '', // 新增:女频/玄等 + release_date: '', // 新增:上线日期 play_vv: 0, total_likes_formatted: '', cover_image_url: '', @@ -30,11 +32,14 @@ const editForm = reactive({ // 分类字段 isNovel: false, isAnime: false, - isDrama: false + isDrama: false, + // 评论总结字段 + comments_summary: '' }) // API基础URL -const API_BASE_URL = 'http://159.75.150.210:8443/api' +// const API_BASE_URL = 'http://159.75.150.210:8443/api' // 远程服务器 +const API_BASE_URL = 'http://localhost:8443/api' // 本地服务器 // 格式化播放量 const formatPlayCount = (count) => { @@ -113,6 +118,8 @@ const editItem = async (item) => { editForm.series_author = item.series_author || '' editForm.Manufacturing_Field = item.Manufacturing_Field || '' editForm.Copyright_field = item.Copyright_field || '' + editForm.classification_type = item.classification_type || '' // 新增 + editForm.release_date = item.release_date || '' // 新增 editForm.play_vv = item.play_vv || 0 editForm.total_likes_formatted = item.total_likes_formatted || '' editForm.cover_image_url = item.cover_image_url || '' @@ -121,6 +128,7 @@ const editItem = async (item) => { play_vv_change: item.timeline_data?.play_vv_change || 0, play_vv_change_rate: item.timeline_data?.play_vv_change_rate || 0 } + editForm.comments_summary = item.comments_summary || '' // 加载分类状态(优先使用 mix_id,兼容 mix_name) await loadClassificationStatus(item.mix_id, item.mix_name) @@ -203,6 +211,39 @@ const updateClassification = async (classificationType, isChecked) => { } } +// 清空评论总结(优先使用 mix_id) +const clearCommentsSummary = async () => { + if (!confirm('确定要清空评论总结吗?清空后下次定时任务会重新生成。')) { + return + } + + try { + const today = new Date().toISOString().split('T')[0] + const requestData = { + date: today + } + + // 优先使用 mix_id,备用 mix_name + if (editForm.mix_id) { + requestData.mix_id = editForm.mix_id + } else { + requestData.mix_name = editForm.mix_name + } + + const response = await axios.post(`${API_BASE_URL}/rank/clear_comments_summary`, requestData) + + if (response.data.success) { + editForm.comments_summary = '' + alert('评论总结已清空') + } else { + alert(`清空失败: ${response.data.message}`) + } + } catch (error) { + console.error('清空评论总结失败:', error) + alert('清空评论总结失败,请检查网络连接') + } +} + // 删除项目 const deleteItem = async (item) => { if (!confirm(`确定要删除 "${item.title || item.mix_name}" 吗?`)) { @@ -246,11 +287,14 @@ const saveEdit = async () => { series_author: editForm.series_author, Manufacturing_Field: editForm.Manufacturing_Field, Copyright_field: editForm.Copyright_field, + classification_type: editForm.classification_type, + release_date: editForm.release_date, play_vv: editForm.play_vv, total_likes_formatted: editForm.total_likes_formatted, cover_image_url: editForm.cover_image_url, cover_backup_urls: editForm.cover_backup_urls, - timeline_data: editForm.timeline_data + timeline_data: editForm.timeline_data, + comments_summary: editForm.comments_summary } // 调用后端API更新数据 @@ -416,14 +460,26 @@ onMounted(() => {
-

制作信息

-
- - -
+

制作信息(锁定字段)

- + +
+
+ + +
+
+ +
+

短剧详细信息(锁定字段)

+
+ + +
+
+ +
@@ -467,6 +523,29 @@ onMounted(() => { + +
+

评论总结

+
+ + + +
+
+

其他信息

diff --git a/frontend/src/App.vue b/frontend/src/App.vue index 521e2a8..046ccf9 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -17,6 +17,10 @@ const updateTime = ref('') // 添加更新时间字段 const showDatePicker = ref(false) // 控制日期选择器显示 const dateOptions = ref([]) // 日期选项列表 const selectedCategory = ref('all') // 当前选中的分类 +const showCommentsSummary = ref(false) // 控制评论总结弹窗显示 +const currentCommentsSummary = ref('') // 当前显示的评论总结 +const currentDramaName = ref('') // 当前短剧名称 +const currentDramaMixId = ref('') // 当前短剧ID // 初始化日期为今天 const initDate = () => { @@ -77,7 +81,8 @@ const fetchRankingData = async () => { params.classification_type = selectedCategory.value } - const response = await axios.get('http://159.75.150.210:8443/api/rank/videos', { + // const response = await axios.get('http://159.75.150.210:8443/api/rank/videos', { // 远程服务器 + const response = await axios.get('http://localhost:8443/api/rank/videos', { // 本地服务器 params: params }) @@ -257,6 +262,33 @@ const getRankBadgeClass = (rank) => { // router.push('/admin') // } +// 获取评论总结(优先使用 mix_id)- 改为直接跳转到详情页 +const fetchCommentsSummary = async (item, event) => { + // 阻止事件冒泡,避免触发卡片点击 + if (event) { + event.stopPropagation() + } + + // 直接跳转到详情页,并定位到评论区域 + const dramaId = item.mix_id || item._id + router.push(`/drama/${dramaId}#comments`) +} + +// 关闭评论总结弹窗 +const closeCommentsSummary = () => { + showCommentsSummary.value = false + currentCommentsSummary.value = '' + currentDramaName.value = '' + currentDramaMixId.value = '' +} + +// 跳转到短剧详情页 +const goToDramaDetail = (item) => { + // 使用 mix_id 作为路由参数 + const dramaId = item.mix_id || item._id + router.push(`/drama/${dramaId}`) +} + // 页面加载时初始化 onMounted(() => { initDate() @@ -331,6 +363,7 @@ onMounted(() => { v-for="(item, index) in rankingData" :key="item._id || index" class="ranking-item" + @click="goToDramaDetail(item)" >
@@ -381,8 +414,20 @@ onMounted(() => {
- 热度 - {{ formatGrowth(item) || '300W' }} +
+ 热度 + {{ formatGrowth(item) || '300W' }} +
+ + +
@@ -414,6 +459,7 @@ onMounted(() => {
+ @@ -612,6 +658,12 @@ onMounted(() => { gap: 12px; position: relative; border-bottom: 1px solid #E1E3E5; + cursor: pointer; + transition: background-color 0.2s ease; +} + +.ranking-item:hover { + background-color: #f8f9fa; } /* 排名徽章 */ @@ -742,10 +794,18 @@ onMounted(() => { /* 增长数据 */ .growth-section { display: flex; - align-items: center; - gap: 4px; + flex-direction: column; + align-items: flex-start; + gap: 2px; flex-shrink: 0; min-width: 60px; + justify-content: flex-end; +} + +.growth-section > div:first-child { + display: flex; + align-items: center; + gap: 4px; } .growth-icon { @@ -876,6 +936,27 @@ onMounted(() => { color: rgba(255, 255, 255, 0.8); } +/* 评论总结按钮样式 */ +.comments-summary-btn { + background: none; + border: none; + color: #333; + font-size: 10px; + cursor: pointer; + padding: 0; + margin-top: 78px; + display: block; + text-align: left; + transition: color 0.2s ease; + white-space: nowrap; +} + +.comments-summary-btn:hover { + color: #666; + text-decoration: underline; +} + + /* 响应式设计 */ @media (max-width: 480px) { .main-container { diff --git a/frontend/src/DramaDetail.vue b/frontend/src/DramaDetail.vue new file mode 100644 index 0000000..7fffb14 --- /dev/null +++ b/frontend/src/DramaDetail.vue @@ -0,0 +1,581 @@ + + + + + diff --git a/frontend/src/images/抖音icon.png b/frontend/src/images/抖音icon.png new file mode 100644 index 0000000..5960603 Binary files /dev/null and b/frontend/src/images/抖音icon.png differ diff --git a/frontend/src/router/index.js b/frontend/src/router/index.js index ace39c5..dc2e2eb 100644 --- a/frontend/src/router/index.js +++ b/frontend/src/router/index.js @@ -1,11 +1,17 @@ import { createRouter, createWebHistory } from 'vue-router' import AdminPanel from '../AdminPanel.vue' +import DramaDetail from '../DramaDetail.vue' const routes = [ { path: '/admin', name: 'Admin', component: AdminPanel + }, + { + path: '/drama/:id', + name: 'DramaDetail', + component: DramaDetail } ]