管理后台连接大模型API支持重新生成评论总结

添加锁定评论列表字段
This commit is contained in:
Qyir 2025-11-18 11:42:47 +08:00
parent 91761b6754
commit 9017cea25d
4 changed files with 456 additions and 72 deletions

View File

@ -69,6 +69,27 @@ class DouyinAutoScheduler:
# 创建logger实例
self.logger = logging.getLogger(__name__)
def _sync_episode_details_with_lock(self, episode_details, comments_summary):
"""
同步 episode_details 时处理评论锁定逻辑
如果有 comments_summary则保留评论内容只更新互动数据
Args:
episode_details: 管理数据库中的 episode_details
comments_summary: 评论总结字段
Returns:
处理后的 episode_details
"""
# 如果没有 comments_summary 或没有 episode_details直接返回原数据
if not comments_summary or not episode_details:
return episode_details
# 如果有 comments_summary说明评论内容已锁定直接返回管理数据库的数据
# 因为管理数据库中已经保存了锁定的评论内容
logging.info(f'🔒 检测到 comments_summaryepisode_details 将保持锁定状态(包含评论内容)')
return episode_details
def _normalize_play_vv(self, play_vv):
"""标准化播放量数据类型,将字符串转换为数字"""
if isinstance(play_vv, str):
@ -407,7 +428,11 @@ class DouyinAutoScheduler:
"desc": management_data.get("desc", "") if management_data else "",
"updated_to_episode": management_data.get("updated_to_episode", 0) if management_data else 0,
"episode_video_ids": management_data.get("episode_video_ids", []) if management_data else [],
"episode_details": management_data.get("episode_details", []) if management_data else [],
# 🔒 episode_details 同步逻辑:如果有 comments_summary保留评论内容但更新互动数据
"episode_details": self._sync_episode_details_with_lock(
management_data.get("episode_details", []) if management_data else [],
management_data.get("comments_summary", "") if management_data else ""
),
"data_status": management_data.get("data_status", "") if management_data else "",
"realtime_saved": management_data.get("realtime_saved", True) if management_data else True,
"created_at": management_data.get("created_at") if management_data else None,

View File

@ -2373,6 +2373,20 @@ class DouyinPlayVVScraper:
# 检查是否已存在该短剧的记录
existing_doc = target_collection.find_one({'mix_id': mix_id})
# 🔒 保护现有的 episode_details 中的评论数据
final_episode_details = target_doc.get('episode_details', [])
if existing_doc and existing_doc.get('episode_details'):
existing_episode_details = existing_doc.get('episode_details', [])
# 合并现有的评论数据到新的 episode_details
for i, new_episode in enumerate(final_episode_details):
if i < len(existing_episode_details):
existing_episode = existing_episode_details[i]
# 保留现有的评论数据(如果存在)
existing_comments = existing_episode.get('comments', [])
if existing_comments:
new_episode['comments'] = existing_comments
logging.info(f'[评论保护] 保留第 {i+1} 集的 {len(existing_comments)} 条现有评论: {mix_name}')
# 准备更新字段(不包含锁定字段,锁定字段将在后面单独处理)
set_fields = {
# 按照用户指定的字段顺序设置
@ -2394,7 +2408,7 @@ class DouyinPlayVVScraper:
'desc': target_doc.get('desc', ''),
'updated_to_episode': target_doc.get('updated_to_episode', 0),
'episode_video_ids': target_doc.get('episode_video_ids', []),
'episode_details': target_doc.get('episode_details', []),
'episode_details': final_episode_details, # 使用合并后的 episode_details
'data_status': target_doc.get('data_status', ''),
'realtime_saved': target_doc.get('realtime_saved', True),
'created_at': target_doc.get('created_at', datetime.now()),
@ -2617,7 +2631,7 @@ class DouyinPlayVVScraper:
logging.error(f'[增量更新] 更新视频ID列表失败: {mix_name} - 错误: {e}')
return []
def update_single_video_details(self, document_id, episode_number: int, video_id: str, video_details: dict, mix_name: str):
def update_single_video_details(self, document_id, episode_number: int, video_id: str, video_details: dict, mix_name: str, mix_id: str = ''):
"""更新单个视频的详细数据(第三阶段增量更新)"""
target_collection = self.collection # 使用根据模式选择的集合
if not self.realtime_save_enabled or target_collection is None or not document_id:
@ -2631,6 +2645,36 @@ class DouyinPlayVVScraper:
return False
try:
# 🔒 检查是否有 comments_summary如果有则保留现有评论
# 注意:始终检查 Rankings_management 数据库,因为这是锁定字段的唯一来源
existing_comments = []
if self.management_collection is not None:
# 优先使用 mix_id 查询,因为 mix_id 是唯一且稳定的标识符
# 如果没有 mix_id则使用 document_id_id
query = {'mix_id': mix_id} if mix_id else {'_id': document_id}
logging.info(f'🔍 [评论锁定] 检查 Rankings_management 数据库: query={query}, episode_number={episode_number}')
doc = self.management_collection.find_one(query)
if doc:
logging.info(f'🔍 [评论锁定] 找到文档: mix_name={doc.get("mix_name")}, has_comments_summary={bool(doc.get("comments_summary"))}')
if doc.get('comments_summary'):
# 获取现有的评论数据
episode_details = doc.get('episode_details', [])
logging.info(f'🔍 [评论锁定] episode_details 长度: {len(episode_details)}')
if episode_number - 1 < len(episode_details):
existing_episode = episode_details[episode_number - 1]
existing_comments = existing_episode.get('comments', [])
logging.info(f'🔍 [评论锁定] 第 {episode_number} 集现有评论数: {len(existing_comments)}')
if existing_comments:
logging.info(f'🔒 检测到 comments_summary保留现有 {len(existing_comments)} 条评论')
else:
logging.warning(f'⚠️ [评论锁定] episode_number={episode_number} 超出 episode_details 范围(长度={len(episode_details)}')
else:
logging.info(f'🔍 [评论锁定] comments_summary 为空,将抓取新评论')
else:
logging.warning(f'⚠️ [评论锁定] 未找到 document_id={document_id} 的文档')
else:
logging.warning(f'⚠️ [评论锁定] management_collection 未初始化')
# 构建更新的视频详细信息
episode_info = {
'episode_number': episode_number,
@ -2641,7 +2685,8 @@ class DouyinPlayVVScraper:
'likes_formatted': self.format_interaction_count(video_details.get('likes', 0)),
'shares_formatted': self.format_interaction_count(video_details.get('shares', 0)),
'favorites_formatted': self.format_interaction_count(video_details.get('favorites', 0)),
'comments': video_details.get('comments', []),
# 🔒 如果有现有评论则保留,否则使用新抓取的评论
'comments': existing_comments if existing_comments else video_details.get('comments', []),
'data_status': 'completed'
}
@ -2919,13 +2964,13 @@ class DouyinPlayVVScraper:
if video_details and video_details.get('success', False):
# 立即更新到数据库
self.update_single_video_details(document_id, i, video_id, video_details, mix_name)
self.update_single_video_details(document_id, i, video_id, video_details, mix_name, mix_id)
else:
logging.warning(f'[增量更新] 第 {i} 集视频详细数据获取失败: {mix_name}')
# 添加随机延迟避免请求过快,模拟人类行为
if i < len(episode_video_ids): # 不是最后一个视频时才延迟
random_delay = self.anti_detection.get_human_like_delay()
random_delay = random.uniform(2.0, 5.0) # 2-5秒随机延迟
logging.info(f'🕐 [增量更新] 视频间隔等待时间: {random_delay:.1f}')
time.sleep(random_delay)
@ -3985,6 +4030,21 @@ class DouyinPlayVVScraper:
# 添加互动数据保存标记,避免重复保存
interaction_data_saved = False
# 🔒 检查是否应该跳过评论抓取(根据 comments_summary 字段判断)
# 注意:始终检查 Rankings_management 数据库,因为这是锁定字段的唯一来源
should_skip_comments = False
if document_id:
try:
# 使用 management_collection 而不是 self.collection
# 确保无论什么模式都检查管理数据库中的 comments_summary
if self.management_collection is not None:
doc = self.management_collection.find_one({'_id': document_id})
if doc and doc.get('comments_summary'):
should_skip_comments = True
logging.info(f'🔒 检测到 comments_summary 字段有内容,将跳过评论抓取(但仍会更新点赞、分享、收藏数)')
except Exception as e:
logging.warning(f'检查 comments_summary 字段时出错: {e}')
# 检查是否应该跳过详细数据获取(仅在定时器模式下跳过)
if os.environ.get('AUTO_CONTINUE') == '1':
logging.info(f'🚀 定时器模式:跳过视频 {video_id} 的详细数据获取(点赞、收藏、分享、评论)')
@ -3992,7 +4052,7 @@ class DouyinPlayVVScraper:
video_details['error'] = '定时器模式:跳过详细数据获取'
return video_details
logging.info(f'🔍 get_video_details 被调用: video_id={video_id}')
logging.info(f'🔍 get_video_details 被调用: video_id={video_id}, 跳过评论={should_skip_comments}')
try:
# 确保driver已初始化
@ -4086,6 +4146,8 @@ class DouyinPlayVVScraper:
except Exception as e:
continue
# 🔒 根据 should_skip_comments 标志决定是否抓取评论
if not should_skip_comments:
# 启动滑动机制加载更多评论
logging.info(f'开始为视频 {video_id} 启动滑动机制加载评论')
scrolled_comments = self._simulate_comment_scrolling(video_id, max_scroll_attempts=15, scroll_delay=2.0,
@ -4144,6 +4206,8 @@ class DouyinPlayVVScraper:
except Exception as e:
continue
else:
logging.info(f'🔒 跳过视频 {video_id} 的评论抓取comments_summary 已存在)')
# 如果网络日志没有获取到数据,尝试页面解析
if video_details['likes'] == 0 and video_details['shares'] == 0 and video_details['favorites'] == 0:
@ -4410,7 +4474,7 @@ class DouyinPlayVVScraper:
video_details_list.append(video_details)
# 添加随机延迟避免请求过快,模拟人类行为
random_delay = self.anti_detection.get_human_like_delay()
random_delay = random.uniform(2.0, 5.0) # 2-5秒随机延迟
logging.info(f'🕐 视频间隔等待时间: {random_delay:.1f}')
time.sleep(random_delay)
# exit(0)

View File

@ -2937,3 +2937,208 @@ def article_health_check():
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
# ==================== 评论总结管理API ====================
@rank_bp.route('/comments/regenerate-summary', methods=['POST'])
def regenerate_comments_summary():
"""
重新生成评论总结
根据现有的评论内容重新调用大模型API生成总结
不清空评论内容只更新 comments_summary 字段
"""
try:
data = request.get_json()
mix_id = data.get('mix_id', '')
if not mix_id:
return jsonify({
"success": False,
"message": "缺少必要参数: mix_id"
})
# 从管理数据库获取数据
management_doc = rankings_management_collection.find_one({'mix_id': mix_id})
if not management_doc:
return jsonify({
"success": False,
"message": f"未找到 mix_id 为 {mix_id} 的数据"
})
# 获取所有集的评论
episode_details = management_doc.get('episode_details', [])
if not episode_details:
return jsonify({
"success": False,
"message": "该短剧没有集数数据"
})
# 收集所有评论
all_comments = []
for episode in episode_details:
comments = episode.get('comments', [])
for comment in comments:
if isinstance(comment, dict):
text = comment.get('text', '').strip()
if text:
all_comments.append(text)
elif isinstance(comment, str):
text = comment.strip()
if text:
all_comments.append(text)
if not all_comments:
return jsonify({
"success": False,
"message": "该短剧没有评论内容,无法生成总结"
})
# 调用大模型API生成总结
try:
from handlers.Rankings.rank_data_scraper import CommentsSummarizer
summarizer = CommentsSummarizer()
mix_name = management_doc.get('mix_name', '')
comments_summary = summarizer.summarize_comments(all_comments, mix_name)
if not comments_summary:
return jsonify({
"success": False,
"message": "评论总结生成失败,请稍后重试"
})
# 更新两个数据库的 comments_summary 字段
update_data = {
'$set': {
'comments_summary': comments_summary,
'last_updated': datetime.now()
}
}
# 更新管理数据库
rankings_management_collection.update_one(
{'mix_id': mix_id},
update_data
)
# 更新主数据库Ranking_storage
# 查找最新的包含该 mix_id 的文档
latest_doc = collection.find_one(
{'data.mix_id': mix_id},
sort=[('created_at', -1)]
)
if latest_doc:
# 更新 data 数组中对应的项
collection.update_one(
{'_id': latest_doc['_id'], 'data.mix_id': mix_id},
{'$set': {'data.$.comments_summary': comments_summary}}
)
logging.info(f'✅ 成功重新生成评论总结: mix_id={mix_id}, mix_name={mix_name}')
return jsonify({
"success": True,
"message": "评论总结重新生成成功",
"data": {
"comments_summary": comments_summary,
"comments_count": len(all_comments)
}
})
except Exception as e:
logging.error(f'调用大模型API失败: {e}')
return jsonify({
"success": False,
"message": f"调用大模型API失败: {str(e)}"
})
except Exception as e:
logging.error(f'重新生成评论总结失败: {e}')
return jsonify({
"success": False,
"message": f"重新生成评论总结失败: {str(e)}"
})
@rank_bp.route('/comments/clear-all', methods=['POST'])
def clear_all_comments():
"""
清空所有评论相关数据
同时清空 comments_summary 和所有 episode_details 中的 comments 字段
两个数据库都会同步清空
"""
try:
data = request.get_json()
mix_id = data.get('mix_id', '')
if not mix_id:
return jsonify({
"success": False,
"message": "缺少必要参数: mix_id"
})
# 从管理数据库获取数据
management_doc = rankings_management_collection.find_one({'mix_id': mix_id})
if not management_doc:
return jsonify({
"success": False,
"message": f"未找到 mix_id 为 {mix_id} 的数据"
})
# 清空 episode_details 中的所有评论
episode_details = management_doc.get('episode_details', [])
for episode in episode_details:
episode['comments'] = []
# 更新数据
update_data = {
'$set': {
'comments_summary': '',
'episode_details': episode_details,
'last_updated': datetime.now()
}
}
# 更新管理数据库
result1 = rankings_management_collection.update_one(
{'mix_id': mix_id},
update_data
)
# 更新主数据库Ranking_storage
# 查找最新的包含该 mix_id 的文档
latest_doc = collection.find_one(
{'data.mix_id': mix_id},
sort=[('created_at', -1)]
)
if latest_doc:
# 更新 data 数组中对应的项
collection.update_one(
{'_id': latest_doc['_id'], 'data.mix_id': mix_id},
{
'$set': {
'data.$.comments_summary': '',
'data.$.episode_details': episode_details
}
}
)
mix_name = management_doc.get('mix_name', '')
logging.info(f'✅ 成功清空所有评论数据: mix_id={mix_id}, mix_name={mix_name}')
return jsonify({
"success": True,
"message": "评论数据已全部清空",
"data": {
"mix_id": mix_id,
"mix_name": mix_name,
"cleared_episodes": len(episode_details)
}
})
except Exception as e:
logging.error(f'清空评论数据失败: {e}')
return jsonify({
"success": False,
"message": f"清空评论数据失败: {str(e)}"
})

View File

@ -9,6 +9,7 @@ const router = useRouter()
const rankingData = ref([])
const loading = ref(false)
const showEditModal = ref(false)
const regenerating = ref(false) //
//
const editForm = reactive({
@ -244,6 +245,73 @@ const clearCommentsSummary = async () => {
}
}
//
const regenerateCommentsSummary = async () => {
if (!confirm('确定要重新生成评论总结吗这将调用AI根据现有评论内容重新生成总结。')) {
return
}
if (regenerating.value) {
alert('正在生成中,请稍候...')
return
}
try {
if (!editForm.mix_id) {
alert('缺少 mix_id无法重新生成')
return
}
//
regenerating.value = true
const response = await axios.post(`${API_BASE_URL}/rank/comments/regenerate-summary`, {
mix_id: editForm.mix_id
})
if (response.data.success) {
editForm.comments_summary = response.data.data.comments_summary
alert(`评论总结重新生成成功!共分析了 ${response.data.data.comments_count} 条评论`)
} else {
alert(`重新生成失败: ${response.data.message}`)
}
} catch (error) {
console.error('重新生成评论总结失败:', error)
alert('重新生成评论总结失败,请检查网络连接')
} finally {
//
regenerating.value = false
}
}
//
const clearAllComments = async () => {
if (!confirm('确定要清空所有评论数据吗?这将同时清空评论总结和所有评论内容,此操作不可恢复!')) {
return
}
try {
if (!editForm.mix_id) {
alert('缺少 mix_id无法清空')
return
}
const response = await axios.post(`${API_BASE_URL}/rank/comments/clear-all`, {
mix_id: editForm.mix_id
})
if (response.data.success) {
editForm.comments_summary = ''
alert(`评论数据已全部清空!共清空了 ${response.data.data.cleared_episodes} 集的评论`)
} else {
alert(`清空失败: ${response.data.message}`)
}
} catch (error) {
console.error('清空评论数据失败:', error)
alert('清空评论数据失败,请检查网络连接')
}
}
//
const deleteItem = async (item) => {
if (!confirm(`确定要删除 "${item.title || item.mix_name}" 吗?`)) {
@ -531,7 +599,6 @@ onMounted(() => {
</div>
</div>
<!-- 评论总结区域 -->
<div class="form-section">
<h4 class="section-title">评论总结</h4>
<div class="form-group">
@ -543,16 +610,27 @@ onMounted(() => {
placeholder="评论总结内容(可手动编辑或由系统自动生成)"
style="resize: vertical;"
></textarea>
<div style="margin-top: 8px; display: flex; gap: 8px;">
<button
class="btn btn-sm btn-primary"
@click="regenerateCommentsSummary"
:disabled="regenerating"
title="根据现有评论内容重新生成AI总结"
>
{{ regenerating ? '⏳ 正在生成中...' : '🔄 重新生成总结' }}
</button>
<button
v-if="editForm.comments_summary"
class="btn btn-sm btn-delete"
@click="clearCommentsSummary"
style="margin-top: 8px;"
@click="clearAllComments"
:disabled="regenerating"
title="清空评论总结和所有评论内容"
>
清空评论总结
🗑 清空所有
</button>
</div>
</div>
</div>
<div class="form-section">
<h4 class="section-title">其他信息</h4>
@ -666,11 +744,23 @@ export default {
background: #357abd;
}
.btn-primary:disabled {
background: #a0c4e8;
cursor: not-allowed;
opacity: 0.7;
}
.btn-secondary {
background: #6c757d;
color: white;
}
.btn-delete:disabled {
background: #e89ca5;
cursor: not-allowed;
opacity: 0.7;
}
.btn-secondary:hover {
background: #545b62;
}