去除了多余的日期过滤条件,添加前端准确使用mix_id来同步找短剧

This commit is contained in:
qiaoyirui0819 2025-11-10 21:34:27 +08:00
parent 8f231a7c8e
commit 868163d84e
4 changed files with 49 additions and 109 deletions

View File

@ -136,24 +136,8 @@ class DouyinAutoScheduler:
return len(killed_processes) > 0 return len(killed_processes) > 0
except ImportError: except ImportError:
# 如果没有psutil使用系统命令 # 如果没有psutil跳过清理以避免影响其他脚本实例
try: logging.warning('psutil 不可用,跳过进程清理(避免全局终止 Chrome')
import subprocess
import os
script_dir = os.path.dirname(os.path.abspath(__file__))
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_timer', 'douyin_persistent')
# 使用taskkill命令终止Chrome进程
result = subprocess.run(['taskkill', '/F', '/IM', 'chrome.exe'], capture_output=True, text=True)
if result.returncode == 0:
logging.info('使用系统命令终止Chrome进程')
return True
else:
logging.warning('无法终止Chrome进程')
return False
except Exception as e:
logging.warning(f'系统命令清理Chrome进程失败: {e}')
return False return False
except Exception as e: except Exception as e:
logging.warning(f'清理Chrome进程时出错: {e}') logging.warning(f'清理Chrome进程时出错: {e}')

View File

@ -851,9 +851,12 @@ class DouyinPlayVVScraper:
def _cleanup_chrome_processes(self): def _cleanup_chrome_processes(self):
"""清理可能占用配置文件的Chrome进程""" """清理可能占用配置文件的Chrome进程"""
try: try:
# 获取当前配置文件路径(按模式隔离)
# 获取当前配置文件路径
script_dir = os.path.dirname(os.path.abspath(__file__)) script_dir = os.path.dirname(os.path.abspath(__file__))
is_timer_mode = os.environ.get('TIMER_MODE') == '1'
if is_timer_mode:
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_timer', 'douyin_persistent')
else:
profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_scraper', 'douyin_persistent') profile_dir = os.path.join(script_dir, 'config', 'chrome_profile_scraper', 'douyin_persistent')
# 查找使用该配置文件的Chrome进程 # 查找使用该配置文件的Chrome进程
@ -874,18 +877,9 @@ class DouyinPlayVVScraper:
time.sleep(2) time.sleep(2)
return len(killed_processes) > 0 return len(killed_processes) > 0
except ImportError: except ImportError:
# 如果没有psutil使用系统命令 # 如果没有psutil跳过清理以避免影响其他脚本实例
try: logging.warning('psutil 不可用,跳过进程清理(避免全局终止 Chrome')
result = subprocess.run(['taskkill', '/f', '/im', 'chrome.exe'],
capture_output=True, text=True, timeout=10)
if result.returncode == 0:
logging.info('使用taskkill清理Chrome进程')
time.sleep(2)
return True
except Exception as e:
logging.warning(f'清理Chrome进程失败: {e}')
return False return False
except Exception as e: except Exception as e:
logging.warning(f'清理Chrome进程时出错: {e}') logging.warning(f'清理Chrome进程时出错: {e}')

View File

@ -68,7 +68,7 @@ def find_management_data(query, target_date=None):
Args: Args:
query: 查询条件字典可以包含mix_id, mix_name等字段 query: 查询条件字典可以包含mix_id, mix_name等字段
target_date: 目标日期用于日期过滤 target_date: 目标日期已不用于管理库过滤保留参数兼容
Returns: Returns:
查询到的文档或None 查询到的文档或None
@ -78,20 +78,6 @@ def find_management_data(query, target_date=None):
if 'mix_id' in query and query['mix_id']: if 'mix_id' in query and query['mix_id']:
mix_id_query = {"mix_id": query['mix_id']} mix_id_query = {"mix_id": query['mix_id']}
# 添加日期过滤如果提供了target_date
if target_date:
if isinstance(target_date, str):
target_date = parse_date_string(target_date)
if target_date:
start_of_day = datetime.combine(target_date, datetime.min.time())
end_of_day = datetime.combine(target_date, datetime.max.time())
mix_id_query.update({
"$or": [
{"created_at": {"$gte": start_of_day, "$lte": end_of_day}},
{"last_updated": {"$gte": start_of_day, "$lte": end_of_day}}
]
})
result = rankings_management_collection.find_one(mix_id_query) result = rankings_management_collection.find_one(mix_id_query)
if result: if result:
logging.info(f"通过mix_id找到管理数据: {query['mix_id']}") logging.info(f"通过mix_id找到管理数据: {query['mix_id']}")
@ -100,20 +86,6 @@ def find_management_data(query, target_date=None):
# 如果通过mix_id没找到或者没有mix_id尝试其他查询条件 # 如果通过mix_id没找到或者没有mix_id尝试其他查询条件
fallback_query = {k: v for k, v in query.items() if k != 'mix_id'} fallback_query = {k: v for k, v in query.items() if k != 'mix_id'}
# 添加日期过滤如果提供了target_date
if target_date and fallback_query:
if isinstance(target_date, str):
target_date = parse_date_string(target_date)
if target_date:
start_of_day = datetime.combine(target_date, datetime.min.time())
end_of_day = datetime.combine(target_date, datetime.max.time())
fallback_query.update({
"$or": [
{"created_at": {"$gte": start_of_day, "$lte": end_of_day}},
{"last_updated": {"$gte": start_of_day, "$lte": end_of_day}}
]
})
if fallback_query: if fallback_query:
result = rankings_management_collection.find_one(fallback_query) result = rankings_management_collection.find_one(fallback_query)
if result: if result:
@ -1294,11 +1266,12 @@ def update_content_classification():
try: try:
data = request.get_json() data = request.get_json()
# 验证必需参数 # 验证必需参数(支持 mix_id 或 mix_name 任一)
if not data or 'mix_name' not in data or 'classification_type' not in data: if not data or ('mix_id' not in data and 'mix_name' not in data) or 'classification_type' not in data:
return jsonify({"success": False, "message": "缺少必需参数 mix_name 或 classification_type"}) return jsonify({"success": False, "message": "缺少必需参数:需要 mix_id 或 mix_name以及 classification_type"})
mix_name = data['mix_name'] mix_id_param = data.get('mix_id')
mix_name = data.get('mix_name')
classification_type = data['classification_type'] # 'novel', 'anime', 'drama' classification_type = data['classification_type'] # 'novel', 'anime', 'drama'
action = data.get('action', 'add') # 'add' 或 'remove' action = data.get('action', 'add') # 'add' 或 'remove'
exclusive = data.get('exclusive', True) # 默认启用互斥模式,确保每个短剧只能属于一个分类 exclusive = data.get('exclusive', True) # 默认启用互斥模式,确保每个短剧只能属于一个分类
@ -1316,24 +1289,14 @@ def update_content_classification():
} }
field_name = field_mapping[classification_type] field_name = field_mapping[classification_type]
# 首先从Rankings_management获取短剧的mix_id使用今天的日期 # 优先使用 mix_id 获取管理库文档,不做日期过滤
today = datetime.now().date() mgmt_doc = find_management_data({'mix_id': mix_id_param, 'mix_name': mix_name})
start_of_day = datetime.combine(today, datetime.min.time())
end_of_day = datetime.combine(today, datetime.max.time())
mgmt_doc = rankings_management_collection.find_one({
"mix_name": mix_name,
"$or": [
{"created_at": {"$gte": start_of_day, "$lte": end_of_day}},
{"last_updated": {"$gte": start_of_day, "$lte": end_of_day}}
]
})
if not mgmt_doc: if not mgmt_doc:
return jsonify({"success": False, "message": f"未找到短剧: {mix_name}"}) return jsonify({"success": False, "message": f"未找到短剧:{mix_name or mix_id_param}"})
mix_id = mgmt_doc.get('mix_id') mix_id = mgmt_doc.get('mix_id')
if not mix_id: if not mix_id:
return jsonify({"success": False, "message": f"短剧 {mix_name} 缺少 mix_id"}) return jsonify({"success": False, "message": f"短剧 {mix_name or '[未知名称]'} 缺少 mix_id"})
updated_count = 0 updated_count = 0
@ -1350,7 +1313,7 @@ def update_content_classification():
# 1. 从Rankings_management中移除其他分类 # 1. 从Rankings_management中移除其他分类
for other_field in other_fields: for other_field in other_fields:
result = rankings_management_collection.update_many( result = rankings_management_collection.update_many(
{"mix_name": mix_name, other_field: mix_id}, {"mix_id": mix_id, other_field: mix_id},
{"$pull": {other_field: mix_id}} {"$pull": {other_field: mix_id}}
) )
if result.modified_count > 0: if result.modified_count > 0:
@ -1375,7 +1338,7 @@ def update_content_classification():
# 添加到分类字段(使用$addToSet避免重复 # 添加到分类字段(使用$addToSet避免重复
# 1. 更新Rankings_management数据库 # 1. 更新Rankings_management数据库
result_mgmt = rankings_management_collection.update_many( result_mgmt = rankings_management_collection.update_many(
{"mix_name": mix_name}, {"mix_id": mix_id},
{"$addToSet": {field_name: mix_id}} {"$addToSet": {field_name: mix_id}}
) )
@ -1394,7 +1357,7 @@ def update_content_classification():
# 从分类字段中移除 # 从分类字段中移除
# 1. 更新Rankings_management数据库 # 1. 更新Rankings_management数据库
result_mgmt = rankings_management_collection.update_many( result_mgmt = rankings_management_collection.update_many(
{"mix_name": mix_name}, {"mix_id": mix_id},
{"$pull": {field_name: mix_id}} {"$pull": {field_name: mix_id}}
) )
@ -1412,14 +1375,8 @@ def update_content_classification():
logging.info(f"分类更新: {message}, Rankings_management({result_mgmt.modified_count}), Ranking_storage({result_storage.modified_count})") logging.info(f"分类更新: {message}, Rankings_management({result_mgmt.modified_count}), Ranking_storage({result_storage.modified_count})")
# 获取更新后的分类状态,使用今天的日期 # 获取更新后的分类状态(按 mix_id 直接查询,不做日期过滤)
updated_mgmt_doc = rankings_management_collection.find_one({ updated_mgmt_doc = rankings_management_collection.find_one({"mix_id": mix_id})
"mix_name": mix_name,
"$or": [
{"created_at": {"$gte": start_of_day, "$lte": end_of_day}},
{"last_updated": {"$gte": start_of_day, "$lte": end_of_day}}
]
})
classification_status = { classification_status = {
'novel': mix_id in updated_mgmt_doc.get('Novel_IDs', []) if updated_mgmt_doc else False, 'novel': mix_id in updated_mgmt_doc.get('Novel_IDs', []) if updated_mgmt_doc else False,
'anime': mix_id in updated_mgmt_doc.get('Anime_IDs', []) if updated_mgmt_doc else False, 'anime': mix_id in updated_mgmt_doc.get('Anime_IDs', []) if updated_mgmt_doc else False,
@ -1449,19 +1406,20 @@ def update_content_classification():
def get_content_classification(): def get_content_classification():
"""获取短剧的分类状态""" """获取短剧的分类状态"""
try: try:
mix_id_param = request.args.get('mix_id')
mix_name = request.args.get('mix_name') mix_name = request.args.get('mix_name')
if not mix_name: if not mix_id_param and not mix_name:
return jsonify({"success": False, "message": "缺少必需参数 mix_name"}) return jsonify({"success": False, "message": "缺少必需参数:需要 mix_id 或 mix_name"})
# 从Rankings_management获取短剧信息 # 优先使用 mix_id 获取管理库信息(不做日期过滤)
mgmt_doc = rankings_management_collection.find_one({"mix_name": mix_name}) mgmt_doc = find_management_data({'mix_id': mix_id_param, 'mix_name': mix_name})
if not mgmt_doc: if not mgmt_doc:
return jsonify({"success": False, "message": f"未找到短剧: {mix_name}"}) return jsonify({"success": False, "message": f"未找到短剧{mix_name or mix_id_param}"})
mix_id = mgmt_doc.get('mix_id') mix_id = mgmt_doc.get('mix_id')
if not mix_id: if not mix_id:
return jsonify({"success": False, "message": f"短剧 {mix_name} 缺少 mix_id"}) return jsonify({"success": False, "message": f"短剧 {mix_name or '[未知名称]'} 缺少 mix_id"})
# 检查短剧在各个分类中的状态 # 检查短剧在各个分类中的状态
novel_ids = mgmt_doc.get('Novel_IDs', []) novel_ids = mgmt_doc.get('Novel_IDs', [])
@ -1476,9 +1434,9 @@ def get_content_classification():
return jsonify({ return jsonify({
"success": True, "success": True,
"message": f"获取短剧 {mix_name} 分类状态成功", "message": f"获取短剧 {mgmt_doc.get('mix_name', mix_name)} 分类状态成功",
"data": { "data": {
"mix_name": mix_name, "mix_name": mgmt_doc.get('mix_name', mix_name),
"mix_id": mix_id, "mix_id": mix_id,
"classification_status": classification_status, "classification_status": classification_status,
"classification_details": { "classification_details": {

View File

@ -13,6 +13,7 @@ const showEditModal = ref(false)
// //
const editForm = reactive({ const editForm = reactive({
id: null, id: null,
mix_id: '',
title: '', title: '',
mix_name: '', mix_name: '',
series_author: '', series_author: '',
@ -106,6 +107,7 @@ const fetchRankingData = async () => {
// //
const editItem = async (item) => { const editItem = async (item) => {
editForm.id = item.id || item._id editForm.id = item.id || item._id
editForm.mix_id = item.mix_id || ''
editForm.title = item.title || '' editForm.title = item.title || ''
editForm.mix_name = item.mix_name || '' editForm.mix_name = item.mix_name || ''
editForm.series_author = item.series_author || '' editForm.series_author = item.series_author || ''
@ -120,17 +122,17 @@ const editItem = async (item) => {
play_vv_change_rate: item.timeline_data?.play_vv_change_rate || 0 play_vv_change_rate: item.timeline_data?.play_vv_change_rate || 0
} }
// // 使 mix_id mix_name
await loadClassificationStatus(item.mix_name) await loadClassificationStatus(item.mix_id, item.mix_name)
showEditModal.value = true showEditModal.value = true
} }
// //
const loadClassificationStatus = async (mixName) => { const loadClassificationStatus = async (mixId, mixName) => {
try { try {
const response = await axios.get(`${API_BASE_URL}/rank/get_content_classification`, { const response = await axios.get(`${API_BASE_URL}/rank/get_content_classification`, {
params: { mix_name: mixName } params: { mix_id: mixId, mix_name: mixName }
}) })
if (response.data.success) { if (response.data.success) {
@ -150,8 +152,8 @@ const loadClassificationStatus = async (mixName) => {
// //
const updateClassification = async (classificationType, isChecked) => { const updateClassification = async (classificationType, isChecked) => {
if (!editForm.mix_name) { if (!editForm.mix_id && !editForm.mix_name) {
alert('合集名不能为空') alert('缺少短剧标识mix_id 或 mix_name')
return return
} }
@ -172,6 +174,7 @@ const updateClassification = async (classificationType, isChecked) => {
try { try {
const response = await axios.post(`${API_BASE_URL}/rank/update_content_classification`, { const response = await axios.post(`${API_BASE_URL}/rank/update_content_classification`, {
mix_id: editForm.mix_id,
mix_name: editForm.mix_name, mix_name: editForm.mix_name,
classification_type: classificationType, classification_type: classificationType,
action: isChecked ? 'add' : 'remove', action: isChecked ? 'add' : 'remove',
@ -190,13 +193,13 @@ const updateClassification = async (classificationType, isChecked) => {
} else { } else {
alert(`分类更新失败: ${response.data.message}`) alert(`分类更新失败: ${response.data.message}`)
// checkbox // checkbox
await loadClassificationStatus(editForm.mix_name) await loadClassificationStatus(editForm.mix_id, editForm.mix_name)
} }
} catch (error) { } catch (error) {
console.error('分类更新失败:', error) console.error('分类更新失败:', error)
alert('分类更新失败,请检查网络连接') alert('分类更新失败,请检查网络连接')
// checkbox // checkbox
await loadClassificationStatus(editForm.mix_name) await loadClassificationStatus(editForm.mix_id, editForm.mix_name)
} }
} }
@ -237,6 +240,7 @@ const deleteItem = async (item) => {
const saveEdit = async () => { const saveEdit = async () => {
try { try {
const updateData = { const updateData = {
mix_id: editForm.mix_id,
title: editForm.title, title: editForm.title,
mix_name: editForm.mix_name, mix_name: editForm.mix_name,
series_author: editForm.series_author, series_author: editForm.series_author,