解决封面图片过期情况,存入TOS中media/rank/目录中
热播总榜选择日期之后永久显示图片。
This commit is contained in:
parent
06996967ca
commit
be44334960
@ -28,7 +28,7 @@ import config
|
|||||||
|
|
||||||
# 添加项目路径到 Python 路径
|
# 添加项目路径到 Python 路径
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), 'handlers', 'Rankings'))
|
sys.path.append(os.path.join(os.path.dirname(__file__), 'handlers', 'Rankings'))
|
||||||
from rank_data_scraper import DouyinPlayVVScraper
|
from handlers.Rankings.rank_data_scraper import DouyinPlayVVScraper
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -18,4 +18,25 @@ LOG_DIR = 'logs'
|
|||||||
# 定时器配置
|
# 定时器配置
|
||||||
SCHEDULER_TIME = "24:00" # 定时器执行时间,格式为 HH:MM (24小时制)
|
SCHEDULER_TIME = "24:00" # 定时器执行时间,格式为 HH:MM (24小时制)
|
||||||
|
|
||||||
|
# TOS/火山云对象存储配置
|
||||||
|
TOS_CONFIG = {
|
||||||
|
'access_key_id': os.getenv('TOS_ACCESS_KEY_ID', 'AKLTYjQyYmE1ZDAwZTY5NGZiOWI3ODZkZDhhOWE4MzVjODE'),
|
||||||
|
'access_key_secret': os.getenv('TOS_ACCESS_KEY_SECRET', 'WlRKa05EbGhZVEUyTXpjNU5ESmpPRGt5T0RJNFl6QmhPR0pqTVRjMVpUWQ=='),
|
||||||
|
'endpoint': 'https://tos-cn-beijing.volces.com',
|
||||||
|
'region': 'cn-beijing',
|
||||||
|
'bucket_name': os.getenv('TOS_BUCKET_NAME', 'km1'),
|
||||||
|
'self_domain': os.getenv('TOS_SELF_DOMAIN', 'oss.xintiao85.com'),
|
||||||
|
'disable_ssl_warnings': True
|
||||||
|
}
|
||||||
|
|
||||||
|
# API配置(兼容现有代码)
|
||||||
|
API_CONFIG = {
|
||||||
|
'huoshan': {
|
||||||
|
'AccessKey': TOS_CONFIG['access_key_id'],
|
||||||
|
'SecretKey': TOS_CONFIG['access_key_secret']
|
||||||
|
},
|
||||||
|
'OSS_BUCKET_NAME': TOS_CONFIG['bucket_name'],
|
||||||
|
'OSS_HOST': TOS_CONFIG['self_domain']
|
||||||
|
}
|
||||||
|
|
||||||
print(f"Successfully loaded configuration for environment: {APP_ENV}")
|
print(f"Successfully loaded configuration for environment: {APP_ENV}")
|
||||||
@ -35,7 +35,13 @@ import sys
|
|||||||
import os
|
import os
|
||||||
# 添加项目根目录到 Python 路径
|
# 添加项目根目录到 Python 路径
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||||
|
# 确保能找到backend目录下的模块
|
||||||
|
backend_dir = os.path.join(os.path.dirname(__file__), '..', '..')
|
||||||
|
sys.path.insert(0, backend_dir)
|
||||||
from database import db
|
from database import db
|
||||||
|
from tos_client import oss_client
|
||||||
|
import uuid
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
# 配置日志
|
# 配置日志
|
||||||
@ -65,8 +71,10 @@ class DouyinPlayVVScraper:
|
|||||||
self.captured_responses = []
|
self.captured_responses = []
|
||||||
self.db = None
|
self.db = None
|
||||||
self.collection = None
|
self.collection = None
|
||||||
|
self.image_cache = {} # 图片ID到TOS链接的缓存映射 {image_id: tos_url}
|
||||||
self._cleanup_old_profiles()
|
self._cleanup_old_profiles()
|
||||||
self._setup_mongodb()
|
self._setup_mongodb()
|
||||||
|
self._load_image_cache()
|
||||||
|
|
||||||
def _setup_mongodb(self):
|
def _setup_mongodb(self):
|
||||||
"""设置MongoDB连接"""
|
"""设置MongoDB连接"""
|
||||||
@ -81,10 +89,42 @@ class DouyinPlayVVScraper:
|
|||||||
logging.info(f'MongoDB连接成功,使用数据库: {self.db.name},集合: {mongo_collection}')
|
logging.info(f'MongoDB连接成功,使用数据库: {self.db.name},集合: {mongo_collection}')
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f'MongoDB设置出错: {e}')
|
logging.error(f'MongoDB连接失败: {e}')
|
||||||
self.db = None
|
self.db = None
|
||||||
self.collection = None
|
self.collection = None
|
||||||
|
|
||||||
|
def _load_image_cache(self):
|
||||||
|
"""从数据库加载已存在的图片ID到TOS链接的映射"""
|
||||||
|
if self.collection is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 查询所有有封面图片的记录
|
||||||
|
cursor = self.collection.find(
|
||||||
|
{
|
||||||
|
'cover_image_url_original': {'$exists': True, '$ne': ''},
|
||||||
|
'cover_image_url': {'$exists': True, '$ne': ''}
|
||||||
|
},
|
||||||
|
{'cover_image_url_original': 1, 'cover_image_url': 1}
|
||||||
|
)
|
||||||
|
|
||||||
|
cache_count = 0
|
||||||
|
for doc in cursor:
|
||||||
|
original_url = doc.get('cover_image_url_original', '')
|
||||||
|
tos_url = doc.get('cover_image_url', '')
|
||||||
|
|
||||||
|
if original_url and tos_url and original_url != tos_url:
|
||||||
|
# 提取图片ID
|
||||||
|
image_id = self.extract_douyin_image_id(original_url)
|
||||||
|
if image_id:
|
||||||
|
self.image_cache[image_id] = tos_url
|
||||||
|
cache_count += 1
|
||||||
|
|
||||||
|
logging.info(f'从数据库加载图片缓存: {cache_count} 个图片映射')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f'加载图片缓存失败: {e}')
|
||||||
|
|
||||||
def _cleanup_old_profiles(self):
|
def _cleanup_old_profiles(self):
|
||||||
"""清理超过一天的旧临时Chrome配置文件"""
|
"""清理超过一天的旧临时Chrome配置文件"""
|
||||||
try:
|
try:
|
||||||
@ -696,6 +736,109 @@ class DouyinPlayVVScraper:
|
|||||||
|
|
||||||
logging.info('结果已保存到MongoDB')
|
logging.info('结果已保存到MongoDB')
|
||||||
|
|
||||||
|
def extract_douyin_image_id(self, cover_url):
|
||||||
|
"""
|
||||||
|
从抖音图片URL中提取唯一的图片ID
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cover_url (str): 抖音图片URL
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 图片ID,如果提取失败返回空字符串
|
||||||
|
"""
|
||||||
|
if not cover_url:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 抖音图片URL格式支持两种:
|
||||||
|
# 1. https://p{数字}-sign.douyinpic.com/obj/tos-cn-i-dy/{图片ID}?{参数}
|
||||||
|
# 2. https://p{数字}-sign.douyinpic.com/obj/douyin-user-image-file/{图片ID}?{参数}
|
||||||
|
# 使用正则表达式提取图片ID
|
||||||
|
patterns = [
|
||||||
|
r'/obj/tos-cn-i-dy/([a-f0-9]+)',
|
||||||
|
r'/obj/douyin-user-image-file/([a-f0-9]+)'
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, cover_url)
|
||||||
|
if match:
|
||||||
|
image_id = match.group(1)
|
||||||
|
logging.debug(f'提取图片ID成功: {image_id} from {cover_url}')
|
||||||
|
return image_id
|
||||||
|
|
||||||
|
logging.warning(f'无法从URL中提取图片ID: {cover_url}')
|
||||||
|
return ''
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f'提取图片ID异常: {cover_url}, 错误: {e}')
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def upload_cover_image(self, cover_url, mix_name):
|
||||||
|
"""
|
||||||
|
上传封面图片到TOS并返回永久链接(带去重功能)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cover_url: 临时封面图片链接
|
||||||
|
mix_name: 合集名称,用于生成文件名
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 永久链接URL,如果上传失败则返回原链接
|
||||||
|
"""
|
||||||
|
if not cover_url:
|
||||||
|
return cover_url
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 提取图片ID
|
||||||
|
image_id = self.extract_douyin_image_id(cover_url)
|
||||||
|
|
||||||
|
# 如果能提取到图片ID,检查缓存
|
||||||
|
if image_id:
|
||||||
|
if image_id in self.image_cache:
|
||||||
|
cached_url = self.image_cache[image_id]
|
||||||
|
logging.info(f'使用缓存图片: {image_id} -> {cached_url} (合集: {mix_name})')
|
||||||
|
return cached_url
|
||||||
|
|
||||||
|
# 生成随机文件名,保持原有的扩展名
|
||||||
|
file_extension = '.jpg' # 抖音封面图片通常是jpg格式
|
||||||
|
|
||||||
|
# 改进的扩展名检测逻辑
|
||||||
|
url_without_params = cover_url.split('?')[0]
|
||||||
|
url_path = url_without_params.split('/')[-1] # 获取URL路径的最后一部分
|
||||||
|
|
||||||
|
# 只有当最后一部分包含点且点后面的内容是常见图片扩展名时才使用
|
||||||
|
if '.' in url_path:
|
||||||
|
potential_ext = url_path.split('.')[-1].lower()
|
||||||
|
# 检查是否为常见的图片扩展名
|
||||||
|
if potential_ext in ['jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp']:
|
||||||
|
file_extension = '.' + potential_ext
|
||||||
|
|
||||||
|
# 生成唯一文件名
|
||||||
|
random_filename = f"{uuid.uuid4().hex}{file_extension}"
|
||||||
|
object_key = f"media/rank/{random_filename}"
|
||||||
|
|
||||||
|
logging.info(f'开始上传封面图片: {mix_name}')
|
||||||
|
logging.info(f'封面图片URL: {cover_url}')
|
||||||
|
|
||||||
|
# 从URL上传到TOS并获取新的URL
|
||||||
|
oss_url = oss_client.upload_from_url(
|
||||||
|
url=cover_url,
|
||||||
|
object_key=object_key,
|
||||||
|
return_url=True
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info(f'封面图片上传成功: {mix_name} -> {oss_url}')
|
||||||
|
|
||||||
|
# 如果有图片ID,将结果缓存
|
||||||
|
if image_id:
|
||||||
|
self.image_cache[image_id] = oss_url
|
||||||
|
logging.debug(f'图片缓存已更新: {image_id} -> {oss_url}')
|
||||||
|
|
||||||
|
return oss_url
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f'封面图片上传失败: {mix_name} - {str(e)}')
|
||||||
|
return cover_url # 上传失败时返回原链接
|
||||||
|
|
||||||
def save_to_mongodb(self):
|
def save_to_mongodb(self):
|
||||||
"""将数据保存到MongoDB"""
|
"""将数据保存到MongoDB"""
|
||||||
if self.collection is None:
|
if self.collection is None:
|
||||||
@ -711,16 +854,34 @@ class DouyinPlayVVScraper:
|
|||||||
documents = []
|
documents = []
|
||||||
|
|
||||||
for item in self.play_vv_items:
|
for item in self.play_vv_items:
|
||||||
|
# 获取原始封面图片URL
|
||||||
|
original_cover_url = item.get('cover_image_url', '')
|
||||||
|
mix_name = item.get('mix_name', '')
|
||||||
|
|
||||||
|
# 处理封面图片
|
||||||
|
permanent_cover_url = ''
|
||||||
|
if original_cover_url:
|
||||||
|
# 上传封面图片到TOS获取永久链接
|
||||||
|
permanent_cover_url = self.upload_cover_image(original_cover_url, mix_name)
|
||||||
|
|
||||||
|
# 如果上传失败且有原始链接,记录警告但继续保存
|
||||||
|
if permanent_cover_url == original_cover_url:
|
||||||
|
logging.warning(f'封面图片上传失败,使用原始链接: {mix_name}')
|
||||||
|
else:
|
||||||
|
# 没有封面图片,使用空字符串
|
||||||
|
permanent_cover_url = ''
|
||||||
|
|
||||||
# 保留用户要求的7个字段 + cover_image_url作为合集封面图片完整链接
|
# 保留用户要求的7个字段 + cover_image_url作为合集封面图片完整链接
|
||||||
doc = {
|
doc = {
|
||||||
'batch_time': batch_time,
|
'batch_time': batch_time,
|
||||||
'mix_name': item.get('mix_name', ''),
|
'mix_name': mix_name,
|
||||||
'video_url': item.get('video_url', ''),
|
'video_url': item.get('video_url', ''),
|
||||||
'playcount': item.get('formatted', ''),
|
'playcount': item.get('formatted', ''),
|
||||||
'play_vv': item.get('play_vv', 0),
|
'play_vv': item.get('play_vv', 0),
|
||||||
'request_id': item.get('request_id', ''),
|
'request_id': item.get('request_id', ''),
|
||||||
'rank': 0, # 临时设置,后面会重新计算
|
'rank': 0, # 临时设置,后面会重新计算
|
||||||
'cover_image_url': item.get('cover_image_url', ''), # 合集封面图片主链接(完整URL)
|
'cover_image_url_original': original_cover_url, # 保存原始临时链接用于调试
|
||||||
|
'cover_image_url': permanent_cover_url, # 合集封面图片永久链接
|
||||||
'cover_backup_urls': item.get('cover_backup_urls', []) # 封面图片备用链接列表
|
'cover_backup_urls': item.get('cover_backup_urls', []) # 封面图片备用链接列表
|
||||||
}
|
}
|
||||||
documents.append(doc)
|
documents.append(doc)
|
||||||
@ -739,12 +900,16 @@ class DouyinPlayVVScraper:
|
|||||||
max_play_vv = max(doc['play_vv'] for doc in documents) if documents else 0
|
max_play_vv = max(doc['play_vv'] for doc in documents) if documents else 0
|
||||||
|
|
||||||
logging.info(f'MongoDB保存统计: 总播放量={total_play_vv:,}, 最高播放量={max_play_vv:,}')
|
logging.info(f'MongoDB保存统计: 总播放量={total_play_vv:,}, 最高播放量={max_play_vv:,}')
|
||||||
logging.info(f'保存的字段: batch_time, mix_name, video_url, playcount, play_vv, request_id, rank, cover_image_url, cover_backup_urls')
|
logging.info(f'保存的字段: batch_time, mix_name, video_url, playcount, play_vv, request_id, rank, cover_image_url_original, cover_image_url')
|
||||||
|
|
||||||
# 统计封面图片提取情况
|
# 统计封面图片处理情况
|
||||||
cover_count = sum(1 for doc in documents if doc.get('cover_image_url'))
|
cover_count = sum(1 for doc in documents if doc.get('cover_image_url'))
|
||||||
backup_count = sum(1 for doc in documents if doc.get('cover_backup_urls'))
|
original_count = sum(1 for item in self.play_vv_items if item.get('cover_image_url'))
|
||||||
logging.info(f'封面图片统计: {cover_count}/{len(documents)} 个合集有主封面链接, {backup_count} 个合集有备用链接')
|
success_count = sum(1 for doc in documents if doc.get('cover_image_url') and doc.get('cover_image_url') != doc.get('cover_image_url_original', ''))
|
||||||
|
|
||||||
|
logging.info(f'封面图片统计: {cover_count}/{len(documents)} 个合集有封面链接')
|
||||||
|
logging.info(f'封面上传统计: {success_count}/{original_count} 个封面成功上传到TOS')
|
||||||
|
logging.info(f'图片缓存统计: 当前缓存 {len(self.image_cache)} 个图片映射')
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f'保存到MongoDB时出错: {e}')
|
logging.error(f'保存到MongoDB时出错: {e}')
|
||||||
|
|||||||
562
backend/handlers/Rankings/tos_client.py
Normal file
562
backend/handlers/Rankings/tos_client.py
Normal file
@ -0,0 +1,562 @@
|
|||||||
|
from typing import Any, Optional
|
||||||
|
import mimetypes
|
||||||
|
from io import StringIO
|
||||||
|
import os
|
||||||
|
import tos
|
||||||
|
import urllib3
|
||||||
|
from urllib3.exceptions import InsecureRequestWarning
|
||||||
|
from config import API_CONFIG
|
||||||
|
# 火山对象存储
|
||||||
|
class TOSClient:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
access_key_id: str,
|
||||||
|
access_key_secret: str,
|
||||||
|
endpoint: str,
|
||||||
|
region: str,
|
||||||
|
bucket_name: str,
|
||||||
|
self_domain: str,
|
||||||
|
disable_ssl_warnings: bool = True
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
初始化OSS客户端
|
||||||
|
|
||||||
|
Args:
|
||||||
|
access_key_id: ak
|
||||||
|
access_key_secret: sk
|
||||||
|
endpoint: OSS访问端点 (如: https://oss-cn-hangzhou.aliyuncs.com)
|
||||||
|
bucket_name: 存储桶名称
|
||||||
|
self_domain: 自定义域名
|
||||||
|
disable_ssl_warnings: 是否禁用SSL警告
|
||||||
|
"""
|
||||||
|
# 禁用SSL警告(如果需要)
|
||||||
|
if disable_ssl_warnings:
|
||||||
|
urllib3.disable_warnings(InsecureRequestWarning)
|
||||||
|
sts_token: str = "token_test"
|
||||||
|
self.bucket_name = bucket_name
|
||||||
|
self.self_domain = self_domain
|
||||||
|
self.endpoint = endpoint
|
||||||
|
self.client = tos.TosClientV2(
|
||||||
|
ak=access_key_id,
|
||||||
|
sk=access_key_secret,
|
||||||
|
endpoint=self_domain,
|
||||||
|
region=region,
|
||||||
|
is_custom_domain=True,
|
||||||
|
# bucket_name,
|
||||||
|
# security_token=sts_token,
|
||||||
|
connection_time=30, socket_timeout=60, max_retry_count=3
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_base_url(self, object_key: str) -> str:
|
||||||
|
"""获取基础URL(不带签名参数)"""
|
||||||
|
# endpoint = self.endpoint.replace('https://', '').replace('http://', '')
|
||||||
|
return f"https://{self.self_domain}/{object_key}"
|
||||||
|
|
||||||
|
def generate_url(self, object_key: str, expires: int = 3600) -> str:
|
||||||
|
"""生成带签名的临时访问URL"""
|
||||||
|
# 生成签名URL
|
||||||
|
pre_signed_url_output = self.client.pre_signed_url(
|
||||||
|
tos.HttpMethodType.Http_Method_Get,
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=object_key,
|
||||||
|
expires=expires)
|
||||||
|
return pre_signed_url_output.signed_url
|
||||||
|
|
||||||
|
def upload_string(
|
||||||
|
self,
|
||||||
|
content_str: str,
|
||||||
|
object_key: str,
|
||||||
|
headers: Optional[dict] = None,
|
||||||
|
return_url: bool = True,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
上传本地文件到OSS
|
||||||
|
|
||||||
|
Args:
|
||||||
|
local_file_path: 本地文件路径
|
||||||
|
object_key: OSS对象键(路径),如果为None则使用本地文件名
|
||||||
|
headers: 自定义HTTP头
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 文件在OSS的公开URL
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: 如果上传失败
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
# if headers is None:
|
||||||
|
# headers = {}
|
||||||
|
# if content_type and 'Content-Type' not in headers:
|
||||||
|
# headers['Content-Type'] = content_type
|
||||||
|
content = StringIO(content_str)
|
||||||
|
result = self.client.put_object(
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=object_key,
|
||||||
|
content_type='text/plain',
|
||||||
|
content=content,
|
||||||
|
)
|
||||||
|
|
||||||
|
# HTTP状态码
|
||||||
|
print('upload_string http status code:{}'.format(result.status_code))
|
||||||
|
# 请求ID。请求ID是本次请求的唯一标识,建议在日志中添加此参数
|
||||||
|
# print('request_id: {}'.format(result.request_id))
|
||||||
|
# hash_crc64_ecma 表示该对象的64位CRC值, 可用于验证上传对象的完整性
|
||||||
|
# print('crc64: {}'.format(result.hash_crc64_ecma))
|
||||||
|
if result.status_code != 200:
|
||||||
|
raise Exception(f"上传失败,HTTP状态码: {result.status_code}")
|
||||||
|
|
||||||
|
return self.get_base_url(object_key) if return_url else object_key # 修改返回逻辑
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"上传文件到OSS失败: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def upload_file(
|
||||||
|
self,
|
||||||
|
local_file_path: str,
|
||||||
|
object_key: Optional[str] = None,
|
||||||
|
headers: Optional[dict] = None,
|
||||||
|
return_url: bool = True,
|
||||||
|
expires: int = 3600 # 新增参数,默认1小时
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
上传本地文件到OSS
|
||||||
|
|
||||||
|
Args:
|
||||||
|
local_file_path: 本地文件路径
|
||||||
|
object_key: OSS对象键(路径),如果为None则使用本地文件名
|
||||||
|
headers: 自定义HTTP头
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 文件在OSS的公开URL
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: 如果上传失败
|
||||||
|
"""
|
||||||
|
if not os.path.exists(local_file_path):
|
||||||
|
raise FileNotFoundError(f"本地文件不存在: {local_file_path}")
|
||||||
|
|
||||||
|
# 如果没有指定object_key,则使用文件名
|
||||||
|
if object_key is None:
|
||||||
|
object_key = os.path.basename(local_file_path)
|
||||||
|
|
||||||
|
# 自动设置Content-Type
|
||||||
|
content_type, _ = mimetypes.guess_type(local_file_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# file_name为本地文件的完整路径。
|
||||||
|
result = self.client.put_object_from_file(
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=object_key,
|
||||||
|
content_type=content_type or '',
|
||||||
|
file_path=local_file_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.status_code != 200:
|
||||||
|
raise Exception(f"上传失败,HTTP状态码: {result.status_code}")
|
||||||
|
|
||||||
|
return self.get_base_url(object_key) if return_url else object_key # 修改返回逻辑
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"上传文件到OSS失败: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
def upload_bytes(
|
||||||
|
self,
|
||||||
|
data: bytes,
|
||||||
|
object_key: str,
|
||||||
|
content_type: Optional[str] = None,
|
||||||
|
headers: Optional[dict] = None,
|
||||||
|
return_url: bool = True,
|
||||||
|
expires: int = 3600 # 新增参数
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
上传字节数据到OSS
|
||||||
|
Args:
|
||||||
|
data: 要上传的字节数据
|
||||||
|
object_key: OSS对象键(路径)
|
||||||
|
content_type: 内容类型 (如: image/jpeg)
|
||||||
|
headers: 自定义HTTP头
|
||||||
|
Returns:
|
||||||
|
str: 文件在OSS的公开URL
|
||||||
|
Raises:
|
||||||
|
Exception: 如果上传失败
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = self.client.put_object(
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=object_key,
|
||||||
|
content_type=content_type or 'application/octet-stream',
|
||||||
|
content=data,
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.status_code != 200:
|
||||||
|
raise Exception(f"上传失败,HTTP状态码: {result.status_code}")
|
||||||
|
|
||||||
|
return self.get_base_url(object_key) if return_url else object_key # 修改返回逻辑
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"上传字节数据到OSS失败: {str(e)}")
|
||||||
|
|
||||||
|
def upload_from_url(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
object_key: str,
|
||||||
|
headers: Optional[dict] = None,
|
||||||
|
timeout: int = 30,
|
||||||
|
return_url: bool = True,
|
||||||
|
expires: int = 3600 # 新增参数
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
从网络URL下载文件并上传到OSS
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: 网络文件URL
|
||||||
|
object_key: OSS对象键(路径)
|
||||||
|
headers: 自定义HTTP头
|
||||||
|
timeout: 下载超时时间(秒)
|
||||||
|
return_url: 是否返回完整URL
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 文件在OSS的公开URL或object_key
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: 如果下载或上传失败
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
if not url.startswith(('http://', 'https://')):
|
||||||
|
raise ValueError("URL必须以http://或https://开头")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 下载文件
|
||||||
|
response = requests.get(url, stream=True, timeout=timeout)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# 获取内容类型
|
||||||
|
content_type = response.headers.get('Content-Type', '')
|
||||||
|
if not content_type:
|
||||||
|
content_type = mimetypes.guess_type(url)[0] or 'application/octet-stream'
|
||||||
|
|
||||||
|
# 上传到OSS
|
||||||
|
return self.upload_bytes(
|
||||||
|
data=response.content,
|
||||||
|
object_key=object_key,
|
||||||
|
content_type=content_type,
|
||||||
|
headers=headers,
|
||||||
|
return_url=return_url,
|
||||||
|
expires=expires # 传递参数
|
||||||
|
)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
raise Exception(f"下载网络文件失败: {str(e)}")
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"上传网络文件到OSS失败: {str(e)}")
|
||||||
|
|
||||||
|
def _format_object_key(self, object_key: str) -> str:
|
||||||
|
"""
|
||||||
|
格式化OSS对象键(路径)
|
||||||
|
"""
|
||||||
|
# 如果object_key包含self_domain,截取self_domain后面的字符作为新的object_key
|
||||||
|
if self.self_domain and self.self_domain in object_key:
|
||||||
|
# 找到self_domain在object_key中的位置,截取后面的部分
|
||||||
|
domain_index = object_key.find(self.self_domain)
|
||||||
|
if domain_index != -1:
|
||||||
|
# 截取self_domain后面的部分,去掉开头的斜杠
|
||||||
|
object_key = object_key[domain_index + len(self.self_domain):].lstrip('/')
|
||||||
|
return object_key
|
||||||
|
|
||||||
|
# 删除文件
|
||||||
|
def delete_file(self, object_key: str) -> bool:
|
||||||
|
"""
|
||||||
|
删除OSS上的文件
|
||||||
|
|
||||||
|
Args:
|
||||||
|
object_key: OSS对象键(路径)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: 删除是否成功
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.client.delete_object(
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=self._format_object_key(object_key),
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"删除文件失败: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def download_file(self, object_key: str) -> bytes:
|
||||||
|
"""
|
||||||
|
从TOS下载文件并返回文件数据
|
||||||
|
|
||||||
|
Args:
|
||||||
|
object_key: OSS对象键(路径)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bytes: 文件的字节数据
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: 如果下载失败
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
object_key = self._format_object_key(object_key)
|
||||||
|
|
||||||
|
object_stream = self.client.get_object(
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=object_key,
|
||||||
|
)
|
||||||
|
content = object_stream.read() or b''
|
||||||
|
if not content:
|
||||||
|
raise Exception(f"文件内容为空: {object_key}")
|
||||||
|
return content
|
||||||
|
except tos.exceptions.TosClientError as e:
|
||||||
|
# 操作失败,捕获客户端异常,一般情况为非法请求参数或网络异常
|
||||||
|
print('TOS下载 fail with client error, message:{}, cause: {}'.format(e.message, e.cause))
|
||||||
|
raise Exception(f"下载异常: {object_key} {e.message}")
|
||||||
|
except tos.exceptions.TosServerError as e:
|
||||||
|
# 操作失败,捕获服务端异常,可从返回信息中获取详细错误信息
|
||||||
|
print('TOS下载 fail with server error, code: {}'.format(e.code))
|
||||||
|
# request id 可定位具体问题,强烈建议日志中保存
|
||||||
|
print('TOS下载 error with request id: {}'.format(e.request_id))
|
||||||
|
print('TOS下载 error with message: {}'.format(e.message))
|
||||||
|
print('TOS下载 error with http code: {}'.format(e.status_code))
|
||||||
|
print('TOS下载 error with ec: {}'.format(e.ec))
|
||||||
|
print('TOS下载 error with request url: {}'.format(e.request_url))
|
||||||
|
raise Exception(f"下载异常: {object_key} {e.message}")
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"下载文件失败: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
class TOSChunkUploader:
|
||||||
|
"""TOS分片上传类"""
|
||||||
|
|
||||||
|
def __init__(self, tos_client: TOSClient):
|
||||||
|
"""
|
||||||
|
初始化分片上传器
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tos_client: TOS客户端实例
|
||||||
|
"""
|
||||||
|
self.client = tos_client.client
|
||||||
|
self.bucket_name = tos_client.bucket_name
|
||||||
|
self.self_domain = tos_client.self_domain
|
||||||
|
|
||||||
|
def init_multipart_upload(self, object_key: str, content_type: Optional[str] = None) -> str | None:
|
||||||
|
"""
|
||||||
|
初始化分片上传
|
||||||
|
|
||||||
|
Args:
|
||||||
|
object_key: 对象键
|
||||||
|
content_type: 内容类型
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 上传ID
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: 如果初始化失败
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 设置默认内容类型
|
||||||
|
if not content_type:
|
||||||
|
content_type = mimetypes.guess_type(object_key)[0] or 'application/octet-stream'
|
||||||
|
|
||||||
|
# 初始化分片上传
|
||||||
|
result = self.client.create_multipart_upload(
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=object_key,
|
||||||
|
content_type=content_type
|
||||||
|
)
|
||||||
|
|
||||||
|
return result.upload_id
|
||||||
|
|
||||||
|
except tos.exceptions.TosClientError as e:
|
||||||
|
raise Exception(f"初始化分片上传失败(客户端错误): {e.message}")
|
||||||
|
except tos.exceptions.TosServerError as e:
|
||||||
|
raise Exception(f"初始化分片上传失败(服务端错误): {e.message}")
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"初始化分片上传失败: {str(e)}")
|
||||||
|
|
||||||
|
def upload_part(self, object_key: str, upload_id: str, part_number: int, data: bytes) -> dict:
|
||||||
|
"""
|
||||||
|
上传分片
|
||||||
|
|
||||||
|
Args:
|
||||||
|
object_key: 对象键
|
||||||
|
upload_id: 上传ID
|
||||||
|
part_number: 分片号(从1开始)
|
||||||
|
data: 分片数据
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: 包含完整分片信息的字典
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: 如果上传失败
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from io import BytesIO
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
# 计算分片大小
|
||||||
|
part_size = len(data)
|
||||||
|
|
||||||
|
# 计算CRC64(如果需要的话,这里先设为None)
|
||||||
|
hash_crc64_ecma = None
|
||||||
|
|
||||||
|
# 上传分片
|
||||||
|
result = self.client.upload_part(
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=object_key,
|
||||||
|
upload_id=upload_id,
|
||||||
|
part_number=part_number,
|
||||||
|
content=BytesIO(data)
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'part_number': part_number,
|
||||||
|
'etag': result.etag,
|
||||||
|
'part_size': part_size,
|
||||||
|
'hash_crc64_ecma': hash_crc64_ecma,
|
||||||
|
'is_completed': True
|
||||||
|
}
|
||||||
|
|
||||||
|
except tos.exceptions.TosClientError as e:
|
||||||
|
raise Exception(f"上传分片失败(客户端错误): {e.message}")
|
||||||
|
except tos.exceptions.TosServerError as e:
|
||||||
|
raise Exception(f"上传分片失败(服务端错误): {e.message}")
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"上传分片失败: {str(e)}")
|
||||||
|
|
||||||
|
def complete_multipart_upload(self, object_key: str, upload_id: str, parts: list) -> str:
|
||||||
|
"""
|
||||||
|
完成分片上传
|
||||||
|
|
||||||
|
Args:
|
||||||
|
object_key: 对象键
|
||||||
|
upload_id: 上传ID
|
||||||
|
parts: 分片信息列表,每个元素包含part_number和etag
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 文件的完整URL
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: 如果完成上传失败
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 按分片号排序
|
||||||
|
sorted_parts = sorted(parts, key=lambda x: x['part_number'])
|
||||||
|
|
||||||
|
# 构建分片列表并计算偏移量
|
||||||
|
part_list = []
|
||||||
|
current_offset = 0
|
||||||
|
|
||||||
|
for part in sorted_parts:
|
||||||
|
part_list.append(tos.models2.PartInfo(
|
||||||
|
part_number=part['part_number'],
|
||||||
|
etag=part['etag'],
|
||||||
|
part_size=part.get('part_size'),
|
||||||
|
offset=current_offset,
|
||||||
|
hash_crc64_ecma=part.get('hash_crc64_ecma'),
|
||||||
|
is_completed=part.get('is_completed', True)
|
||||||
|
))
|
||||||
|
|
||||||
|
# 更新偏移量
|
||||||
|
if part.get('part_size'):
|
||||||
|
current_offset += part['part_size']
|
||||||
|
|
||||||
|
# 完成分片上传
|
||||||
|
result = self.client.complete_multipart_upload(
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=object_key,
|
||||||
|
upload_id=upload_id,
|
||||||
|
parts=part_list
|
||||||
|
)
|
||||||
|
|
||||||
|
# 返回完整URL
|
||||||
|
return f"https://{self.self_domain}/{object_key}"
|
||||||
|
|
||||||
|
except tos.exceptions.TosClientError as e:
|
||||||
|
raise Exception(f"完成分片上传失败(客户端错误): {e.message}")
|
||||||
|
except tos.exceptions.TosServerError as e:
|
||||||
|
raise Exception(f"完成分片上传失败(服务端错误): {e.message}")
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"完成分片上传失败: {str(e)}")
|
||||||
|
|
||||||
|
def abort_multipart_upload(self, object_key: str, upload_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
取消分片上传
|
||||||
|
|
||||||
|
Args:
|
||||||
|
object_key: 对象键
|
||||||
|
upload_id: 上传ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: 是否取消成功
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
self.client.abort_multipart_upload(
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=object_key,
|
||||||
|
upload_id=upload_id
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except tos.exceptions.TosClientError as e:
|
||||||
|
print(f"取消分片上传失败(客户端错误): {e.message}")
|
||||||
|
return False
|
||||||
|
except tos.exceptions.TosServerError as e:
|
||||||
|
print(f"取消分片上传失败(服务端错误): {e.message}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"取消分片上传失败: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def list_parts(self, object_key: str, upload_id: str) -> list:
|
||||||
|
"""
|
||||||
|
列出已上传的分片
|
||||||
|
|
||||||
|
Args:
|
||||||
|
object_key: 对象键
|
||||||
|
upload_id: 上传ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list: 已上传的分片列表
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
result = self.client.list_parts(
|
||||||
|
bucket=self.bucket_name,
|
||||||
|
key=object_key,
|
||||||
|
upload_id=upload_id
|
||||||
|
)
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
for part in result.parts:
|
||||||
|
parts.append({
|
||||||
|
'part_number': part.part_number,
|
||||||
|
'etag': part.etag,
|
||||||
|
'size': part.size,
|
||||||
|
'last_modified': part.last_modified
|
||||||
|
})
|
||||||
|
|
||||||
|
return parts
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"列出分片失败: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
# 创建OSS客户端
|
||||||
|
from config import TOS_CONFIG
|
||||||
|
oss_client = TOSClient(
|
||||||
|
access_key_id=TOS_CONFIG['access_key_id'],
|
||||||
|
access_key_secret=TOS_CONFIG['access_key_secret'],
|
||||||
|
endpoint=TOS_CONFIG['endpoint'],
|
||||||
|
region=TOS_CONFIG['region'],
|
||||||
|
bucket_name=TOS_CONFIG['bucket_name'],
|
||||||
|
self_domain=TOS_CONFIG['self_domain'],
|
||||||
|
disable_ssl_warnings=TOS_CONFIG['disable_ssl_warnings']
|
||||||
|
)
|
||||||
|
|
||||||
|
# 创建分片上传器
|
||||||
|
chunk_uploader = TOSChunkUploader(oss_client)
|
||||||
@ -128,8 +128,7 @@ def format_mix_item(doc):
|
|||||||
"play_vv": doc.get("play_vv", 0),
|
"play_vv": doc.get("play_vv", 0),
|
||||||
"request_id": doc.get("request_id", ""),
|
"request_id": doc.get("request_id", ""),
|
||||||
"rank": doc.get("rank", 0),
|
"rank": doc.get("rank", 0),
|
||||||
"cover_image_url": doc.get("cover_image_url", ""),
|
"cover_image_url": doc.get("cover_image_url", "")
|
||||||
"cover_backup_urls": doc.get("cover_backup_urls", [])
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_mix_list(page=1, limit=20, sort_by="playcount"):
|
def get_mix_list(page=1, limit=20, sort_by="playcount"):
|
||||||
|
|||||||
@ -42,8 +42,7 @@
|
|||||||
"play_vv": 120000000,
|
"play_vv": 120000000,
|
||||||
"request_id": "request_xxx",
|
"request_id": "request_xxx",
|
||||||
"rank": 1,
|
"rank": 1,
|
||||||
"cover_image_url": "https://p3.douyinpic.com/xxx",
|
"cover_image_url": "https://p3.douyinpic.com/xxx"
|
||||||
"cover_backup_urls": ["url1", "url2"]
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -199,7 +198,6 @@ GET /api/rank/videos?page=1&limit=20&sort=growth&start_date=2025-10-16&end_date=
|
|||||||
"request_id": "request_xxx",
|
"request_id": "request_xxx",
|
||||||
"rank": 1,
|
"rank": 1,
|
||||||
"cover_image_url": "https://p3.douyinpic.com/xxx",
|
"cover_image_url": "https://p3.douyinpic.com/xxx",
|
||||||
"cover_backup_urls": ["url1", "url2"],
|
|
||||||
"growth": 5000000,
|
"growth": 5000000,
|
||||||
"growth_rate": 4.35
|
"growth_rate": 4.35
|
||||||
}
|
}
|
||||||
@ -252,8 +250,7 @@ GET /api/rank/top?limit=10
|
|||||||
"play_vv": 120000000,
|
"play_vv": 120000000,
|
||||||
"request_id": "request_xxx",
|
"request_id": "request_xxx",
|
||||||
"rank": 1,
|
"rank": 1,
|
||||||
"cover_image_url": "https://p3.douyinpic.com/xxx",
|
"cover_image_url": "https://p3.douyinpic.com/xxx"
|
||||||
"cover_backup_urls": ["url1", "url2"]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"total": 10,
|
"total": 10,
|
||||||
@ -297,8 +294,7 @@ GET /api/rank/search?q=关键词&page=1&limit=10
|
|||||||
"play_vv": 120000000,
|
"play_vv": 120000000,
|
||||||
"request_id": "request_xxx",
|
"request_id": "request_xxx",
|
||||||
"rank": 1,
|
"rank": 1,
|
||||||
"cover_image_url": "https://p3.douyinpic.com/xxx",
|
"cover_image_url": "https://p3.douyinpic.com/xxx"
|
||||||
"cover_backup_urls": ["url1", "url2"]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"keyword": "关键词",
|
"keyword": "关键词",
|
||||||
@ -347,8 +343,7 @@ GET /api/rank/detail?id=674f1234567890abcdef
|
|||||||
"play_vv": 120000000,
|
"play_vv": 120000000,
|
||||||
"request_id": "request_xxx",
|
"request_id": "request_xxx",
|
||||||
"rank": 1,
|
"rank": 1,
|
||||||
"cover_image_url": "https://p3.douyinpic.com/xxx",
|
"cover_image_url": "https://p3.douyinpic.com/xxx"
|
||||||
"cover_backup_urls": ["url1", "url2"]
|
|
||||||
},
|
},
|
||||||
"update_time": "2025-10-17 15:30:00"
|
"update_time": "2025-10-17 15:30:00"
|
||||||
}
|
}
|
||||||
@ -887,8 +882,7 @@ wx.request({
|
|||||||
- 提供搜索建议
|
- 提供搜索建议
|
||||||
|
|
||||||
### 3. 图片加载
|
### 3. 图片加载
|
||||||
- 优先使用 `cover_image_url`
|
- 使用 `cover_image_url` 作为封面图片
|
||||||
- 备用 `cover_backup_urls` 作为备选
|
|
||||||
- 添加图片加载失败处理
|
- 添加图片加载失败处理
|
||||||
|
|
||||||
### 4. 数据更新
|
### 4. 数据更新
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user