解决合并冲突 - 接受远程版本
This commit is contained in:
commit
97c5fbe4df
@ -22,8 +22,7 @@ import sys
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
from pathlib import Path
|
from datetime import datetime, date, timedelta
|
||||||
from datetime import datetime, date
|
|
||||||
import config
|
import config
|
||||||
|
|
||||||
# 添加项目路径到 Python 路径
|
# 添加项目路径到 Python 路径
|
||||||
@ -33,16 +32,18 @@ from handlers.Rankings.rank_data_scraper import DouyinPlayVVScraper
|
|||||||
|
|
||||||
|
|
||||||
# 配置日志的函数
|
# 配置日志的函数
|
||||||
def setup_logging():
|
def setup_logging(quiet_mode=False):
|
||||||
"""设置日志配置"""
|
"""设置日志配置"""
|
||||||
# 确保logs目录存在
|
# 确保logs目录存在
|
||||||
import os
|
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
logs_dir = os.path.join(script_dir, 'handlers', 'Rankings', 'logs')
|
logs_dir = os.path.join(script_dir, 'handlers', 'Rankings', 'logs')
|
||||||
os.makedirs(logs_dir, exist_ok=True)
|
os.makedirs(logs_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 在安静模式下,只记录WARNING及以上级别的日志到控制台
|
||||||
|
console_level = logging.WARNING if quiet_mode else logging.INFO
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO,
|
level=logging.INFO, # 文件日志仍然记录所有INFO级别
|
||||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||||
handlers=[
|
handlers=[
|
||||||
logging.FileHandler(os.path.join(logs_dir, 'scheduler.log'), encoding='utf-8'),
|
logging.FileHandler(os.path.join(logs_dir, 'scheduler.log'), encoding='utf-8'),
|
||||||
@ -50,19 +51,58 @@ def setup_logging():
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 如果是安静模式,调整控制台处理器的级别
|
||||||
|
if quiet_mode:
|
||||||
|
for handler in logging.getLogger().handlers:
|
||||||
|
if isinstance(handler, logging.StreamHandler) and not isinstance(handler, logging.FileHandler):
|
||||||
|
handler.setLevel(console_level)
|
||||||
|
|
||||||
class DouyinAutoScheduler:
|
class DouyinAutoScheduler:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.is_running = False
|
self.is_running = False
|
||||||
|
|
||||||
|
def _normalize_play_vv(self, play_vv):
|
||||||
|
"""标准化播放量数据类型,将字符串转换为数字"""
|
||||||
|
if isinstance(play_vv, str):
|
||||||
|
try:
|
||||||
|
return int(play_vv.replace(',', '').replace('万', '0000').replace('亿', '00000000'))
|
||||||
|
except:
|
||||||
|
return 0
|
||||||
|
elif not isinstance(play_vv, (int, float)):
|
||||||
|
return 0
|
||||||
|
return play_vv
|
||||||
|
|
||||||
|
def _deduplicate_videos_by_mix_name(self, videos, include_rank=False):
|
||||||
|
"""按短剧名称去重,保留播放量最高的记录"""
|
||||||
|
unique_data = {}
|
||||||
|
for video in videos:
|
||||||
|
mix_name = video.get("mix_name", "")
|
||||||
|
if mix_name:
|
||||||
|
# 标准化播放量数据类型
|
||||||
|
play_vv = self._normalize_play_vv(video.get("play_vv", 0))
|
||||||
|
|
||||||
|
if mix_name not in unique_data or play_vv > unique_data[mix_name].get("play_vv", 0):
|
||||||
|
if include_rank:
|
||||||
|
# 用于昨天数据的格式
|
||||||
|
unique_data[mix_name] = {
|
||||||
|
"play_vv": play_vv,
|
||||||
|
"video_id": str(video.get("_id", "")),
|
||||||
|
"rank": 0 # 稍后计算排名
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# 用于今天数据的格式,直接更新原视频对象
|
||||||
|
video["play_vv"] = play_vv
|
||||||
|
unique_data[mix_name] = video
|
||||||
|
|
||||||
|
return unique_data
|
||||||
|
|
||||||
def run_douyin_scraper(self):
|
def run_douyin_scraper(self):
|
||||||
"""执行抖音播放量抓取任务"""
|
"""执行抖音播放量抓取任务"""
|
||||||
try:
|
try:
|
||||||
logging.info("🚀 开始执行抖音播放量抓取任务...")
|
logging.warning("🚀 开始执行抖音播放量抓取任务...")
|
||||||
|
|
||||||
# 设置环境变量,确保自动模式
|
# 设置环境变量,确保自动模式
|
||||||
os.environ['AUTO_CONTINUE'] = '1'
|
os.environ['AUTO_CONTINUE'] = '1'
|
||||||
# 设置定时器模式环境变量,跳过评论抓取等函数
|
|
||||||
os.environ['TIMER_MODE'] = '1'
|
|
||||||
|
|
||||||
# 直接创建并运行 DouyinPlayVVScraper 实例
|
# 直接创建并运行 DouyinPlayVVScraper 实例
|
||||||
scraper = DouyinPlayVVScraper(
|
scraper = DouyinPlayVVScraper(
|
||||||
@ -122,7 +162,7 @@ class DouyinAutoScheduler:
|
|||||||
today_videos_raw = list(douyin_collection.find({"batch_time": latest_batch_time}).sort("play_vv", -1))
|
today_videos_raw = list(douyin_collection.find({"batch_time": latest_batch_time}).sort("play_vv", -1))
|
||||||
logging.info(f"📊 最新批次数据数量: {len(today_videos_raw)}")
|
logging.info(f"📊 最新批次数据数量: {len(today_videos_raw)}")
|
||||||
|
|
||||||
# 按短剧名称去重(虽然同一批次应该不会有重复,但为了代码健壮性保留此逻辑)
|
# 按短剧名称去重,每个短剧只保留播放量最高的一条
|
||||||
unique_videos = {}
|
unique_videos = {}
|
||||||
for video in today_videos_raw:
|
for video in today_videos_raw:
|
||||||
mix_name = video.get("mix_name", "")
|
mix_name = video.get("mix_name", "")
|
||||||
@ -133,34 +173,24 @@ class DouyinAutoScheduler:
|
|||||||
|
|
||||||
logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)")
|
logging.info(f"📊 今日数据去重后:{len(today_videos)} 个独特短剧(原始数据:{len(today_videos_raw)} 条)")
|
||||||
|
|
||||||
# 获取昨天最后一批次的数据
|
# 获取昨天的榜单数据(如果存在),取最新的计算结果
|
||||||
yesterday_batch = douyin_collection.find_one({
|
yesterday_ranking = rankings_collection.find_one({
|
||||||
"batch_time": {"$regex": f"^{yesterday_str}"}
|
"date": yesterday_str,
|
||||||
}, sort=[("batch_time", -1)])
|
"type": "comprehensive"
|
||||||
|
}, sort=[("calculation_sequence", -1)])
|
||||||
|
|
||||||
yesterday_data = {}
|
yesterday_data = {}
|
||||||
if yesterday_batch:
|
if yesterday_ranking and "data" in yesterday_ranking:
|
||||||
# 获取昨天最后一批次的所有数据
|
# 将昨天的数据转换为字典,以短剧名称为键
|
||||||
yesterday_videos = list(douyin_collection.find({
|
for item in yesterday_ranking["data"]:
|
||||||
"batch_time": yesterday_batch["batch_time"]
|
title = item.get("title", "")
|
||||||
}).sort("play_vv", -1))
|
if title:
|
||||||
|
yesterday_data[title] = {
|
||||||
# 按短剧名称去重,保留播放量最高的记录
|
"rank": item.get("rank", 0),
|
||||||
for video in yesterday_videos:
|
"play_vv": item.get("play_vv", 0),
|
||||||
mix_name = video.get("mix_name", "")
|
"video_id": item.get("video_id", "")
|
||||||
if mix_name and (mix_name not in yesterday_data or video.get("play_vv", 0) > yesterday_data[mix_name].get("play_vv", 0)):
|
|
||||||
yesterday_data[mix_name] = {
|
|
||||||
"play_vv": video.get("play_vv", 0),
|
|
||||||
"video_id": str(video.get("_id", "")),
|
|
||||||
"rank": 0 # 稍后计算排名
|
|
||||||
}
|
}
|
||||||
|
logging.info(f"📊 找到昨天的榜单数据,共 {len(yesterday_data)} 个短剧")
|
||||||
# 计算排名
|
|
||||||
sorted_videos = sorted(yesterday_data.items(), key=lambda x: x[1]["play_vv"], reverse=True)
|
|
||||||
for rank, (mix_name, data) in enumerate(sorted_videos, 1):
|
|
||||||
yesterday_data[mix_name]["rank"] = rank
|
|
||||||
|
|
||||||
logging.info(f"📊 找到昨天的原始数据,共 {len(yesterday_data)} 个短剧")
|
|
||||||
else:
|
else:
|
||||||
logging.info("📊 未找到昨天的原始数据,将作为首次生成")
|
logging.info("📊 未找到昨天的原始数据,将作为首次生成")
|
||||||
|
|
||||||
@ -337,8 +367,6 @@ class DouyinAutoScheduler:
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""主函数"""
|
"""主函数"""
|
||||||
import argparse
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parser = argparse.ArgumentParser(description='抖音播放量自动抓取定时器')
|
parser = argparse.ArgumentParser(description='抖音播放量自动抓取定时器')
|
||||||
parser.add_argument('--test', action='store_true', help='测试模式 - 立即执行一次')
|
parser.add_argument('--test', action='store_true', help='测试模式 - 立即执行一次')
|
||||||
@ -347,23 +375,51 @@ def main():
|
|||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# 设置日志配置
|
# 设置日志配置 - 只在定时器模式下启用静默模式
|
||||||
setup_logging()
|
quiet_mode = not (args.test or args.once or args.ranking_only)
|
||||||
|
setup_logging(quiet_mode=quiet_mode)
|
||||||
|
|
||||||
print("正在初始化定时器...")
|
print("正在初始化定时器...")
|
||||||
scheduler = DouyinAutoScheduler()
|
scheduler = DouyinAutoScheduler()
|
||||||
|
|
||||||
if args.test:
|
if args.test:
|
||||||
|
scheduler._is_timer_mode = False
|
||||||
print("执行测试模式...")
|
print("执行测试模式...")
|
||||||
scheduler.run_test()
|
scheduler.run_test()
|
||||||
elif args.once:
|
elif args.once:
|
||||||
|
scheduler._is_timer_mode = False
|
||||||
print("执行单次模式...")
|
print("执行单次模式...")
|
||||||
scheduler.run_once()
|
scheduler.run_once()
|
||||||
elif args.ranking_only:
|
elif args.ranking_only:
|
||||||
|
scheduler._is_timer_mode = False
|
||||||
print("执行榜单生成模式...")
|
print("执行榜单生成模式...")
|
||||||
scheduler.run_ranking_only()
|
scheduler.run_ranking_only()
|
||||||
else:
|
else:
|
||||||
|
scheduler._is_timer_mode = True
|
||||||
print("启动定时器模式...")
|
print("启动定时器模式...")
|
||||||
|
|
||||||
|
# 显示定时器信息(使用print确保能看到)
|
||||||
|
from datetime import datetime
|
||||||
|
current_time = datetime.now()
|
||||||
|
print(f"🕐 当前时间:{current_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
print(f"⏰ 执行规则:每小时整点执行抖音播放量抓取")
|
||||||
|
|
||||||
|
# 计算下次执行时间
|
||||||
|
next_hour = current_time.replace(minute=0, second=0, microsecond=0)
|
||||||
|
if current_time.minute > 0 or current_time.second > 0:
|
||||||
|
next_hour = next_hour.replace(hour=next_hour.hour + 1)
|
||||||
|
if next_hour.hour >= 24:
|
||||||
|
from datetime import timedelta
|
||||||
|
next_hour = next_hour.replace(hour=0) + timedelta(days=1)
|
||||||
|
|
||||||
|
wait_seconds = (next_hour - current_time).total_seconds()
|
||||||
|
wait_minutes = int(wait_seconds // 60)
|
||||||
|
|
||||||
|
print(f"⏰ 下次执行时间:{next_hour.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
print(f"⏳ 距离下次执行:{wait_minutes} 分钟 ({int(wait_seconds)} 秒)")
|
||||||
|
print("💡 定时器正在等待中,将在整点自动执行任务...")
|
||||||
|
print("⏹️ 按 Ctrl+C 停止定时器")
|
||||||
|
|
||||||
scheduler.setup_schedule()
|
scheduler.setup_schedule()
|
||||||
scheduler.start_scheduler()
|
scheduler.start_scheduler()
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
@ -127,9 +127,17 @@
|
|||||||
{
|
{
|
||||||
"video_id": "7562121519012285755",
|
"video_id": "7562121519012285755",
|
||||||
"episode_num": 0
|
"episode_num": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"video_id": "7563238756892757307",
|
||||||
|
"episode_num": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"video_id": "7564756828878753061",
|
||||||
|
"episode_num": 0
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"total_count": 32,
|
"total_count": 34,
|
||||||
"last_update": "2025-10-22T09:55:21.390427",
|
"last_update": "2025-10-26T14:48:56.017802",
|
||||||
"mix_name": "【中式百妖集·阴医】"
|
"mix_name": "【中式百妖集·阴医】"
|
||||||
}
|
}
|
||||||
@ -47,9 +47,13 @@
|
|||||||
{
|
{
|
||||||
"video_id": "7563638353325821203",
|
"video_id": "7563638353325821203",
|
||||||
"episode_num": 0
|
"episode_num": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"video_id": "7564982296051338534",
|
||||||
|
"episode_num": 0
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"total_count": 12,
|
"total_count": 13,
|
||||||
"last_update": "2025-10-22T09:55:16.348610",
|
"last_update": "2025-10-25T12:53:08.640840",
|
||||||
"mix_name": "暗黑神话《葫芦兄弟》大电影"
|
"mix_name": "暗黑神话《葫芦兄弟》大电影"
|
||||||
}
|
}
|
||||||
@ -24,16 +24,24 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import requests
|
import requests
|
||||||
|
import base64
|
||||||
|
import uuid
|
||||||
|
import sys
|
||||||
|
import psutil
|
||||||
|
import random
|
||||||
|
import threading
|
||||||
|
import argparse
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
import os
|
|
||||||
from selenium.webdriver.chrome.service import Service
|
from selenium.webdriver.chrome.service import Service
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
# 保留导入但默认不使用webdriver_manager,避免网络下载卡顿
|
# 保留导入但默认不使用webdriver_manager,避免网络下载卡顿
|
||||||
from webdriver_manager.chrome import ChromeDriverManager # noqa: F401
|
from webdriver_manager.chrome import ChromeDriverManager # noqa: F401
|
||||||
import chromedriver_autoinstaller
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
# 添加项目根目录到 Python 路径
|
# 添加项目根目录到 Python 路径
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||||
# 确保能找到backend目录下的模块
|
# 确保能找到backend目录下的模块
|
||||||
@ -41,13 +49,10 @@ backend_dir = os.path.join(os.path.dirname(__file__), '..', '..')
|
|||||||
sys.path.insert(0, backend_dir)
|
sys.path.insert(0, backend_dir)
|
||||||
from database import db
|
from database import db
|
||||||
from tos_client import oss_client
|
from tos_client import oss_client
|
||||||
import uuid
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
# 配置日志
|
# 配置日志
|
||||||
# 确保logs目录存在
|
# 确保logs目录存在
|
||||||
import os
|
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
logs_dir = os.path.join(script_dir, 'logs')
|
logs_dir = os.path.join(script_dir, 'logs')
|
||||||
os.makedirs(logs_dir, exist_ok=True)
|
os.makedirs(logs_dir, exist_ok=True)
|
||||||
@ -73,6 +78,7 @@ class DouyinPlayVVScraper:
|
|||||||
self.db = None
|
self.db = None
|
||||||
self.collection = None
|
self.collection = None
|
||||||
self.image_cache = {} # 图片ID到TOS链接的缓存映射 {image_id: tos_url}
|
self.image_cache = {} # 图片ID到TOS链接的缓存映射 {image_id: tos_url}
|
||||||
|
self.all_collected_comments = [] # 存储所有收集到的评论数据
|
||||||
self._cleanup_old_profiles()
|
self._cleanup_old_profiles()
|
||||||
self._setup_mongodb()
|
self._setup_mongodb()
|
||||||
self._load_image_cache()
|
self._load_image_cache()
|
||||||
@ -83,6 +89,9 @@ class DouyinPlayVVScraper:
|
|||||||
# 使用 database.py 中的连接
|
# 使用 database.py 中的连接
|
||||||
self.db = db
|
self.db = db
|
||||||
|
|
||||||
|
# 根据运行模式选择集合
|
||||||
|
is_timer_mode = os.environ.get('TIMER_MODE') == '1'
|
||||||
|
mongo_collection = 'Ranking_storage_list' if is_timer_mode else 'Rankings_list'
|
||||||
# 根据运行模式选择集合
|
# 根据运行模式选择集合
|
||||||
is_timer_mode = os.environ.get('TIMER_MODE') == '1'
|
is_timer_mode = os.environ.get('TIMER_MODE') == '1'
|
||||||
mongo_collection = 'Ranking_storage_list' if is_timer_mode else 'Rankings_list'
|
mongo_collection = 'Ranking_storage_list' if is_timer_mode else 'Rankings_list'
|
||||||
@ -90,6 +99,7 @@ class DouyinPlayVVScraper:
|
|||||||
|
|
||||||
logging.info(f'MongoDB连接成功,使用数据库: {self.db.name},集合: {mongo_collection}')
|
logging.info(f'MongoDB连接成功,使用数据库: {self.db.name},集合: {mongo_collection}')
|
||||||
logging.info(f'当前运行模式: {"定时器模式" if is_timer_mode else "普通模式"}')
|
logging.info(f'当前运行模式: {"定时器模式" if is_timer_mode else "普通模式"}')
|
||||||
|
logging.info(f'当前运行模式: {"定时器模式" if is_timer_mode else "普通模式"}')
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f'MongoDB连接失败: {e}')
|
logging.error(f'MongoDB连接失败: {e}')
|
||||||
@ -158,8 +168,6 @@ class DouyinPlayVVScraper:
|
|||||||
def _cleanup_chrome_processes(self):
|
def _cleanup_chrome_processes(self):
|
||||||
"""清理可能占用配置文件的Chrome进程"""
|
"""清理可能占用配置文件的Chrome进程"""
|
||||||
try:
|
try:
|
||||||
import subprocess
|
|
||||||
import psutil
|
|
||||||
|
|
||||||
# 获取当前配置文件路径
|
# 获取当前配置文件路径
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
@ -472,7 +480,39 @@ class DouyinPlayVVScraper:
|
|||||||
else:
|
else:
|
||||||
return str(n)
|
return str(n)
|
||||||
|
|
||||||
|
def save_comments_to_file(self, comments: list, video_id: str = None):
|
||||||
|
"""简单保存评论数据到JSON文件"""
|
||||||
|
try:
|
||||||
|
if not comments:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 创建保存目录
|
||||||
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
save_dir = os.path.join(script_dir, 'saved_comments')
|
||||||
|
os.makedirs(save_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 生成文件名
|
||||||
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||||
|
filename = f'comments_{video_id}_{timestamp}.json' if video_id else f'comments_{timestamp}.json'
|
||||||
|
file_path = os.path.join(save_dir, filename)
|
||||||
|
|
||||||
|
# 保存数据
|
||||||
|
save_data = {
|
||||||
|
'timestamp': datetime.now().isoformat(),
|
||||||
|
'video_id': video_id,
|
||||||
|
'total_comments': len(comments),
|
||||||
|
'comments': comments
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(file_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(save_data, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
logging.info(f'保存 {len(comments)} 条评论到: {file_path}')
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f'保存评论失败: {e}')
|
||||||
|
return None
|
||||||
|
|
||||||
def parse_play_vv_from_text(self, text: str, source_url: str, request_id: str = None):
|
def parse_play_vv_from_text(self, text: str, source_url: str, request_id: str = None):
|
||||||
"""解析文本中的play_vv、mix_name和watched_item信息"""
|
"""解析文本中的play_vv、mix_name和watched_item信息"""
|
||||||
@ -1174,9 +1214,7 @@ class DouyinPlayVVScraper:
|
|||||||
|
|
||||||
# 等待页面加载完成
|
# 等待页面加载完成
|
||||||
try:
|
try:
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
|
|
||||||
WebDriverWait(self.driver, 10).until(
|
WebDriverWait(self.driver, 10).until(
|
||||||
EC.presence_of_element_located((By.TAG_NAME, "video"))
|
EC.presence_of_element_located((By.TAG_NAME, "video"))
|
||||||
@ -1195,6 +1233,7 @@ class DouyinPlayVVScraper:
|
|||||||
'Network.responseReceived' in log['method']
|
'Network.responseReceived' in log['method']
|
||||||
and 'response' in log['params']
|
and 'response' in log['params']
|
||||||
and log['params']['response']
|
and log['params']['response']
|
||||||
|
and log['params']['response']
|
||||||
and 'url' in log['params']['response']
|
and 'url' in log['params']['response']
|
||||||
and '/web/api/v2/aweme/iteminfo' in log['params']['response']['url']
|
and '/web/api/v2/aweme/iteminfo' in log['params']['response']['url']
|
||||||
):
|
):
|
||||||
@ -1235,6 +1274,11 @@ class DouyinPlayVVScraper:
|
|||||||
logging.info(f'定时器模式:跳过 get_collection_videos 函数')
|
logging.info(f'定时器模式:跳过 get_collection_videos 函数')
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# 定时器模式下跳过此函数
|
||||||
|
if os.environ.get('TIMER_MODE') == '1':
|
||||||
|
logging.info(f'定时器模式:跳过 get_collection_videos 函数')
|
||||||
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 检查缓存文件
|
# 检查缓存文件
|
||||||
cache_dir = os.path.join(os.path.dirname(__file__), 'episode_video_ids')
|
cache_dir = os.path.join(os.path.dirname(__file__), 'episode_video_ids')
|
||||||
@ -1378,11 +1422,675 @@ class DouyinPlayVVScraper:
|
|||||||
return [video['video_id'] for video in cached_videos]
|
return [video['video_id'] for video in cached_videos]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def get_video_details(self, video_id: str, max_comments: int = 20) -> dict:
|
def _simulate_comment_scrolling(self, video_id: str, max_scroll_attempts: int = 10, scroll_delay: float = 2.0) -> list:
|
||||||
|
"""
|
||||||
|
模拟用户异步滑动机制,向上滑动加载更多评论
|
||||||
|
Args:
|
||||||
|
video_id: 视频ID
|
||||||
|
max_scroll_attempts: 最大滑动尝试次数,默认10次
|
||||||
|
scroll_delay: 每次滑动后的延迟时间(秒),默认2秒
|
||||||
|
Returns:
|
||||||
|
list: 收集到的所有评论数据
|
||||||
|
"""
|
||||||
|
all_comments = []
|
||||||
|
collected_comment_ids = set()
|
||||||
|
|
||||||
|
try:
|
||||||
|
logging.info(f'开始为视频 {video_id} 执行评论滑动加载机制')
|
||||||
|
|
||||||
|
# 等待页面加载完成
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
# 定位评论区域
|
||||||
|
self._scroll_to_comment_section()
|
||||||
|
|
||||||
|
# 点击评论区域以触发网络请求
|
||||||
|
self._click_comment_area()
|
||||||
|
|
||||||
|
# 使用线程池实现异步滑动和监控
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
import threading
|
||||||
|
|
||||||
|
# 创建共享状态对象,用于任务间通信
|
||||||
|
shared_state = {
|
||||||
|
'scroll_completed': False,
|
||||||
|
'lock': threading.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||||
|
# 提交滑动任务
|
||||||
|
scroll_future = executor.submit(self._async_scroll_task_with_state, max_scroll_attempts, scroll_delay, shared_state)
|
||||||
|
|
||||||
|
# 同时提交监控任务 - 监控任务会检测滑动任务状态
|
||||||
|
monitor_future = executor.submit(self._async_monitor_task_with_state, video_id, collected_comment_ids, shared_state, 3600)
|
||||||
|
|
||||||
|
# 等待两个任务完成
|
||||||
|
scroll_result = scroll_future.result()
|
||||||
|
monitor_comments = monitor_future.result()
|
||||||
|
|
||||||
|
all_comments.extend(monitor_comments)
|
||||||
|
|
||||||
|
logging.info(f'评论滑动加载完成,共收集到 {len(all_comments)} 条评论')
|
||||||
|
|
||||||
|
# 保存评论到文件
|
||||||
|
if all_comments:
|
||||||
|
self.save_comments_to_file(all_comments, video_id)
|
||||||
|
|
||||||
|
return all_comments
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f'评论滑动加载机制执行失败: {e}')
|
||||||
|
return all_comments
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _async_scroll_task_with_state(self, max_attempts: int, scroll_delay: float, shared_state: dict):
|
||||||
|
"""带状态的异步滑动任务 - 无限滑动直到检测到"暂时没有更多评论"文本"""
|
||||||
|
try:
|
||||||
|
consecutive_no_progress = 0 # 连续无进展次数
|
||||||
|
attempt = 0
|
||||||
|
|
||||||
|
logging.info('开始无限滑动,直到检测到"暂时没有更多评论"')
|
||||||
|
|
||||||
|
while True: # 无限循环,直到检测到底部文本
|
||||||
|
attempt += 1
|
||||||
|
logging.info(f'第 {attempt} 次向上滑动')
|
||||||
|
|
||||||
|
# 记录滑动前的位置
|
||||||
|
current_position = self.driver.execute_script("return window.pageYOffset;")
|
||||||
|
|
||||||
|
# 执行向上滑动(加载更多评论)
|
||||||
|
self._execute_upward_scroll(attempt)
|
||||||
|
|
||||||
|
# 等待新内容加载
|
||||||
|
time.sleep(scroll_delay)
|
||||||
|
|
||||||
|
# 优先检查是否到达底部(检测到"暂时没有更多评论"文本)
|
||||||
|
if self._check_comment_section_bottom():
|
||||||
|
logging.info('检测到"暂时没有更多评论",停止滑动')
|
||||||
|
break
|
||||||
|
|
||||||
|
# 检查滑动是否有效果
|
||||||
|
new_position = self.driver.execute_script("return window.pageYOffset;")
|
||||||
|
if abs(new_position - current_position) < 50: # 滑动距离太小
|
||||||
|
consecutive_no_progress += 1
|
||||||
|
logging.debug(f'滑动进展较小,连续无进展次数: {consecutive_no_progress}')
|
||||||
|
|
||||||
|
# 如果连续多次无进展,增加滑动力度
|
||||||
|
if consecutive_no_progress >= 5:
|
||||||
|
logging.info('连续多次滑动无进展,增加滑动力度')
|
||||||
|
self._execute_force_scroll()
|
||||||
|
consecutive_no_progress = 0 # 重置计数器
|
||||||
|
time.sleep(scroll_delay * 2) # 增加等待时间
|
||||||
|
|
||||||
|
# 再次检查是否到达底部
|
||||||
|
if self._check_comment_section_bottom():
|
||||||
|
logging.info('强制滑动后检测到底部,停止滑动')
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
consecutive_no_progress = 0
|
||||||
|
|
||||||
|
# 每50次滑动输出一次进度信息
|
||||||
|
if attempt % 50 == 0:
|
||||||
|
logging.info(f'已完成 {attempt} 次滑动,继续寻找"暂时没有更多评论"文本')
|
||||||
|
|
||||||
|
# 安全机制:如果滑动次数过多,暂停一下
|
||||||
|
if attempt % 200 == 0:
|
||||||
|
logging.info(f'已滑动 {attempt} 次,暂停5秒以避免过度请求')
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
# 滑动任务完成,通知监控任务
|
||||||
|
with shared_state['lock']:
|
||||||
|
shared_state['scroll_completed'] = True
|
||||||
|
logging.info('滑动任务已完成,通知监控任务结束')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'滑动任务出错: {e}')
|
||||||
|
# 即使出错也要通知监控任务结束
|
||||||
|
with shared_state['lock']:
|
||||||
|
shared_state['scroll_completed'] = True
|
||||||
|
|
||||||
|
def _execute_force_scroll(self):
|
||||||
|
"""执行强制滑动,用于突破可能的滑动阻塞"""
|
||||||
|
try:
|
||||||
|
logging.info('执行强制滑动以突破阻塞')
|
||||||
|
|
||||||
|
# 执行多重强制滑动策略
|
||||||
|
self.driver.execute_script("""
|
||||||
|
// 1. 多次大幅度滑动
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
window.scrollBy(0, 1000);
|
||||||
|
document.documentElement.scrollTop += 1000;
|
||||||
|
document.body.scrollTop += 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. 滑动到页面最底部
|
||||||
|
window.scrollTo(0, document.body.scrollHeight);
|
||||||
|
|
||||||
|
// 3. 强制滚动所有容器
|
||||||
|
const containers = document.querySelectorAll('[data-e2e="comment-list"], .comment-list, [class*="comment"], [class*="scroll"]');
|
||||||
|
containers.forEach(container => {
|
||||||
|
if (container.scrollTop !== undefined) {
|
||||||
|
container.scrollTop = container.scrollHeight;
|
||||||
|
container.dispatchEvent(new Event('scroll', { bubbles: true }));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// 4. 触发所有滚动相关事件
|
||||||
|
['scroll', 'wheel', 'touchmove', 'resize', 'load'].forEach(eventType => {
|
||||||
|
window.dispatchEvent(new Event(eventType, { bubbles: true }));
|
||||||
|
document.dispatchEvent(new Event(eventType, { bubbles: true }));
|
||||||
|
});
|
||||||
|
|
||||||
|
// 5. 模拟用户交互
|
||||||
|
document.body.click();
|
||||||
|
|
||||||
|
console.log('执行强制滑动完成');
|
||||||
|
""")
|
||||||
|
|
||||||
|
time.sleep(3) # 增加等待时间
|
||||||
|
|
||||||
|
# 再次滑动到底部确保效果
|
||||||
|
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
logging.debug('强制滑动操作完成')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'执行强制滑动失败: {e}')
|
||||||
|
|
||||||
|
def _execute_upward_scroll(self, attempt: int):
|
||||||
|
"""执行向上滑动操作 - 使用强力滑动策略确保有效触发懒加载"""
|
||||||
|
try:
|
||||||
|
# 记录滑动前状态
|
||||||
|
before_state = self.driver.execute_script("""
|
||||||
|
return {
|
||||||
|
scrollTop: window.pageYOffset,
|
||||||
|
commentCount: document.querySelectorAll('[data-e2e="comment-item"], [class*="comment"], .comment-item').length,
|
||||||
|
pageHeight: document.documentElement.scrollHeight
|
||||||
|
};
|
||||||
|
""")
|
||||||
|
|
||||||
|
logging.debug(f'滑动前状态: 位置={before_state["scrollTop"]}px, 评论数={before_state["commentCount"]}条')
|
||||||
|
|
||||||
|
# 计算滑动距离,递增以确保效果
|
||||||
|
scroll_distance = 800 + (attempt * 300)
|
||||||
|
|
||||||
|
# 执行强力滚动 - 参考111.py的实现
|
||||||
|
self.driver.execute_script(f"""
|
||||||
|
// 1. 强制滚动页面
|
||||||
|
window.scrollBy(0, {scroll_distance});
|
||||||
|
document.documentElement.scrollTop += {scroll_distance};
|
||||||
|
document.body.scrollTop += {scroll_distance};
|
||||||
|
|
||||||
|
// 2. 滚动到页面底部(触发懒加载)
|
||||||
|
window.scrollTo(0, document.body.scrollHeight);
|
||||||
|
|
||||||
|
// 3. 查找并滚动所有可能的评论容器
|
||||||
|
const containers = document.querySelectorAll('[data-e2e="comment-list"], .comment-list, [class*="comment"], [class*="scroll"], [role="main"]');
|
||||||
|
containers.forEach(container => {{
|
||||||
|
if (container.scrollTop !== undefined) {{
|
||||||
|
container.scrollTop = container.scrollHeight;
|
||||||
|
container.dispatchEvent(new Event('scroll', {{ bubbles: true }}));
|
||||||
|
}}
|
||||||
|
}});
|
||||||
|
|
||||||
|
// 4. 触发所有相关事件
|
||||||
|
['scroll', 'wheel', 'touchmove', 'resize'].forEach(eventType => {{
|
||||||
|
window.dispatchEvent(new Event(eventType, {{ bubbles: true }}));
|
||||||
|
document.dispatchEvent(new Event(eventType, {{ bubbles: true }}));
|
||||||
|
}});
|
||||||
|
|
||||||
|
// 5. 模拟用户交互
|
||||||
|
document.body.click();
|
||||||
|
|
||||||
|
console.log('执行强力滚动:', {scroll_distance}, 'px');
|
||||||
|
""")
|
||||||
|
|
||||||
|
time.sleep(2) # 等待页面响应
|
||||||
|
|
||||||
|
# 尝试点击加载更多按钮(如果存在)
|
||||||
|
try:
|
||||||
|
button_clicked = self.driver.execute_script("""
|
||||||
|
const selectors = [
|
||||||
|
'[data-e2e="comment-load-more"]',
|
||||||
|
'[class*="load-more"]',
|
||||||
|
'[class*="more-comment"]',
|
||||||
|
'button[class*="load"]',
|
||||||
|
'div[class*="load"]'
|
||||||
|
];
|
||||||
|
|
||||||
|
for (let selector of selectors) {
|
||||||
|
const buttons = document.querySelectorAll(selector);
|
||||||
|
for (let button of buttons) {
|
||||||
|
if (button.offsetParent !== null && !button.disabled) {
|
||||||
|
button.click();
|
||||||
|
console.log('点击了加载更多按钮:', selector);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
""")
|
||||||
|
|
||||||
|
if button_clicked:
|
||||||
|
logging.debug('成功点击了加载更多按钮')
|
||||||
|
time.sleep(1) # 等待按钮响应
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f'点击加载更多按钮失败: {e}')
|
||||||
|
|
||||||
|
# 每隔几次使用真实手势滑动
|
||||||
|
if attempt % 3 == 0:
|
||||||
|
self._simulate_real_swipe()
|
||||||
|
|
||||||
|
logging.debug(f'执行强力滑动,距离: {scroll_distance}px')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'执行滑动操作失败: {e}')
|
||||||
|
|
||||||
|
def _simulate_real_swipe(self):
|
||||||
|
"""模拟真实向上滑动手势 - 手指从下往上移动"""
|
||||||
|
try:
|
||||||
|
|
||||||
|
window_size = self.driver.get_window_size()
|
||||||
|
width = window_size['width']
|
||||||
|
height = window_size['height']
|
||||||
|
|
||||||
|
# 向上滑动手势:手指从屏幕下方往上方移动
|
||||||
|
start_x = width // 2 + random.randint(-20, 20) # 增加随机性
|
||||||
|
start_y = height * 4 // 5 # 从更靠下的位置开始(4/5处)
|
||||||
|
end_y = height // 5 # 到更靠上的位置结束(1/5处)
|
||||||
|
|
||||||
|
# 使用ActionChains模拟真实向上滑动手势
|
||||||
|
actions = ActionChains(self.driver)
|
||||||
|
actions.w3c_actions.pointer_action\
|
||||||
|
.move_to_location(start_x, start_y)\
|
||||||
|
.pointer_down()\
|
||||||
|
.pause(0.1)\
|
||||||
|
.move_to_location(start_x, end_y)\
|
||||||
|
.pause(0.1)\
|
||||||
|
.pointer_up()
|
||||||
|
actions.perform()
|
||||||
|
|
||||||
|
logging.debug(f'执行真实向上滑动手势: 从({start_x}, {start_y})到({start_x}, {end_y})')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f'真实手势滑动失败: {e}')
|
||||||
|
|
||||||
|
def _async_monitor_task(self, video_id: str, collected_comment_ids: set, timeout: float) -> list:
|
||||||
|
"""异步监控任务"""
|
||||||
|
all_comments = []
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
while time.time() - start_time < timeout:
|
||||||
|
try:
|
||||||
|
# 从网络日志获取新评论
|
||||||
|
new_comments = self._extract_comments_from_network_logs(video_id)
|
||||||
|
|
||||||
|
# 去重并添加新评论
|
||||||
|
for comment in new_comments:
|
||||||
|
comment_id = f"{comment.get('text', '')}_{comment.get('user_name', '')}"
|
||||||
|
if comment_id not in collected_comment_ids:
|
||||||
|
collected_comment_ids.add(comment_id)
|
||||||
|
all_comments.append(comment)
|
||||||
|
|
||||||
|
if new_comments:
|
||||||
|
logging.info(f'监控到 {len(new_comments)} 条新评论,总计 {len(all_comments)} 条')
|
||||||
|
|
||||||
|
# 短暂等待后继续监控
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'监控任务出错: {e}')
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
return all_comments
|
||||||
|
|
||||||
|
def _async_monitor_task_with_state(self, video_id: str, collected_comment_ids: set, shared_state: dict, timeout: float) -> list:
|
||||||
|
"""带状态的异步监控任务 - 监控评论并检测滑动任务状态"""
|
||||||
|
all_comments = []
|
||||||
|
start_time = time.time()
|
||||||
|
last_comment_count = 0
|
||||||
|
no_new_comments_count = 0
|
||||||
|
|
||||||
|
logging.info('开始监控评论,将持续到滑动任务完成')
|
||||||
|
|
||||||
|
while time.time() - start_time < timeout:
|
||||||
|
try:
|
||||||
|
# 检查滑动任务是否完成
|
||||||
|
with shared_state['lock']:
|
||||||
|
scroll_completed = shared_state['scroll_completed']
|
||||||
|
|
||||||
|
if scroll_completed:
|
||||||
|
logging.info('检测到滑动任务已完成,监控任务即将结束')
|
||||||
|
# 滑动完成后再监控5秒,确保收集到最后的评论
|
||||||
|
final_start = time.time()
|
||||||
|
while time.time() - final_start < 5:
|
||||||
|
try:
|
||||||
|
new_comments = self._extract_comments_from_network_logs(video_id)
|
||||||
|
for comment in new_comments:
|
||||||
|
comment_id = f"{comment.get('text', '')}_{comment.get('user_name', '')}"
|
||||||
|
if comment_id not in collected_comment_ids:
|
||||||
|
collected_comment_ids.add(comment_id)
|
||||||
|
all_comments.append(comment)
|
||||||
|
time.sleep(0.5)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'最终监控阶段出错: {e}')
|
||||||
|
break
|
||||||
|
|
||||||
|
# 从网络日志获取新评论
|
||||||
|
new_comments = self._extract_comments_from_network_logs(video_id)
|
||||||
|
|
||||||
|
# 去重并添加新评论
|
||||||
|
for comment in new_comments:
|
||||||
|
comment_id = f"{comment.get('text', '')}_{comment.get('user_name', '')}"
|
||||||
|
if comment_id not in collected_comment_ids:
|
||||||
|
collected_comment_ids.add(comment_id)
|
||||||
|
all_comments.append(comment)
|
||||||
|
|
||||||
|
# 检查是否有新评论
|
||||||
|
current_comment_count = len(all_comments)
|
||||||
|
if current_comment_count > last_comment_count:
|
||||||
|
logging.info(f'监控到 {current_comment_count - last_comment_count} 条新评论,总计 {current_comment_count} 条')
|
||||||
|
last_comment_count = current_comment_count
|
||||||
|
no_new_comments_count = 0
|
||||||
|
else:
|
||||||
|
no_new_comments_count += 1
|
||||||
|
# 每30秒输出一次状态
|
||||||
|
if no_new_comments_count % 30 == 0:
|
||||||
|
logging.info(f'监控中...当前总计 {current_comment_count} 条评论,等待滑动任务完成')
|
||||||
|
|
||||||
|
# 短暂等待后继续监控
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'监控任务出错: {e}')
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
logging.info(f'监控任务结束,共收集到 {len(all_comments)} 条评论')
|
||||||
|
return all_comments
|
||||||
|
|
||||||
|
def _scroll_to_comment_section(self):
|
||||||
|
"""滚动到评论区域"""
|
||||||
|
try:
|
||||||
|
comment_section_selectors = [
|
||||||
|
'[data-e2e="comment-list"]',
|
||||||
|
'[class*="comment-list"]',
|
||||||
|
'[class*="comment-container"]',
|
||||||
|
]
|
||||||
|
|
||||||
|
for selector in comment_section_selectors:
|
||||||
|
try:
|
||||||
|
elements = self.driver.find_elements("css selector", selector)
|
||||||
|
if elements:
|
||||||
|
self.driver.execute_script(
|
||||||
|
"arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});",
|
||||||
|
elements[0]
|
||||||
|
)
|
||||||
|
time.sleep(2)
|
||||||
|
logging.info(f'成功定位到评论区域: {selector}')
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 备用方案:滚动到页面底部
|
||||||
|
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||||||
|
time.sleep(2)
|
||||||
|
logging.info('使用备用方案:滚动到页面底部')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'定位评论区域失败: {e}')
|
||||||
|
|
||||||
|
def _click_comment_area(self):
|
||||||
|
"""
|
||||||
|
点击评论区域以触发网络请求,确保能够获取到评论数据
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 多种评论区域选择器
|
||||||
|
comment_selectors = [
|
||||||
|
'[data-e2e="comment-list"]',
|
||||||
|
'[class*="comment"]',
|
||||||
|
'[class*="Comment"]',
|
||||||
|
'.comment-list',
|
||||||
|
'.comment-container',
|
||||||
|
'[data-e2e="comment-item"]',
|
||||||
|
'[class*="comment-item"]',
|
||||||
|
'div[class*="comment"]',
|
||||||
|
# 抖音特有的评论区域选择器
|
||||||
|
'div[data-e2e="comment-list"]',
|
||||||
|
'div[class*="CommentList"]',
|
||||||
|
'div[class*="comment-list"]'
|
||||||
|
]
|
||||||
|
|
||||||
|
clicked = False
|
||||||
|
for selector in comment_selectors:
|
||||||
|
try:
|
||||||
|
elements = self.driver.find_elements("css selector", selector)
|
||||||
|
if elements:
|
||||||
|
for element in elements:
|
||||||
|
try:
|
||||||
|
if element.is_displayed() and element.is_enabled():
|
||||||
|
# 滚动到元素可见
|
||||||
|
self.driver.execute_script("arguments[0].scrollIntoView(true);", element)
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# 点击元素
|
||||||
|
element.click()
|
||||||
|
logging.info(f'成功点击评论区域: {selector}')
|
||||||
|
clicked = True
|
||||||
|
time.sleep(1) # 等待网络请求触发
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f'点击元素失败: {e}')
|
||||||
|
continue
|
||||||
|
if clicked:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f'使用选择器 {selector} 查找评论区域失败: {e}')
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not clicked:
|
||||||
|
# 如果没有找到特定的评论区域,尝试点击页面中部区域
|
||||||
|
try:
|
||||||
|
window_size = self.driver.get_window_size()
|
||||||
|
center_x = window_size['width'] // 2
|
||||||
|
center_y = window_size['height'] // 2
|
||||||
|
|
||||||
|
# 使用JavaScript点击页面中部
|
||||||
|
self.driver.execute_script(f"""
|
||||||
|
var element = document.elementFromPoint({center_x}, {center_y});
|
||||||
|
if (element) {{
|
||||||
|
element.click();
|
||||||
|
}}
|
||||||
|
""")
|
||||||
|
logging.info('点击页面中部区域以触发评论加载')
|
||||||
|
time.sleep(1)
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f'点击页面中部失败: {e}')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'点击评论区域失败: {e}')
|
||||||
|
|
||||||
|
def _check_comment_section_bottom(self) -> bool:
|
||||||
|
"""
|
||||||
|
检测是否已经到达评论区底部
|
||||||
|
只有检测到"暂时没有更多评论"文本时才停止滑动,确保无限滑动直到真正到达底部
|
||||||
|
Returns:
|
||||||
|
bool: True表示已到达底部,False表示还可以继续加载
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 目标文本:只有检测到这个文本才认为到达底部
|
||||||
|
target_text = "暂时没有更多评论"
|
||||||
|
|
||||||
|
logging.debug(f'正在检测评论区底部标识文本: "{target_text}"')
|
||||||
|
|
||||||
|
# 方法1: 使用XPath检测包含文本的元素
|
||||||
|
xpath_selectors = [
|
||||||
|
f"//*[contains(text(), '{target_text}')]",
|
||||||
|
f"//div[contains(text(), '{target_text}')]",
|
||||||
|
f"//span[contains(text(), '{target_text}')]",
|
||||||
|
f"//p[contains(text(), '{target_text}')]",
|
||||||
|
f"//*[text()='{target_text}']"
|
||||||
|
]
|
||||||
|
|
||||||
|
for xpath in xpath_selectors:
|
||||||
|
try:
|
||||||
|
elements = self.driver.find_elements("xpath", xpath)
|
||||||
|
if elements:
|
||||||
|
# 检查元素是否可见
|
||||||
|
for element in elements:
|
||||||
|
try:
|
||||||
|
if element.is_displayed():
|
||||||
|
logging.info(f'检测到评论区底部标识文本: "{target_text}" (通过XPath: {xpath})')
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f'XPath检测失败 {xpath}: {e}')
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 方法2: 使用JavaScript在页面中搜索文本
|
||||||
|
try:
|
||||||
|
js_result = self.driver.execute_script(f"""
|
||||||
|
// 搜索页面中所有包含目标文本的元素
|
||||||
|
var targetText = '{target_text}';
|
||||||
|
var walker = document.createTreeWalker(
|
||||||
|
document.body,
|
||||||
|
NodeFilter.SHOW_TEXT,
|
||||||
|
null,
|
||||||
|
false
|
||||||
|
);
|
||||||
|
|
||||||
|
var node;
|
||||||
|
while (node = walker.nextNode()) {{
|
||||||
|
if (node.textContent.includes(targetText)) {{
|
||||||
|
var element = node.parentElement;
|
||||||
|
if (element && element.offsetParent !== null) {{
|
||||||
|
return {{
|
||||||
|
found: true,
|
||||||
|
text: node.textContent.trim(),
|
||||||
|
tagName: element.tagName,
|
||||||
|
className: element.className
|
||||||
|
}};
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
return {{found: false}};
|
||||||
|
""")
|
||||||
|
|
||||||
|
if js_result and js_result.get('found'):
|
||||||
|
logging.info(f'通过JavaScript检测到评论区底部标识文本: "{target_text}"')
|
||||||
|
logging.debug(f'元素信息: 标签={js_result.get("tagName")}, 类名={js_result.get("className")}, 文本="{js_result.get("text")}"')
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f'JavaScript文本检测失败: {e}')
|
||||||
|
|
||||||
|
# 方法3: 检查页面源码中是否包含完整的目标文本
|
||||||
|
try:
|
||||||
|
page_source = self.driver.page_source
|
||||||
|
if target_text in page_source:
|
||||||
|
# 进一步验证:使用正则表达式确保是完整的文本匹配
|
||||||
|
pattern = re.escape(target_text)
|
||||||
|
if re.search(pattern, page_source):
|
||||||
|
logging.info(f'在页面源码中检测到完整的底部标识文本: "{target_text}"')
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f'页面源码检测失败: {e}')
|
||||||
|
|
||||||
|
# 检查页面滚动位置(仅用于调试信息)
|
||||||
|
try:
|
||||||
|
current_position = self.driver.execute_script("return window.pageYOffset;")
|
||||||
|
page_height = self.driver.execute_script("return document.body.scrollHeight;")
|
||||||
|
window_height = self.driver.execute_script("return window.innerHeight;")
|
||||||
|
distance_to_bottom = page_height - (current_position + window_height)
|
||||||
|
|
||||||
|
logging.debug(f'滚动状态: 当前位置={current_position}, 页面高度={page_height}, 窗口高度={window_height}, 距离底部={distance_to_bottom}px')
|
||||||
|
|
||||||
|
# 即使滚动到底部,也不停止滑动,除非检测到目标文本
|
||||||
|
if distance_to_bottom <= 10:
|
||||||
|
logging.debug(f'已滚动到页面底部,但未检测到"{target_text}"文本,继续滑动')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f'检查滚动位置失败: {e}')
|
||||||
|
|
||||||
|
# 只有检测到"暂时没有更多评论"文本才返回True,否则继续滑动
|
||||||
|
logging.debug(f'未检测到"{target_text}"文本,继续滑动')
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'检测评论区底部失败: {e}')
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _extract_comments_from_network_logs(self, video_id: str) -> list:
|
||||||
|
"""
|
||||||
|
从网络日志中提取评论数据
|
||||||
|
Args:
|
||||||
|
video_id: 视频ID
|
||||||
|
Returns:
|
||||||
|
list: 评论数据列表
|
||||||
|
"""
|
||||||
|
comments = []
|
||||||
|
try:
|
||||||
|
# 获取网络请求日志
|
||||||
|
logs = self.driver.get_log('performance')
|
||||||
|
|
||||||
|
for entry in logs:
|
||||||
|
try:
|
||||||
|
log = json.loads(entry['message'])['message']
|
||||||
|
if (
|
||||||
|
'Network.responseReceived' in log['method']
|
||||||
|
and 'response' in log['params']
|
||||||
|
and log['params']['response']
|
||||||
|
and log['params']['response'].get('url')
|
||||||
|
):
|
||||||
|
url = log['params']['response']['url']
|
||||||
|
|
||||||
|
# 检查是否是评论API
|
||||||
|
if '/aweme/v1/web/comment/list/' in url and video_id in url:
|
||||||
|
try:
|
||||||
|
# 获取响应体
|
||||||
|
response_body = self.driver.execute_cdp_cmd(
|
||||||
|
'Network.getResponseBody',
|
||||||
|
{'requestId': log['params']['requestId']}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response_body and 'body' in response_body:
|
||||||
|
data = json.loads(response_body['body'])
|
||||||
|
api_comments = data.get('comments', [])
|
||||||
|
|
||||||
|
for comment in api_comments:
|
||||||
|
comment_info = {
|
||||||
|
'text': comment.get('text', ''),
|
||||||
|
'user_name': comment.get('user', {}).get('nickname', ''),
|
||||||
|
'digg_count': int(comment.get('digg_count', 0)),
|
||||||
|
'create_time': comment.get('create_time', 0)
|
||||||
|
}
|
||||||
|
comments.append(comment_info)
|
||||||
|
|
||||||
|
# 记录API URL信息,用于调试
|
||||||
|
if api_comments:
|
||||||
|
logging.debug(f'从API获取到 {len(api_comments)} 条评论: {url}')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f'解析评论API响应失败: {e}')
|
||||||
|
continue
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
continue
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f'提取网络日志评论数据失败: {e}')
|
||||||
|
|
||||||
|
return comments
|
||||||
|
|
||||||
|
def get_video_details(self, video_id: str, max_comments: int = 100) -> dict:
|
||||||
"""获取单个视频的详细互动数据
|
"""获取单个视频的详细互动数据
|
||||||
Args:
|
Args:
|
||||||
video_id: 视频ID
|
video_id: 视频ID
|
||||||
max_comments: 最大评论数量,默认20条
|
max_comments: 最大评论数量,默认100条
|
||||||
Returns:
|
Returns:
|
||||||
dict: 包含点赞数、收藏数、转发数、评论内容的字典
|
dict: 包含点赞数、收藏数、转发数、评论内容的字典
|
||||||
"""
|
"""
|
||||||
@ -1426,11 +2134,12 @@ class DouyinPlayVVScraper:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f'等待视频元素超时: {e}')
|
logging.warning(f'等待视频元素超时: {e}')
|
||||||
|
|
||||||
# 获取网络请求日志
|
# 首先获取页面加载时的网络请求日志(关键修复)
|
||||||
logs = self.driver.get_log('performance')
|
logging.info(f'获取页面加载时的网络日志以捕获视频详情API')
|
||||||
|
initial_logs = self.driver.get_log('performance')
|
||||||
|
|
||||||
# 解析网络日志获取视频详细数据
|
# 解析初始网络日志获取视频详细数据cc
|
||||||
for entry in logs:
|
for entry in initial_logs:
|
||||||
try:
|
try:
|
||||||
log = json.loads(entry['message'])['message']
|
log = json.loads(entry['message'])['message']
|
||||||
if (
|
if (
|
||||||
@ -1466,14 +2175,42 @@ class DouyinPlayVVScraper:
|
|||||||
video_details['shares_formatted'] = self.format_interaction_count(video_details['shares'])
|
video_details['shares_formatted'] = self.format_interaction_count(video_details['shares'])
|
||||||
video_details['favorites_formatted'] = self.format_interaction_count(video_details['favorites'])
|
video_details['favorites_formatted'] = self.format_interaction_count(video_details['favorites'])
|
||||||
|
|
||||||
logging.info(f'视频 {video_id} 互动数据: 点赞={video_details["likes_formatted"]}, 分享={video_details["shares_formatted"]}, 收藏={video_details["favorites_formatted"]}')
|
logging.info(f'从初始网络日志获取视频 {video_id} 互动数据: 点赞={video_details["likes_formatted"]}, 分享={video_details["shares_formatted"]}, 收藏={video_details["favorites_formatted"]}')
|
||||||
|
break
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f'解析视频详情API响应失败: {e}')
|
logging.warning(f'解析初始视频详情API响应失败: {e}')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 检查是否是评论API
|
except Exception as e:
|
||||||
elif '/aweme/v1/web/comment/list/' in url and video_id in url:
|
continue
|
||||||
|
|
||||||
|
# 启动滑动机制加载更多评论
|
||||||
|
logging.info(f'开始为视频 {video_id} 启动滑动机制加载评论')
|
||||||
|
scrolled_comments = self._simulate_comment_scrolling(video_id, max_scroll_attempts=15, scroll_delay=2.0)
|
||||||
|
|
||||||
|
# 如果滑动机制获取到评论,直接使用
|
||||||
|
if scrolled_comments:
|
||||||
|
video_details['comments'] = scrolled_comments[:max_comments]
|
||||||
|
logging.info(f'滑动机制成功获取 {len(video_details["comments"])} 条评论')
|
||||||
|
|
||||||
|
# 获取滑动后的网络请求日志(用于评论数据)
|
||||||
|
logs = self.driver.get_log('performance')
|
||||||
|
|
||||||
|
# 解析滑动后的网络日志获取评论数据(作为滑动机制的补充)
|
||||||
|
for entry in logs:
|
||||||
|
try:
|
||||||
|
log = json.loads(entry['message'])['message']
|
||||||
|
if (
|
||||||
|
'Network.responseReceived' in log['method']
|
||||||
|
and 'response' in log['params']
|
||||||
|
and log['params']['response']
|
||||||
|
and log['params']['response'].get('url')
|
||||||
|
):
|
||||||
|
url = log['params']['response']['url']
|
||||||
|
|
||||||
|
# 只处理评论API(视频详情API已在初始阶段处理)
|
||||||
|
if '/aweme/v1/web/comment/list/' in url and video_id in url and not video_details['comments']:
|
||||||
try:
|
try:
|
||||||
# 获取响应体
|
# 获取响应体
|
||||||
response_body = self.driver.execute_cdp_cmd(
|
response_body = self.driver.execute_cdp_cmd(
|
||||||
@ -1485,16 +2222,19 @@ class DouyinPlayVVScraper:
|
|||||||
data = json.loads(response_body['body'])
|
data = json.loads(response_body['body'])
|
||||||
comments = data.get('comments', [])
|
comments = data.get('comments', [])
|
||||||
|
|
||||||
for comment in comments[:max_comments]:
|
# 只有在滑动机制没有获取到评论时才使用这个方法
|
||||||
comment_info = {
|
if not video_details['comments']:
|
||||||
'text': comment.get('text', ''),
|
for comment in comments[:max_comments]:
|
||||||
'user_name': comment.get('user', {}).get('nickname', ''),
|
comment_info = {
|
||||||
'digg_count': int(comment.get('digg_count', 0)),
|
'text': comment.get('text', ''),
|
||||||
'create_time': comment.get('create_time', 0)
|
'user_name': comment.get('user', {}).get('nickname', ''),
|
||||||
}
|
'digg_count': int(comment.get('digg_count', 0)),
|
||||||
video_details['comments'].append(comment_info)
|
'create_time': comment.get('create_time', 0)
|
||||||
|
}
|
||||||
|
video_details['comments'].append(comment_info)
|
||||||
|
|
||||||
logging.info(f'视频 {video_id} 获取到 {len(video_details["comments"])} 条评论')
|
logging.info(f'备用方案获取到 {len(comments)} 条评论')
|
||||||
|
logging.info(f'评论API URL: {url}')
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f'解析评论API响应失败: {e}')
|
logging.warning(f'解析评论API响应失败: {e}')
|
||||||
@ -1675,12 +2415,12 @@ class DouyinPlayVVScraper:
|
|||||||
|
|
||||||
return video_details
|
return video_details
|
||||||
|
|
||||||
def get_collection_video_details(self, episode_video_ids: list, mix_name: str = '', max_comments_per_video: int = 10) -> list:
|
def get_collection_video_details(self, episode_video_ids: list, mix_name: str = '', max_comments_per_video: int = 100) -> list:
|
||||||
"""获取合集中所有视频的详细互动数据
|
"""获取合集中所有视频的详细互动数据
|
||||||
Args:
|
Args:
|
||||||
episode_video_ids: 视频ID列表
|
episode_video_ids: 视频ID列表
|
||||||
mix_name: 合集名称,用于日志
|
mix_name: 合集名称,用于日志
|
||||||
max_comments_per_video: 每个视频最大评论数量,默认10条
|
max_comments_per_video: 每个视频最大评论数量,默认100条
|
||||||
Returns:
|
Returns:
|
||||||
list: 包含每个视频详细数据的列表
|
list: 包含每个视频详细数据的列表
|
||||||
"""
|
"""
|
||||||
@ -1722,6 +2462,7 @@ class DouyinPlayVVScraper:
|
|||||||
|
|
||||||
# 添加延迟避免请求过快
|
# 添加延迟避免请求过快
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
# exit(0)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f'获取视频 {video_id} 详细数据时出错: {e}'
|
error_msg = f'获取视频 {video_id} 详细数据时出错: {e}'
|
||||||
@ -1772,7 +2513,6 @@ class DouyinPlayVVScraper:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import argparse
|
|
||||||
parser = argparse.ArgumentParser(description='Selenium+CDP 抖音play_vv抓取器')
|
parser = argparse.ArgumentParser(description='Selenium+CDP 抖音play_vv抓取器')
|
||||||
parser.add_argument('--url', default='https://www.douyin.com/user/self?showTab=favorite_collection&showSubTab=compilation', help='收藏合集列表页面URL')
|
parser.add_argument('--url', default='https://www.douyin.com/user/self?showTab=favorite_collection&showSubTab=compilation', help='收藏合集列表页面URL')
|
||||||
parser.add_argument('--auto', action='store_true', help='自动继续,跳过回车等待')
|
parser.add_argument('--auto', action='store_true', help='自动继续,跳过回车等待')
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -343,7 +343,7 @@ class TOSChunkUploader:
|
|||||||
self.bucket_name = tos_client.bucket_name
|
self.bucket_name = tos_client.bucket_name
|
||||||
self.self_domain = tos_client.self_domain
|
self.self_domain = tos_client.self_domain
|
||||||
|
|
||||||
def init_multipart_upload(self, object_key: str, content_type: Optional[str] = None) -> str | None:
|
def init_multipart_upload(self, object_key: str, content_type: Optional[str] = None) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
初始化分片上传
|
初始化分片上传
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,12 @@
|
|||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.0/font/bootstrap-icons.css">
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.0/font/bootstrap-icons.css">
|
||||||
<title>Vite App</title>
|
<title>Vite App</title>
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="app"></div>
|
<div id="app"></div>
|
||||||
|
|||||||
@ -9,11 +9,49 @@ const loading = ref(false)
|
|||||||
const selectedDate = ref('')
|
const selectedDate = ref('')
|
||||||
const currentPage = ref(1)
|
const currentPage = ref(1)
|
||||||
const totalPages = ref(1)
|
const totalPages = ref(1)
|
||||||
|
const updateTime = ref('') // 添加更新时间字段
|
||||||
|
const showDatePicker = ref(false) // 控制日期选择器显示
|
||||||
|
const dateOptions = ref([]) // 日期选项列表
|
||||||
|
|
||||||
// 初始化日期为今天
|
// 初始化日期为今天
|
||||||
const initDate = () => {
|
const initDate = () => {
|
||||||
const today = new Date()
|
const today = new Date()
|
||||||
selectedDate.value = today.toISOString().split('T')[0]
|
selectedDate.value = today.toISOString().split('T')[0]
|
||||||
|
generateDateOptions()
|
||||||
|
}
|
||||||
|
|
||||||
|
// 生成日期选项(今天和往前7天)
|
||||||
|
const generateDateOptions = () => {
|
||||||
|
const options = []
|
||||||
|
const today = new Date()
|
||||||
|
|
||||||
|
for (let i = 0; i < 8; i++) {
|
||||||
|
const date = new Date(today)
|
||||||
|
date.setDate(today.getDate() - i)
|
||||||
|
|
||||||
|
const value = date.toISOString().split('T')[0]
|
||||||
|
const weekdays = ['周日', '周一', '周二', '周三', '周四', '周五', '周六']
|
||||||
|
const weekday = weekdays[date.getDay()]
|
||||||
|
|
||||||
|
let label = ''
|
||||||
|
if (i === 0) {
|
||||||
|
label = '今天'
|
||||||
|
} else if (i === 1) {
|
||||||
|
label = '昨天'
|
||||||
|
} else {
|
||||||
|
label = `${i}天前`
|
||||||
|
}
|
||||||
|
|
||||||
|
const display = `${date.getMonth() + 1}月${date.getDate()}日 ${weekday}`
|
||||||
|
|
||||||
|
options.push({
|
||||||
|
value,
|
||||||
|
label,
|
||||||
|
display
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
dateOptions.value = options
|
||||||
}
|
}
|
||||||
|
|
||||||
// 获取排行榜数据
|
// 获取排行榜数据
|
||||||
@ -33,6 +71,8 @@ const fetchRankingData = async () => {
|
|||||||
if (response.data.success) {
|
if (response.data.success) {
|
||||||
rankingData.value = response.data.data
|
rankingData.value = response.data.data
|
||||||
totalPages.value = response.data.pagination.pages
|
totalPages.value = response.data.pagination.pages
|
||||||
|
// 获取后端返回的更新时间
|
||||||
|
updateTime.value = response.data.update_time || ''
|
||||||
} else {
|
} else {
|
||||||
console.error('获取数据失败:', response.data.message)
|
console.error('获取数据失败:', response.data.message)
|
||||||
rankingData.value = []
|
rankingData.value = []
|
||||||
@ -76,7 +116,7 @@ const formatGrowth = (item) => {
|
|||||||
const changeRate = timelineData.play_vv_change_rate || 0
|
const changeRate = timelineData.play_vv_change_rate || 0
|
||||||
|
|
||||||
if (change > 0) {
|
if (change > 0) {
|
||||||
return `+${formatPlayCount(change)} (${changeRate.toFixed(1)}%)`
|
return `${formatPlayCount(change)}`
|
||||||
}
|
}
|
||||||
return '暂无数据'
|
return '暂无数据'
|
||||||
}
|
}
|
||||||
@ -89,12 +129,85 @@ const switchTab = (tab) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 获取图片源地址
|
||||||
|
const getImageSrc = (item) => {
|
||||||
|
// 优先使用 cover_image_url
|
||||||
|
if (item.cover_image_url) {
|
||||||
|
return item.cover_image_url
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果有备用链接,使用第一个
|
||||||
|
if (item.cover_backup_urls && item.cover_backup_urls.length > 0) {
|
||||||
|
return item.cover_backup_urls[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
// 最后使用占位符
|
||||||
|
return '/placeholder-poster.svg'
|
||||||
|
}
|
||||||
|
|
||||||
|
// 处理图片加载错误
|
||||||
|
const handleImageError = (event, item) => {
|
||||||
|
const img = event.target
|
||||||
|
console.log('图片加载失败:', img.src, '视频:', item.title)
|
||||||
|
|
||||||
|
// 如果当前显示的是主链接,尝试备用链接
|
||||||
|
if (img.src === item.cover_image_url && item.cover_backup_urls && item.cover_backup_urls.length > 0) {
|
||||||
|
console.log('尝试备用链接:', item.cover_backup_urls[0])
|
||||||
|
// 尝试第一个备用链接
|
||||||
|
img.src = item.cover_backup_urls[0]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// 如果当前显示的是第一个备用链接,尝试其他备用链接
|
||||||
|
if (item.cover_backup_urls && item.cover_backup_urls.length > 1) {
|
||||||
|
const currentIndex = item.cover_backup_urls.indexOf(img.src)
|
||||||
|
if (currentIndex >= 0 && currentIndex < item.cover_backup_urls.length - 1) {
|
||||||
|
console.log('尝试下一个备用链接:', item.cover_backup_urls[currentIndex + 1])
|
||||||
|
img.src = item.cover_backup_urls[currentIndex + 1]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 所有链接都失败,使用占位符
|
||||||
|
console.log('使用占位符图片')
|
||||||
|
img.src = '/placeholder-poster.svg'
|
||||||
|
}
|
||||||
|
|
||||||
// 日期改变处理
|
// 日期改变处理
|
||||||
const onDateChange = () => {
|
const onDateChange = () => {
|
||||||
currentPage.value = 1
|
currentPage.value = 1
|
||||||
fetchRankingData()
|
fetchRankingData()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 格式化显示日期
|
||||||
|
const formatDisplayDate = (dateStr) => {
|
||||||
|
if (!dateStr) return '选择日期'
|
||||||
|
|
||||||
|
const date = new Date(dateStr)
|
||||||
|
const today = new Date()
|
||||||
|
const diffTime = today.getTime() - date.getTime()
|
||||||
|
const diffDays = Math.floor(diffTime / (1000 * 60 * 60 * 24))
|
||||||
|
return `${date.getFullYear()}-${(date.getMonth() + 1).toString().padStart(2, '0')}-${date.getDate().toString().padStart(2, '0')}`
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// 切换日期选择器显示状态
|
||||||
|
const toggleDatePicker = () => {
|
||||||
|
showDatePicker.value = !showDatePicker.value
|
||||||
|
}
|
||||||
|
|
||||||
|
// 关闭日期选择器
|
||||||
|
const closeDatePicker = () => {
|
||||||
|
showDatePicker.value = false
|
||||||
|
}
|
||||||
|
|
||||||
|
// 选择日期
|
||||||
|
const selectDate = (dateValue) => {
|
||||||
|
selectedDate.value = dateValue
|
||||||
|
showDatePicker.value = false
|
||||||
|
onDateChange()
|
||||||
|
}
|
||||||
|
|
||||||
// 页面加载时初始化
|
// 页面加载时初始化
|
||||||
onMounted(() => {
|
onMounted(() => {
|
||||||
initDate()
|
initDate()
|
||||||
@ -111,26 +224,44 @@ onMounted(() => {
|
|||||||
<!-- 标题 -->
|
<!-- 标题 -->
|
||||||
<div class="header">
|
<div class="header">
|
||||||
<div class="title-container">
|
<div class="title-container">
|
||||||
<span class="lightning-icon">⚡</span>
|
<i class="bi bi-stars lightning-icon"></i>
|
||||||
<h1 class="title">热播总榜</h1>
|
<h1 class="title">抖音AI短剧榜</h1>
|
||||||
<span class="lightning-icon">⚡</span>
|
<i class="bi bi-stars lightning-icon"></i>
|
||||||
</div>
|
</div>
|
||||||
<div class="update-time">
|
<div class="update-time">
|
||||||
基于实时热度排行 {{ getCurrentTime() }}更新
|
基于实时热度排行 {{ updateTime || getCurrentTime() }}更新
|
||||||
<span class="refresh-icon">🔄</span>
|
<span class="refresh-icon">🔄</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- 日期选择 -->
|
<!-- 自定义日期选择器 -->
|
||||||
<div class="date-selector">
|
<div class="custom-date-selector">
|
||||||
<label for="date-input">选择日期:</label>
|
<div class="date-display" @click="toggleDatePicker">
|
||||||
<input
|
<span class="date-text">{{ formatDisplayDate(selectedDate) }}<i class="bi bi-chevron-compact-right"></i></span>
|
||||||
id="date-input"
|
|
||||||
type="date"
|
</div>
|
||||||
v-model="selectedDate"
|
|
||||||
@change="onDateChange"
|
<!-- 日期选择弹窗 -->
|
||||||
class="date-input"
|
<div v-if="showDatePicker" class="date-picker-overlay" @click="closeDatePicker">
|
||||||
/>
|
<div class="date-picker-popup" @click.stop>
|
||||||
|
<div class="date-picker-header">
|
||||||
|
<h3>选择日期</h3>
|
||||||
|
<button class="close-btn" @click="closeDatePicker">×</button>
|
||||||
|
</div>
|
||||||
|
<div class="date-list">
|
||||||
|
<div
|
||||||
|
v-for="date in dateOptions"
|
||||||
|
:key="date.value"
|
||||||
|
class="date-option"
|
||||||
|
:class="{ active: selectedDate === date.value }"
|
||||||
|
@click="selectDate(date.value)"
|
||||||
|
>
|
||||||
|
<span class="date-label">{{ date.label }}</span>
|
||||||
|
<span class="date-value">{{ date.display }}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- 加载状态 -->
|
<!-- 加载状态 -->
|
||||||
@ -154,9 +285,9 @@ onMounted(() => {
|
|||||||
<!-- 海报 -->
|
<!-- 海报 -->
|
||||||
<div class="poster">
|
<div class="poster">
|
||||||
<img
|
<img
|
||||||
:src="item.cover_image_url || '/placeholder-poster.svg'"
|
:src="getImageSrc(item)"
|
||||||
:alt="item.title || item.mix_name"
|
:alt="item.title || item.mix_name"
|
||||||
@error="$event.target.src='/placeholder-poster.svg'"
|
@error="handleImageError($event, item)"
|
||||||
class="poster-img"
|
class="poster-img"
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
@ -172,15 +303,11 @@ onMounted(() => {
|
|||||||
<span class="play-value">{{ formatPlayCount(item.play_vv) }}</span>
|
<span class="play-value">{{ formatPlayCount(item.play_vv) }}</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- 简介(省略显示) -->
|
|
||||||
<div class="description">
|
|
||||||
{{ item.summary || item.title || item.mix_name || '暂无简介' }}
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- 增长数据 -->
|
<!-- 增长数据 -->
|
||||||
<div class="growth-data">
|
<div class="growth-data">
|
||||||
<span class="growth-icon">🔥</span>
|
<i class="bi bi-fire"></i>
|
||||||
<span class="growth-number">{{ formatGrowth(item) }}</span>
|
<span class="growth-number">{{ formatGrowth(item) }}</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -234,7 +361,6 @@ onMounted(() => {
|
|||||||
|
|
||||||
.app {
|
.app {
|
||||||
min-height: 100vh;
|
min-height: 100vh;
|
||||||
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
|
|
||||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||||
padding-bottom: 80px; /* 为底部导航留出空间 */
|
padding-bottom: 80px; /* 为底部导航留出空间 */
|
||||||
color: white;
|
color: white;
|
||||||
@ -251,7 +377,6 @@ onMounted(() => {
|
|||||||
/* 标题区域 */
|
/* 标题区域 */
|
||||||
.header {
|
.header {
|
||||||
text-align: center;
|
text-align: center;
|
||||||
margin-bottom: 20px;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.title-container {
|
.title-container {
|
||||||
@ -268,11 +393,10 @@ onMounted(() => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.title {
|
.title {
|
||||||
color: white;
|
color: #555;
|
||||||
font-size: 24px;
|
font-size: 24px;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
text-shadow: 0 2px 4px rgba(0,0,0,0.3);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.update-time {
|
.update-time {
|
||||||
@ -289,35 +413,141 @@ onMounted(() => {
|
|||||||
color: #4CAF50;
|
color: #4CAF50;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 日期选择器 */
|
/* 自定义日期选择器 */
|
||||||
.date-selector {
|
.custom-date-selector {
|
||||||
background: rgba(255, 255, 255, 0.9);
|
padding: 16px 0 8px;
|
||||||
padding: 15px;
|
|
||||||
border-radius: 12px;
|
|
||||||
margin-bottom: 20px;
|
|
||||||
display: flex;
|
|
||||||
align-items: center;
|
|
||||||
gap: 10px;
|
|
||||||
box-shadow: 0 4px 12px rgba(0,0,0,0.1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.date-selector label {
|
.date-display {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: flex-end;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.date-text {
|
||||||
|
font-size: 12px;
|
||||||
|
font-weight: 500;
|
||||||
|
color: #999;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 日期选择弹窗 */
|
||||||
|
.date-picker-overlay {
|
||||||
|
position: fixed;
|
||||||
|
top: 0;
|
||||||
|
left: 0;
|
||||||
|
right: 0;
|
||||||
|
bottom: 0;
|
||||||
|
background: rgba(0, 0, 0, 0.5);
|
||||||
|
display: flex;
|
||||||
|
align-items: flex-end;
|
||||||
|
z-index: 1000;
|
||||||
|
animation: fadeIn 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.date-picker-popup {
|
||||||
|
width: 100%;
|
||||||
|
max-height: 70vh;
|
||||||
|
background: white;
|
||||||
|
border-radius: 20px 20px 0 0;
|
||||||
|
animation: slideUp 0.3s ease;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.date-picker-header {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
padding: 20px;
|
||||||
|
border-bottom: 1px solid #eee;
|
||||||
|
background: #f8f9fa;
|
||||||
|
}
|
||||||
|
|
||||||
|
.date-picker-header h3 {
|
||||||
|
margin: 0;
|
||||||
|
font-size: 18px;
|
||||||
|
font-weight: 600;
|
||||||
|
color: #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
.close-btn {
|
||||||
|
background: none;
|
||||||
|
border: none;
|
||||||
|
font-size: 24px;
|
||||||
|
color: #666;
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 0;
|
||||||
|
width: 30px;
|
||||||
|
height: 30px;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
border-radius: 50%;
|
||||||
|
transition: all 0.2s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.close-btn:hover {
|
||||||
|
background: #e9ecef;
|
||||||
|
color: #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
.date-list {
|
||||||
|
max-height: 400px;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.date-option {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: space-between;
|
||||||
|
padding: 16px 20px;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.2s ease;
|
||||||
|
border-bottom: 1px solid #f0f0f0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.date-option:hover {
|
||||||
|
background: #f8f9fa;
|
||||||
|
}
|
||||||
|
|
||||||
|
.date-option.active {
|
||||||
|
background: #e3f2fd;
|
||||||
|
border-left: 4px solid #2196f3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.date-option.active .date-label {
|
||||||
|
color: #2196f3;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.date-label {
|
||||||
|
font-size: 16px;
|
||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
color: #333;
|
color: #333;
|
||||||
}
|
}
|
||||||
|
|
||||||
.date-input {
|
.date-value {
|
||||||
flex: 1;
|
font-size: 14px;
|
||||||
padding: 8px 12px;
|
color: #666;
|
||||||
border: 2px solid #e1e5e9;
|
|
||||||
border-radius: 8px;
|
|
||||||
font-size: 16px;
|
|
||||||
background: white;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.date-input:focus {
|
/* 动画效果 */
|
||||||
outline: none;
|
@keyframes fadeIn {
|
||||||
border-color: #667eea;
|
from {
|
||||||
|
opacity: 0;
|
||||||
|
}
|
||||||
|
to {
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes slideUp {
|
||||||
|
from {
|
||||||
|
transform: translateY(100%);
|
||||||
|
}
|
||||||
|
to {
|
||||||
|
transform: translateY(0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 加载状态 */
|
/* 加载状态 */
|
||||||
@ -350,35 +580,44 @@ onMounted(() => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.ranking-item {
|
.ranking-item {
|
||||||
background: rgba(255, 255, 255, 0.95);
|
|
||||||
border-radius: 16px;
|
border-radius: 16px;
|
||||||
padding: 15px;
|
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: flex-start;
|
align-items: flex-start;
|
||||||
gap: 15px;
|
gap: 15px;
|
||||||
box-shadow: 0 4px 20px rgba(0,0,0,0.1);
|
padding: 4px;
|
||||||
transition: transform 0.2s ease, box-shadow 0.2s ease;
|
|
||||||
}
|
|
||||||
|
|
||||||
.ranking-item:hover {
|
|
||||||
transform: translateY(-2px);
|
|
||||||
box-shadow: 0 8px 25px rgba(0,0,0,0.15);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 排名数字 */
|
/* 排名数字 */
|
||||||
.rank-number {
|
.rank-number {
|
||||||
background: linear-gradient(135deg, #ff6b6b, #ee5a24);
|
color: #333;
|
||||||
color: white;
|
width: 16px;
|
||||||
width: 32px;
|
height: 80px;
|
||||||
height: 32px;
|
|
||||||
border-radius: 50%;
|
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
font-size: 14px;
|
font-size: 16px;
|
||||||
flex-shrink: 0;
|
flex-shrink: 0;
|
||||||
box-shadow: 0 2px 8px rgba(255,107,107,0.3);
|
}
|
||||||
|
|
||||||
|
/* 前三名特殊样式 */
|
||||||
|
.rank-first {
|
||||||
|
color: #ffd700;
|
||||||
|
font-size: 24px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rank-second {
|
||||||
|
color: #afe3f6;
|
||||||
|
font-size: 24px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rank-third {
|
||||||
|
color: #cd7f32;
|
||||||
|
font-size: 24px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rank-normal {
|
||||||
|
color: #666;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 海报 */
|
/* 海报 */
|
||||||
@ -387,8 +626,7 @@ onMounted(() => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.poster-img {
|
.poster-img {
|
||||||
width: 60px;
|
width: 72px;
|
||||||
height: 80px;
|
|
||||||
object-fit: cover;
|
object-fit: cover;
|
||||||
border-radius: 8px;
|
border-radius: 8px;
|
||||||
box-shadow: 0 2px 8px rgba(0,0,0,0.15);
|
box-shadow: 0 2px 8px rgba(0,0,0,0.15);
|
||||||
@ -403,11 +641,14 @@ onMounted(() => {
|
|||||||
.drama-name {
|
.drama-name {
|
||||||
font-size: 16px;
|
font-size: 16px;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
color: #2c3e50;
|
color: #555;
|
||||||
margin: 0 0 8px 0;
|
margin: 0 0 8px 0;
|
||||||
line-height: 1.3;
|
line-height: 1.3;
|
||||||
}
|
}
|
||||||
|
.growth-data {
|
||||||
|
color: #e74c3c;
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
.growth-info, .play-count {
|
.growth-info, .play-count {
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
@ -430,16 +671,6 @@ onMounted(() => {
|
|||||||
font-weight: 600;
|
font-weight: 600;
|
||||||
}
|
}
|
||||||
|
|
||||||
.description {
|
|
||||||
color: #7f8c8d;
|
|
||||||
font-size: 12px;
|
|
||||||
line-height: 1.4;
|
|
||||||
overflow: hidden;
|
|
||||||
text-overflow: ellipsis;
|
|
||||||
white-space: nowrap;
|
|
||||||
margin-top: 8px;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 空状态 */
|
/* 空状态 */
|
||||||
.empty-state {
|
.empty-state {
|
||||||
text-align: center;
|
text-align: center;
|
||||||
@ -499,61 +730,4 @@ onMounted(() => {
|
|||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 移动端适配 */
|
|
||||||
@media (max-width: 768px) {
|
|
||||||
.main-content {
|
|
||||||
padding: 15px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.title {
|
|
||||||
font-size: 20px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.ranking-item {
|
|
||||||
padding: 12px;
|
|
||||||
gap: 12px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.poster-img {
|
|
||||||
width: 50px;
|
|
||||||
height: 67px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.drama-name {
|
|
||||||
font-size: 15px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.date-selector {
|
|
||||||
padding: 12px;
|
|
||||||
flex-direction: column;
|
|
||||||
align-items: stretch;
|
|
||||||
gap: 8px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.date-input {
|
|
||||||
width: 100%;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@media (max-width: 480px) {
|
|
||||||
.main-content {
|
|
||||||
padding: 10px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.ranking-item {
|
|
||||||
padding: 10px;
|
|
||||||
gap: 10px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.poster-img {
|
|
||||||
width: 45px;
|
|
||||||
height: 60px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.rank-number {
|
|
||||||
width: 28px;
|
|
||||||
height: 28px;
|
|
||||||
font-size: 12px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</style>
|
</style>
|
||||||
|
|||||||
@ -2,13 +2,11 @@ import { fileURLToPath, URL } from 'node:url'
|
|||||||
|
|
||||||
import { defineConfig } from 'vite'
|
import { defineConfig } from 'vite'
|
||||||
import vue from '@vitejs/plugin-vue'
|
import vue from '@vitejs/plugin-vue'
|
||||||
import vueDevTools from 'vite-plugin-vue-devtools'
|
|
||||||
|
|
||||||
// https://vite.dev/config/
|
// https://vite.dev/config/
|
||||||
export default defineConfig({
|
export default defineConfig({
|
||||||
plugins: [
|
plugins: [
|
||||||
vue(),
|
vue(),
|
||||||
vueDevTools(),
|
|
||||||
],
|
],
|
||||||
resolve: {
|
resolve: {
|
||||||
alias: {
|
alias: {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user