143 lines
4.4 KiB
Python
143 lines
4.4 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
抖音播放量自动抓取定时器 - 跨平台版本
|
||
|
||
功能:
|
||
- 每晚24:00自动执行抖音播放量抓取任务
|
||
- 支持Windows、macOS、Linux
|
||
- 自动保存数据到MongoDB
|
||
"""
|
||
|
||
import schedule
|
||
import time
|
||
import sys
|
||
import os
|
||
import logging
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
import config
|
||
|
||
# 添加项目路径到 Python 路径
|
||
sys.path.append(os.path.join(os.path.dirname(__file__), 'handlers', 'Rankings'))
|
||
from rank_data_scraper import DouyinPlayVVScraper
|
||
|
||
# 配置日志的函数
|
||
def setup_logging():
|
||
"""设置日志配置"""
|
||
# 确保logs目录存在
|
||
import os
|
||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||
logs_dir = os.path.join(script_dir, 'handlers', 'Rankings', 'logs')
|
||
os.makedirs(logs_dir, exist_ok=True)
|
||
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||
handlers=[
|
||
logging.FileHandler(os.path.join(logs_dir, 'scheduler.log'), encoding='utf-8'),
|
||
logging.StreamHandler()
|
||
]
|
||
)
|
||
|
||
class DouyinAutoScheduler:
|
||
def __init__(self):
|
||
self.is_running = False
|
||
|
||
def run_douyin_scraper(self):
|
||
"""执行抖音播放量抓取任务"""
|
||
try:
|
||
logging.info("🚀 开始执行抖音播放量抓取任务...")
|
||
|
||
# 设置环境变量,确保自动模式
|
||
os.environ['AUTO_CONTINUE'] = '1'
|
||
|
||
# 直接创建并运行 DouyinPlayVVScraper 实例
|
||
scraper = DouyinPlayVVScraper(
|
||
start_url="https://www.douyin.com/user/self?showTab=favorite_collection&showSubTab=compilation",
|
||
auto_continue=True,
|
||
duration_s=60
|
||
)
|
||
|
||
logging.info("📁 开始执行抓取任务...")
|
||
scraper.run()
|
||
|
||
logging.info("✅ 抖音播放量抓取任务执行成功")
|
||
|
||
except Exception as e:
|
||
logging.error(f"💥 执行任务时发生异常: {e}")
|
||
import traceback
|
||
logging.error(f"详细错误信息: {traceback.format_exc()}")
|
||
|
||
def setup_schedule(self):
|
||
"""设置定时任务"""
|
||
# 从配置文件读取执行时间
|
||
scheduler_time = config.SCHEDULER_TIME
|
||
schedule.every().day.at(scheduler_time).do(self.run_douyin_scraper)
|
||
|
||
logging.info(f"⏰ 定时器已设置:每晚{scheduler_time}执行抖音播放量抓取")
|
||
|
||
def show_next_run(self):
|
||
"""显示下次执行时间"""
|
||
jobs = schedule.get_jobs()
|
||
if jobs:
|
||
next_run = jobs[0].next_run
|
||
logging.info(f"⏰ 下次执行时间: {next_run}")
|
||
|
||
def run_once(self):
|
||
"""立即执行一次"""
|
||
logging.info("🔧 立即执行模式...")
|
||
self.run_douyin_scraper()
|
||
|
||
def run_test(self):
|
||
"""测试模式 - 立即执行一次"""
|
||
logging.info("🧪 测试模式 - 立即执行抖音播放量抓取任务...")
|
||
self.run_douyin_scraper()
|
||
|
||
def start_scheduler(self):
|
||
"""启动定时器"""
|
||
self.is_running = True
|
||
logging.info("🚀 抖音播放量自动抓取定时器已启动")
|
||
logging.info(f"⏰ 执行时间:每天{config.SCHEDULER_TIME}执行抖音播放量抓取")
|
||
logging.info("📁 目标脚本:rank_data_scraper.py")
|
||
logging.info("💾 数据保存:MongoDB")
|
||
logging.info("⏹️ 按 Ctrl+C 停止定时器")
|
||
|
||
try:
|
||
while self.is_running:
|
||
schedule.run_pending()
|
||
time.sleep(1)
|
||
|
||
# 每分钟显示一次状态
|
||
if int(time.time()) % 60 == 0:
|
||
self.show_next_run()
|
||
|
||
except KeyboardInterrupt:
|
||
logging.info("\n⏹️ 定时器已停止")
|
||
self.is_running = False
|
||
|
||
def main():
|
||
"""主函数"""
|
||
import argparse
|
||
|
||
parser = argparse.ArgumentParser(description='抖音播放量自动抓取定时器')
|
||
parser.add_argument('--test', action='store_true', help='测试模式 - 立即执行一次')
|
||
parser.add_argument('--once', action='store_true', help='立即执行一次并退出')
|
||
|
||
args = parser.parse_args()
|
||
|
||
# 设置日志配置
|
||
setup_logging()
|
||
|
||
scheduler = DouyinAutoScheduler()
|
||
|
||
if args.test:
|
||
scheduler.run_test()
|
||
elif args.once:
|
||
scheduler.run_once()
|
||
else:
|
||
scheduler.setup_schedule()
|
||
scheduler.start_scheduler()
|
||
|
||
if __name__ == '__main__':
|
||
main() |