架构优化

2025-10-17 16:09:17 +08:00 · 2025-10-17 16:09:17 +08:00 · f51278742c
commit f51278742c
parent 8d4369ecef
15 changed files with 319 additions and 1240 deletions
--- a/scripts/douyin_auto_scheduler.py
+++ b/scripts/douyin_auto_scheduler.py
@ -24,8 +24,7 @@ def setup_logging():
    # 确保logs目录存在
    import os
    script_dir = os.path.dirname(os.path.abspath(__file__))
-    project_root = os.path.dirname(script_dir)
-    logs_dir = os.path.join(project_root, 'logs')
+    logs_dir = os.path.join(script_dir, 'handlers', 'Rankings', 'logs')
    os.makedirs(logs_dir, exist_ok=True)

    logging.basicConfig(
@ -49,8 +48,8 @@ class DouyinAutoScheduler:
            # 设置环境变量，确保自动模式
            os.environ['AUTO_CONTINUE'] = '1'

-            # 构建脚本路径 - 现在在同一目录下
-            script_path = Path(__file__).parent / 'douyin_selenium_cdp_play_vv.py'
+            # 构建脚本路径 - 指向Rankings目录中的脚本
+            script_path = Path(__file__).parent / 'handlers' / 'Rankings' / 'rank_data_scraper.py'

            if not script_path.exists():
                logging.error(f"❌ 脚本文件不存在: {script_path}")
@ -109,7 +108,7 @@ class DouyinAutoScheduler:
        self.is_running = True
        logging.info("🚀 抖音播放量自动抓取定时器已启动")
        logging.info("⏰ 执行时间：每天上午9:35")
-        logging.info("📁 目标脚本：douyin_selenium_cdp_play_vv.py")
+        logging.info("📁 目标脚本：rank_data_scraper.py")
        logging.info("💾 数据保存：MongoDB")
        logging.info("⏹️  按 Ctrl+C 停止定时器")

--- a/app.py
+++ b/app.py
@ -0,0 +1,93 @@
+from flask import Flask
+from flask_cors import CORS
+import logging
+import os
+
+app = Flask(__name__)
+CORS(app)  # 允许跨域访问
+
+# 配置日志
+# 确保logs目录存在
+logs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'handlers', 'Rankings', 'logs')
+os.makedirs(logs_dir, exist_ok=True)
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler(os.path.join(logs_dir, 'app.log'), encoding='utf-8'),
+        logging.StreamHandler()
+    ]
+)
+
+# 导入路由
+from routers.rank_api_routes import api
+
+# 注册路由
+@app.route('/')
+def index():
+    """API首页"""
+    from flask import jsonify
+    return jsonify({
+        "name": "抖音播放量数据API服务",
+        "version": "2.0",
+        "description": "主程序服务 - 整合小程序API功能",
+        "endpoints": {
+            "/api/videos": "获取视频列表 (支持分页和排序)",
+            "/api/top": "获取热门视频榜单",
+            "/api/search": "搜索视频",
+            "/api/detail": "获取视频详情",
+            "/api/stats": "获取统计信息",
+            "/api/health": "健康检查"
+        },
+        "features": [
+            "分页支持",
+            "多种排序方式",
+            "搜索功能",
+            "详情查看",
+            "统计分析",
+            "小程序优化"
+        ]
+    })
+
+# 注册小程序API路由
+@app.route('/api/videos')
+def get_videos():
+    return api.get_videos()
+
+@app.route('/api/top')
+def get_top():
+    return api.get_top()
+
+@app.route('/api/search')
+def search():
+    return api.search()
+
+@app.route('/api/detail')
+def get_detail():
+    return api.get_detail()
+
+@app.route('/api/stats')
+def get_stats():
+    return api.get_stats()
+
+@app.route('/api/health')
+def health_check():
+    return api.health_check()
+
+if __name__ == '__main__':
+    print("启动主程序服务...")
+    print("服务地址: http://localhost:5000")
+    print("API接口列表:")
+    print("   - GET /             显示API信息")
+    print("   - GET /api/videos?page=1&limit=20&sort=playcount  获取视频列表（总播放量排序）")
+    print("   - GET /api/videos?page=1&limit=20&sort=growth     获取视频列表（增长排序，默认昨天到今天的差值）")
+    print("   - GET /api/videos?page=1&limit=20&sort=growth&start_date=2025-10-16&end_date=2025-10-17  获取视频列表（自定义日期范围增长排序）")
+    print("   - GET /api/top?limit=10                           获取热门榜单")
+    print("   - GET /api/search?q=关键词&page=1&limit=10         搜索视频")
+    print("   - GET /api/detail?id=视频ID                       获取视频详情")
+    print("   - GET /api/stats                                  获取统计信息")
+    print("   - GET /api/health                                 健康检查")
+    print("专为小程序优化：分页、搜索、详情、统计、增长排序、自定义日期范围")
+
+    app.run(host='0.0.0.0', port=5000, debug=True)
--- a/config.py
+++ b/config.py
@ -0,0 +1,16 @@
+import os
+import importlib
+
+# 数据库配置
+MONGO_URI = "mongodb://localhost:27017"
+MONGO_DB_NAME = "Rankings"
+
+# 应用配置
+APP_ENV = os.getenv('APP_ENV', 'development')
+DEBUG = APP_ENV == 'development'
+
+# 日志配置
+LOG_LEVEL = 'INFO'
+LOG_DIR = 'logs'
+
+print(f"Successfully loaded configuration for environment: {APP_ENV}")
--- a/database.py
+++ b/database.py
@ -0,0 +1,19 @@
+from pymongo import MongoClient
+import config
+# from mongo_listeners import all_event_listeners # 导入监听器（暂时注释掉，因为文件不存在）
+
+MONGO_URI = config.MONGO_URI
+DB_NAME = config.MONGO_DB_NAME
+
+# 创建MongoDB客户端连接
+try:
+    # 实例化MongoClient时传入事件监听器
+    client = MongoClient(MONGO_URI, serverSelectionTimeoutMS=5000) # 设置5秒超时
+    db = client[DB_NAME]
+    # 主动检查连接状态
+    client.admin.command('ping')
+    success_message = f"\033[92m成功连接到MongoDB: {DB_NAME}\033[0m"
+    print(success_message)
+except Exception as e:
+    error_message = f"\033[91m数据库连接失败: {MONGO_URI}，请检查MongoDB服务是否已启动。\033[0m"
+    print(error_message)
--- a/docs/API接口文档.md
+++ b/docs/API接口文档.md
@ -1,289 +0,0 @@
-# 小程序抖音播放量数据API文档
-
-## 概述
-
-抖音短剧播放量数据API，专为小程序优化，支持分页、搜索、增长分析等功能。
-
-**服务地址**: `http://localhost:5001`
-**启动命令**: `cd backend && python miniprogram_api_server.py`
-
-## 接口列表
-
-### 1. 健康检查
-`GET /api/health`
- 检查服务器和数据库状态
-
-### 2. 视频列表
-`GET /api/videos`
-
-**参数**:
- `page`: 页码（默认1）
- `limit`: 每页数量（默认20，最大50）
- `sort`: 排序方式
-  - `playcount`: 按总播放量排序
-  - `growth`: 按增长排序
-  - `time`: 按时间排序
- `start_date`: 开始日期（增长排序用，格式：2025-10-16）
- `end_date`: 结束日期（增长排序用，格式：2025-10-17）
-
-**示例**:
-```
-# 总播放量排序
-/api/videos?page=1&limit=10&sort=playcount
-
-# 增长排序（昨天到今天）
-/api/videos?page=1&limit=10&sort=growth
-
-# 自定义日期增长排序
-/api/videos?page=1&limit=10&sort=growth&start_date=2025-10-16&end_date=2025-10-17
-```
-
-### 3. 热门榜单
-`GET /api/top`
- `limit`: 返回数量（默认10，最大50）
-
-### 4. 搜索视频
-`GET /api/search`
- `q`: 搜索关键词
- `page`: 页码
- `limit`: 每页数量
-
-### 5. 视频详情
-`GET /api/detail`
- `id`: 视频ID
-
-## 数据字段说明
-
-### 视频数据字段
- `_id`: 视频唯一ID
- `mix_name`: 短剧名称
- `playcount`: 播放量文本（如"2.1亿"）
- `play_vv`: 播放量数值
- `video_url`: 抖音合集链接
- `rank`: 排名
- `batch_time`: 数据采集时间
- `aweme_ids`: 视频ID数组
- `cover_image_url`: 封面图片
- `cover_backup_urls`: 备用封面图片
- `request_id`: 请求ID
-
-### 增长排序特有字段
- `growth`: 播放量增长值
- `start_date`: 开始日期
- `end_date`: 结束日期
-
-## 响应格式
-
-所有接口返回格式：
-```json
-{
-  "success": true/false,
-  "data": [...],
-  "message": "错误信息（仅当success为false时）"
-}
-```
-
-视频列表接口额外包含：
- `pagination`: 分页信息
- `sort_by`: 排序方式
- `date_range`: 日期范围（仅增长排序）
- `update_time`: 更新时间
-
-## 📱 小程序集成示例
-
-### 微信小程序示例
-
-```javascript
-// 获取视频列表
-wx.request({
-  url: 'http://localhost:5001/api/videos',
-  data: {
-    page: 1,
-    limit: 10,
-    sort: 'playcount'
-  },
-  success: function(res) {
-    if (res.data.success) {
-      const videos = res.data.data;
-      videos.forEach(video => {
-        console.log(`${video.mix_name}: ${video.playcount}`);
-        console.log(`封面: ${video.cover_image_url}`);
-        console.log(`链接: ${video.video_url}`);
-        console.log(`视频数量: ${video.aweme_ids.length}`);
-      });
-      console.log('分页信息:', res.data.pagination);
-    }
-  }
-});
-
-// 搜索视频
-wx.request({
-  url: 'http://localhost:5001/api/search',
-  data: {
-    q: '奶团',
-    page: 1,
-    limit: 5
-  },
-  success: function(res) {
-    if (res.data.success) {
-      const results = res.data.data;
-      results.forEach(video => {
-        console.log(`找到: ${video.mix_name}`);
-        console.log(`播放量: ${video.playcount}`);
-        console.log(`数值播放量: ${video.play_vv}`);
-      });
-    }
-  }
-});
-
-// 获取热门榜单
-wx.request({
-  url: 'http://localhost:5001/api/top',
-  data: {
-    limit: 10
-  },
-  success: function(res) {
-    if (res.data.success) {
-      console.log('热门榜单:', res.data.data);
-    }
-  }
-});
-```
-
-### uni-app示例
-
-```javascript
-// 封装API请求
-const API_BASE = 'http://localhost:5001';
-
-// 获取视频列表
-export function getVideoList(page = 1, limit = 20, sort = 'playcount') {
-  return uni.request({
-    url: `${API_BASE}/api/videos`,
-    data: { page, limit, sort }
-  });
-}
-
-// 搜索视频
-export function searchVideos(keyword, page = 1, limit = 10) {
-  return uni.request({
-    url: `${API_BASE}/api/search`,
-    data: { q: keyword, page, limit }
-  });
-}
-
-// 获取视频详情
-export function getVideoDetail(id) {
-  return uni.request({
-    url: `${API_BASE}/api/detail`,
-    data: { id }
-  });
-}
-
-// 使用示例
-getVideoList(1, 10).then(([err, res]) => {
-  if (!err && res.data.success) {
-    console.log('视频列表:', res.data.data);
-  }
-});
-```
-
-## 🎯 数据字段说明
-
-### 视频合集字段
- `_id`: 合集唯一标识符（MongoDB ObjectId）
- `mix_name`: 合集名称
- `playcount`: 播放量文本（如"2.1亿"）
- `play_vv`: 播放量数值
- `video_url`: 合集链接
- `rank`: 排名
- `batch_time`: 批次时间
- `aweme_ids`: 视频ID数组
- `cover_image_url`: 封面图片URL
- `cover_backup_urls`: 备用封面图片URL数组
- `request_id`: 请求ID
-
-### 分页信息字段
- `page`: 当前页码
- `limit`: 每页数量
- `total`: 总记录数
- `pages`: 总页数
- `has_next`: 是否有下一页
- `has_prev`: 是否有上一页
-
-## 🔧 技术特性
-
-### 1. 小程序优化
- **轻量级响应**: 精简数据结构，减少传输量
- **分页支持**: 避免一次性加载大量数据
- **搜索功能**: 支持关键词模糊搜索
- **错误处理**: 统一的错误响应格式
-
-### 2. 性能优化
- **数据缓存**: MongoDB查询优化
- **分页限制**: 防止过大的数据请求
- **连接池**: 数据库连接复用
- **日志记录**: 完整的请求日志
-
-### 3. 安全特性
- **参数验证**: 输入参数安全检查
- **CORS支持**: 跨域请求支持
- **错误隐藏**: 不暴露内部错误信息
-
-## 📊 测试结果
-
-最新测试结果（100%通过率）：
- ✅ API首页: 正常
- ✅ 健康检查: 数据库连接正常，35条记录
- ✅ 视频列表: 分页功能正常
- ✅ 热门榜单: 排序功能正常
- ✅ 搜索功能: 关键词搜索正常
- ✅ 视频详情: 详情获取正常
- ✅ 统计信息: 数据统计正常
-
-## 🚀 部署建议
-
-### 开发环境
-```bash
-# 启动API服务器
-python scripts/miniprogram_api_server.py
-
-# 运行测试
-python scripts/test_miniprogram_api.py
-```
-
-### 生产环境
-```bash
-# 使用Gunicorn部署
-pip install gunicorn
-gunicorn -w 4 -b 0.0.0.0:5001 scripts.miniprogram_api_server:app
-
-# 使用Nginx反向代理
-# 配置SSL证书支持HTTPS
-```
-
-## 📝 更新日志
-
-### v2.0 (2025-10-16)
- 🎉 全新的小程序优化API
- ✨ 添加分页和搜索功能
- 🔧 优化数据结构和响应格式
- 📊 增加统计信息接口
- 🧪 完整的测试覆盖
-
-### 与v1.0的主要区别
- **更好的分页**: 支持灵活的分页参数
- **搜索功能**: 关键词模糊搜索
- **详情接口**: 单独的视频详情查看
- **统计分析**: 数据统计和分类
- **小程序优化**: 专为小程序设计的数据格式
-
-## 🤝 技术支持
-
-如有问题，请检查：
-1. MongoDB服务是否正常运行
-2. API服务器是否启动成功
-3. 网络连接是否正常
-4. 参数格式是否正确
-
-测试工具会自动生成详细的测试报告，保存在 `api_test_report.json` 文件中。
--- a/docs/README.md
+++ b/docs/README.md
@ -1,224 +0,0 @@
-# 抖音合集播放量数据抓取系统
-
-这是一个完整的抖音合集播放量数据抓取和分析系统，包含自动化抓取、定时任务、数据存储和API服务。
-
-## 🎯 系统概述
-
-本系统通过Selenium + Chrome DevTools Protocol技术，自动化抓取抖音收藏合集的真实播放量数据，并提供以下功能：
-
- **自动化数据抓取**: 每天定时自动抓取抖音收藏合集的播放量数据
- **持久化存储**: 数据自动保存到MongoDB数据库
- **RESTful API**: 为小程序提供数据接口服务
- **多维度分析**: 支持总播放量、增长排序、搜索等功能
-
-## 📁 项目结构
-
-```
-rank_backend/
-├── scripts/                    # 核心脚本目录
-│   ├── douyin_selenium_cdp_play_vv.py     # 主抓取脚本
-│   ├── douyin_auto_scheduler.py           # 定时任务调度器
-│   ├── miniprogram_api_server.py          # 小程序API服务器
-│   ├── query_mongodb_data.py              # 数据库查询工具
-│   ├── view_latest_data.py                # 最新数据查看工具
-│   ├── check_mongodb.py                   # MongoDB连接检查
-│   └── mongodb_quick_view.py              # 数据库快速查看
-├── docs/                       # 文档目录
-│   ├── README.md                          # 项目说明文档
-│   └── API接口文档.md                     # API接口文档
-├── config/                    # 配置文件目录
-├── drivers/                   # Chrome驱动目录
-├── data/                      # 数据文件目录
-├── logs/                      # 日志文件目录
-├── requirements.txt           # Python依赖包
-├── CHROME_PROFILE_SETUP.md    # Chrome配置设置指南
-└── .gitignore                 # Git忽略文件
-```
-
-## 🚀 快速开始
-
-### 1. 环境准备
-
-#### 安装Python依赖
-```bash
-pip install -r requirements.txt
-```
-
-#### 安装MongoDB
- 下载并安装MongoDB Community Server
- 启动MongoDB服务（默认端口27017）
-
-#### Chrome浏览器
- 确保已安装Chrome浏览器
- 系统会自动检测并使用合适的ChromeDriver
-
-### 2. Chrome配置文件
-```bash
-# 首次运行需要手动登录
-python scripts/douyin_selenium_cdp_play_vv.py
-自动创建
-```
-### 3. ChromeDriver说明
-
-**ChromeDriver已包含在仓库中** (`drivers/chromedriver.exe`)，无需额外下载。系统会自动检测并使用该驱动。
-
-如果遇到驱动版本不匹配问题，可以：
-1. 删除 `drivers/chromedriver.exe`
-2. 重新运行脚本，系统会自动下载合适版本的ChromeDriver
-
-### 3. 运行系统
-
-#### 方式一：手动运行抓取脚本
-```bash
-python scripts/douyin_selenium_cdp_play_vv.py --auto --duration 60
-```
-
-#### 方式二：启动定时任务（推荐）
-```bash
-python scripts/douyin_auto_scheduler.py
-```
-
-#### 方式三：启动API服务器
-```bash
-python scripts/miniprogram_api_server.py
-```
-
-## ⚙️ 核心功能
-
-### 1. 数据抓取模块
- **技术栈**: Selenium + Chrome DevTools Protocol
- **数据源**: 抖音收藏合集页面
- **提取字段**:
-  - 合集名称 (mix_name)
-  - 真实播放量 (play_vv)
-  - 合集链接 (video_url)
-  - 合集ID (mix_id)
-  - 视频ID列表 (aweme_ids)
-  - 封面图片 (cover_image_url)
-
-### 2. 定时任务模块
- **执行时间**: 每天上午9:35自动执行
- **日志记录**: 完整的执行日志
- **错误处理**: 自动重试和异常处理
-
-### 3. API服务模块
- **端口**: 5001
- **跨域支持**: 支持小程序调用
- **接口功能**:
-  - 视频列表查询（分页、排序）
-  - 热门榜单
-  - 搜索功能
-  - 视频详情
-  - 统计分析
-
-## 📊 API接口
-
-### 基础接口
-
-| 接口 | 方法 | 描述 | 参数 |
-|------|------|------|------|
-| `/api/videos` | GET | 获取视频列表 | `page`, `limit`, `sort` |
-| `/api/top` | GET | 热门榜单 | `limit` |
-| `/api/search` | GET | 搜索视频 | `q`, `page`, `limit` |
-| `/api/detail` | GET | 视频详情 | `id` |
-| `/api/stats` | GET | 统计信息 | - |
-| `/api/health` | GET | 健康检查 | - |
-
-### 排序方式
- `playcount`: 按总播放量排序（默认）
- `growth`: 按增长量排序
- `time`: 按时间排序
-
-### 增长排序参数
-```
-/api/videos?sort=growth&start_date=2025-10-16&end_date=2025-10-17
-```
-
-## 🔧 配置说明
-
-### 环境变量
-```bash
-# MongoDB配置
-MONGO_HOST=localhost
-MONGO_PORT=27017
-MONGO_DB=douyin_data
-MONGO_COLLECTION=play_vv_records
-
-# ChromeDriver配置
-OVERRIDE_CHROMEDRIVER=/path/to/chromedriver
-
-# 自动模式
-AUTO_CONTINUE=1
-```
-
-### 定时任务配置
-在 `scripts/douyin_auto_scheduler.py` 中修改执行时间：
-```python
-schedule.every().day.at("09:35").do(self.run_douyin_scraper)
-```
-
-## 📈 数据格式
-
-### MongoDB文档结构
-```json
-{
-  "_id": ObjectId,
-  "batch_time": "2025-10-17T09:35:10",
-  "mix_name": "合集名称",
-  "video_url": "https://www.douyin.com/collection/xxx",
-  "playcount": "1.2亿",
-  "play_vv": 120000000,
-  "request_id": "请求ID",
-  "rank": 1,
-  "aweme_ids": ["视频ID1", "视频ID2"],
-  "cover_image_url": "封面图片URL",
-  "cover_backup_urls": ["备用图片URL"]
-}
-```
-
-## 🛠️ 工具脚本
-
-### 数据库查询
-```bash
-python scripts/query_mongodb_data.py
-```
-
-### 查看最新数据
-```bash
-python scripts/view_latest_data.py
-```
-
-### 检查MongoDB连接
-```bash
-python scripts/check_mongodb.py
-```
-
-## ⚠️ 注意事项
-
-### 法律合规
- 请确保使用符合抖音服务条款和相关法律法规
- 数据仅供学习和研究使用，请勿用于商业用途
- 避免过于频繁的请求，以免触发反爬虫机制
-
-### 技术限制
- Chrome配置文件需要手动设置登录状态
- 抖音页面结构变化可能导致抓取失败
- 需要稳定的网络环境
-
-### 故障排除
-1. **ChromeDriver问题**: 确保Chrome浏览器版本与ChromeDriver匹配
-2. **登录状态丢失**: 重新运行手动登录流程
-3. **MongoDB连接失败**: 检查MongoDB服务是否启动
-
-## 📝 版本历史
-
- **v2.0**: 新增小程序API服务、增长排序功能
- **v1.0**: 基础抓取功能和定时任务
-
-## 🤝 贡献
-
-欢迎提交Issue和Pull Request来改进这个项目。
-
-## 📄 许可证
-
-本项目仅供学习和研究使用。使用者需要遵守相关法律法规和平台服务条款，作者不承担任何法律责任。
--- a/handlers/Rankings/docs/README.md
+++ b/handlers/Rankings/docs/README.md
@ -0,0 +1,102 @@
+# 排名系统（Rankings）说明大纲
+
+## 1. 项目概览
+- 提供抖音收藏合集真实播放量数据采集与API服务
+- 抓取脚本写入 MongoDB；API 按播放量与增长榜返回数据
+
+## 2. 目录速览（关键）
+- `handlers/Rankings/rank_data_scraper.py` 数据抓取脚本（Selenium+CDP）
+- `routers/rank_api_routes.py` 小程序 API 数据访问/逻辑模块（由 `app.py` 调用，不独立运行）
+- `app.py` 主服务入口（Flask应用，注册所有 API 路由）
+- `Timer_worker.py` 定时任务，每日自动运行抓取
+
+### 项目结构（简版）
+```
+项目根/
+├── app.py                  # 主服务入口（5000）
+├── Timer_worker.py         # 定时抓取任务
+├── config.py               # 全局配置
+├── database.py             # 数据库封装
+├── routers/
+│   └── rank_api_routes.py  # 小程序API逻辑模块
+├── handlers/
+│   └── Rankings/
+│       ├── rank_data_scraper.py         # 抓取脚本（Selenium+CDP）
+│       ├── config/
+│       │   └── chrome_profile/
+│       │       └── douyin_persistent/   # 持久化Chrome用户目录（登录态）
+│       ├── data/                        # 数据导出/缓存（可选）
+│       ├── docs/                        # 使用说明与文档
+│       ├── drivers/                     # 浏览器驱动等
+│       └── logs/                        # 运行日志
+└── 项目启动说明.md
+```
+- 核心数据表：`Rankings/Rankings_list`
+- 日志示例：`handlers/Rankings/logs/douyin_scraper.log`
+
+## 3. 服务与端口
+- 单一服务：`app.py`（默认端口 `5000`，包含小程序 API 路由）
+
+## 4. 一键启动
+- 启动主服务：
+  ```bash
+  python app.py
+  ```
+- 启动定时任务（每日 9:35 自动抓取）：
+  ```bash
+  python Timer_worker.py
+  ```
+
+## 5. 使用步骤（首次登录与日常）
+- 安装依赖：
+  ```bash
+  pip install -r handlers/Rankings/docs/requirements.txt
+  ```
+- 第一次使用（登录抖音）：
+  - 运行抓取脚本：`python handlers/Rankings/rank_data_scraper.py`
+  - 弹出 Chrome 后，完成抖音登录（扫码/账号均可）。
+  - 登录完成后，回到终端提示界面按回车继续抓取。
+  - 后续运行会复用已登录的浏览器配置，免重复登录。
+
+- 日常流程：
+  - 抓取：`python handlers/Rankings/rank_data_scraper.py`
+  - 服务：`python app.py`（端口 `5000`）
+  - 定时：`python Timer_worker.py`（每日 9:35 自动执行）
+  
+- 验证数据：
+  - MongoDB：数据库 `Rankings`，集合 `Rankings_list`
+  - API 检查：
+    - `http://localhost:5000/api/health`
+    - `http://localhost:5000/api/videos?page=1&limit=20&sort_by=playcount`
+    - 增长榜：`http://localhost:5000/api/videos?sort_by=growth&page=1&limit=20`
+
+## 6. 数据抓取流程（简版）
+- 复用已登录的 Chrome 配置，滚动/刷新触发请求
+- 通过 CDP 捕获响应，解析 `play_vv` 与 SSR 数据
+- 按合集聚合视频，写入 MongoDB 指定集合
+
+## 7. 数据库与集合
+- 数据库：`Rankings`
+- 集合：`Rankings_list`
+- 连接：`mongodb://localhost:27017/`（可通过环境变量覆盖）
+
+## 8. API 功能摘要
+- 视频列表（分页、按播放量/时间排序，仅当日最新数据）
+- 增长榜（按指定日期区间对比增长量，分页返回）
+
+## 9. 配置项（环境变量）
+- `MONGO_HOST` 默认 `localhost`
+- `MONGO_PORT` 默认 `27017`
+- `MONGO_DB` 默认 `Rankings`
+- `MONGO_COLLECTION` 默认 `Rankings_list`
+
+## 10. 快速排错
+- MongoDB 连接失败：抓取脚本将仅保存本地文件日志
+- Chrome 配置：`handlers/Rankings/config/chrome_profile/`
+- 日志位置：`handlers/Rankings/logs/`
+
+## 11. 你需要知道的
+- 当前架构下没有独立的 `5001` 端口；`routers/rank_api_routes.py` 提供逻辑模块，由 `app.py` 注册路由并统一对外服务（`5000`）。
+- 抓取脚本与 API 使用同一集合，数据结构一致
+- 小程序 API 专注返回易用字段（封面、播放量、时间、链接）
+- 可直接在现有数据上新增排序或过滤，保持接口向后兼容
--- a/handlers/Rankings/docs/requirements.txt
+++ b/handlers/Rankings/docs/requirements.txt
--- a/handlers/Rankings/drivers/chromedriver.exe
+++ b/handlers/Rankings/drivers/chromedriver.exe
--- a/handlers/Rankings/rank_data_scraper.py
+++ b/handlers/Rankings/rank_data_scraper.py
@ -39,8 +39,7 @@ from pymongo.errors import ConnectionFailure
 # 确保logs目录存在
 import os
 script_dir = os.path.dirname(os.path.abspath(__file__))
-project_root = os.path.dirname(script_dir)
-logs_dir = os.path.join(project_root, 'logs')
+logs_dir = os.path.join(script_dir, 'logs')
 os.makedirs(logs_dir, exist_ok=True)

 logging.basicConfig(
@ -75,8 +74,8 @@ class DouyinPlayVVScraper:
            # MongoDB连接配置
            mongo_host = os.environ.get('MONGO_HOST', 'localhost')
            mongo_port = int(os.environ.get('MONGO_PORT', 27017))
-            mongo_db = os.environ.get('MONGO_DB', 'douyin_data')
-            mongo_collection = os.environ.get('MONGO_COLLECTION', 'play_vv_records')
+            mongo_db = os.environ.get('MONGO_DB', 'Rankings')
+            mongo_collection = os.environ.get('MONGO_COLLECTION', 'Rankings_list')
            
            # 创建MongoDB连接
            self.mongo_client = MongoClient(mongo_host, mongo_port, serverSelectionTimeoutMS=5000)
@ -105,7 +104,8 @@ class DouyinPlayVVScraper:
    def _cleanup_old_profiles(self):
        """清理超过一天的旧临时Chrome配置文件"""
        try:
-            profile_base_dir = os.path.abspath(os.path.join('.', 'config', 'chrome_profile'))
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            profile_base_dir = os.path.join(script_dir, 'config', 'chrome_profile')
            if not os.path.exists(profile_base_dir):
                return
                
@ -135,7 +135,8 @@ class DouyinPlayVVScraper:
            import psutil
            
            # 获取当前配置文件路径
-            profile_dir = os.path.abspath(os.path.join('.', 'config', 'chrome_profile', 'douyin_persistent'))
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            profile_dir = os.path.join(script_dir, 'config', 'chrome_profile', 'douyin_persistent')
            
            # 查找使用该配置文件的Chrome进程
            killed_processes = []
@ -190,7 +191,8 @@ class DouyinPlayVVScraper:
        chrome_options.add_argument('--start-maximized')
        chrome_options.add_argument('--lang=zh-CN')
        # 使用固定的Chrome配置文件目录以保持登录状态
-        profile_dir = os.path.abspath(os.path.join('.', 'config', 'chrome_profile', 'douyin_persistent'))
+        script_dir = os.path.dirname(os.path.abspath(__file__))
+        profile_dir = os.path.join(script_dir, 'config', 'chrome_profile', 'douyin_persistent')
        os.makedirs(profile_dir, exist_ok=True)
        chrome_options.add_argument(f'--user-data-dir={profile_dir}')
        logging.info(f'使用持久化Chrome配置文件: {profile_dir}')
@ -931,8 +933,14 @@ class DouyinPlayVVScraper:

    def save_results(self):
        ts = datetime.now().strftime('%Y%m%d_%H%M%S')
-        json_file = f'douyin_cdp_play_vv_{ts}.json'
-        txt_file = f'douyin_cdp_play_vv_{ts}.txt'
+        
+        # 创建data文件夹
+        script_dir = os.path.dirname(os.path.abspath(__file__))
+        data_dir = os.path.join(script_dir, 'data')
+        os.makedirs(data_dir, exist_ok=True)
+        
+        json_file = os.path.join(data_dir, f'douyin_cdp_play_vv_{ts}.json')
+        txt_file = os.path.join(data_dir, f'douyin_cdp_play_vv_{ts}.txt')

        # 保存到JSON文件
        with open(json_file, 'w', encoding='utf-8') as f:
--- a/scripts/miniprogram_api_server.py
+++ b/scripts/miniprogram_api_server.py
@ -5,34 +5,11 @@
 优化的数据格式和接口设计，专为小程序使用
 """

-from flask import Flask, jsonify, request
-from flask_cors import CORS
 from pymongo import MongoClient
 from datetime import datetime, timedelta
 import logging
-import os
 import re

-# 配置日志
-# 确保logs目录存在
-import os
-script_dir = os.path.dirname(os.path.abspath(__file__))
-project_root = os.path.dirname(script_dir)
-logs_dir = os.path.join(project_root, 'logs')
-os.makedirs(logs_dir, exist_ok=True)
-
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler(os.path.join(logs_dir, 'miniprogram_api.log'), encoding='utf-8'),
-        logging.StreamHandler()
-    ]
-)
-
-app = Flask(__name__)
-CORS(app)  # 允许跨域访问，支持小程序调用
-
 class MiniprogramAPI:
    def __init__(self):
        self.client = None
@ -47,8 +24,8 @@ class MiniprogramAPI:
            # 测试连接
            self.client.admin.command('ping')
            # 使用数据库与集合
-            self.db = self.client['douyin_data']
-            self.collection = self.db['play_vv_records']
+            self.db = self.client['Rankings']
+            self.collection = self.db['Rankings_list']
            logging.info("MongoDB连接成功")
            return True
        except Exception as e:
@ -465,136 +442,70 @@ class MiniprogramAPI:
            logging.error(f"获取统计信息失败: {e}")
            return {"success": False, "message": f"获取统计失败: {str(e)}"}

+    def get_videos(self):
+        """获取视频列表 - 兼容app.py调用"""
+        from flask import request
+        
+        page = int(request.args.get('page', 1))
+        limit = int(request.args.get('limit', 20))
+        sort_by = request.args.get('sort', 'playcount')
+        
+        if sort_by == 'growth':
+            start_date = request.args.get('start_date')
+            end_date = request.args.get('end_date')
+            return self.get_growth_videos(page, limit, start_date, end_date)
+        else:
+            return self.get_video_list(page, limit, sort_by)
+    
+    def get_top(self):
+        """获取热门榜单 - 兼容app.py调用"""
+        from flask import request
+        limit = int(request.args.get('limit', 10))
+        return self.get_top_videos(limit)
+    
+    def search(self):
+        """搜索视频 - 兼容app.py调用"""
+        from flask import request
+        keyword = request.args.get('q', '')
+        page = int(request.args.get('page', 1))
+        limit = int(request.args.get('limit', 10))
+        return self.search_videos(keyword, page, limit)
+    
+    def get_detail(self):
+        """获取视频详情 - 兼容app.py调用"""
+        from flask import request
+        video_id = request.args.get('id', '')
+        return self.get_video_detail(video_id)
+    
+    def get_stats(self):
+        """获取统计信息 - 兼容app.py调用"""
+        return self.get_statistics()
+    
+    def health_check(self):
+        """健康检查 - 兼容app.py调用"""
+        try:
+            # 检查数据库连接
+            if not self.client:
+                return {"success": False, "message": "数据库未连接"}
+            
+            # 测试数据库连接
+            self.client.admin.command('ping')
+            
+            # 获取数据统计
+            total_count = self.collection.count_documents({})
+            
+            return {
+                "success": True,
+                "message": "服务正常",
+                "data": {
+                    "database": "连接正常",
+                    "total_records": total_count,
+                    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                }
+            }
+        except Exception as e:
+            logging.error(f"健康检查失败: {e}")
+            return {"success": False, "message": f"服务异常: {str(e)}"}
+
 # 创建API实例
-api = MiniprogramAPI()
-
-# API路由定义
-@app.route('/')
-def index():
-    """API首页"""
-    return jsonify({
-        "name": "小程序抖音播放量数据API",
-        "version": "2.0",
-        "description": "专为小程序优化的抖音播放量数据接口",
-        "endpoints": {
-            "/api/videos": "获取视频列表 (支持分页和排序)",
-            "/api/top": "获取热门视频榜单",
-            "/api/search": "搜索视频",
-            "/api/detail": "获取视频详情",
-            "/api/stats": "获取统计信息",
-            "/api/health": "健康检查"
-        },
-        "features": [
-            "分页支持",
-            "多种排序方式",
-            "搜索功能",
-            "详情查看",
-            "统计分析",
-            "小程序优化"
-        ]
-    })
-
-@app.route('/api/videos')
-def get_videos():
-    """获取视频列表"""
-    page = request.args.get('page', 1, type=int)
-    limit = request.args.get('limit', 20, type=int)
-    sort_by = request.args.get('sort', 'playcount')  # playcount, time, 或 growth
-    start_date = request.args.get('start_date', None)
-    end_date = request.args.get('end_date', None)
-
-    # 限制参数范围
-    page = max(1, page)
-    limit = min(50, max(1, limit))  # 限制每页最多50条
-
-    if sort_by == "growth":
-        # 增长排序需要特殊处理，支持日期参数
-        result = api.get_growth_videos(page, limit, start_date, end_date)
-    else:
-        result = api.get_video_list(page, limit, sort_by)
-
-    return jsonify(result)
-
-@app.route('/api/top')
-def get_top():
-    """获取热门视频榜单"""
-    limit = request.args.get('limit', 10, type=int)
-    limit = min(50, max(1, limit))  # 限制最多50条
-    
-    result = api.get_top_videos(limit)
-    return jsonify(result)
-
-@app.route('/api/search')
-def search():
-    """搜索视频"""
-    keyword = request.args.get('q', '').strip()
-    page = request.args.get('page', 1, type=int)
-    limit = request.args.get('limit', 10, type=int)
-    
-    # 限制参数范围
-    page = max(1, page)
-    limit = min(30, max(1, limit))  # 搜索结果限制每页最多30条
-    
-    result = api.search_videos(keyword, page, limit)
-    return jsonify(result)
-
-@app.route('/api/detail')
-def get_detail():
-    """获取视频详情"""
-    video_id = request.args.get('id', '').strip()
-    
-    if not video_id:
-        return jsonify({"success": False, "message": "请提供视频ID"})
-    
-    result = api.get_video_detail(video_id)
-    return jsonify(result)
-
-@app.route('/api/stats')
-def get_stats():
-    """获取统计信息"""
-    result = api.get_statistics()
-    return jsonify(result)
-
-@app.route('/api/health')
-def health_check():
-    """健康检查"""
-    try:
-        # 检查MongoDB连接
-        api.client.admin.command('ping')
-        
-        # 获取基本信息
-        total_count = api.collection.count_documents({})
-        
-        return jsonify({
-            "success": True,
-            "status": "healthy",
-            "mongodb": "connected",
-            "total_records": total_count,
-            "server_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-            "api_version": "2.0"
-        })
-        
-    except Exception as e:
-        return jsonify({
-            "success": False,
-            "status": "unhealthy",
-            "mongodb": "disconnected",
-            "error": str(e),
-            "server_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        })
-
-if __name__ == '__main__':
-    print("启动小程序专用抖音播放量API服务器...")
-    print("API地址: http://localhost:5001")
-    print("小程序API接口列表:")
-    print("   - GET /api/videos?page=1&limit=20&sort=playcount  获取视频列表（总播放量排序）")
-    print("   - GET /api/videos?page=1&limit=20&sort=growth     获取视频列表（增长排序，默认昨天到今天的差值）")
-    print("   - GET /api/videos?page=1&limit=20&sort=growth&start_date=2025-10-16&end_date=2025-10-17  获取视频列表（自定义日期范围增长排序）")
-    print("   - GET /api/top?limit=10                           获取热门榜单")
-    print("   - GET /api/search?q=关键词&page=1&limit=10         搜索视频")
-    print("   - GET /api/detail?id=视频ID                       获取视频详情")
-    print("   - GET /api/stats                                  获取统计信息")
-    print("   - GET /api/health                                 健康检查")
-    print("专为小程序优化：分页、搜索、详情、统计、增长排序、自定义日期范围")
-
-    app.run(host='0.0.0.0', port=5001, debug=True)
+api = MiniprogramAPI()
--- a/scripts/check_mongodb.py
+++ b/scripts/check_mongodb.py
@ -1,65 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-检查MongoDB数据保存状态
-"""
-
-from pymongo import MongoClient
-from datetime import datetime
-import sys
-
-def check_mongodb():
-    """检查MongoDB连接和数据"""
-    try:
-        # 使用与主脚本相同的连接参数
-        client = MongoClient('localhost', 27017, serverSelectionTimeoutMS=5000)
-
-        # 测试连接
-        client.admin.command('ping')
-        print("MongoDB连接成功")
-
-        # 检查数据库和集合
-        db = client['douyin_data']
-        collection = db['play_vv_records']
-
-        total_count = collection.count_documents({})
-        print(f"总记录数: {total_count}")
-
-        # 检查今天的数据
-        today_start = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
-        today_count = collection.count_documents({'batch_time': {'$gte': today_start}})
-        print(f"今天的数据记录数: {today_count}")
-
-        # 显示最新5条记录（按时间倒序排列）
-        print("\n最新5条记录（按时间倒序排列）:")
-        print("-" * 60)
-        for doc in collection.find().sort('batch_time', -1).limit(5):
-            print(f"合集名称: {doc.get('mix_name', '未知')}")
-            print(f"播放量: {doc.get('play_vv', 0):,} ({doc.get('playcount', '')})")
-            print(f"合集链接: {doc.get('video_url', '')}")
-            print(f"保存时间: {doc.get('batch_time', '')}")
-            print(f"视频ID数: {len(doc.get('aweme_ids', []))}")
-            print(f"封面图片: {'有' if doc.get('cover_image_url') else '无'}")
-            print("-" * 60)
-
-        # 显示字段结构
-        if total_count > 0:
-            sample = collection.find_one()
-            print(f"\n文档字段结构:")
-            for key in sample.keys():
-                print(f"  - {key}: {type(sample[key]).__name__}")
-
-    except Exception as e:
-        print(f"检查MongoDB时出错: {e}")
-        return False
-
-    return True
-
-if __name__ == '__main__':
-    print("=== MongoDB数据检查 ===")
-    success = check_mongodb()
-    if success:
-        print("\n检查完成")
-    else:
-        print("\n检查失败")
-        sys.exit(1)
--- a/scripts/mongodb_quick_view.py
+++ b/scripts/mongodb_quick_view.py
@ -1,294 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-MongoDB数据库快速查看工具
-一次性显示数据库结构、统计信息和最新数据
-"""
-
-import pymongo
-from pymongo import MongoClient
-from datetime import datetime
-import json
-from collections import defaultdict
-
-def connect_mongodb(connection_string='mongodb://localhost:27017/'):
-    """连接到MongoDB"""
-    try:
-        client = MongoClient(connection_string, serverSelectionTimeoutMS=5000)
-        client.admin.command('ping')
-        print(f"✅ 成功连接到MongoDB: {connection_string}")
-        return client
-    except Exception as e:
-        print(f"❌ 连接MongoDB失败: {e}")
-        return None
-
-def analyze_document_schema(document):
-    """分析文档结构"""
-    if not document:
-        return {}
-    
-    schema = {}
-    for key, value in document.items():
-        if key == '_id':
-            schema[key] = {'type': 'ObjectId', 'example': str(value)}
-        elif isinstance(value, str):
-            schema[key] = {'type': 'string', 'example': value[:50] + '...' if len(value) > 50 else value}
-        elif isinstance(value, int):
-            schema[key] = {'type': 'integer', 'example': value}
-        elif isinstance(value, float):
-            schema[key] = {'type': 'float', 'example': value}
-        elif isinstance(value, bool):
-            schema[key] = {'type': 'boolean', 'example': value}
-        elif isinstance(value, datetime):
-            schema[key] = {'type': 'datetime', 'example': value.strftime('%Y-%m-%d %H:%M:%S')}
-        elif isinstance(value, list):
-            schema[key] = {
-                'type': 'array', 
-                'length': len(value),
-                'example': value[:3] if len(value) <= 3 else value[:3] + ['...']
-            }
-        elif isinstance(value, dict):
-            schema[key] = {
-                'type': 'object',
-                'keys': list(value.keys())[:5],
-                'example': {k: v for k, v in list(value.items())[:2]}
-            }
-        else:
-            schema[key] = {'type': type(value).__name__, 'example': str(value)[:50]}
-    
-    return schema
-
-def display_database_info(client):
-    """显示数据库信息"""
-    print("\n" + "="*80)
-    print("📊 MongoDB 数据库结构分析")
-    print("="*80)
-    
-    try:
-        db_names = client.list_database_names()
-        
-        for db_name in db_names:
-            if db_name in ['admin', 'local', 'config']:
-                continue
-                
-            db = client[db_name]
-            collections = db.list_collection_names()
-            
-            print(f"\n🗄️  数据库: {db_name}")
-            print(f"   集合数量: {len(collections)}")
-            
-            for coll_name in collections:
-                collection = db[coll_name]
-                count = collection.count_documents({})
-                
-                print(f"\n   📁 集合: {coll_name}")
-                print(f"      文档数量: {count:,}")
-                
-                if count > 0:
-                    # 获取样本文档来分析结构
-                    sample_doc = collection.find_one()
-                    schema = analyze_document_schema(sample_doc)
-                    
-                    if schema:
-                        print(f"      📋 字段结构:")
-                        for field_name, field_info in schema.items():
-                            print(f"         • {field_name}: {field_info['type']}")
-                            if 'example' in field_info:
-                                example = field_info['example']
-                                if isinstance(example, str) and len(example) > 100:
-                                    example = example[:100] + "..."
-                                print(f"           示例: {example}")
-                else:
-                    print(f"      ⚠️  集合为空")
-                    
-    except Exception as e:
-        print(f"❌ 获取数据库信息失败: {e}")
-
-def display_statistics(client, db_name='douyin_data', collection_name='play_vv_records'):
-    """显示统计信息"""
-    try:
-        db = client[db_name]
-        collection = db[collection_name]
-        
-        print(f"\n📊 统计信息 ({db_name}.{collection_name})")
-        print("-" * 50)
-        
-        # 基本统计
-        total_count = collection.count_documents({})
-        print(f"📈 总文档数: {total_count:,}")
-        
-        if total_count == 0:
-            print("⚠️  集合为空，无法显示统计信息")
-            return
-        
-        # 时间范围统计
-        time_fields = ['batch_time', 'created_at', 'timestamp']
-        for field in time_fields:
-            if collection.find_one({field: {'$exists': True}}):
-                pipeline = [
-                    {'$group': {
-                        '_id': None,
-                        'min_time': {'$min': f'${field}'},
-                        'max_time': {'$max': f'${field}'}
-                    }}
-                ]
-                result = list(collection.aggregate(pipeline))
-                if result:
-                    min_time = result[0]['min_time']
-                    max_time = result[0]['max_time']
-                    print(f"📅 时间范围 ({field}):")
-                    print(f"   最早: {min_time.strftime('%Y-%m-%d %H:%M:%S')}")
-                    print(f"   最新: {max_time.strftime('%Y-%m-%d %H:%M:%S')}")
-                break
-        
-        # 播放量统计
-        playcount_fields = ['play_vv', 'playcount', 'play_count', 'views']
-        for field in playcount_fields:
-            if collection.find_one({field: {'$exists': True, '$type': 'number'}}):
-                pipeline = [
-                    {'$group': {
-                        '_id': None,
-                        'total_plays': {'$sum': f'${field}'},
-                        'avg_plays': {'$avg': f'${field}'},
-                        'max_plays': {'$max': f'${field}'},
-                        'min_plays': {'$min': f'${field}'}
-                    }}
-                ]
-                result = list(collection.aggregate(pipeline))
-                if result:
-                    stats = result[0]
-                    print(f"🎬 播放量统计 ({field}):")
-                    print(f"   总播放量: {stats['total_plays']:,}")
-                    print(f"   平均播放量: {stats['avg_plays']:,.0f}")
-                    print(f"   最高播放量: {stats['max_plays']:,}")
-                    print(f"   最低播放量: {stats['min_plays']:,}")
-                break
-        
-        # 热门内容统计
-        if collection.find_one({'mix_name': {'$exists': True}}):
-            print(f"\n🔥 热门内容 (按播放量排序):")
-            pipeline = [
-                {'$match': {'play_vv': {'$exists': True, '$type': 'number'}}},
-                {'$sort': {'play_vv': -1}},
-                {'$limit': 5},
-                {'$project': {'mix_name': 1, 'play_vv': 1, 'batch_time': 1}}
-            ]
-            top_content = list(collection.aggregate(pipeline))
-            for i, content in enumerate(top_content, 1):
-                name = content.get('mix_name', '未知')
-                plays = content.get('play_vv', 0)
-                time_str = content.get('batch_time', datetime.now()).strftime('%m-%d %H:%M')
-                print(f"   {i}. {name}: {plays:,} ({time_str})")
-                
-    except Exception as e:
-        print(f"❌ 获取统计信息失败: {e}")
-
-def display_recent_data(client, db_name='douyin_data', collection_name='play_vv_records', limit=3):
-    """显示最近的数据"""
-    try:
-        db = client[db_name]
-        collection = db[collection_name]
-        
-        print(f"\n📈 最近 {limit} 条数据 ({db_name}.{collection_name})")
-        print("-" * 80)
-        
-        # 尝试按时间字段排序
-        time_fields = ['batch_time', 'created_at', 'timestamp', '_id']
-        sort_field = None
-        
-        for field in time_fields:
-            if collection.find_one({field: {'$exists': True}}):
-                sort_field = field
-                break
-        
-        if sort_field:
-            recent_docs = list(collection.find().sort(sort_field, -1).limit(limit))
-        else:
-            recent_docs = list(collection.find().limit(limit))
-        
-        if not recent_docs:
-            print("⚠️  没有找到数据")
-            return
-        
-        for i, doc in enumerate(recent_docs, 1):
-            print(f"\n📄 记录 {i}:")
-            display_document(doc)
-            
-    except Exception as e:
-        print(f"❌ 获取最近数据失败: {e}")
-
-def display_document(doc, indent=2):
-    """显示单个文档"""
-    spaces = " " * indent
-    
-    for key, value in doc.items():
-        if key == '_id':
-            print(f"{spaces}🆔 {key}: {value}")
-        elif isinstance(value, datetime):
-            print(f"{spaces}📅 {key}: {value.strftime('%Y-%m-%d %H:%M:%S')}")
-        elif isinstance(value, str):
-            display_value = value[:100] + "..." if len(value) > 100 else value
-            print(f"{spaces}📝 {key}: {display_value}")
-        elif isinstance(value, (int, float)):
-            if key in ['playcount', 'play_count', 'views', 'play_vv']:
-                print(f"{spaces}📊 {key}: {value:,}")
-            else:
-                print(f"{spaces}🔢 {key}: {value}")
-        elif isinstance(value, list):
-            print(f"{spaces}📋 {key}: [{len(value)} 项]")
-            if len(value) > 0 and len(value) <= 3:
-                for item in value[:3]:
-                    item_str = str(item)[:50] + "..." if len(str(item)) > 50 else str(item)
-                    print(f"{spaces}   - {item_str}")
-            elif len(value) > 3:
-                for item in value[:2]:
-                    item_str = str(item)[:50] + "..." if len(str(item)) > 50 else str(item)
-                    print(f"{spaces}   - {item_str}")
-                print(f"{spaces}   ... 还有 {len(value)-2} 项")
-        elif isinstance(value, dict):
-            print(f"{spaces}📦 {key}: {{对象}}")
-            if len(value) <= 3:
-                for k, v in value.items():
-                    v_str = str(v)[:50] + "..." if len(str(v)) > 50 else str(v)
-                    print(f"{spaces}   {k}: {v_str}")
-            else:
-                for k, v in list(value.items())[:2]:
-                    v_str = str(v)[:50] + "..." if len(str(v)) > 50 else str(v)
-                    print(f"{spaces}   {k}: {v_str}")
-                print(f"{spaces}   ... 还有 {len(value)-2} 个字段")
-        else:
-            print(f"{spaces}❓ {key}: {value}")
-
-def main():
-    """主函数"""
-    print("🚀 MongoDB 数据库快速查看工具")
-    print(f"⏰ 查看时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
-    
-    # 连接数据库
-    client = connect_mongodb()
-    if not client:
-        return
-    
-    try:
-        # 显示数据库结构
-        display_database_info(client)
-        
-        # 显示统计信息
-        display_statistics(client)
-        
-        # 显示最近数据
-        display_recent_data(client)
-        
-        print(f"\n{'='*80}")
-        print("✅ 数据库查看完成!")
-        print("💡 提示: 运行 'python scripts/mongodb_viewer.py' 可以使用交互式查看器")
-        print("🔄 提示: 重新运行此脚本可以查看最新数据")
-        
-    except KeyboardInterrupt:
-        print("\n👋 程序被用户中断")
-    finally:
-        if client:
-            client.close()
-
-if __name__ == '__main__':
-    main()
--- a/scripts/query_mongodb_data.py
+++ b/scripts/query_mongodb_data.py
@ -1,142 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-查询MongoDB中的抖音播放量数据
-"""
-
-from pymongo import MongoClient
-from pymongo.errors import ConnectionFailure
-from datetime import datetime
-
-def connect_mongodb():
-    """连接MongoDB"""
-    try:
-        client = MongoClient('mongodb://localhost:27017/', serverSelectionTimeoutMS=5000)
-        client.admin.command('ping')
-        db = client['douyin——data']
-        collection = db['playcounts']
-        print("MongoDB连接成功")
-        return client, collection
-    except ConnectionFailure:
-        print("MongoDB连接失败，请确保MongoDB服务已启动")
-        return None, None
-    except Exception as e:
-        print(f"MongoDB连接出错: {e}")
-        return None, None
-
-def query_latest_batches(collection, limit=5):
-    """查询最近的几个批次数据"""
-    try:
-        # 按批次时间倒序获取最近的批次
-        pipeline = [
-            {"$group": {
-                "_id": "$batch_id",
-                "batch_time": {"$first": "$batch_time"},
-                "count": {"$sum": 1}
-            }},
-            {"$sort": {"batch_time": -1}},
-            {"$limit": limit}
-        ]
-        
-        batches = list(collection.aggregate(pipeline))
-        
-        if not batches:
-            print("暂无数据")
-            return
-        
-        print(f"\n===== 最近 {len(batches)} 个批次 =====")
-        for batch in batches:
-            batch_time = batch['batch_time'].strftime("%Y-%m-%d %H:%M:%S")
-            print(f"批次ID: {batch['_id']}, 时间: {batch_time}, 数据条数: {batch['count']}")
-            
-            # 显示该批次的具体数据，按播放量排序（如果有rank字段则按rank排序，否则按playcount_number排序）
-            batch_data = list(collection.find(
-                {"batch_id": batch['_id']},
-                {"name": 1, "playcount": 1, "rank": 1, "playcount_number": 1, "_id": 0}
-            ))
-            
-            # 按rank排序（如果存在），否则按playcount_number降序排序
-            if batch_data and 'rank' in batch_data[0]:
-                batch_data.sort(key=lambda x: x.get('rank', 999))
-            elif batch_data and 'playcount_number' in batch_data[0]:
-                batch_data.sort(key=lambda x: x.get('playcount_number', 0), reverse=True)
-            
-            for i, item in enumerate(batch_data, 1):
-                rank_info = f"[第{item.get('rank', i)}名] " if 'rank' in item else ""
-                print(f"  {rank_info}{item['name']}")
-                print(f"    播放量: {item['playcount']}")
-            print()
-            
-    except Exception as e:
-        print(f"查询数据失败: {e}")
-
-def query_by_name(collection, name_keyword):
-    """根据剧本名称关键词查询"""
-    try:
-        # 使用正则表达式进行模糊匹配
-        query = {"name": {"$regex": name_keyword, "$options": "i"}}
-        results = list(collection.find(query).sort("batch_time", -1))
-        
-        if not results:
-            print(f"未找到包含'{name_keyword}'的剧本")
-            return
-        
-        print(f"\n===== 包含'{name_keyword}'的剧本 =====")
-        for result in results:
-            batch_time = result['batch_time'].strftime("%Y-%m-%d %H:%M:%S")
-            print(f"剧本: {result['name']}")
-            print(f"播放量: {result['playcount']}")
-            print(f"抓取时间: {batch_time}")
-            print(f"批次ID: {result['batch_id']}")
-            print("-" * 30)
-            
-    except Exception as e:
-        print(f"查询失败: {e}")
-
-def main():
-    print("抖音播放量数据查询工具")
-    print("=" * 40)
-    
-    client, collection = connect_mongodb()
-    if collection is None:
-        return
-    
-    try:
-        while True:
-            print("\n请选择操作:")
-            print("1. 查看最近的批次数据")
-            print("2. 根据剧本名称搜索")
-            print("3. 退出")
-            
-            choice = input("请输入选项 (1-3): ").strip()
-            
-            if choice == '1':
-                limit = input("显示最近几个批次? (默认5): ").strip()
-                try:
-                    limit = int(limit) if limit else 5
-                except ValueError:
-                    limit = 5
-                query_latest_batches(collection, limit)
-                
-            elif choice == '2':
-                keyword = input("请输入剧本名称关键词: ").strip()
-                if keyword:
-                    query_by_name(collection, keyword)
-                else:
-                    print("关键词不能为空")
-                    
-            elif choice == '3':
-                break
-                
-            else:
-                print("无效选项，请重新选择")
-                
-    except KeyboardInterrupt:
-        print("\n用户中断操作")
-    finally:
-        if client:
-            client.close()
-        print("已断开MongoDB连接")
-
-if __name__ == '__main__':
-    main()
--- a/scripts/view_latest_data.py
+++ b/scripts/view_latest_data.py
@ -1,55 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-查看MongoDB最新数据 - 始终按时间倒序排列
-"""
-
-from pymongo import MongoClient
-from datetime import datetime
-
-def view_latest_data(limit=20):
-    """查看最新数据"""
-    try:
-        client = MongoClient('localhost', 27017)
-        db = client['douyin_data']
-        collection = db['play_vv_records']
-
-        print("=== 抖音播放量最新数据 ===")
-        print(f"显示最新 {limit} 条记录（按时间倒序排列）")
-        print("=" * 80)
-
-        # 获取最新数据，按时间倒序排列
-        latest_docs = list(collection.find().sort('batch_time', -1).limit(limit))
-
-        if not latest_docs:
-            print("没有找到数据")
-            return
-
-        for i, doc in enumerate(latest_docs, 1):
-            print(f"\n记录 #{i}")
-            print("-" * 50)
-            print(f"合集名称: {doc.get('mix_name', '未知')}")
-            print(f"播放量: {doc.get('play_vv', 0):,} ({doc.get('playcount', '')})")
-            print(f"合集链接: {doc.get('video_url', '')}")
-            print(f"保存时间: {doc.get('batch_time', '')}")
-            print(f"视频ID数: {len(doc.get('aweme_ids', []))}")
-            print(f"封面图片: {'有' if doc.get('cover_image_url') else '无'}")
-
-        # 显示统计信息
-        total_count = collection.count_documents({})
-        today_start = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
-        today_count = collection.count_documents({'batch_time': {'$gte': today_start}})
-
-        print(f"\n" + "=" * 80)
-        print(f"统计信息:")
-        print(f"- 总记录数: {total_count}")
-        print(f"- 今天记录数: {today_count}")
-        print(f"- 最新记录时间: {latest_docs[0].get('batch_time')}")
-
-    except Exception as e:
-        print(f"查看数据时出错: {e}")
-
-if __name__ == '__main__':
-    import sys
-    limit = int(sys.argv[1]) if len(sys.argv) > 1 else 20
-    view_latest_data(limit)