rank_backend/scripts/view_latest_data.py
Qyir 53160420d1 Initial commit: Douyin play count tracking system
Features:
- Douyin play count scraper using Selenium + Chrome DevTools Protocol
- Automated scheduler for daily data collection
- MongoDB data storage
- Mini-program API server
- Data analysis and visualization tools

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-17 10:48:52 +08:00

55 lines
2.0 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
查看MongoDB最新数据 - 始终按时间倒序排列
"""
from pymongo import MongoClient
from datetime import datetime
def view_latest_data(limit=20):
"""查看最新数据"""
try:
client = MongoClient('localhost', 27017)
db = client['douyin_data']
collection = db['play_vv_records']
print("=== 抖音播放量最新数据 ===")
print(f"显示最新 {limit} 条记录(按时间倒序排列)")
print("=" * 80)
# 获取最新数据,按时间倒序排列
latest_docs = list(collection.find().sort('batch_time', -1).limit(limit))
if not latest_docs:
print("没有找到数据")
return
for i, doc in enumerate(latest_docs, 1):
print(f"\n记录 #{i}")
print("-" * 50)
print(f"合集名称: {doc.get('mix_name', '未知')}")
print(f"播放量: {doc.get('play_vv', 0):,} ({doc.get('playcount', '')})")
print(f"合集链接: {doc.get('video_url', '')}")
print(f"保存时间: {doc.get('batch_time', '')}")
print(f"视频ID数: {len(doc.get('aweme_ids', []))}")
print(f"封面图片: {'' if doc.get('cover_image_url') else ''}")
# 显示统计信息
total_count = collection.count_documents({})
today_start = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
today_count = collection.count_documents({'batch_time': {'$gte': today_start}})
print(f"\n" + "=" * 80)
print(f"统计信息:")
print(f"- 总记录数: {total_count}")
print(f"- 今天记录数: {today_count}")
print(f"- 最新记录时间: {latest_docs[0].get('batch_time')}")
except Exception as e:
print(f"查看数据时出错: {e}")
if __name__ == '__main__':
import sys
limit = int(sys.argv[1]) if len(sys.argv) > 1 else 20
view_latest_data(limit)