Features: - Douyin play count scraper using Selenium + Chrome DevTools Protocol - Automated scheduler for daily data collection - MongoDB data storage - Mini-program API server - Data analysis and visualization tools 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
65 lines
2.1 KiB
Python
65 lines
2.1 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
检查MongoDB数据保存状态
|
||
"""
|
||
|
||
from pymongo import MongoClient
|
||
from datetime import datetime
|
||
import sys
|
||
|
||
def check_mongodb():
|
||
"""检查MongoDB连接和数据"""
|
||
try:
|
||
# 使用与主脚本相同的连接参数
|
||
client = MongoClient('localhost', 27017, serverSelectionTimeoutMS=5000)
|
||
|
||
# 测试连接
|
||
client.admin.command('ping')
|
||
print("MongoDB连接成功")
|
||
|
||
# 检查数据库和集合
|
||
db = client['douyin_data']
|
||
collection = db['play_vv_records']
|
||
|
||
total_count = collection.count_documents({})
|
||
print(f"总记录数: {total_count}")
|
||
|
||
# 检查今天的数据
|
||
today_start = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
|
||
today_count = collection.count_documents({'batch_time': {'$gte': today_start}})
|
||
print(f"今天的数据记录数: {today_count}")
|
||
|
||
# 显示最新5条记录(按时间倒序排列)
|
||
print("\n最新5条记录(按时间倒序排列):")
|
||
print("-" * 60)
|
||
for doc in collection.find().sort('batch_time', -1).limit(5):
|
||
print(f"合集名称: {doc.get('mix_name', '未知')}")
|
||
print(f"播放量: {doc.get('play_vv', 0):,} ({doc.get('playcount', '')})")
|
||
print(f"合集链接: {doc.get('video_url', '')}")
|
||
print(f"保存时间: {doc.get('batch_time', '')}")
|
||
print(f"视频ID数: {len(doc.get('aweme_ids', []))}")
|
||
print(f"封面图片: {'有' if doc.get('cover_image_url') else '无'}")
|
||
print("-" * 60)
|
||
|
||
# 显示字段结构
|
||
if total_count > 0:
|
||
sample = collection.find_one()
|
||
print(f"\n文档字段结构:")
|
||
for key in sample.keys():
|
||
print(f" - {key}: {type(sample[key]).__name__}")
|
||
|
||
except Exception as e:
|
||
print(f"检查MongoDB时出错: {e}")
|
||
return False
|
||
|
||
return True
|
||
|
||
if __name__ == '__main__':
|
||
print("=== MongoDB数据检查 ===")
|
||
success = check_mongodb()
|
||
if success:
|
||
print("\n检查完成")
|
||
else:
|
||
print("\n检查失败")
|
||
sys.exit(1) |