#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 查询MongoDB中的抖音播放量数据 """ from pymongo import MongoClient from pymongo.errors import ConnectionFailure from datetime import datetime def connect_mongodb(): """连接MongoDB""" try: client = MongoClient('mongodb://localhost:27017/', serverSelectionTimeoutMS=5000) client.admin.command('ping') db = client['douyin——data'] collection = db['playcounts'] print("MongoDB连接成功") return client, collection except ConnectionFailure: print("MongoDB连接失败,请确保MongoDB服务已启动") return None, None except Exception as e: print(f"MongoDB连接出错: {e}") return None, None def query_latest_batches(collection, limit=5): """查询最近的几个批次数据""" try: # 按批次时间倒序获取最近的批次 pipeline = [ {"$group": { "_id": "$batch_id", "batch_time": {"$first": "$batch_time"}, "count": {"$sum": 1} }}, {"$sort": {"batch_time": -1}}, {"$limit": limit} ] batches = list(collection.aggregate(pipeline)) if not batches: print("暂无数据") return print(f"\n===== 最近 {len(batches)} 个批次 =====") for batch in batches: batch_time = batch['batch_time'].strftime("%Y-%m-%d %H:%M:%S") print(f"批次ID: {batch['_id']}, 时间: {batch_time}, 数据条数: {batch['count']}") # 显示该批次的具体数据,按播放量排序(如果有rank字段则按rank排序,否则按playcount_number排序) batch_data = list(collection.find( {"batch_id": batch['_id']}, {"name": 1, "playcount": 1, "rank": 1, "playcount_number": 1, "_id": 0} )) # 按rank排序(如果存在),否则按playcount_number降序排序 if batch_data and 'rank' in batch_data[0]: batch_data.sort(key=lambda x: x.get('rank', 999)) elif batch_data and 'playcount_number' in batch_data[0]: batch_data.sort(key=lambda x: x.get('playcount_number', 0), reverse=True) for i, item in enumerate(batch_data, 1): rank_info = f"[第{item.get('rank', i)}名] " if 'rank' in item else "" print(f" {rank_info}{item['name']}") print(f" 播放量: {item['playcount']}") print() except Exception as e: print(f"查询数据失败: {e}") def query_by_name(collection, name_keyword): """根据剧本名称关键词查询""" try: # 使用正则表达式进行模糊匹配 query = {"name": {"$regex": name_keyword, "$options": "i"}} results = list(collection.find(query).sort("batch_time", -1)) if not results: print(f"未找到包含'{name_keyword}'的剧本") return print(f"\n===== 包含'{name_keyword}'的剧本 =====") for result in results: batch_time = result['batch_time'].strftime("%Y-%m-%d %H:%M:%S") print(f"剧本: {result['name']}") print(f"播放量: {result['playcount']}") print(f"抓取时间: {batch_time}") print(f"批次ID: {result['batch_id']}") print("-" * 30) except Exception as e: print(f"查询失败: {e}") def main(): print("抖音播放量数据查询工具") print("=" * 40) client, collection = connect_mongodb() if collection is None: return try: while True: print("\n请选择操作:") print("1. 查看最近的批次数据") print("2. 根据剧本名称搜索") print("3. 退出") choice = input("请输入选项 (1-3): ").strip() if choice == '1': limit = input("显示最近几个批次? (默认5): ").strip() try: limit = int(limit) if limit else 5 except ValueError: limit = 5 query_latest_batches(collection, limit) elif choice == '2': keyword = input("请输入剧本名称关键词: ").strip() if keyword: query_by_name(collection, keyword) else: print("关键词不能为空") elif choice == '3': break else: print("无效选项,请重新选择") except KeyboardInterrupt: print("\n用户中断操作") finally: if client: client.close() print("已断开MongoDB连接") if __name__ == '__main__': main()