
实战指南Python自动化获取B站数据全流程【免费下载链接】bilibili-api哔哩哔哩常用API调用。支持视频、番剧、用户、频道、音频等功能。原仓库地址https://github.com/MoyuScript/bilibili-api项目地址: https://gitcode.com/gh_mirrors/bi/bilibili-apiBilibili API Python库是开发者获取哔哩哔哩平台数据的强大工具支持视频、用户、直播、动态、专栏等多种数据类型。本文将从零开始带你掌握使用bilibili-api进行数据采集、分析和应用的全流程涵盖从环境配置到高级应用的最佳实践。项目定位与核心价值bilibili-api是一个功能全面的Python库专门用于调用B站的各种API接口。它不仅支持官方提供的标准接口还集成了许多实用功能如BV号与AV号互转、弹幕下载与转换、专栏内容爬取等。该库采用异步设计支持多种HTTP客户端能够有效应对B站的反爬虫机制是进行B站数据分析、内容监控、自动化操作的理想选择。环境配置与快速开始安装与依赖管理首先克隆项目仓库到本地git clone https://gitcode.com/gh_mirrors/bi/bilibili-api cd bilibili-api安装核心库和依赖pip install bilibili-api-pythonbilibili-api支持多种HTTP客户端你可以根据需求选择安装# 安装aiohttp推荐 pip install aiohttp # 或安装httpx pip install httpx # 或安装curl_cffi支持浏览器指纹伪装 pip install curl_cffi基础使用示例下面是一个获取视频信息的简单示例import asyncio from bilibili_api import Video async def get_video_info(): # 创建Video对象传入BV号 video Video(bvidBV1xx4y1z7R9) # 获取视频详细信息 info await video.get_info() # 输出关键信息 print(f标题{info[title]}) print(f播放量{info[stat][view]}) print(f点赞数{info[stat][like]}) print(f投币数{info[stat][coin]}) print(f收藏数{info[stat][favorite]}) return info # 运行异步函数 if __name__ __main__: asyncio.run(get_video_info())核心功能深度解析视频数据采集与分析视频是B站最核心的内容形式bilibili-api提供了丰富的视频操作接口from bilibili_api import Video, Credential import asyncio async def comprehensive_video_analysis(): # 如果需要登录操作创建凭证 credential Credential( sessdata你的sessdata, bili_jct你的bili_jct, buvid3你的buvid3 ) # 初始化视频对象 video Video(bvidBV1xx4y1z7R9, credentialcredential) # 获取视频基本信息 basic_info await video.get_info() # 获取视频分P信息 pages await video.get_pages() # 获取视频播放地址 play_url await video.get_download_url() # 获取视频弹幕XML格式 danmaku_xml await video.get_danmaku_xml() # 获取视频统计信息 stat await video.get_stat() # 获取相关推荐视频 related await video.get_related() return { basic_info: basic_info, pages: pages, play_url: play_url, danmaku: danmaku_xml, statistics: stat, related_videos: related }用户数据获取与处理用户数据分析是B站运营的重要环节通过以下代码可以获取用户详细信息from bilibili_api import User async def analyze_user_profile(uid: int): user User(uiduid) # 获取用户基本信息 user_info await user.get_user_info() # 获取用户投稿视频 videos await user.get_videos(ps30) # ps参数控制每页数量 # 获取用户动态 dynamics await user.get_dynamics() # 获取用户关注列表 followings await user.get_followings() # 获取用户粉丝列表 followers await user.get_followers() # 获取用户专栏文章 articles await user.get_articles() return { profile: user_info, video_count: len(videos[list]), dynamic_count: len(dynamics[cards]), following_count: len(followings[list]), follower_count: len(followers[list]), article_count: len(articles[articles]) }弹幕数据处理与可视化弹幕是B站特色功能bilibili-api提供了强大的弹幕处理能力from bilibili_api import Video from bilibili_api.utils.danmaku2ass import Danmaku2ASS import json async def process_danmaku_data(bvid: str): video Video(bvidbvid) # 获取弹幕XML数据 danmaku_xml await video.get_danmaku_xml() # 解析弹幕为结构化数据 danmaku_list [] for dm in danmaku_xml: danmaku_list.append({ time: dm.time, type: dm.type, size: dm.size, color: dm.color, text: dm.text, user_hash: dm.user_hash }) # 转换为ASS字幕格式 ass_content Danmaku2ASS( danmaku_list, 1920, 1080, # 视频分辨率 Microsoft YaHei, # 字体 36, # 字体大小 0.8, # 透明度 10, # 弹幕停留时间 5 # 弹幕速度 ) # 保存为文件 with open(f{bvid}_danmaku.ass, w, encodingutf-8) as f: f.write(ass_content) # 弹幕词频分析 word_freq {} for dm in danmaku_list: text dm[text] if text in word_freq: word_freq[text] 1 else: word_freq[text] 1 # 排序并获取高频弹幕 top_danmaku sorted(word_freq.items(), keylambda x: x[1], reverseTrue)[:10] return { total_danmaku: len(danmaku_list), top_danmaku: top_danmaku, ass_file: f{bvid}_danmaku.ass }图B站动态投票功能的前端实现代码展示了API如何与B站前端组件交互高级应用场景批量数据采集与存储对于需要大规模数据采集的场景可以使用以下批量处理方案import asyncio import aiohttp import pandas as pd from bilibili_api import Video, User from typing import List class BilibiliBatchCollector: def __init__(self, max_concurrent5): self.semaphore asyncio.Semaphore(max_concurrent) async def collect_video_batch(self, bvid_list: List[str]): 批量收集视频数据 tasks [] for bvid in bvid_list: task asyncio.create_task(self._get_video_data(bvid)) tasks.append(task) results await asyncio.gather(*tasks, return_exceptionsTrue) return self._process_results(results) async def _get_video_data(self, bvid: str): 获取单个视频数据 async with self.semaphore: try: video Video(bvidbvid) info await video.get_info() stat await video.get_stat() return { bvid: bvid, title: info.get(title), view: stat.get(view), like: stat.get(like), coin: stat.get(coin), favorite: stat.get(favorite), share: stat.get(share), duration: info.get(duration), pubdate: info.get(pubdate), owner: info.get(owner, {}).get(name) } except Exception as e: print(f获取视频 {bvid} 数据失败: {e}) return None def _process_results(self, results): 处理结果并保存为CSV valid_results [r for r in results if r is not None] df pd.DataFrame(valid_results) # 保存到CSV df.to_csv(bilibili_videos.csv, indexFalse, encodingutf-8-sig) # 数据分析 stats { total_videos: len(valid_results), total_views: df[view].sum(), avg_likes: df[like].mean(), avg_duration: df[duration].mean() } return df, stats实时监控与告警系统构建B站内容实时监控系统import asyncio import time from datetime import datetime from bilibili_api import Video, Live, Dynamic import smtplib from email.mime.text import MIMEText class BilibiliMonitor: def __init__(self, config): self.config config self.monitored_items [] async def monitor_video_stats(self, bvid: str, interval: int 300): 监控视频数据变化 video Video(bvidbvid) last_stats None while True: try: current_stats await video.get_stat() if last_stats: # 检测数据变化 changes self._detect_changes(last_stats, current_stats) if changes: await self._send_alert(bvid, changes) last_stats current_stats await asyncio.sleep(interval) except Exception as e: print(f监控视频 {bvid} 出错: {e}) await asyncio.sleep(60) # 出错后等待1分钟 def _detect_changes(self, old_stats, new_stats): 检测统计数据变化 changes {} metrics [view, like, coin, favorite, share, reply] for metric in metrics: old_value old_stats.get(metric, 0) new_value new_stats.get(metric, 0) if new_value old_value: changes[metric] { old: old_value, new: new_value, increase: new_value - old_value } return changes async def _send_alert(self, bvid: str, changes: dict): 发送告警通知 timestamp datetime.now().strftime(%Y-%m-%d %H:%M:%S) subject fB站视频数据变化告警 - {bvid} # 构建告警内容 content f时间: {timestamp}\n content f视频: {bvid}\n\n content 数据变化详情:\n for metric, data in changes.items(): content f{metric}: {data[old]} → {data[new]} ({data[increase]})\n # 发送邮件通知 self._send_email(subject, content) # 也可以集成其他通知方式如钉钉、企业微信等 print(f告警已发送: {subject})数据分析与可视化将采集的数据进行可视化分析import matplotlib.pyplot as plt import seaborn as sns import pandas as pd from wordcloud import WordCloud import jieba from collections import Counter class BilibiliDataVisualizer: def __init__(self, data): self.data data def plot_video_metrics(self): 绘制视频指标对比图 fig, axes plt.subplots(2, 2, figsize(12, 10)) # 播放量分布 axes[0, 0].hist(self.data[view], bins20, alpha0.7, colorskyblue) axes[0, 0].set_title(播放量分布) axes[0, 0].set_xlabel(播放量) axes[0, 0].set_ylabel(视频数量) # 点赞投币收藏关系 axes[0, 1].scatter(self.data[like], self.data[coin], alpha0.5, cself.data[favorite], cmapviridis) axes[0, 1].set_title(点赞-投币-收藏关系) axes[0, 1].set_xlabel(点赞数) axes[0, 1].set_ylabel(投币数) # 发布时间分布 publish_hours pd.to_datetime(self.data[pubdate], units).dt.hour axes[1, 0].hist(publish_hours, bins24, alpha0.7, colorlightgreen) axes[1, 0].set_title(发布时间分布) axes[1, 0].set_xlabel(发布时间小时) axes[1, 0].set_ylabel(视频数量) # 互动率分析 self.data[interaction_rate] (self.data[like] self.data[coin] self.data[favorite]) / self.data[view] axes[1, 1].boxplot(self.data[interaction_rate]) axes[1, 1].set_title(视频互动率分布) axes[1, 1].set_ylabel(互动率) plt.tight_layout() plt.savefig(video_metrics_analysis.png, dpi300, bbox_inchestight) plt.show() def generate_danmaku_wordcloud(self, danmaku_texts): 生成弹幕词云 # 分词处理 all_text .join(danmaku_texts) words jieba.lcut(all_text) # 统计词频 word_freq Counter(words) # 过滤停用词和单个字符 stop_words {的, 了, 在, 是, 我, 有, 和, 就, 不, 人, 都, 一, 一个, 上, 也, 很, 到, 说, 要, 去, 你, 会, 着, 没有, 看, 好, 自己, 这} filtered_freq {word: freq for word, freq in word_freq.items() if word not in stop_words and len(word) 1} # 生成词云 wc WordCloud( font_pathsimhei.ttf, width800, height600, background_colorwhite, max_words200 ).generate_from_frequencies(filtered_freq) plt.figure(figsize(10, 8)) plt.imshow(wc, interpolationbilinear) plt.axis(off) plt.title(弹幕词云分析, fontsize16) plt.savefig(danmaku_wordcloud.png, dpi300, bbox_inchestight) plt.show()图Bilibili API项目Logo展示了Python与B站API的完美结合最佳实践与优化建议性能优化策略连接池管理合理配置HTTP客户端连接池大小请求限流避免触发B站的反爬虫机制缓存策略对不常变的数据进行本地缓存异步并发充分利用asyncio的并发能力import asyncio import aiohttp from aiohttp import ClientSession, TCPConnector from bilibili_api import request_settings class OptimizedBilibiliClient: def __init__(self, max_connections10): self.max_connections max_connections self.session None async def __aenter__(self): # 配置连接池 connector TCPConnector( limitself.max_connections, limit_per_host5, ttl_dns_cache300 ) self.session ClientSession(connectorconnector) # 配置bilibili-api使用自定义session request_settings.set(session, self.session) return self async def __aexit__(self, exc_type, exc_val, exc_tb): if self.session: await self.session.close() async def batch_fetch(self, tasks, batch_size5): 批量获取数据控制并发 results [] for i in range(0, len(tasks), batch_size): batch tasks[i:ibatch_size] batch_results await asyncio.gather(*batch, return_exceptionsTrue) results.extend(batch_results) # 控制请求频率 await asyncio.sleep(1) return results错误处理与重试机制import asyncio import random from typing import Callable, Any from functools import wraps def retry_with_backoff( max_retries: int 3, base_delay: float 1.0, max_delay: float 10.0 ): 指数退避重试装饰器 def decorator(func: Callable): wraps(func) async def wrapper(*args, **kwargs): last_exception None for attempt in range(max_retries 1): try: return await func(*args, **kwargs) except Exception as e: last_exception e if attempt max_retries: break # 计算退避时间 delay min( base_delay * (2 ** attempt) random.uniform(0, 0.1), max_delay ) print(f请求失败{delay:.2f}秒后重试 ({attempt1}/{max_retries})) await asyncio.sleep(delay) raise last_exception return wrapper return decorator class RobustBilibiliAPI: retry_with_backoff(max_retries3, base_delay2.0) async def get_video_info_safe(self, bvid: str): 带重试机制的视频信息获取 from bilibili_api import Video video Video(bvidbvid) return await video.get_info() retry_with_backoff(max_retries2, base_delay1.0) async def get_user_info_safe(self, uid: int): 带重试机制的用户信息获取 from bilibili_api import User user User(uiduid) return await user.get_user_info()数据存储与备份方案import sqlite3 import json from datetime import datetime from typing import Dict, Any class BilibiliDataStorage: def __init__(self, db_pathbilibili_data.db): self.db_path db_path self._init_database() def _init_database(self): 初始化数据库表结构 conn sqlite3.connect(self.db_path) cursor conn.cursor() # 视频数据表 cursor.execute( CREATE TABLE IF NOT EXISTS videos ( bvid TEXT PRIMARY KEY, title TEXT, owner_uid INTEGER, owner_name TEXT, view_count INTEGER, like_count INTEGER, coin_count INTEGER, favorite_count INTEGER, duration INTEGER, pubdate INTEGER, tags TEXT, description TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ) # 用户数据表 cursor.execute( CREATE TABLE IF NOT EXISTS users ( uid INTEGER PRIMARY KEY, name TEXT, sex TEXT, face TEXT, sign TEXT, level INTEGER, vip_status INTEGER, vip_type INTEGER, follower_count INTEGER, following_count INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) ) # 统计数据历史表 cursor.execute( CREATE TABLE IF NOT EXISTS video_stats_history ( id INTEGER PRIMARY KEY AUTOINCREMENT, bvid TEXT, view_count INTEGER, like_count INTEGER, coin_count INTEGER, favorite_count INTEGER, share_count INTEGER, recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (bvid) REFERENCES videos (bvid) ) ) conn.commit() conn.close() def save_video_data(self, video_data: Dict[str, Any]): 保存视频数据 conn sqlite3.connect(self.db_path) cursor conn.cursor() try: cursor.execute( INSERT OR REPLACE INTO videos (bvid, title, owner_uid, owner_name, view_count, like_count, coin_count, favorite_count, duration, pubdate, tags, description) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) , ( video_data.get(bvid), video_data.get(title), video_data.get(owner, {}).get(mid), video_data.get(owner, {}).get(name), video_data.get(stat, {}).get(view), video_data.get(stat, {}).get(like), video_data.get(stat, {}).get(coin), video_data.get(stat, {}).get(favorite), video_data.get(duration), video_data.get(pubdate), json.dumps(video_data.get(tags, []), ensure_asciiFalse), video_data.get(desc) )) # 保存统计数据历史 cursor.execute( INSERT INTO video_stats_history (bvid, view_count, like_count, coin_count, favorite_count, share_count) VALUES (?, ?, ?, ?, ?, ?) , ( video_data.get(bvid), video_data.get(stat, {}).get(view), video_data.get(stat, {}).get(like), video_data.get(stat, {}).get(coin), video_data.get(stat, {}).get(favorite), video_data.get(stat, {}).get(share) )) conn.commit() except Exception as e: print(f保存视频数据失败: {e}) conn.rollback() finally: conn.close()常见问题与解决方案1. 认证与登录问题问题如何获取有效的Credential凭证解决方案通过浏览器开发者工具获取cookies使用bilibili-api提供的登录工具定期刷新cookies避免过期from bilibili_api import Credential import asyncio async def refresh_credential(): # 从环境变量或配置文件读取凭证 credential Credential( sessdatayour_sessdata, bili_jctyour_bili_jct, buvid3your_buvid3 ) # 验证凭证有效性 try: from bilibili_api import user test_user user.User(credentialcredential) info await test_user.get_user_info() print(f凭证有效用户: {info[name]}) return credential except Exception as e: print(f凭证无效: {e}) # 触发重新登录流程 return await relogin()2. 请求频率限制问题遇到412 Precondition Failed错误解决方案添加请求延迟使用代理IP实现指数退避重试import asyncio import random from bilibili_api import request_settings class RateLimitedClient: def __init__(self, requests_per_second2): self.requests_per_second requests_per_second self.last_request_time 0 async def rate_limited_request(self, coro): 带速率限制的请求 current_time asyncio.get_event_loop().time() time_since_last current_time - self.last_request_time if time_since_last 1.0 / self.requests_per_second: wait_time (1.0 / self.requests_per_second) - time_since_last await asyncio.sleep(wait_time random.uniform(0, 0.1)) self.last_request_time asyncio.get_event_loop().time() return await coro def set_proxy(self, proxy_url): 设置代理 request_settings.set_proxy(proxy_url)3. 数据解析错误问题API返回数据结构变化导致解析失败解决方案添加数据验证和异常处理使用类型提示和默认值实现版本兼容性检查from typing import Dict, Any, Optional from dataclasses import dataclass dataclass class VideoInfo: bvid: str title: str view: int 0 like: int 0 coin: int 0 favorite: int 0 classmethod def from_api_response(cls, data: Dict[str, Any]) - Optional[VideoInfo]: 从API响应创建VideoInfo对象 try: return cls( bviddata.get(bvid, ), titledata.get(title, ), viewdata.get(stat, {}).get(view, 0), likedata.get(stat, {}).get(like, 0), coindata.get(stat, {}).get(coin, 0), favoritedata.get(stat, {}).get(favorite, 0) ) except (KeyError, TypeError, AttributeError) as e: print(f解析视频数据失败: {e}) return None进一步学习路径推荐学习资源官方文档仔细阅读bilibili-api的官方文档了解所有可用接口示例代码参考docs/examples目录中的各种使用示例源码学习深入阅读bilibili_api目录下的源代码理解实现原理社区交流参与GitHub Issues讨论了解常见问题和解决方案进阶项目实践B站数据分析平台构建完整的数据采集、存储、分析和可视化系统内容监控工具实现特定UP主或话题的实时监控和告警自动化运营工具开发自动发布、互动、数据分析的一体化工具学术研究应用利用B站数据进行社会科学、传播学等领域的学术研究性能调优建议数据库优化根据数据量选择合适的数据库SQLite/MySQL/PostgreSQL缓存策略使用Redis等缓存热点数据减少API调用分布式采集对于大规模数据采集考虑使用分布式架构监控告警建立完善的监控体系及时发现和解决问题通过本文的指导你已经掌握了使用bilibili-api进行B站数据获取的核心技能。记住技术工具的使用应当遵守平台规则和法律法规合理合法地进行数据采集和分析。随着你对bilibili-api的深入使用你将能够解锁更多高级功能构建出更加强大和实用的B站数据处理应用。【免费下载链接】bilibili-api哔哩哔哩常用API调用。支持视频、番剧、用户、频道、音频等功能。原仓库地址https://github.com/MoyuScript/bilibili-api项目地址: https://gitcode.com/gh_mirrors/bi/bilibili-api创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考