实战演练:利用京东API一键抓取商品详情

发布时间:2026/5/20 1:24:09

实战演练:利用京东API一键抓取商品详情 京东商品详情抓取实战 目录https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E5%87%86%E5%A4%87%E5%B7%A5%E4%BD%9Chttps://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E6%96%B9%E6%A1%88%E4%B8%80https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E6%96%B9%E6%A1%88%E4%BA%8Chttps://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E5%AE%8C%E6%95%B4%E4%BB%A3%E7%A0%81https://yiyan.baidu.com/chat/NDk2NDIyNDM5Njo1MTk5OTIwMjMz#%E6%95%B0%E6%8D%AE%E8%A7%A3%E6%9E%90 准备工作1️⃣ 申请京东联盟API权限bash# 注册地址https://union.jd.com/ # 步骤 # 1. 注册账号 → 2. 创建应用 → 3. 获取 AppKey 和 AppSecret2️⃣ 安装依赖bashpip install requests hashlib time json⭐ 方案一使用京东联盟API推荐✅完整代码实现pythonimport requests import hashlib import time import json from urllib.parse import quote class JDApiClient: 京东联盟API客户端 def __init__(self, app_key, app_secret): self.app_key app_key self.app_secret app_secret self.access_token None self.token_expire 0 def _get_timestamp(self): 获取时间戳 return str(int(time.time() * 1000)) def _get_sign(self, params): 生成签名 sorted_params sorted(params.items()) sign_str self.app_secret for k, v in sorted_params: sign_str f{k}{v} sign_str self.app_secret return hashlib.md5(sign_str.encode()).hexdigest().upper() def get_access_token(self): 获取访问令牌 if self.access_token and time.time() self.token_expire: return self.access_token url https://api.jd.com/token params { grant_type: client_credentials, app_key: self.app_key, app_secret: self.app_secret, timestamp: self._get_timestamp(), sign_method: md5, } params[sign] self._get_sign(params) response requests.get(url, paramsparams) data response.json() if access_token in data: self.access_token data[access_token] self.token_expire time.time() data[expires_in] - 300 return self.access_token else: raise Exception(f获取Token失败: {data}) def get_goods_detail(self, sku_id): 获取商品详情 :param sku_id: 商品SKU ID京东商品ID url https://api.jd.com/routerjson params { method: jingdong.union.open.goods.query, app_key: self.app_key, access_token: self.get_access_token(), timestamp: self._get_timestamp(), format: json, v: 2.0, sign_method: md5, skuIds: sku_id, } params[sign] self._get_sign(params) response requests.get(url, paramsparams) return response.json() def get_goods_promotion(self, sku_id): 获取商品促销信息价格、优惠券等 url https://api.jd.com/routerjson params { method: jd.union.open.goods.promotion.get, app_key: self.app_key, access_token: self.get_access_token(), timestamp: self._get_timestamp(), format: json, v: 2.0, sign_method: md5, skuId: sku_id, } params[sign] self._get_sign(params) response requests.get(url, paramsparams) return response.json() # 使用示例 if __name__ __main__: # 替换为你的 AppKey 和 AppSecret APP_KEY your_app_key_here APP_SECRET your_app_secret_here client JDApiClient(APP_KEY, APP_SECRET) # 示例抓取 iPhone 15 的商品详情 # SKU ID 可以从京东商品URL中获取item.jd.com/100038004356.html - 100038004356 sku_id 100038004356 print( 正在获取商品详情...) detail client.get_goods_detail(sku_id) print(json.dumps(detail, ensure_asciiFalse, indent2)) print(\n 正在获取促销信息...) promo client.get_goods_promotion(sku_id) print(json.dumps(promo, ensure_asciiFalse, indent2)) 方案二直接爬取无需API Keypythonimport requests from bs4 import BeautifulSoup import json import re class JDCrawler: 京东商品爬虫无需认证 def __init__(self): self.headers { User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36, Accept: application/json, text/plain, */*, Referer: https://www.jd.com/ } def get_product_json(self, item_id): 从商品页面提取JSON数据 :param item_id: 商品ID从URL获取 url fhttps://item.jd.com/{item_id}.html response requests.get(url, headersself.headers) soup BeautifulSoup(response.text, html.parser) # 查找页面中的JSON数据 scripts soup.find_all(script, typetext/javascript) for script in scripts: text script.string if text and window.__INITIAL_STATE__ in text: # 提取JSON部分 json_str re.search(rwindow\.__INITIAL_STATE__({.*?});, text, re.DOTALL) if json_str: return json.loads(json_str.group(1)) return None def get_product_api(self, item_id): 使用京东内部API更稳定 url fhttps://p.3.cn/prices/mgets?skuIdsJ_{item_id} response requests.get(url, headersself.headers) return response.json() # 使用示例 crawler JDCrawler() # 抓取 iPhone 15 item_id 100038004356 print( 方法1: 页面解析) data crawler.get_product_json(item_id) if data: print(f商品名: {data[pcData][productInfo][name]}) print(f价格: ¥{data[pcData][productInfo][price]}) print(\n 方法2: 价格API) price_data crawler.get_product_api(item_id) print(json.dumps(price_data, ensure_asciiFalse, indent2)) 完整实战批量抓取 数据存储pythonimport requests import json import csv from datetime import datetime import time class JDBatchCrawler: 批量商品抓取工具 def __init__(self): self.headers { User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 } def get_product_info(self, item_id): 获取单个商品信息 try: # 使用价格API url fhttps://p.3.cn/prices/mgets?skuIdsJ_{item_id} resp requests.get(url, headersself.headers, timeout10) price_data resp.json() # 获取商品详情 detail_url fhttps://item.jd.com/{item_id}.html detail_resp requests.get(detail_url, headersself.headers, timeout10) # 提取关键信息 name_match re.search(rh1 classname(.*?)/h1, detail_resp.text) shop_match re.search(rclassname.*?(.*?)/a, detail_resp.text) product { item_id: item_id, name: name_match.group(1) if name_match else N/A, shop: shop_match.group(1) if shop_match else N/A, price: price_data[0].get(p, N/A) if price_data else N/A, crawl_time: datetime.now().strftime(%Y-%m-%d %H:%M:%S) } return product except Exception as e: print(f❌ 抓取失败 {item_id}: {e}) return None def batch_crawl(self, item_ids, output_fileproducts.csv): 批量抓取 results [] for i, item_id in enumerate(item_ids, 1): print(f [{i}/{len(item_ids)}] 正在抓取: {item_id}) product self.get_product_info(item_id) if product: results.append(product) time.sleep(1) # 避免请求过快 # 保存到CSV if results: with open(output_file, w, newline, encodingutf-8-sig) as f: writer csv.DictWriter(f, fieldnamesresults[0].keys()) writer.writeheader() writer.writerows(results) print(f\n✅ 成功抓取 {len(results)} 个商品已保存到 {output_file}) return results # 运行示例 if __name__ __main__: # 商品ID列表 item_ids [ 100038004356, # iPhone 15 100012043978, # MacBook Pro 100026789012, # AirPods Pro ] crawler JDBatchCrawler() crawler.batch_crawl(item_ids) 数据解析示例pythonimport json # 模拟API返回的数据 api_response { code: 0, msg: 成功, data: { goodsInfo: { skuId: 100038004356, spuId: 100038004356, name: Apple iPhone 15 (A3092) 128GB 蓝色, shopId: 1000001, categoryId: 9987, brandId: 1320, price: 5999.00, jdPrice: 5999.00, promotionPrice: 5499.00, couponPrice: 5299.00, commissionRate: 1.5, commission: 89.99 } } } # 解析数据 data api_response[data][goodsInfo] print(f️ 商品名称: {data[name]}) print(f 京东价: ¥{data[jdPrice]}) print(f 促销价: ¥{data[promotionPrice]}) print(f 优惠券价: ¥{data[couponPrice]}) print(f 佣金比例: {data[commissionRate]}%) print(f 预估佣金: ¥{data[commission]})⚠️ 注意事项事项说明频率限制API每秒最多10次请求爬虫建议间隔1-2秒️反爬机制添加随机User-Agent使用代理IP合规使用仅用于学习研究遵守robots.txtToken有效期Access Token通常2小时过期需刷新 快速开始模板python# 一键复制即可运行替换APP_KEY和APP_SECRET from jd_api import JDApiClient client JDApiClient(your_app_key, your_app_secret) data client.get_goods_detail(100038004356) print(data)需要我详细讲解某个部分吗比如 Token刷新机制 数据存储到数据库 自动化定时抓取

相关新闻