爬取 NameSilo 的域名价格
前言
最近想要替换掉这个域名,当初选择这个域名也是因为在当时看是最便宜的,结果在我再次进入 Nmaeslio 的时候发现需要点很多次才能获取全部的列表,并且如果有的时候网络不佳还只能重新获取。
最后我就想要写一个爬虫来在我想要的时候自己去爬取 NameSilo 目前域名的折扣价和原价。
实现
先确定好想要购买的域名名称
# 确定要浏览的域名
name = "liushawangluo"
然后再伪装一个请求头,因为目前我们没有任何的 cookie ,所以我们暂时就先创建一个默认头
# 先伪装一个头
headerUserAgent = {
"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
根据查看 F12 的抓包记录,发现有 3 个重要的 cookie 需要抓取,通过查看发现第一个 cookie 为 cfbm
# 抓取 cfbm
urlCfbm = "https://www.namesilo.com/domain/search-domains?query=" + name
# 拼接获取 cfbm 的url
requestsCfbmData = requests.get(url=urlCfbm,headers=headerUserAgent)
# 请求
cookieCfbm = requestsCfbmData.headers.get('Set-Cookie')
# 因为返回的 cfbm 在返回头中,所以需要 .headers 来获取请求头的内容
然后我们将我们刚刚获得的 cfbm 添加进请求头中
# 将 cfbm 添加进请求头
headerCfbm = {
"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
"cookie": cookieCfbm
}
接着我们抓取 PHPSESSID
# 抓取 phpSessId
urlPhpSessId = "https://www.namesilo.com/cart/api/list"
requestsPhpSessIdData = requests.get(url=urlPhpSessId,headers=headerCfbm)
cookiePhpSessIdData = requestsPhpSessIdData.headers.get('Set-Cookie')
# PHPSESSID 同样存储于返回头中,需要使用 .headers 来获取
cookiePhpSessId = cookiePhpSessIdData[0:cookiePhpSessIdData.find(";")]
# 因为获取到的字符串里面包含其他字符,需要使用字符串切片,并且使用 find 来查找切片结尾
将获取到的 PHPSESSID 添加进请求头中
# 将 phpSessId 添加进请求头
headerCfbmPhpSessId = {
"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
"cookie": cookieCfbm + ";" + cookiePhpSessId
}
然后抓取 csrf
# 抓取 csrf
urlCsrf = "https://www.namesilo.com/public/api/token"
requestsCsrfData = requests.get(url=urlCsrf,headers=headerCfbmPhpSessId)
cookieCsrfData = requestsCsrfData.headers.get('Set-Cookie')
# 从请求头中获取 csrf
cookieCsrf = cookieCsrfData[0:cookieCsrfData.find(";")]
# 同样通过字符串切片方式获取 csrf
以上我们将 cookie 全部获取完毕,接下来获取 xsrfToken
# 抓取 xsrfToken
urlXsrfToken = "https://www.namesilo.com/public/api/token"
requestsXsrfTokenData = requests.get(url=urlXsrfToken,headers=headerCfbmPhpSessId).json().get('data')
# 通过 .json 的方法获取
xsrfToken = requestsXsrfTokenData.get('xsrfToken')
将 csrf 和 csrfToken 加入请求头
# 将 csrf 和 xsrfToken 加入请求头
headerCfbmPhpSessIdCsrfXCsrfToken = {
"cookie": cookieCfbm + ";" + cookiePhpSessId + ";" + cookieCsrf,
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
"x-csrf-token": xsrfToken
}
抓取所有可购买的域名后缀,保存到 tldsData
# 抓取所有可购买的域名后缀
urlTlds = "https://www.namesilo.com/public/api/tlds"
requestsTldsData = requests.get(url=urlTlds,headers=headerCfbmPhpSessIdCsrfXCsrfToken)
tldsData = requestsTldsData.json().get('data')
然后将域名和域名后缀拼接
j = 0
domains = []
tlds = []
# 将域名和域名后缀拼接
for i in tldsData:
tlds.append(i.get('tld'))
domains.append(name + "." + tlds[j])
j = j + 1
然后我们将域名分成 8 个一组分别请求来获取,避免太多而被识别到
# 将所有域名以每8个一组开始请求
k = 0
dataTlds = ["a"]*8
if (len(domains)%8==0):
m = 0
for i in len(domains) / 8:
dataDomain = ["a"]*8
# 将域名和域名后缀分成8个一组
for j in 8:
dataDomain[j] = domains[k]
dataTlds[j] = tlds[k]
k = k + 1
# 将域名和域名后缀添加进请求体中
data = {
"domains[]": dataDomain,
"tlds[]": dataTlds
}
# 随机延迟2~8秒执行,防止被banip
delayTime = random.randint(2,8)
time.sleep(delayTime)
# 先抓取到每个查询单单独的 checkId
urlCheckId = "https://www.namesilo.com/public/api/domains/bulk-check"
requestsCheckId = requests.post(url=urlCheckId, headers=headerCfbmPhpSessIdCsrfXCsrfToken, data=data)
dataCheckId = requestsCheckId.json().get('data')
checkId = dataCheckId.get('checkId')
# 抓取域名和域名价格
urlDomainPrice = "https://www.namesilo.com/public/api/domains/results/" + checkId
requestsDomainPrice = requests.get(url=urlDomainPrice,headers=headerCfbmPhpSessIdCsrfXCsrfToken,data=data)
dataDomainPrice = requestsDomainPrice.json().get('data')
listDomainPrice = dataDomainPrice.get('domains')
# 输出域名和域名价格
for l in range(8):
domain = listDomainPrice[l - 1].get('domain')
regularPrice = listDomainPrice[l-1].get('regularPrice')
currentPrice = listDomainPrice[l-1].get('currentPrice')
renewalPrice = listDomainPrice[l-1].get('renewalPrice')
print(str(m) + "域名:" + domain +
'\000\000' + "正常价格:" + str(regularPrice) +
'\000\000' + "优惠价格:" + str(currentPrice) +
'\000\000' + "续费价格:" + str(renewalPrice))
m = m+1
else:
m = 0
for i in range(int(len(domains) / 8) + 1):
dataDomain = ["a"]*8
for j in range(8):
if(k<=len(domains)):
dataDomain[j] = domains[k]
dataTlds[j] = tlds[k]
k = k + 1
data = {
"domains[]": dataDomain,
"tlds[]": dataTlds
}
delayTime = random.randint(2,8)
time.sleep(delayTime)
urlCheckId = "https://www.namesilo.com/public/api/domains/bulk-check"
requestsCheckId = requests.post(url=urlCheckId, headers=headerCfbmPhpSessIdCsrfXCsrfToken, data=data)
dataCheckId = requestsCheckId.json().get('data')
checkId = dataCheckId.get('checkId')
urlDomainPrice = "https://www.namesilo.com/public/api/domains/results/" + checkId
requestsDomainPrice = requests.get(url=urlDomainPrice,headers=headerCfbmPhpSessIdCsrfXCsrfToken,data=data)
dataDomainPrice = requestsDomainPrice.json().get('data')
listDomainPrice = dataDomainPrice.get('domains')
for l in range(8):
domain = listDomainPrice[l - 1].get('domain')
regularPrice = listDomainPrice[l-1].get('regularPrice')
currentPrice = listDomainPrice[l-1].get('currentPrice')
renewalPrice = listDomainPrice[l-1].get('renewalPrice')
print(str(m) + "域名:" + domain +
'\000\000' + "正常价格:" + str(regularPrice) +
'\000\000' + "优惠价格:" + str(currentPrice) +
'\000\000' + "续费价格:" + str(renewalPrice))
m = m+1