爬取各个站点域名的价格并制表
前言
之前我写过一篇关于爬取 Namesilo 域名价格的博客,当然,购买这种事情当然是要货比三家,所以我就找到了目前一些大的域名商,并且写了一个爬取域名和价格的脚本。包括以下的域名商
-
NameSilo -
NameCheap -
DreamHost -
DyNadot
NameSilo
这个商家没什么好说的了,直接使用上次的代码就行
import requests
import random
import time
# 确定要浏览的域名
name = "jianhuangshi"
# 先伪装一个头
headerUserAgent = {
"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
# 抓取 cfbm
urlCfbm = "https://www.namesilo.com/domain/search-domains?query=" + name
# 拼接获取 cfbm 的url
requestsCfbmData = requests.get(url=urlCfbm,headers=headerUserAgent)
# 请求
cookieCfbm = requestsCfbmData.headers.get('Set-Cookie')
# 因为返回的 cfbm 在返回头中,所以需要 .headers 来获取请求头的内容
# 将 cfbm 添加进请求头
headerCfbm = {
"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
"cookie": cookieCfbm
}
# 抓取 phpSessId
urlPhpSessId = "https://www.namesilo.com/cart/api/list"
requestsPhpSessIdData = requests.get(url=urlPhpSessId,headers=headerCfbm)
cookiePhpSessIdData = requestsPhpSessIdData.headers.get('Set-Cookie')
# PHPSESSID 同样存储于返回头中,需要使用 .headers 来获取
cookiePhpSessId = cookiePhpSessIdData[0:cookiePhpSessIdData.find(";")]
# 因为获取到的字符串里面包含其他字符,需要使用字符串切片,并且使用 find 来查找切片结尾
# 将 phpSessId 添加进请求头
headerCfbmPhpSessId = {
"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
"cookie": cookieCfbm + ";" + cookiePhpSessId
}
# 抓取 csrf
urlCsrf = "https://www.namesilo.com/public/api/token"
requestsCsrfData = requests.get(url=urlCsrf,headers=headerCfbmPhpSessId)
cookieCsrfData = requestsCsrfData.headers.get('Set-Cookie')
# 从请求头中获取 csrf
cookieCsrf = cookieCsrfData[0:cookieCsrfData.find(";")]
# 同样通过字符串切片方式获取 csrf
# 抓取 xsrfToken
urlXsrfToken = "https://www.namesilo.com/public/api/token"
requestsXsrfTokenData = requests.get(url=urlXsrfToken,headers=headerCfbmPhpSessId).json().get('data')
# 通过 .json 的方法获取
xsrfToken = requestsXsrfTokenData.get('xsrfToken')
# 将 csrf 和 xsrfToken 加入请求头
headerCfbmPhpSessIdCsrfXCsrfToken = {
"cookie": cookieCfbm + ";" + cookiePhpSessId + ";" + cookieCsrf,
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
"x-csrf-token": xsrfToken
}
# 抓取所有可购买的域名后缀
urlTlds = "https://www.namesilo.com/public/api/tlds"
requestsTldsData = requests.get(url=urlTlds,headers=headerCfbmPhpSessIdCsrfXCsrfToken)
tldsData = requestsTldsData.json().get('data')
j = 0
domains = []
tlds = []
# 将域名和域名后缀拼接
for i in tldsData:
tlds.append(i.get('tld'))
domains.append(name + "." + tlds[j])
j = j + 1
# 将所有域名以每8个一组开始请求
k = 0
dataTlds = ["a"]*8
if (len(domains)%8==0):
m = 0
with open('Price.csv', 'a') as f:
f.write("域名" + "," + "正常价格" + "," + "优惠价格" + "," + "续费价格")
for i in len(domains) / 8:
dataDomain = ["a"]*8
# 将域名和域名后缀分成8个一组
for j in 8:
dataDomain[j] = domains[k]
dataTlds[j] = tlds[k]
k = k + 1
# 将域名和域名后缀添加进请求体中
data = {
"domains[]": dataDomain,
"tlds[]": dataTlds
}
# 随机延迟2~8秒执行,防止被banip
delayTime = random.randint(2,8)
time.sleep(delayTime)
# 先抓取到每个查询单单独的 checkId
urlCheckId = "https://www.namesilo.com/public/api/domains/bulk-check"
requestsCheckId = requests.post(url=urlCheckId, headers=headerCfbmPhpSessIdCsrfXCsrfToken, data=data)
dataCheckId = requestsCheckId.json().get('data')
checkId = dataCheckId.get('checkId')
# 抓取域名和域名价格
urlDomainPrice = "https://www.namesilo.com/public/api/domains/results/" + checkId
requestsDomainPrice = requests.get(url=urlDomainPrice,headers=headerCfbmPhpSessIdCsrfXCsrfToken,data=data)
dataDomainPrice = requestsDomainPrice.json().get('data')
listDomainPrice = dataDomainPrice.get('domains')
# 输出域名和域名价格
for l in range(8):
if(l<len(listDomainPrice)):
domain = listDomainPrice[l].get('domain')
regularPrice = listDomainPrice[l].get('regularPrice')
currentPrice = listDomainPrice[l].get('currentPrice')
renewalPrice = listDomainPrice[l].get('renewalPrice')
f.write(domain + "," + str(regularPrice) + "," + str(currentPrice) + "," + str(renewalPrice))
print(str(m) + "域名:" + domain +
'\000\000' + "正常价格:" + str(regularPrice) +
'\000\000' + "优惠价格:" + str(currentPrice) +
'\000\000' + "续费价格:" + str(renewalPrice))
m = m + 1
else:
m = 0
with open('Price.csv', 'a',encoding='UTF-8') as f:
f.write("商家" + "," + "域名" + "," + "正常价格" + "," + "优惠价格" + "," + "续费价格" + '\n')
for i in range(int(len(domains) / 8) + 1):
dataDomain = ["a"]*8
for j in range(8):
if(k<len(domains)):
dataDomain[j] = domains[k]
dataTlds[j] = tlds[k]
k = k + 1
data = {
"domains[]": dataDomain,
"tlds[]": dataTlds
}
delayTime = random.randint(2,8)
time.sleep(delayTime)
urlCheckId = "https://www.namesilo.com/public/api/domains/bulk-check"
requestsCheckId = requests.post(url=urlCheckId, headers=headerCfbmPhpSessIdCsrfXCsrfToken, data=data)
dataCheckId = requestsCheckId.json().get('data')
checkId = dataCheckId.get('checkId')
urlDomainPrice = "https://www.namesilo.com/public/api/domains/results/" + checkId
requestsDomainPrice = requests.get(url=urlDomainPrice,headers=headerCfbmPhpSessIdCsrfXCsrfToken,data=data)
dataDomainPrice = requestsDomainPrice.json().get('data')
listDomainPrice = dataDomainPrice.get('domains')
for l in range(8):
if(l < len(listDomainPrice)):
domain = listDomainPrice[l].get('domain')
regularPrice = listDomainPrice[l].get('regularPrice')
currentPrice = listDomainPrice[l].get('currentPrice')
renewalPrice = listDomainPrice[l].get('renewalPrice')
with open('Price.csv', 'a') as f:
f.write("NameSilo" + "," + domain + "," + str(regularPrice) + "," + str(currentPrice) + "," + str(renewalPrice) + '\n')
print(str(m) + "域名:" + domain +
'\000\000' + "正常价格:" + str(regularPrice) +
'\000\000' + "优惠价格:" + str(currentPrice) +
'\000\000' + "续费价格:" + str(renewalPrice))
m = m + 1
NameCheap
import requests
# 导入 requests 包
headerUserAgent = {
"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
# 定义请求头
url = "https://d1dijnkjnmzy2z.cloudfront.net/tlds.json"
# 指定域名后缀的url
requestsDomain = requests.get(url=url,headers=headerUserAgent)
# NameCheap 的价格和域名后缀一起放在请求中,直接请求域名后缀即可获得
host = "jianhuangshi"
# 定义要购买的域名
j = 0
for i in requestsDomain.json():
# 返回的数据体为 json 类型,可以直接通过 .json() 来执行循环
j = j + 1
print(str(j) + "域名:" + host + "." +str(i.get('Name')) + '\000\000'
+ "正常价格" + str(i.get('Pricing').get('Regular')) + '\000\000'
+ "优惠价格" + str(i.get('Pricing').get('Price')) + '\000\000'
+ "续费价格" + str(i.get('Pricing').get('Renewal')))
# 输出域名和价格
with open('Price.csv','a') as f:
f.write("NameCheap" + "," +
host + "." + str(i.get('Name')) + "," +
str(i.get('Pricing').get('Regular')) + "," +
str(i.get('Pricing').get('Price')) + "," +
str(i.get('Pricing').get('Renewal')) + '\n'
)
# 写入域名和价格
DreamHost
import requests
import re
# 导入 requests 和 re 包
headerUserAgent = {
"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
# 定义请求头
url = "https://marketing-api.dreamhost.com/ajax.cgi?callback=jQuery&cmd=domreg-tld_list&_="
host = "jianhuangshi"
# 确定好请求地址和需要购买的域名
requestsTextDomain = requests.get(url=url,headers=headerUserAgent).text
# 获取并将其赋值到变量
reDomain = re.compile(r'[{](.*)[}]', re.S)
reAllDomain = re.compile(r'[{](.*?)[}]',re.S)
reDomainPrice = re.compile(r'(.*)[,](.*)',re.S)
# 定义域名和价格的re
requestsTextDomain = re.findall(reDomain,requestsTextDomain)
requestsTextDomain = re.findall(reAllDomain,requestsTextDomain[0])
# 因为请求回的数据在多个括号中,所以需要层层提取
domainPrice = []
for i in requestsTextDomain:
tld = i[i.find("tld")+6:i.find('\"',i.find("tld")+6)]
# 从字符中查找到域名后缀
regularPrice = i[i.find("renew_price")+13:i.find(",",i.find("renew_price")+7)]
currentPrice = i[i.find("price")+7:i.find(",",i.find("price")+7)]
renewalPrice = i[i.find("renew_price")+13:i.find(",",i.find("renew_price")+7)]
# 从字符中查找到价格
with open('Price.csv', 'a') as f:
f.write("DreamHost" + "," +
host + "." + tld + "," +
str(regularPrice) + "," +
str(currentPrice) + "," +
str(renewalPrice) + '\n'
)
# 将域名和价格写入
DyNadot
import requests
import parsel
import random
import time
#导入包
host = "jianhuangshi"
# 定义要购买的域名
headerUserAgent = {
"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
# 定义请求头
data = {
"domain": host
}
# 定义请求体
url = "https://www.dynadot.com/domain/search.html"
# 赋值请求地址
a = 0
while (a<26):
# 循环26次以下,26次包括之后会返回重复的结果,并不会返回404
if(a != 0):
data = {
"domain": host,
"i": a
}
# 判断是否是第一次执行,如果不是第一次执行到这则改变请求体
delayTime = random.randint(3, 8)
time.sleep(delayTime)
# 延迟请求,不然会被触发人机验证
requestsDomainPriceText = requests.post(url=url, headers=headerUserAgent, data=data).text
selectorRequestsText = parsel.Selector(requestsDomainPriceText)
# 请求之后改变为 Selector 类型,方便之后 css
tld = selectorRequestsText.css('#tab-result > div::attr(row-tld)').getall()
# 或者域名后缀的列表
listRegular = []
listPrice = []
# 声明价格的列表
for i in range(len(tld)):
# 循环域名后缀中成员数的次数
purchase = selectorRequestsText.css(
'#tab-result > div:nth-child(' + str(i + 1) + ') > div.s-btn-taken.s-btn.s-btn-text::text').get()
# 将当前域名的后缀是否可以被购买
if (purchase != "Taken"):
# 如果能购买
price = selectorRequestsText.css(
'#tab-result > div:nth-child(' + str(
i + 1) + ') > div.price-wrap > span.s-renewal-price::text').get()
regular = selectorRequestsText.css(
'#tab-result > div:nth-child(' + str(
i + 1) + ') > div.price-wrap > span.s-current-price::text').get()
# 获取域名的价格
regular = str(regular)
price = str(price)
# 将价格转换为 str 类型
regular = regular[regular.find('$') + 1:]
price = price[price.find('$') + 1:]
# 将价格中的其他字符去除,只留其中的价格
listRegular.append(regular)
listPrice.append(price)
# 将价格添加入列表
with open('Price.csv','a') as f:
f.write("dynadot" + "," +
host + "." + tld[i] + "," +
price + "," +
regular + "," +
price + '\n')
# 写入当前的域名和价格
# print(requestsText)
print(tld)
print(listRegular)
print(listPrice)
#输出
else:
# 如果当前的域名后缀不能被购买哦
regular = str(0)
price = str(0)
# 将域名价格定义为0
listRegular.append(regular)
listPrice.append(price)
# 添加入列表
with open('Price.csv','a') as f:
f.write("dynadot" + "," +
host + "." + tld[i] + "," +
price + "," +
regular + "," +
price + '\n')
# 写入价格和域名
a = a + 1
print(a)