02-爬取 NameSilo 的域名价格







爬取 NameSilo 的域名价格

前言


最近想要替换掉这个域名,当初选择这个域名也是因为在当时看是最便宜的,结果在我再次进入 Nmaeslio 的时候发现需要点很多次才能获取全部的列表,并且如果有的时候网络不佳还只能重新获取。

最后我就想要写一个爬虫来在我想要的时候自己去爬取 NameSilo 目前域名的折扣价和原价。

实现


先确定好想要购买的域名名称

# 确定要浏览的域名
name = "liushawangluo"

然后再伪装一个请求头,因为目前我们没有任何的 cookie ,所以我们暂时就先创建一个默认头

# 先伪装一个头
headerUserAgent = {
    "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}

根据查看 F12 的抓包记录,发现有 3 个重要的 cookie 需要抓取,通过查看发现第一个 cookiecfbm

# 抓取 cfbm
urlCfbm = "https://www.namesilo.com/domain/search-domains?query=" + name
# 拼接获取 cfbm 的url
requestsCfbmData = requests.get(url=urlCfbm,headers=headerUserAgent)
# 请求
cookieCfbm = requestsCfbmData.headers.get('Set-Cookie')
# 因为返回的 cfbm 在返回头中,所以需要 .headers 来获取请求头的内容

然后我们将我们刚刚获得的 cfbm 添加进请求头中

# 将 cfbm 添加进请求头
headerCfbm = {
    "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
    "cookie": cookieCfbm
}

接着我们抓取 PHPSESSID

# 抓取 phpSessId
urlPhpSessId = "https://www.namesilo.com/cart/api/list"
requestsPhpSessIdData = requests.get(url=urlPhpSessId,headers=headerCfbm)
cookiePhpSessIdData = requestsPhpSessIdData.headers.get('Set-Cookie')
# PHPSESSID 同样存储于返回头中,需要使用 .headers 来获取
cookiePhpSessId = cookiePhpSessIdData[0:cookiePhpSessIdData.find(";")]
# 因为获取到的字符串里面包含其他字符,需要使用字符串切片,并且使用 find 来查找切片结尾

将获取到的 PHPSESSID 添加进请求头中

# 将 phpSessId 添加进请求头
headerCfbmPhpSessId = {
    "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
    "cookie": cookieCfbm + ";" + cookiePhpSessId
}

然后抓取 csrf

# 抓取 csrf
urlCsrf = "https://www.namesilo.com/public/api/token"
requestsCsrfData = requests.get(url=urlCsrf,headers=headerCfbmPhpSessId)
cookieCsrfData = requestsCsrfData.headers.get('Set-Cookie')
# 从请求头中获取 csrf
cookieCsrf = cookieCsrfData[0:cookieCsrfData.find(";")]
# 同样通过字符串切片方式获取 csrf

以上我们将 cookie 全部获取完毕,接下来获取 xsrfToken

# 抓取 xsrfToken
urlXsrfToken = "https://www.namesilo.com/public/api/token"
requestsXsrfTokenData = requests.get(url=urlXsrfToken,headers=headerCfbmPhpSessId).json().get('data')
# 通过 .json 的方法获取
xsrfToken = requestsXsrfTokenData.get('xsrfToken')

csrfcsrfToken 加入请求头

# 将 csrf 和 xsrfToken 加入请求头
headerCfbmPhpSessIdCsrfXCsrfToken = {
    "cookie": cookieCfbm + ";" + cookiePhpSessId + ";" + cookieCsrf,
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
    "x-csrf-token": xsrfToken
}

抓取所有可购买的域名后缀,保存到 tldsData

# 抓取所有可购买的域名后缀
urlTlds = "https://www.namesilo.com/public/api/tlds"
requestsTldsData = requests.get(url=urlTlds,headers=headerCfbmPhpSessIdCsrfXCsrfToken)
tldsData = requestsTldsData.json().get('data')

然后将域名和域名后缀拼接

j = 0
domains = []
tlds = []
# 将域名和域名后缀拼接
for i in tldsData:
    tlds.append(i.get('tld'))
    domains.append(name + "." + tlds[j])
    j = j + 1

然后我们将域名分成 8 个一组分别请求来获取,避免太多而被识别到

# 将所有域名以每8个一组开始请求
k = 0
dataTlds = ["a"]*8
if (len(domains)%8==0):
    m = 0
    for i in len(domains) / 8:
        dataDomain = ["a"]*8
        # 将域名和域名后缀分成8个一组
        for j in 8:
            dataDomain[j] = domains[k]
            dataTlds[j] = tlds[k]
            k = k + 1
        # 将域名和域名后缀添加进请求体中
        data = {
            "domains[]": dataDomain,
            "tlds[]": dataTlds
        }
        # 随机延迟2~8秒执行,防止被banip
        delayTime = random.randint(2,8)
        time.sleep(delayTime)
        # 先抓取到每个查询单单独的 checkId
        urlCheckId = "https://www.namesilo.com/public/api/domains/bulk-check"
        requestsCheckId = requests.post(url=urlCheckId, headers=headerCfbmPhpSessIdCsrfXCsrfToken, data=data)
        dataCheckId = requestsCheckId.json().get('data')
        checkId = dataCheckId.get('checkId')
        # 抓取域名和域名价格
        urlDomainPrice = "https://www.namesilo.com/public/api/domains/results/" + checkId
        requestsDomainPrice = requests.get(url=urlDomainPrice,headers=headerCfbmPhpSessIdCsrfXCsrfToken,data=data)
        dataDomainPrice = requestsDomainPrice.json().get('data')
        listDomainPrice = dataDomainPrice.get('domains')
        # 输出域名和域名价格
        for l in range(8):
            domain = listDomainPrice[l - 1].get('domain')
            regularPrice = listDomainPrice[l-1].get('regularPrice')
            currentPrice = listDomainPrice[l-1].get('currentPrice')
            renewalPrice = listDomainPrice[l-1].get('renewalPrice')
            print(str(m) + "域名:" + domain +
                  '\000\000' + "正常价格:" + str(regularPrice) +
                  '\000\000' + "优惠价格:" + str(currentPrice) +
                  '\000\000' + "续费价格:" + str(renewalPrice))
            m = m+1
else:
    m = 0
    for i in range(int(len(domains) / 8) + 1):
        dataDomain = ["a"]*8
        for j in range(8):
            if(k<=len(domains)):
                dataDomain[j] = domains[k]
                dataTlds[j] = tlds[k]
                k = k + 1
            data = {
                "domains[]": dataDomain,
                "tlds[]": dataTlds
            }
        delayTime = random.randint(2,8)
        time.sleep(delayTime)
        urlCheckId = "https://www.namesilo.com/public/api/domains/bulk-check"
        requestsCheckId = requests.post(url=urlCheckId, headers=headerCfbmPhpSessIdCsrfXCsrfToken, data=data)
        dataCheckId = requestsCheckId.json().get('data')
        checkId = dataCheckId.get('checkId')
        urlDomainPrice = "https://www.namesilo.com/public/api/domains/results/" + checkId
        requestsDomainPrice = requests.get(url=urlDomainPrice,headers=headerCfbmPhpSessIdCsrfXCsrfToken,data=data)
        dataDomainPrice = requestsDomainPrice.json().get('data')
        listDomainPrice = dataDomainPrice.get('domains')
        for l in range(8):
            domain = listDomainPrice[l - 1].get('domain')
            regularPrice = listDomainPrice[l-1].get('regularPrice')
            currentPrice = listDomainPrice[l-1].get('currentPrice')
            renewalPrice = listDomainPrice[l-1].get('renewalPrice')
            print(str(m) + "域名:" + domain +
                  '\000\000' + "正常价格:" + str(regularPrice) +
                  '\000\000' + "优惠价格:" + str(currentPrice) +
                  '\000\000' + "续费价格:" + str(renewalPrice))
            m = m+1


暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇