03-爬取各个站点域名的价格并制表

2022-10-29 20:43

|

264

|

0

|

爬虫

470 字

|

18 分钟

爬取各个站点域名的价格并制表

前言

之前我写过一篇关于爬取 Namesilo 域名价格的博客，当然，购买这种事情当然是要货比三家，所以我就找到了目前一些大的域名商，并且写了一个爬取域名和价格的脚本。包括以下的域名商

NameSilo
NameCheap
DreamHost
DyNadot

NameSilo

这个商家没什么好说的了，直接使用上次的代码就行

import requests
import random
import time

# 确定要浏览的域名
name = "jianhuangshi"

# 先伪装一个头
headerUserAgent = {
    "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}

# 抓取 cfbm
urlCfbm = "https://www.namesilo.com/domain/search-domains?query=" + name
# 拼接获取 cfbm 的url
requestsCfbmData = requests.get(url=urlCfbm,headers=headerUserAgent)
# 请求
cookieCfbm = requestsCfbmData.headers.get('Set-Cookie')
# 因为返回的 cfbm 在返回头中，所以需要 .headers 来获取请求头的内容

# 将 cfbm 添加进请求头
headerCfbm = {
    "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
    "cookie": cookieCfbm
}

# 抓取 phpSessId
urlPhpSessId = "https://www.namesilo.com/cart/api/list"
requestsPhpSessIdData = requests.get(url=urlPhpSessId,headers=headerCfbm)
cookiePhpSessIdData = requestsPhpSessIdData.headers.get('Set-Cookie')
# PHPSESSID 同样存储于返回头中，需要使用 .headers 来获取
cookiePhpSessId = cookiePhpSessIdData[0:cookiePhpSessIdData.find(";")]
# 因为获取到的字符串里面包含其他字符，需要使用字符串切片，并且使用 find 来查找切片结尾

# 将 phpSessId 添加进请求头
headerCfbmPhpSessId = {
    "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
    "cookie": cookieCfbm + ";" + cookiePhpSessId
}

# 抓取 csrf
urlCsrf = "https://www.namesilo.com/public/api/token"
requestsCsrfData = requests.get(url=urlCsrf,headers=headerCfbmPhpSessId)
cookieCsrfData = requestsCsrfData.headers.get('Set-Cookie')
# 从请求头中获取 csrf
cookieCsrf = cookieCsrfData[0:cookieCsrfData.find(";")]
# 同样通过字符串切片方式获取 csrf

# 抓取 xsrfToken
urlXsrfToken = "https://www.namesilo.com/public/api/token"
requestsXsrfTokenData = requests.get(url=urlXsrfToken,headers=headerCfbmPhpSessId).json().get('data')
# 通过 .json 的方法获取
xsrfToken = requestsXsrfTokenData.get('xsrfToken')

# 将 csrf 和 xsrfToken 加入请求头
headerCfbmPhpSessIdCsrfXCsrfToken = {
    "cookie": cookieCfbm + ";" + cookiePhpSessId + ";" + cookieCsrf,
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36",
    "x-csrf-token": xsrfToken
}

# 抓取所有可购买的域名后缀
urlTlds = "https://www.namesilo.com/public/api/tlds"
requestsTldsData = requests.get(url=urlTlds,headers=headerCfbmPhpSessIdCsrfXCsrfToken)
tldsData = requestsTldsData.json().get('data')

j = 0
domains = []
tlds = []
# 将域名和域名后缀拼接
for i in tldsData:
    tlds.append(i.get('tld'))
    domains.append(name + "." + tlds[j])
    j = j + 1

# 将所有域名以每8个一组开始请求
k = 0
dataTlds = ["a"]*8
if (len(domains)%8==0):
    m = 0
    with open('Price.csv', 'a') as f:
        f.write("域名" + "," + "正常价格" + "," + "优惠价格" + "," + "续费价格")
    for i in len(domains) / 8:
        dataDomain = ["a"]*8
        # 将域名和域名后缀分成8个一组
        for j in 8:
            dataDomain[j] = domains[k]
            dataTlds[j] = tlds[k]
            k = k + 1
        # 将域名和域名后缀添加进请求体中
        data = {
            "domains[]": dataDomain,
            "tlds[]": dataTlds
        }
        # 随机延迟2~8秒执行，防止被banip
        delayTime = random.randint(2,8)
        time.sleep(delayTime)
        # 先抓取到每个查询单单独的 checkId
        urlCheckId = "https://www.namesilo.com/public/api/domains/bulk-check"
        requestsCheckId = requests.post(url=urlCheckId, headers=headerCfbmPhpSessIdCsrfXCsrfToken, data=data)
        dataCheckId = requestsCheckId.json().get('data')
        checkId = dataCheckId.get('checkId')
        # 抓取域名和域名价格
        urlDomainPrice = "https://www.namesilo.com/public/api/domains/results/" + checkId
        requestsDomainPrice = requests.get(url=urlDomainPrice,headers=headerCfbmPhpSessIdCsrfXCsrfToken,data=data)
        dataDomainPrice = requestsDomainPrice.json().get('data')
        listDomainPrice = dataDomainPrice.get('domains')
        # 输出域名和域名价格
        for l in range(8):
            if(l<len(listDomainPrice)):
                domain = listDomainPrice[l].get('domain')
                regularPrice = listDomainPrice[l].get('regularPrice')
                currentPrice = listDomainPrice[l].get('currentPrice')
                renewalPrice = listDomainPrice[l].get('renewalPrice')
                f.write(domain + "," + str(regularPrice) + "," + str(currentPrice) + "," + str(renewalPrice))
                print(str(m) + "域名：" + domain +
                      '\000\000' + "正常价格：" + str(regularPrice) +
                      '\000\000' + "优惠价格：" + str(currentPrice) +
                      '\000\000' + "续费价格：" + str(renewalPrice))
                m = m + 1
else:
    m = 0
    with open('Price.csv', 'a',encoding='UTF-8') as f:
        f.write("商家" + "," + "域名" + "," + "正常价格" + "," + "优惠价格" + "," + "续费价格" + '\n')
    for i in range(int(len(domains) / 8) + 1):
        dataDomain = ["a"]*8
        for j in range(8):
            if(k<len(domains)):
                dataDomain[j] = domains[k]
                dataTlds[j] = tlds[k]
                k = k + 1
            data = {
                "domains[]": dataDomain,
                "tlds[]": dataTlds
            }
        delayTime = random.randint(2,8)
        time.sleep(delayTime)
        urlCheckId = "https://www.namesilo.com/public/api/domains/bulk-check"
        requestsCheckId = requests.post(url=urlCheckId, headers=headerCfbmPhpSessIdCsrfXCsrfToken, data=data)
        dataCheckId = requestsCheckId.json().get('data')
        checkId = dataCheckId.get('checkId')
        urlDomainPrice = "https://www.namesilo.com/public/api/domains/results/" + checkId
        requestsDomainPrice = requests.get(url=urlDomainPrice,headers=headerCfbmPhpSessIdCsrfXCsrfToken,data=data)
        dataDomainPrice = requestsDomainPrice.json().get('data')
        listDomainPrice = dataDomainPrice.get('domains')
        for l in range(8):
            if(l < len(listDomainPrice)):
                domain = listDomainPrice[l].get('domain')
                regularPrice = listDomainPrice[l].get('regularPrice')
                currentPrice = listDomainPrice[l].get('currentPrice')
                renewalPrice = listDomainPrice[l].get('renewalPrice')
                with open('Price.csv', 'a') as f:
                    f.write("NameSilo" + "," + domain + "," + str(regularPrice) + "," + str(currentPrice) + "," + str(renewalPrice) + '\n')
                print(str(m) + "域名：" + domain +
                      '\000\000' + "正常价格：" + str(regularPrice) +
                      '\000\000' + "优惠价格：" + str(currentPrice) +
                      '\000\000' + "续费价格：" + str(renewalPrice))
                m = m + 1

NameCheap

import requests
# 导入 requests 包

headerUserAgent = {
    "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
# 定义请求头

url = "https://d1dijnkjnmzy2z.cloudfront.net/tlds.json"
# 指定域名后缀的url
requestsDomain = requests.get(url=url,headers=headerUserAgent)
# NameCheap 的价格和域名后缀一起放在请求中，直接请求域名后缀即可获得
host = "jianhuangshi"
# 定义要购买的域名

j = 0
for i in requestsDomain.json():
    # 返回的数据体为 json 类型，可以直接通过 .json() 来执行循环
    j = j + 1
    print(str(j) + "域名：" + host + "." +str(i.get('Name')) + '\000\000'
          + "正常价格" + str(i.get('Pricing').get('Regular')) + '\000\000'
          + "优惠价格" + str(i.get('Pricing').get('Price')) + '\000\000'
          + "续费价格" + str(i.get('Pricing').get('Renewal')))
    # 输出域名和价格
    with open('Price.csv','a') as f:
        f.write("NameCheap" + "," +
                host + "." + str(i.get('Name')) + "," +
                str(i.get('Pricing').get('Regular')) + "," +
                str(i.get('Pricing').get('Price')) + "," +
                str(i.get('Pricing').get('Renewal')) + '\n'
                )
    # 写入域名和价格

DreamHost

import requests
import re
# 导入 requests 和 re 包

headerUserAgent = {
    "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
# 定义请求头

url = "https://marketing-api.dreamhost.com/ajax.cgi?callback=jQuery&cmd=domreg-tld_list&_="
host = "jianhuangshi"
# 确定好请求地址和需要购买的域名

requestsTextDomain = requests.get(url=url,headers=headerUserAgent).text
# 获取并将其赋值到变量

reDomain = re.compile(r'[{](.*)[}]', re.S)
reAllDomain = re.compile(r'[{](.*?)[}]',re.S)
reDomainPrice = re.compile(r'(.*)[,](.*)',re.S)
# 定义域名和价格的re

requestsTextDomain = re.findall(reDomain,requestsTextDomain)
requestsTextDomain = re.findall(reAllDomain,requestsTextDomain[0])
# 因为请求回的数据在多个括号中，所以需要层层提取

domainPrice = []
for i in requestsTextDomain:
    tld = i[i.find("tld")+6:i.find('\"',i.find("tld")+6)]
    # 从字符中查找到域名后缀

    regularPrice = i[i.find("renew_price")+13:i.find(",",i.find("renew_price")+7)]
    currentPrice = i[i.find("price")+7:i.find(",",i.find("price")+7)]
    renewalPrice = i[i.find("renew_price")+13:i.find(",",i.find("renew_price")+7)]
    # 从字符中查找到价格

    with open('Price.csv', 'a') as f:
        f.write("DreamHost" + "," +
                host + "." + tld + "," +
                str(regularPrice) + "," +
                str(currentPrice) + "," +
                str(renewalPrice) + '\n'
                )
    # 将域名和价格写入

DyNadot

import requests
import parsel
import random
import time
#导入包

host = "jianhuangshi"
# 定义要购买的域名

headerUserAgent = {
    "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
# 定义请求头

data = {
    "domain": host
}
# 定义请求体

url = "https://www.dynadot.com/domain/search.html"
# 赋值请求地址

a = 0
while (a<26):
    # 循环26次以下，26次包括之后会返回重复的结果，并不会返回404

    if(a != 0):
        data = {
            "domain": host,
            "i": a
        }
    # 判断是否是第一次执行，如果不是第一次执行到这则改变请求体

    delayTime = random.randint(3, 8)
    time.sleep(delayTime)
    # 延迟请求，不然会被触发人机验证

    requestsDomainPriceText = requests.post(url=url, headers=headerUserAgent, data=data).text
    selectorRequestsText = parsel.Selector(requestsDomainPriceText)
    # 请求之后改变为 Selector 类型，方便之后 css

    tld = selectorRequestsText.css('#tab-result > div::attr(row-tld)').getall()
    # 或者域名后缀的列表

    listRegular = []
    listPrice = []
    # 声明价格的列表

    for i in range(len(tld)):
        # 循环域名后缀中成员数的次数

        purchase = selectorRequestsText.css(
                '#tab-result > div:nth-child(' + str(i + 1) + ') > div.s-btn-taken.s-btn.s-btn-text::text').get()
        # 将当前域名的后缀是否可以被购买

        if (purchase != "Taken"):
            # 如果能购买

            price = selectorRequestsText.css(
                    '#tab-result > div:nth-child(' + str(
                        i + 1) + ') > div.price-wrap > span.s-renewal-price::text').get()
            regular = selectorRequestsText.css(
                    '#tab-result > div:nth-child(' + str(
                        i + 1) + ') > div.price-wrap > span.s-current-price::text').get()
            # 获取域名的价格

            regular = str(regular)
            price = str(price)
            # 将价格转换为 str 类型

            regular = regular[regular.find('$') + 1:]
            price = price[price.find('$') + 1:]
            # 将价格中的其他字符去除，只留其中的价格

            listRegular.append(regular)
            listPrice.append(price)
            # 将价格添加入列表

            with open('Price.csv','a') as f:
                f.write("dynadot" + "," +
                        host + "." + tld[i] + "," +
                        price + "," +
                        regular + "," +
                        price + '\n')
            # 写入当前的域名和价格
            # print(requestsText)
            print(tld)
            print(listRegular)
            print(listPrice)
            #输出

        else:
            # 如果当前的域名后缀不能被购买哦
            regular = str(0)
            price = str(0)
            # 将域名价格定义为0

            listRegular.append(regular)
            listPrice.append(price)
            # 添加入列表

            with open('Price.csv','a') as f:
                f.write("dynadot" + "," +
                        host + "." + tld[i] + "," +
                        price + "," +
                        regular + "," +
                        price + '\n')
            # 写入价格和域名
    a = a + 1
    print(a)

暂无评论

发送评论编辑评论

Markdown

|´・ω・)ノ

ヾ(≧∇≦*)ゝ

(☆ω☆)

（╯‵□′）╯︵┴─┴

￣﹃￣

(/ω＼)

∠( ᐛ 」∠)＿

(๑•̀ㅁ•́ฅ)

→_→

୧(๑•̀⌄•́๑)૭

٩(ˊᗜˋ*)و

(ノ°ο°)ノ

(´இ皿இ｀)

⌇●﹏●⌇

(ฅ´ω`ฅ)

(╯°A°)╯︵○○○

φ(￣∇￣o)

ヾ(´･･｀｡)ノ"

( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃

(ó﹏ò｡)

Σ(っ °Д °;)っ

( ,,´･ω･)ﾉ"(´っω･｀｡)

╮(╯▽╰)╭

o(*////▽////*)q

＞﹏＜

( ๑´•ω•) "(ㆆᴗㆆ)

颜文字

Emoji

小恐龙

花!