python3+selenium+browsermobproxy百度营销页面

from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.by import By
from browsermobproxy import Server
from urllib.parse import urlparse
import urllib
import json
import time
import io
import sys
import requests

sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf-8')

username = ''
passwd = ''

BMPserver = Server(r'C:/Program Files (x86)/browsermob-proxy-2.1.4/bin/browsermob-proxy.bat')
BMPserver.start()
BMPproxy = BMPserver.create_proxy()

url = "https://cas.baidu.com/?tpl=www2&fromu=http%3A%2F%2Fwww2.baidu.com%2Fcommon%2Fappinit.ajax"
url2 = "https://fengchao.baidu.com/fc/managecenter/dashboard/overview/user/34276332"

options = ChromeOptions()
#options.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
options.add_argument('--disable-gpu') #谷歌文档提到需要加上这个属性来规避bug
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('log-level=3')
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36')
options.add_argument('--window-size=1920,1050')
options.add_argument('--hide-scrollbars') #隐藏滚动条, 应对一些特殊页面
options.add_argument('--ignore-certificate-errors')
# options.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 提升速度
options.binary_location = r'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe' #指定浏览器位置
# options.add_argument('--disable-javascript') #禁止js
#设置代理
options.add_argument('--proxy-server={0}'.format(BMPproxy.proxy))

#防谷歌测试模式
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option('useAutomationExtension', False)
options.add_experimental_option('prefs',{'profile.default_content_setting_values':{'notifications':2}}) #禁止弹窗

driver = webdriver.Chrome(options=options)

#最大化窗口
driver.maximize_window()

BMPproxy.new_har("html_list",options={'captureContent':True,'captureHeaders':True})

driver.get(url)

#添加cookie  通过浏览器获取相应参数值
driver.add_cookie({"name":"CASSSID","value":""})
driver.add_cookie({"name":"uc_login_unique","value":""})
driver.add_cookie({"name":"uc_recom_mark","value":""})

driver.get(url)

driver.implicitly_wait(10)

driver.find_element(By.ID,"uc-common-account").send_keys(username)
time.sleep(1)
driver.find_element(By.ID,"ucsl-password-edit").send_keys(passwd)

# 延迟2秒后在点击登陆
time.sleep(1)

driver.find_element(By.ID,"submit-form").click()
# login_btn = driver.find_element(By.XPATH,'//*[@class="submit uc-submit-form"]')
# login_btn.click()

try:
    # 判断是否弹出验证码,经过测试关闭验证码后再点击登陆就不会出现验证码了
    time.sleep(1)
    #uc-token-header-icon uc-token-cancel-btn
    vcode_close = driver.find_element(By.XPATH,'//*[@class="uc-token-header-icon uc-token-cancel-btn"]')
    if vcode_close.is_displayed() == True:
        print('关闭验证码,重新登陆')
        vcode_close.click()
        time.sleep(1)
        driver.find_element(By.ID,"submit-form").click()
    
except Exception as e:
    print(e)
    pass

time.sleep(30)

user_div = driver.find_element(By.XPATH,'//*[@class="one-ui-pro-nav-profile-content-name"]')
username_text = user_div.get_attribute('innerText')

if username_text == username:
    print('登陆成功',driver.title)
else:
    print('登陆失败')

mainWindow = driver.current_window_handle  #保存主页面句柄

#登陆成功,打开新窗口
js = "window.open('"+url2+"');" 
driver.execute_script(js)

new_window=driver.window_handles[-1] #找到新窗口名字
driver.switch_to.window(new_window) #切换到最新打开的窗口
print("切换窗口",driver.title)

#等页面内容加载完
time.sleep(20)
# driver.implicitly_wait(10)

# BMPproxy.wait_for_traffic_to_stop(1,60)

cookies = driver.get_cookies()
print(cookies)
cookie = []
for c in cookies:
    cookie.append(str(c['name'])+"="+str(c['value'])+";")

# print(cookie)

# 获取network加载的内容
json_data = BMPproxy.har
for ent in json_data['log']['entries']:
    url = ent['request']['url']
    # print(url)
    # 找到需要的url
    # https://fengchao.baidu.com/hairuo/request.ajax?path=puppet/GET/BasicInfoFunction/getBasicInfo&reqid=4b534c48-3fd9-4319-3423-166339713801
    if 'https://fengchao.baidu.com/hairuo/request.ajax' in url and 'getBasicInfo&reqid' in url:
        # content = ent['response']['content']
        print("请求地址:" + str(url))
        result = urlparse(urllib.parse.unquote(url))
        query = result.query.split('&')
        for q in query:
            reqid = q.split("=")[1] if 'reqid=' in q else ''

# 关闭代理和浏览器
# browsermobproxy有BUG,這里虽然关闭了代理,但是端口还在进程中,会占用内存,暂时没想到解决办法,后期在优化
BMPserver.stop()
driver.close()
driver.quit()


标签:暂无标签
版权属于:瑶玲OvO 所有,转载请注明文章来源。

本文链接: https://www.linmy.cn/index.php/python/39.html

赞 (0)

评论区

发表评论

字数
0
/200
4+11=?

暂无评论,要不来一发?

回到顶部