大家好,又见面了,我是你们的朋友全栈君。
今日内容:
1.selenium剩余用法
2.selenium万能登录破解
3.破解极验滑动验证码
from selenium import webdriverimport time driver = webdriver. Chr ome (r' D: Bai duNetdi skDownl oad( chr omedriver_ win32 chr omedriver. exe') try: driver. get(' https://dig. chouti. com/')driver. implicitly_ wait(10)time. sleep(5) 1ogin_ btn = driver. find_ element_ by_ id( 1ogin_ btn')login_ btn. click() .time. sleep(2) phone = driver. find_ element_ by_ class_ name(' 1ogin- -phone )phone. send_ .keys pwd = driver. find_ element_ .by_ class_ name ( pwd-passwor d-input' )pwd. send_ keys( kermi t46709394' ) 1ogin_ submit = driver. find_ ,element_ _by_ ,class. _name(' btn-large )login_ submit. click() . time. sleep (20)
#捕获异常并打印
except Exception as e: print(e) finally: driver. close() from selenium import webdriver . driver = webdriver. Chr ome (r' D: Bai duNetdi skDownl oad( chr omedriver_ win32 chr omedriver. exe') F try: driver. get(' https://doc. scrapy. org/en/1atest/_ static/selectors-sample1. htm1' )driver. implicitly_ wait(5) #根据xpath语法查找元素 #/从根节点开始找第一个 htm1 = driver. find_ element_ _by, _xpath( /html ) # htm1 = driver. find_ .element_ by_ xpath('/head') # HR#print(html. tag_ name) div = driver. find_ element_ by_ _xpath(' //div' )print (div. tag_ name) div = driver. find_ element_ by_ xpath(' //div[@id=" images"]' )print (div. tag_ name)print (div. text) #找到第一个a节点 a = driver. find_ element_ _by_ xpath(' //a' )print(a. tag_ name) #找到所有a节点 a _s = driver. find elements_ by_ xpath(' //a' )print(a_ s) #找到第一个a节点的href属性 # get_ attribute:获取节点中某个属性 a = driver. find_ element_ by_ _xpath(' //a' ). get_ attribute(' href' )print (a) finally: driver. close()
‘ ‘ ‘
点击、清除工作
‘ ‘ ‘
try: driver. implicitly_ wait(10 ) driver. get(' https://www. jd. com/' ) input_ _tag = driver. find_ element_ by_ id(' key' )input_ .tag. send_ keys(' EA ) input_ .tag. send_ _keys (Keys. ENTER)time. sleep (2) input_ tag = driver. find_ element_ by_ id(' key' )input_ tag. clear() input_ .tag. send_ keys(' 5 #Èi )# #HI#R#HÆH# button = driver. find_ _element_ _by_ class_ _name (' button )button. click()e time. sleep(10) finally: driver. close()
from selenium import webdr ivere import time driver = webdr iver. Chrome (r' D: Bai duNetdiskDownloadchr omedr iver. _win32chr try: dr iver. implicitly_ _wait(10) driver. get(' https: //www. zhihu. com/explore' )print (driver. get_ cookies 0) e time. sleep(10) finally: dr iver. c1ose()
from selenium import webdriver from selenium.webdriver import ActionChains import time driver = webdriver.Chrome() driver.implicitly_wait(10) driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') try: # driver.switch_to_frame('iframeResult') # 切换到id为iframeResult的窗口内 driver.switch_to.frame('iframeResult') # 源位置 draggable = driver.find_element_by_id('draggable') # 目标位置 droppable = driver.find_element_by_id('droppable') # 调用ActionChains,必须把驱动对象传进去 # 得到一个动作链对象,复制给一个变量 actions = ActionChains(driver) # 方式一: 机器人 # 瞬间把源图片位置秒移到目标图片位置 # actions.drag_and_drop(draggable, droppable) # 编写一个行为 # actions.perform() # 执行编写好的行为 # 方式二: 模拟人的行为 source = draggable.location['x'] target = droppable.location['x'] print(source, target) distance = target - source print(distance) # perform:每个动作都要调用perform执行 # 点击并摁住源图片 ActionChains(driver).click_and_hold(draggable).perform() s = 0 while s < distance: # 执行位移操作 ActionChains(driver).move_by_offset(xoffset=2, yoffset=0).perform() s += 2 # 释放动作链 ActionChains(driver).release().perform() time.sleep(10) finally: driver.close()
from selenium import webdriver import time driver = webdriver.Chrome() try: driver.implicitly_wait(10) driver.get('[图片]https://www.jd.com/') driver.get('https://www.baidu.com/') driver.get('https://www.cnblogs.com/') time.sleep(2) # 回退操作 driver.back() time.sleep(1) # 前进操作 driver.forward() time.sleep(1) driver.back() time.sleep(10) finally: driver.close()
from selenium import webdriver from selenium.webdriver import ChromeOptions import time r''' 步骤: 1、打开文件的查看,显示隐藏文件 2、找到C:\Users\administortra\AppData\Local\Google\Chrome\User Data 删除Default文件 3、重新打开浏览器,并登陆百度账号 - 此时会创建一个新的Default缓存文件 ''' # 获取options对象,参数对象 options = ChromeOptions() # 获取cookies保存路径 # 'C:\Users\administortra\AppData\Local\Google\Chrome\User Data' profile_directory = r'--user-data-dir=C:\Users\administortra\AppData\Local\Google\Chrome\User Data' # 添加用户信息目录 options.add_argument(profile_directory) # 把参数加载到当前驱动中 chrome_options默认参数,用来接收options对象 driver = webdriver.Chrome(chrome_options=options) try: driver.implicitly_wait(10) driver.get('https://www.baidu.com/') ''' BDUSS:***** ''' # 添加用户cookies信息 # name、value必须小写 driver.add_cookie({ "name": "BDUSS", "value": "用户session字符串"}) # 刷新操作 driver.refresh() time.sleep(10) finally: driver.close()
爬取京东商品信息:
请求url:
https://www. jd. com/提取商品信息:
1.商品详情页
2.商品名称
3.商品价格
4.评价人数
5.商品商家
from selenium import webdriver from selenium. webdriver. common. keys import Keysimport time driver = webdriver. Chrome() try: driver. implicitly_ wait (10)# I、往京东主页发送请求 driver. get(' https://www. jd. com/') # 2、输入商品名称,并回车搜索 input_ .tag = driver. find element_ by_ id(' key' )input_ tag. send keys (’macbook' )input_ tag. send_ keys (Keys. ENTER) # 3、查找所有商品div # good div = driver. find element_ by_ id(J goodsList')good_ list = driver. find_ elements_ by_ class_ name(' gl-item' )
#根据属性选择器查找#商品链接 good_ url = good.find element_ by_ css_ selector .p-img a' ). get_ attribute(' href' ) #商品名称 good_ name = good. find_ element_ by_ CSS_ selector ( '. p-name em' ). text. replace("\n", "--") #商品价格 good_ price = good. find_ element_ by_ class_ name ( p-price' ). text. replace("\n",”:") #评价人数 good_ .commit = good. find element_ by_ class_ name ( p-commit' ). text. replace("\n",””) #商品商家 good_ from = good. find_ element_ by_ class_ name ( ’J_im_ icon' ). text. replace("\n",”") 激活indowse
good_ content = f' ' '
商品链接:{good_url}
商品名称:{good_name}
商品价格:{good_price}
评价人数:{good_commit}
商品商家:{good_from}
print (good_ .content)
with open(' jd. txt', ' a, encoding=' utf-8' ) as f:
e f. write (good_ content)e time. s1eep(10)
finally:
driver. close()
#递归调用函数
get_ good(driver)ng time. sleep(10)166 finally: driver. close0169170 lif _- name_ ._ == '__ _main_. .' :172 good_ name = input('靖輪入爬取商品信息:’). stripO174 driver = webdriver. Chr ome O176 driver. implicitly_ wait(10)
#往京东主页发送请求
input_ tag = driver. find_ .element_ .by_ id(' key')input_ tag. send_ keys (good_ name)input_ tag. send_ keys (Keys. ENTER)time. sleep(2) get_ good(driver)
今日作业:
1.总结课堂知识点,写博客
2.爬取京东商品信息
3.滑动验证(提高题)
from selenium import webdriver from selenium.webdriver.common.keys import Keys import time def get_good(driver): try: # 通过JS控制滚轮滑动获取所有商品信息 js_code = ''' window.scrollTo(0,5000); ''' driver.execute_script(js_code) # 执行js代码 # 等待数据加载 time.sleep(2) # 3、查找所有商品div # good_div = driver.find_element_by_id('J_goodsList') good_list = driver.find_elements_by_class_name('gl-item') n = 1 for good in good_list: # 根据属性选择器查找 # 商品链接 good_url = good.find_element_by_css_selector( '.p-img a').get_attribute('href') # 商品名称 good_name = good.find_element_by_css_selector( '.p-name em').text.replace("\n", "--") # 商品价格 good_price = good.find_element_by_class_name( 'p-price').text.replace("\n", ":") # 评价人数 good_commit = good.find_element_by_class_name( 'p-commit').text.replace("\n", " ") good_content = f''' 商品链接: {good_url} 商品名称: {good_name} 商品价格: {good_price} 评价人数: {good_commit} \n ''' print(good_content) with open('jd.txt', 'a', encoding='utf-8') as f: f.write(good_content) next_tag = driver.find_element_by_class_name('pn-next') next_tag.click() time.sleep(2) # 递归调用函数 get_good(driver) time.sleep(10) finally: driver.close() if __name__ == '__main__': good_name = input('请输入爬取商品信息:').strip() driver = webdriver.Chrome() driver.implicitly_wait(10) # 1、往京东主页发送请求 driver.get('https://www.jd.com/') # 2、输入商品名称,并回车搜索 input_tag = driver.find_element_by_id('key') input_tag.send_keys(good_name) input_tag.send_keys(Keys.ENTER) time.sleep(2) get_good(driver)
#滑动
'''''' ''' 破解极验滑动验证 破解极验滑动验证 博客园登录url: https://account.cnblogs.com/signin?returnUrl=https%3A%2F%2Fwww.cnblogs.com%2F 代码逻辑: 1、输入用户名与密码,并点击登录 2、弹出滑动验证,获取有缺口与完整的图片 3、通过像素点进行比对,获取滑动位移距离 4、模拟人的行为轨迹 5、开始滑动 ''' from selenium import webdriver # 用来驱动浏览器的 from selenium.webdriver import ActionChains # 破解滑动验证码的时候用的 可以拖动图片 import time from PIL import Image # pip3 install pillow import random # 截图图片函数 def cut_image(driver): # 获取整个页面图片,图片名字为'snap.png' driver.save_screenshot('snap.png') # 获取滑动小画图 image = driver.find_element_by_class_name('geetest_canvas_img') print(image.location) print(image.size) # 获取小图片的左上右下的位置 left = image.location['x'] top = image.location['y'] right = left + image.size['width'] buttom = top + image.size['height'] print(left, top, right, buttom) # 调用open方法打开全屏图片并赋值给image_obj对象 image_obj = Image.open('snap.png') # 通过image_obj对象对小图片进行截取 # box: The crop rectangle, as a (left, upper, right, lower)-tuple. img = image_obj.crop((left, top, right, buttom)) # 打开截取后的小图片 img.show() return img # 获取完整图片 def get_image1(driver): time.sleep(2) # 修改document文档树,把完整图片的display属性修改为block js_code = ''' var x = document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display = "block"; ''' # 执行js代码 driver.execute_script(js_code) # 截取图片 image = cut_image(driver) return image # 获取有缺口图片 def get_image2(driver): time.sleep(2) # 修改document文档树,把完整图片的display属性修改为block js_code = ''' var x = document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display = "none"; ''' # 执行js代码 driver.execute_script(js_code) # 截取图片 image = cut_image(driver) return image def main(): driver = webdriver.Chrome(r'D:\BaiduNetdiskDownload\chromedriver_win32\chromedriver.exe') driver.implicitly_wait(10) driver.get('https://account.cnblogs.com/signin?returnUrl=https%3A%2F%2Fwww.cnblogs.com%2F') # 1、输入用户名与密码,并点击登录 user_input = driver.find_element_by_id('LoginName') user_input.send_keys('_tank_') time.sleep(0.2) pwd_input = driver.find_element_by_id('Password') pwd_input.send_keys('k46709394.') time.sleep(2) login_submit = driver.find_element_by_id('submitBtn') login_submit.click() # 2、获取完整的图片 image1 = get_image1(driver) # 3、获取有缺口图片 image2 = get_image2(driver) time.sleep(100) if __name__ == '__main__': main()
转载于:https://www.cnblogs.com/shaoxianwei/p/11045071.html
发布者:全栈程序员-用户IM,转载请注明出处:https://javaforall.cn/127076.html原文链接:https://javaforall.cn
【正版授权,激活自己账号】: Jetbrains全家桶Ide使用,1年售后保障,每天仅需1毛
【官方授权 正版激活】: 官方授权 正版激活 支持Jetbrains家族下所有IDE 使用个人JB账号...