百木园-与人分享,
就是让自己快乐。

抖音网页版高清视频抓取教程selenium

废话不多说,直接上代码

from selenium import webdriver
from selenium.webdriver import ChromeOptions
import time
import re
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import uuid
import os
import requests


option = ChromeOptions()
option.add_argument(
    \'user-agent=\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36\"\'
)
option.add_experimental_option(\'excludeSwitches\', [\'enable-automation\'])  #防止系统检测到自动化工具
option.add_experimental_option(\'useAutomationExtension\', False)
browser = webdriver.Chrome(options=option)
browser.execute_cdp_cmd(\'Page.addScriptToEvaluateOnNewDocument\', {
   \'source\': \'Object.defineProperty(navigator, \"webdriver\", {get: () => undefined})\'
})

browser.maximize_window()#页面最大化

def douyincrawler(keyword):
    url = \'https://www.douyin.com/search/\'+keyword+\'?publish_time=0&sort_type=0&source=switch_tab&type=video\'
    browser.get(url)
    browser.find_element_by_xpath(\'//*[@id=\"qdblhsHs\"]/button\').click()  #点击登陆用抖音手机app扫码登陆
    time.sleep(15)  #设置等待时间扫码登陆

    for x in range(5):#自动下拉
        time.sleep(5)
        js_bottom = \"var q=document.documentElement.scrollTop=10000\"
        browser.execute_script(js_bottom)
        if \'服务出现异常\' in browser.page_source:   #刷新页面
            browser.refresh()
        if \'服务异常,重新\' in browser.page_source:
            browser.find_element_by_xpath(\'//*[@id=\"dark\"]/div[2]/div/div[3]/div[2]/div/div/span\').click()  #点击加载

    detail_url_lists = browser.find_elements_by_xpath(\'//*[@id=\"dark\"]/div[2]/div/div[3]/div[2]/ul/li/div/div/a[1]\')# 获取页面所有详情url
    print(\'共计侦查到{}个视频数据\'.format(len(detail_url_lists)))
    for i in detail_url_lists:
        try:
            browser.execute_script(\"arguments[0].click();\", i)   #防止页面有该元素却无法点击问题出现
            ws = browser.window_handles      #获取所有窗口
            browser.switch_to.window(ws[1])  #切换新句柄
            WebDriverWait(browser, 10).until(EC.presence_of_element_located((
                By.XPATH, \'//*[@id=\"root\"]/div/div[2]/div/div/div[1]/div[1]/div[2]/div/div[1]/div/div[2]/div[2]/xg-video-container/video\'
            )))  #显示等待视频标签出现
            video_url = \'https:\' + re.findall(r\'<source class=\"\" src=\"(.*?)\"\', browser.page_source)[0]   # 正则获取视频链接
            savevideo(video_url)
            browser.close()                  #关闭当前窗口
            browser.switch_to.window(ws[0])  #切回主页面这一步很关键
        except Exception as e:
            print(e)



def savevideo(video_url):
    headers = {
        \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36\",
    }
    video_dir = r\'C:\\Users\\lvye\\Desktop\\dou_yin\\video\'
    video_full_path = os.path.join(video_dir,str(uuid.uuid4()) + \'.mp4\')
    response = requests.get(url=video_url,headers=headers)
    with open(video_full_path,\'wb\')as f:
        f.write(response.content)
    print(\'已下载:{}\'.format(video_url))




if __name__ == \'__main__\':
    douyincrawler(\'街拍美女\')

成果展示:

 

注:该代码只做技术分享,不可用于违法犯罪

 


来源:https://www.cnblogs.com/lvye001/p/16054931.html
本站部分图文来源于网络,如有侵权请联系删除。

未经允许不得转载:百木园 » 抖音网页版高清视频抓取教程selenium

相关推荐

  • 暂无文章