爬取图片实例
•selenium+win32爬取图片
Python学习交流Q群:903971231##### \"\"\"爬取图片\"\"\" import os import threading import time from ctypes import windll import requests import win32ap iimport win32clipboard import win32con from PySide2 import QtWidgets from requests_html import HTMLSession, HTML from PySide2.QtGui import QPixmap, QColor, QStandardItemModel, QStandardItem from PySide2.QtCore import QFile, Qt, QDateTime, QDate, QTime, QTimer, QStringListModel, QModelIndex from PySide2.QtUiTools import QUiLoader from PySide2.QtWidgets import QApplication, QTreeView, QTreeWidget, QHeaderView, QTreeWidgetItem, QWidget from bs4 import BeautifulSoup from selenium import webdriverfrom selenium.webdriver import ActionChainsfrom selenium.webdriver.chrome.options import Optionsfrom selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC class Test: def __init__(self): super(Test, self).__init__() file = QFile(\'UI.ui\') file.open(QFile.ReadOnly) file.close() self.ui = QUiLoader().load(file) self.ui.B_start.clicked.connect(self.start) self.ui.B_left.clicked.connect(lambda: self.change_index(\'left\')) self.ui.B_right.clicked.connect(lambda: self.change_index(\'right\')) #定义图片列表 self.img_list = [] #图片自适应 self.ui.label.setScaledContents(True) #当前显示的图片索引 self.index = 0 self.headers = { \"Accept\": \"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9\", \"Accept-Encoding\": \"gzip, deflate\", \"Accept-Language\": \"zh-CN,zh;q=0.9\", \"Upgrade-Insecure-Requests\": \"1\", \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36\", } self.text = \'\' # 使用phantomJS消除浏览器界面 #self.browser = webdriver.PhantomJS() #出警告可使用设置chrome的方法 #浏览器设置 options = Options() options.add_argument(\'--headless\') # self.browser = webdriver.Chrome(options=options) self.browser = webdriver.Chrome() self.wait = WebDriverWait(self.browser, 30) self.session = requests.Session() self.pull() self.start_show_pic() def change_index(self, button): if button == \'left\': self.index -= 1 pix = QPixmap(\'图片/\' + self.img_list[self.index]) self.ui.label.setPixmap(pix) else: self.index += 1 pix = QPixmap(\'图片/\' + self.img_list[self.index]) self.ui.label.setPixmap(pix) def start_show_pic(self): t1 = threading.Thread(target=self.show_pic) t1.setDaemon(True) t1.start() def show_pic(self): while True: for i in os.walk(\'图片\'): self.img_list = i[2] if self.img_list: pix = QPixmap(\'图片/\' + self.img_list[self.index]) self.ui.label.setPixmap(pix) time.sleep(3) self.index += 1 if self.index > len(self.img_list): self.index = 0 def start(self): t1 = threading.Thread(target=self.get_img) t1.setDaemon(True) t1.start() def pull(self): \"\"\" 查看是否有目标网站的源代码,如果有就读取, 如果没有就发送请求 \"\"\" if os.path.exists(\'爬取图片.html\'): with open(\'爬取图片.html\', \'r\', encoding=\'utf8\') as f: self.text = f.read() # print(self.text) else: self.browser.get(\'http://www.netbian.com/\') self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, \'.list\'))) self.text = self.browser.page_source with open(\'爬取图片.html\', \'w\', encoding=\'utf8\') as f: f.write(self.text) print(self.text) self.ui.B_start.setEnabled(True) def get_img(self): html = BeautifulSoup(self.text, \'lxml\') href_url = html.select(\'.list ul li a\') print(href_url) for a in href_url: #print(\'@@@\', a) if a[\'href\'].startswith(\'/desk\'): url = \'http://www.netbian.com\' + a[\'href\'] #print(url) self.browser.get(url) pic = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, \'.pic\'))) soup = BeautifulSoup(self.browser.page_source, \'lxml\') img = soup.select_one(\'#main > div.endpage > div > p > a > img\') #print(img) #print(img) url = img[\'src\'] title = img[\'title\'] #获取路径 path = os.path.join(os.getcwd(), \'图片\', title + \'.jpg\') if len(self.browser.window_handles) > 1: self.browser.switch_to.window(self.browser.window_handles[1]) self.browser.close() self.browser.switch_to.window(self.browser.window_handles[0]) # action = ActionChains(self.browser) # action.move_to_element(pic) # action.context_click(pic) # 右键点击该元素 # action.perform() # time.sleep(1) # action.send_keys(\'v\') # action.perform() time.sleep(1) if os.path.exists(path): self.ui.listWidget.addItem(title + \'.jpg 已存在,不下载\') self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1) else: # 鼠标移动到位置点右键 windll.user32.SetCursorPos(500, 700) win32api.mouse_event(win32con.MOUSEEVENTF_RIGHTDOWN, 0, 0, 0) time.sleep(0.05) win32api.mouse_event(win32con.MOUSEEVENTF_RIGHTUP, 0, 0, 0) time.sleep(1) # 按下v win32api.keybd_event(86, 0, 0, 0) win32api.keybd_event(86, 0, win32con.KEYEVENTF_KEYUP, 0) # 将路径复制到剪切板 win32clipboard.OpenClipboard() win32clipboard.EmptyClipboard() win32clipboard.SetClipboardText(path) win32clipboard.CloseClipboard() # 鼠标定位输入框并点击 windll.user32.SetCursorPos(274, 449) win32api.mouse_event(win32con.MOUSEEVENTF_LEFTDOWN, 0, 0, 0) win32api.mouse_event(win32con.MOUSEEVENTF_LEFTUP, 0, 0, 0) time.sleep(1) # 按下ctrl+v win32api.keybd_event(17, 0, 0, 0) win32api.keybd_event(86, 0, 0, 0) win32api.keybd_event(86, 0, win32con.KEYEVENTF_KEYUP, 0) win32api.keybd_event(17, 0, win32con.KEYEVENTF_KEYUP, 0) time.sleep(3) # 按下回车 win32api.keybd_event(13, 0, 0, 0) win32api.keybd_event(13, 0, win32con.KEYEVENTF_KEYUP, 0) time.sleep(2) self.ui.listWidget.addItem(title + \'.jpg 下载完成\') self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1) # res = self.session.get(url, headers=self.headers) # print(res.text) # with open(\'图片/\' + title + \'.jpg\', \'wb\') as f: # f.write(res.content) # print(title + \'.jpg 下载完成\') elif a[\'href\'].startswith(\'/index\'): url = \'http://www.netbian.com\' + a[\'href\'] print(url) self.browser.get(url) self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, \'.list\'))) self.text = self.browser.page_source self.ui.listWidget.addItem(\'下一页\') self.ui.listWidget.setCurrentRow(self.ui.listWidget.count() - 1) self.get_img() if __name__ == \'__main__\': app = QApplication([]) # 设置fusion风格 app.setStyle(\'Fusion\') window = Test() window.ui.show() app.exec_()
最后
今天的分享到这里就完了,祝大家五一快乐鸭!!!
来源:https://www.cnblogs.com/123456feng/p/16210585.html
本站部分图文来源于网络,如有侵权请联系删除。