任務:異步數據、爬取圖片放置本地文件夾中
動態數據:Network-XHR-Response(鏈接、圖片鏈接),在Request中尋找URL
異步加載網站:新浪微博評論、豆瓣電影
注意:要知道每一次加載的元素個數(這個網站是12個)、本地文件夾路徑、文件夾權限
動態網站的參數可以在Network中查到!
from bs4 import BeautifulSoupimport requests, time, urllib.requesturl = 'https://knewone.com/discover?page='data = {}# folder_path = (r'C:/Users/Jing/Desktop/a4') #創建文件夾folder_path = ('D://data//imgs//') #創建文件夾def get_gage(url, data=None): #獲取每一個產品的信息 wb_data = requests.get(url) soup = BeautifulSoup(wb_data.text, 'lxml') imgs = soup.select('a.cover-inner > img') titles = soup.select('section.content > h4.title > a') links = soup.select('section.content > h4 > a') if data==None: for img, title, link in zip(imgs, titles, links): data = { 'img': img.get('src'), 'title': title.get('title'), 'link': link.get('href') } PRint(data) item = data['img'] print(item) urllib.request.urlretrieve(item, folder_path + item[-21:-16]) #截取圖片鏈接字符串作為文件后綴def get_more_gages(start,end): #控制爬取頁數 for one in range(start, end): get_gage(url+str(one)) time.sleep(2)get_more_gages(1,3) #一組12個圖片
新聞熱點
疑難解答