defCreateImages(images,name): i=0 for image in images: i = i + 1 response = image.attrs["data-src"] print("finish!!!, now you get "+str(i)+" images") num = "test_" + str(i) dtype = response.split('=')[-1] num += '.' + dtype response = req.get(response) os.makedirs(name, exist_ok=True) withopen(name+ '/' +num, 'wb')as f: f.write(response.content)
images=GetURL() CreateImages(images,name=input("please input its name:"))
defGetName(bsObj): title = bsObj.find("h1", {"class": "rich_media_title"}) name = "image-" + title.string.split('【二次元壁纸分享】')[-1][:3] return name
defGetLinks(bsObj): links = bsObj.findAll("li",{"class": "album__list-item js_album_item js_wx_tap_highlight wx_tap_cell"}) link_list = [] for link in links: if link.attrs['data-link'] isnotNone: if link.attrs['data-link'] notin link_list: link_list.append(link.attrs['data-link']) return link_list
defDownload(images,name): i=0 for image in images: i=i+1 print("finish!!!, now you get " + str(i) + " images") response = image.attrs["data-src"] num = name + "-" + str(i) dtype = response.split('=')[-1] num += '.' + dtype response = req.get(response) os.makedirs(name, exist_ok=True) withopen(name+ '/' +num, 'wb')as f: f.write(response.content)
from urllib.request import urlopen from bs4 import BeautifulSoup import requests as req import os
n = 0
defGetNextURL(target): html = urlopen(target) bsObj = BeautifulSoup(html, "html.parser") name = GetName(bsObj) print("The next one is " + name) return bsObj
defGetName(bsObj): title = bsObj.find("h1", {"class": "rich_media_title"}) name = "image-" + title.string.split('【二次元壁纸分享】')[-1][:3] return name
defGetLinks(bsObj): links = bsObj.findAll("li",{"class": "album__list-item js_album_item js_wx_tap_highlight wx_tap_cell"}) link_list = [] for link in links: if link.attrs['data-link'] isnotNone: if link.attrs['data-link'] notin link_list: link_list.append(link.attrs['data-link']) return link_list
defDownloadApart(images,name): global n i = 0 for image in images: i += 1 n += 1 print("finish!!!, now you get " + str(n) + " images") response = image.attrs["data-src"] num = name + "-" + str(i) dtype = response.split('=')[-1] num += '.' + dtype response = req.get(response) os.makedirs(name, exist_ok=True) withopen(name+ '/' +num, 'wb')as f: f.write(response.content)
defDownloadTogether(images,name): global n i = 0 for image in images: i += 1 n += 1 print("finish!!!, now you get " + str(n) + " images") response = image.attrs["data-src"] num = name + "-" + str(i) dtype = response.split('=')[-1] num += '.' + dtype response = req.get(response) withopen(num, 'wb')as f: f.write(response.content)
print("Do you want to Together or Apart?") choice = input("Please input 't' for Together 'a' for Apart\n")
link_list = GetAllURL() for link in link_list: bsObj = GetNextURL(link) images = Getimage(bsObj) name = GetName(bsObj)
defGetLinks(self,bsObj): self.links = bsObj.findAll("li",{"class": "album__list-item js_album_item js_wx_tap_highlight wx_tap_cell"}) self.link_list = [] for link in self.links: if link.attrs['data-link'] isnotNone: if link.attrs['data-link'] notin self.link_list: self.link_list.append(link.attrs['data-link']) return self.link_list
defDownloadTogether(self,images,name): self.i = 0 for image in images: self.i += 1 self.n += 1 self.response = image.attrs["data-src"] self. num = name + "-" + str(self.i) self.dtype = self.response.split('=')[-1] self.num += '.' + self.dtype self.response = req.get(self.response) print("finish!!!, now you get " + str(self.n) + " => "+self.num) self.path = os.path.join("D:\\PythonProject\\Images",self.num) withopen(self.path, 'wb')as f: f.write(self.response.content)
defStart(self): self.link_list = self.GetAllURL() for link in self.link_list: if self.stop == 1: print("OK quit....") returnNone self.bsObj = self.GetNextURL(link) self.images = self.Getimage(self.bsObj) self.name = self.GetName(self.bsObj) if self.name != None: self.DownloadTogether(self.images,self.name)
defStop(self): self.stop = 1
if __name__=="__main__": crawler = Crawler() crawler.Start()