Jiepai icon indicating copy to clipboard operation
Jiepai copied to clipboard

以前抓的都是小图,我改了下代码,抓大图

Open wardseptember opened this issue 7 years ago • 4 comments

import os
from multiprocessing.pool import Pool
import requests
from urllib.parse import urlencode
from hashlib import md5
from requests import codes


def get_page(offset):
   params = {
       'offset': offset,
       'format': 'json',
       'keyword': '街拍',
       'autoload': 'true',
       'count': '20',
       'cur_tab': '1',
       'from':'search_tab',
       'pd':'synthesis',
   }
   url = 'http://www.toutiao.com/search_content/?' + urlencode(params)
   try:
       response = requests.get(url)
       if response.status_code == 200:
           return response.json()
   except requests.ConnectionError:
       return None


def get_images(json):
   if json.get('data'):
       for item in json.get('data'):
           title = item.get('title')
           images = item.get('image_list')
           if images:
               for image in images:
                   yield {
                       'image': image.get('url'),
                       'title': title
                   }


def save_image(item):
   img_path = 'img' + os.path.sep + item.get('title')
   if not os.path.exists(img_path):
       os.makedirs(img_path)
   try:
       resp = requests.get('https:'+item.get('image').replace('list','large'))
       if codes.ok == resp.status_code:
           file_path = img_path + os.path.sep + '{file_name}.{file_suffix}'.format(
               file_name=md5(resp.content).hexdigest(),
               file_suffix='jpg')
           if not os.path.exists(file_path):
               with open(file_path, 'wb') as f:
                   f.write(resp.content)
               print('Downloaded image path is %s' % file_path)
           else:
               print('Already Downloaded', file_path)
   except requests.ConnectionError:
       print('Failed to Save Image,item %s' % item)

def main(offset):
   json = get_page(offset)
   for item in get_images(json):
       print(item)
       save_image(item)


GROUP_START = 1
GROUP_END = 20

if __name__ == '__main__':
   pool = Pool()
   groups = ([x * 20 for x in range(GROUP_START, GROUP_END + 1)])
   pool.map(main, groups)
   pool.close()
   pool.join()

关键是这里 'image': image.get('url'), resp = requests.get('https:'+item.get('image').replace('list','large')) 在上次这个代码基础上 https://github.com/Python3WebSpider/Jiepai/issues/5

wardseptember avatar Jan 22 '19 12:01 wardseptember

哥们,可否把第句代码的注释给添加上去啊

binjingwang avatar Apr 11 '19 08:04 binjingwang

@binjingwang

resp= requests.get('https:'+item.get('image').replace('list','large')

就是这句,把list换成large就行了,抓取到的就是大图。

wardseptember avatar Apr 12 '19 14:04 wardseptember

大佬为什么 我直接复制你的运行 没结果 没报错 没有图片

Sherlocklcl avatar Apr 27 '19 09:04 Sherlocklcl

大佬为什么 我直接复制你的运行 没结果 没报错 没有图片

我已经修改了这个bug 我的github https://github.com/wvdon/Jiepai

wvdon avatar May 06 '19 13:05 wvdon