• Python并发请求之requests_future模块使用


    # -*- coding: utf-8 -*-
    # @Time : 2019-12-09 10:00
    # @Author : cxa
    # @File : demo.py
    # @Software: PyCharm
    from requests_futures.sessions import FuturesSession
    from concurrent.futures import as_completed
    from lxml import html
    import time
    
    url = ["http://www.baidu.com", "http://www.163.com", "http://www.google.com", "http://www.cnblogs.com/c-x-a"]
    
    
    def get_node(source, x=".//head/title//text()"):
        root = html.fromstring(source)
        node = root.xpath(x)
        return node
    
    
    def response_hook(resp, *args, **kwargs):
        start = time.time()
        resp.encoding = resp.apparent_encoding
        resp.data = resp.text
        resp.code = resp.status_code
        resp.headers = resp.headers
        resp.elapsed = time.time() - start
    
    
    def get_req():
        with FuturesSession(max_workers=4) as session:
            futures = [session.get(i, hooks={"response": response_hook}) for i in url]
            for future in as_completed(futures):
                resp = future.result()
                print("状态码", resp.code)
                print("标题", get_node(resp.data)[0])
                print("耗时", resp.elapsed)
                print("="*30)
    
    if __name__ == '__main__':
        get_req()
    
    

    修改之后

    from requests_futures.sessions import FuturesSession
    from concurrent.futures import as_completed
    from lxml import html
    import time
    
    url = ["http://www.baidu.com", "http://www.163.com", "http://www.google.com", "http://www.cnblogs.com/c-x-a"]
    
    
    class MySession(FuturesSession):
    
        def request(self, method, url, hooks=None, *args, **kwargs):
            start = time.time()
            if hooks is None:
                hooks = {}
    
            def response_hook(resp, *args, **kwargs):
                resp.encoding = resp.apparent_encoding
                resp.data = resp.text
                resp.code = resp.status_code
                resp.headers = resp.headers
                resp.elapsed = time.time() - start
    
            try:
                if isinstance(hooks['response'], (list, tuple)):
                    hooks['response'].insert(0, response_hook)
                else:
                    hooks['response'] = [response_hook, hooks['response']]
            except KeyError:
                hooks['response'] = response_hook
    
            return super(MySession, self).request(method, url, hooks=hooks, *args, **kwargs)
    
    
    def get_node(source, x=".//head/title//text()"):
        root = html.fromstring(source)
        node = root.xpath(x)
        return node
    
    
    def get_req():
        with MySession(max_workers=4) as session:
            futures = [session.get(i) for i in url]
            for future in as_completed(futures):
                resp = future.result()
                print("状态码", resp.code)
                print("标题", get_node(resp.data)[0])
                print("耗时", resp.elapsed)
                print("=" * 30)
    
    
    if __name__ == '__main__':
        get_req()
    
  • 相关阅读:
  • 原文地址:https://www.cnblogs.com/c-x-a/p/12009565.html
  • 最新文章
  • 热门文章
一二三 - 开发者的网上家园