Skip to content

Commit

Permalink
Work as expected with a warning on request meta copy (#116)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio authored Feb 13, 2024
1 parent e43f5a2 commit 677b936
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 34 deletions.
23 changes: 23 additions & 0 deletions scrapy_zyte_smartproxy/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import warnings
from base64 import urlsafe_b64decode
from collections import defaultdict
try:
from urllib.request import _parse_proxy
except ImportError:
from urllib2 import _parse_proxy

from six.moves.urllib.parse import urlparse, urlunparse
from w3lib.http import basic_auth_header
Expand All @@ -17,6 +21,11 @@
logger = logging.getLogger(__name__)


def _remove_auth(auth_proxy_url):
proxy_type, user, password, hostport = _parse_proxy(auth_proxy_url)
return urlunparse((proxy_type, hostport, "", "", "", ""))


class ZyteSmartProxyMiddleware(object):

url = 'http://proxy.zyte.com:8011'
Expand Down Expand Up @@ -108,6 +117,7 @@ def open_spider(self, spider):
return

self._auth_url = self._make_auth_url(spider)
self._authless_url = _remove_auth(self._auth_url)

logger.info(
"Using Zyte Smart Proxy Manager at %s (apikey: %s)" % (
Expand Down Expand Up @@ -214,6 +224,19 @@ def process_request(self, request, spider):
if self._is_enabled_for_request(request):
if 'proxy' not in request.meta:
request.meta['proxy'] = self._auth_url
elif (
request.meta['proxy'] == self._authless_url
and b"Proxy-Authorization" not in request.headers
):
logger.warning(
"The value of the 'proxy' meta key of request {request} "
"has no API key. You seem to have copied the value of "
"the 'proxy' request meta key from a response or from a "
"different request. Copying request meta keys set by "
"middlewares from one request to another is a bad "
"practice that can cause issues.".format(request=request)
)
request.meta['proxy'] = self._auth_url
targets_zyte_api = self._targets_zyte_api(request)
self._set_zyte_smartproxy_default_headers(request)
request.meta['download_timeout'] = self.download_timeout
Expand Down
Loading

0 comments on commit 677b936

Please sign in to comment.