Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make from_parts a LRU to increase the chance we can preserve the internal cache #1434

Merged
merged 4 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/1434.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved cache performance for operations that modify the :class:`~yarl.URL` object -- by :user:`bdraco`.
bdraco marked this conversation as resolved.
Show resolved Hide resolved
103 changes: 38 additions & 65 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,19 @@ def pre_encoded_url(url_str: str) -> "URL":
return self


@lru_cache
def from_parts(scheme: str, netloc: str, path: str, query: str, fragment: str) -> "URL":
"""Create a new URL from parts."""
self = object.__new__(URL)
self._scheme = scheme
self._netloc = netloc
self._path = path
self._query = query
self._fragment = fragment
self._cache = {}
return self


@rewrite_module
class URL:
# Don't derive from str
Expand Down Expand Up @@ -308,10 +321,7 @@ def __new__(
if type(val) is SplitResult:
if not encoded:
raise ValueError("Cannot apply decoding to SplitResult")
self = object.__new__(URL)
self._scheme, self._netloc, self._path, self._query, self._fragment = val
self._cache = {}
return self
return from_parts(*val)
if isinstance(val, str):
return pre_encoded_url(str(val)) if encoded else encode_url(str(val))
if val is UNDEFINED:
Expand Down Expand Up @@ -421,20 +431,6 @@ def build(
url._cache = {}
return url

@classmethod
def _from_parts(
cls, scheme: str, netloc: str, path: str, query: str, fragment: str
) -> "URL":
"""Create a new URL from parts."""
self = object.__new__(cls)
self._scheme = scheme
self._netloc = netloc
self._path = path
self._query = query
self._fragment = fragment
self._cache = {}
return self

def __init_subclass__(cls):
raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden")

Expand Down Expand Up @@ -587,7 +583,7 @@ def _origin(self) -> "URL":
netloc = make_netloc(None, None, encoded_host, self.explicit_port)
elif not self._path and not self._query and not self._fragment:
return self
return self._from_parts(scheme, netloc, "", "", "")
return from_parts(scheme, netloc, "", "", "")

def relative(self) -> "URL":
"""Return a relative part of the URL.
Expand All @@ -597,7 +593,7 @@ def relative(self) -> "URL":
"""
if not self._netloc:
raise ValueError("URL should be absolute")
return self._from_parts("", "", self._path, self._query, self._fragment)
return from_parts("", "", self._path, self._query, self._fragment)

@cached_property
def absolute(self) -> bool:
Expand Down Expand Up @@ -916,12 +912,10 @@ def parent(self) -> "URL":
path = self._path
if not path or path == "/":
if self._fragment or self._query:
return self._from_parts(self._scheme, self._netloc, path, "", "")
return from_parts(self._scheme, self._netloc, path, "", "")
return self
parts = path.split("/")
return self._from_parts(
self._scheme, self._netloc, "/".join(parts[:-1]), "", ""
)
return from_parts(self._scheme, self._netloc, "/".join(parts[:-1]), "", "")

@cached_property
def raw_name(self) -> str:
Expand Down Expand Up @@ -998,13 +992,13 @@ def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL":

parsed.reverse()
if not netloc or not needs_normalize:
return self._from_parts(self._scheme, netloc, "/".join(parsed), "", "")
return from_parts(self._scheme, netloc, "/".join(parsed), "", "")

path = "/".join(normalize_path_segments(parsed))
# If normalizing the path segments removed the leading slash, add it back.
if path and path[0] != "/":
path = f"/{path}"
return self._from_parts(self._scheme, netloc, path, "", "")
return from_parts(self._scheme, netloc, path, "", "")

def with_scheme(self, scheme: str) -> "URL":
"""Return a new URL with scheme replaced."""
Expand All @@ -1019,9 +1013,7 @@ def with_scheme(self, scheme: str) -> "URL":
f"relative URLs for the {lower_scheme} scheme"
)
raise ValueError(msg)
return self._from_parts(
lower_scheme, netloc, self._path, self._query, self._fragment
)
return from_parts(lower_scheme, netloc, self._path, self._query, self._fragment)

def with_user(self, user: Union[str, None]) -> "URL":
"""Return a new URL with user replaced.
Expand All @@ -1043,9 +1035,7 @@ def with_user(self, user: Union[str, None]) -> "URL":
raise ValueError("user replacement is not allowed for relative URLs")
encoded_host = self.host_subcomponent or ""
netloc = make_netloc(user, password, encoded_host, self.explicit_port)
return self._from_parts(
self._scheme, netloc, self._path, self._query, self._fragment
)
return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)

def with_password(self, password: Union[str, None]) -> "URL":
"""Return a new URL with password replaced.
Expand All @@ -1067,9 +1057,7 @@ def with_password(self, password: Union[str, None]) -> "URL":
encoded_host = self.host_subcomponent or ""
port = self.explicit_port
netloc = make_netloc(self.raw_user, password, encoded_host, port)
return self._from_parts(
self._scheme, netloc, self._path, self._query, self._fragment
)
return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)

def with_host(self, host: str) -> "URL":
"""Return a new URL with host replaced.
Expand All @@ -1090,9 +1078,7 @@ def with_host(self, host: str) -> "URL":
encoded_host = _encode_host(host, validate_host=True) if host else ""
port = self.explicit_port
netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port)
return self._from_parts(
self._scheme, netloc, self._path, self._query, self._fragment
)
return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)

def with_port(self, port: Union[int, None]) -> "URL":
"""Return a new URL with port replaced.
Expand All @@ -1110,9 +1096,7 @@ def with_port(self, port: Union[int, None]) -> "URL":
raise ValueError("port replacement is not allowed for relative URLs")
encoded_host = self.host_subcomponent or ""
netloc = make_netloc(self.raw_user, self.raw_password, encoded_host, port)
return self._from_parts(
self._scheme, netloc, self._path, self._query, self._fragment
)
return from_parts(self._scheme, netloc, self._path, self._query, self._fragment)

def with_path(
self,
Expand All @@ -1132,7 +1116,7 @@ def with_path(
path = f"/{path}"
query = self._query if keep_query else ""
fragment = self._fragment if keep_fragment else ""
return self._from_parts(self._scheme, netloc, path, query, fragment)
return from_parts(self._scheme, netloc, path, query, fragment)

@overload
def with_query(self, query: Query) -> "URL": ...
Expand All @@ -1155,9 +1139,7 @@ def with_query(self, *args: Any, **kwargs: Any) -> "URL":
"""
# N.B. doesn't cleanup query/fragment
query = get_str_query(*args, **kwargs) or ""
return self._from_parts(
self._scheme, self._netloc, self._path, query, self._fragment
)
return from_parts(self._scheme, self._netloc, self._path, query, self._fragment)

@overload
def extend_query(self, query: Query) -> "URL": ...
Expand All @@ -1183,9 +1165,7 @@ def extend_query(self, *args: Any, **kwargs: Any) -> "URL":
query += new_query if query[-1] == "&" else f"&{new_query}"
else:
query = new_query
return self._from_parts(
self._scheme, self._netloc, self._path, query, self._fragment
)
return from_parts(self._scheme, self._netloc, self._path, query, self._fragment)

@overload
def update_query(self, query: Query) -> "URL": ...
Expand Down Expand Up @@ -1242,9 +1222,7 @@ def update_query(self, *args: Any, **kwargs: Any) -> "URL":
"Invalid query type: only str, mapping or "
"sequence of (key, value) pairs is allowed"
)
return self._from_parts(
self._scheme, self._netloc, self._path, query, self._fragment
)
return from_parts(self._scheme, self._netloc, self._path, query, self._fragment)

def without_query_params(self, *query_params: str) -> "URL":
"""Remove some keys from query part and return new URL."""
Expand Down Expand Up @@ -1276,7 +1254,7 @@ def with_fragment(self, fragment: Union[str, None]) -> "URL":
raw_fragment = FRAGMENT_QUOTER(fragment)
if self._fragment == raw_fragment:
return self
return self._from_parts(
return from_parts(
self._scheme, self._netloc, self._path, self._query, raw_fragment
)

Expand Down Expand Up @@ -1316,7 +1294,7 @@ def with_name(

query = self._query if keep_query else ""
fragment = self._fragment if keep_fragment else ""
return self._from_parts(self._scheme, netloc, "/".join(parts), query, fragment)
return from_parts(self._scheme, netloc, "/".join(parts), query, fragment)

def with_suffix(
self,
Expand Down Expand Up @@ -1364,9 +1342,7 @@ def join(self, url: "URL") -> "URL":

# scheme is in uses_authority as uses_authority is a superset of uses_relative
if (join_netloc := url._netloc) and scheme in USES_AUTHORITY:
return self._from_parts(
scheme, join_netloc, url._path, url._query, url._fragment
)
return from_parts(scheme, join_netloc, url._path, url._query, url._fragment)

orig_path = self._path
if join_path := url._path:
Expand All @@ -1389,16 +1365,13 @@ def join(self, url: "URL") -> "URL":
else:
path = orig_path

new_url = object.__new__(URL)
new_url._scheme = scheme
new_url._netloc = self._netloc
new_url._path = path
new_url._query = url._query if join_path or url._query else self._query
new_url._fragment = (
url._fragment if join_path or url._fragment else self._fragment
return from_parts(
scheme,
self._netloc,
path,
url._query if join_path or url._query else self._query,
url._fragment if join_path or url._fragment else self._fragment,
)
new_url._cache = {}
return new_url

def joinpath(self, *other: str, encoded: bool = False) -> "URL":
"""Return a new URL with the elements in other appended to the path."""
Expand Down
Loading