Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added rtd preview action #157

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/pull-request-links.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: readthedocs/actions
on:
pull_request_target:
types:
- opened

permissions:
pull-requests: write

jobs:
pull-request-links:
runs-on: ubuntu-latest
steps:
- uses: readthedocs/actions/preview@v1
with:
project-slug: "cleanvision"
1 change: 1 addition & 0 deletions docs/source/cleanvision/issue_managers/index.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
Issue Managers
==============
Contains modules for managing data issues of a particular type in Imagelab.

.. automodule:: cleanvision.issue_managers
:autosummary:
Expand Down
10 changes: 5 additions & 5 deletions examples/custom_issue_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ class CustomIssueManager(IssueManager):

def __init__(self) -> None:
super().__init__()
self.params = self.get_default_params()
self.params = self._get_default_params()

def get_default_params(self) -> Dict[str, Any]:
def _get_default_params(self) -> Dict[str, Any]:
return {"threshold": 0.4}

def update_params(self, params: Dict[str, Any]) -> None:
self.params = self.get_default_params()
def _update_params(self, params: Dict[str, Any]) -> None:
self.params = self._get_default_params()
non_none_params = {k: v for k, v in params.items() if v is not None}
self.params = {**self.params, **non_none_params}

Expand Down Expand Up @@ -65,7 +65,7 @@ def find_issues(
assert imagelab_info is not None
assert dataset is not None

self.update_params(params)
self._update_params(params)

raw_scores = []
for idx in tqdm(dataset.index):
Expand Down
31 changes: 27 additions & 4 deletions src/cleanvision/issue_managers/duplicate_issue_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,23 @@ def compute_hash_wrapper(args: Dict[str, Any]) -> Dict[str, Union[str, int]]:

@register_issue_manager(DUPLICATE)
class DuplicateIssueManager(IssueManager):
"""Checks for exact and near duplicates in images."""

issue_name: str = DUPLICATE
visualization: str = "image_sets"

def __init__(self) -> None:
super().__init__()
self.issue_types: List[str] = []
self.params = self.get_default_params()
self.params = self._get_default_params()

def get_default_params(self) -> Dict[str, Any]:
def _get_default_params(self) -> Dict[str, Any]:
return {
IssueType.EXACT_DUPLICATES.value: {"hash_type": "md5"},
IssueType.NEAR_DUPLICATES.value: {"hash_type": "phash", "hash_size": 8},
}

def update_params(self, params: Dict[str, Any]) -> None:
def _update_params(self, params: Dict[str, Any]) -> None:
for issue_type in self.params:
non_none_params = {
k: v for k, v in params.get(issue_type, {}).items() if v is not None
Expand Down Expand Up @@ -102,13 +104,34 @@ def find_issues(
n_jobs: Optional[int] = None,
**kwargs: Any,
) -> None:
"""Finds exact and near duplicates in the images

Parameters
----------
params: Dict[str, Any], optional
Dict of custom hyperparameters for checking duplicate issues. Default value is empty.
dataset: Dataset
Dataset object on which to run the duplicate checks
imagelab_info: Dict[str, Any]
imagelab.info dict containing computations for reuse
n_jobs: int
Number of processing threads used by multiprocessing.
Default None sets to the number of cores on your CPU (physical cores if you have psutil package installed, otherwise logical cores).
Set this to 1 to disable parallel processing (if its causing issues). Windows users may see a speed-up with n_jobs=1.
For :py:class:`TorchDataset` this is set to 1.
kwargs: Any

Returns
-------

"""
super().find_issues(**kwargs)
assert params is not None
assert imagelab_info is not None
assert dataset is not None

self.issue_types = list(params.keys())
self.update_params(params)
self._update_params(params)

to_compute = self._get_issue_types_to_compute(self.issue_types, imagelab_info)
issue_type_hash_mapping: Dict[str, Any] = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ class ImagePropertyIssueManager(IssueManager):
def __init__(self) -> None:
super().__init__()
self.issue_types: List[str] = []
self.params = self.get_default_params()
self.params = self._get_default_params()
self.image_properties = self._get_image_properties()

def get_default_params(self) -> Dict[str, Any]:
def _get_default_params(self) -> Dict[str, Any]:
return {
IssueType.DARK.value: {"threshold": 0.37},
IssueType.LIGHT.value: {"threshold": 0.05},
Expand All @@ -70,7 +70,7 @@ def get_default_params(self) -> Dict[str, Any]:
IssueType.GRAYSCALE.value: {},
}

def update_params(self, params: Dict[str, Any]) -> None:
def _update_params(self, params: Dict[str, Any]) -> None:
for issue_type in self.params:
non_none_params = {
k: v for k, v in params.get(issue_type, {}).items() if v is not None
Expand Down Expand Up @@ -132,7 +132,7 @@ def find_issues(
additional_set = self._get_additional_to_compute_set(self.issue_types)
self.issue_types = self.issue_types + additional_set

self.update_params(params)
self._update_params(params)

agg_computations = pd.DataFrame(index=dataset.index)
agg_computations = self._add_prev_computations(agg_computations, imagelab_info)
Expand Down
4 changes: 2 additions & 2 deletions src/cleanvision/utils/base_issue_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ def find_issues(self, **kwargs: Any) -> None:
return

@abstractmethod
def get_default_params(self) -> Dict[str, Any]:
def _get_default_params(self) -> Dict[str, Any]:
"""Returns default params to be used by the issue_manager"""
raise NotImplementedError

@abstractmethod
def update_params(self, params: Dict[str, Any]) -> None:
def _update_params(self, params: Dict[str, Any]) -> None:
"""Sets params for an issue manager. Default params will be overridden by user provided params"""
raise NotImplementedError

Expand Down
2 changes: 1 addition & 1 deletion tests/test_duplicate_issue_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_set_params(self, params, expected_params, issue_manager):
1. If no params are specified for an issue_type, default params are used
2. If params are specified, those specific params are updated, for the remaining ones default values are used
"""
issue_manager.update_params(params)
issue_manager._update_params(params)
assert issue_manager.params == expected_params

@pytest.mark.parametrize(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_image_property_issue_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_set_params(self, params, expected_params, issue_manager):
issue_manager: instance of ImagePropertyIssueManager

"""
issue_manager.update_params(params)
issue_manager._update_params(params)
assert issue_manager.params == expected_params

@pytest.mark.parametrize(
Expand Down