From 175c469d535f2c902b7546cda7adc3bf5ba84978 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Fri, 1 Nov 2024 17:52:39 +0000 Subject: [PATCH 1/7] adds in_zephir_for_long_enough method to Item --- aim/digifeeds/item.py | 23 +++++++++++++++++++++++ tests/digifeeds/test_item.py | 26 ++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/aim/digifeeds/item.py b/aim/digifeeds/item.py index b961970..4ded915 100644 --- a/aim/digifeeds/item.py +++ b/aim/digifeeds/item.py @@ -1,3 +1,6 @@ +from datetime import datetime, timedelta + + class Item: """A Digifeeds Item @@ -34,3 +37,23 @@ def barcode(self) -> str: str: The barcode. """ return self.data["barcode"] + + @property + def in_zephir_for_long_enough(self) -> bool: + waiting_period = 14 # days + in_zephir_status = next( + ( + status + for status in self.data["statuses"] + if status["name"] == "in_zephir" + ), + None, + ) + if in_zephir_status is None: + return False + + created_at = datetime.fromisoformat(in_zephir_status["created_at"]) + if created_at < (datetime.now() - timedelta(days=waiting_period)): + return True + else: + return False diff --git a/tests/digifeeds/test_item.py b/tests/digifeeds/test_item.py index c392401..1975f99 100644 --- a/tests/digifeeds/test_item.py +++ b/tests/digifeeds/test_item.py @@ -1,5 +1,6 @@ import pytest import json +from datetime import datetime, timedelta from aim.digifeeds.item import Item @@ -18,3 +19,28 @@ def test_has_status_is_true(item_data): def test_has_status_is_false(item_data): result = Item(item_data).has_status("in_zephir") assert result is False + + +def test_in_zephir_for_long_enough_is_true(item_data): + item_data["statuses"][0]["name"] = "in_zephir" + over_two_weeks_ago = datetime.now() - timedelta(days=15) + item_data["statuses"][0]["created_at"] = over_two_weeks_ago.isoformat( + timespec="seconds" + ) + result = Item(item_data).in_zephir_for_long_enough + assert result is True + + +def test_in_zephir_for_long_enough_is_false(item_data): + item_data["statuses"][0]["name"] = "in_zephir" + less_than_two_weeks_ago = datetime.now() - timedelta(days=13) + item_data["statuses"][0]["created_at"] = less_than_two_weeks_ago.isoformat( + timespec="seconds" + ) + result = Item(item_data).in_zephir_for_long_enough + assert result is False + + +def test_in_zephir_for_long_enough_when_not_in_zephir(item_data): + result = Item(item_data).in_zephir_for_long_enough + assert result is False From 7a922fe23d0e1964245d299faeadb81c646c7ddb Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Wed, 6 Nov 2024 19:06:24 +0000 Subject: [PATCH 2/7] adds move_to_pickup function using rclone --- .config/rclone/rclone.conf.example | 16 ++++++ .gitignore | 4 +- Dockerfile | 37 ++++++------ aim/digifeeds/move_to_pickup.py | 35 ++++++++++++ aim/services.py | 18 ++++++ compose.yml | 17 ++---- docs/api/aim.digifeeds.check_zephir.rst | 7 +++ docs/api/aim.digifeeds.move_to_pickup.rst | 7 +++ docs/api/aim.digifeeds.rst | 2 + init.sh | 20 ++++--- poetry.lock | 16 +++++- pyproject.toml | 1 + tests/digifeeds/test_move_to_pickup.py | 69 +++++++++++++++++++++++ 13 files changed, 208 insertions(+), 41 deletions(-) create mode 100644 .config/rclone/rclone.conf.example create mode 100644 aim/digifeeds/move_to_pickup.py create mode 100644 docs/api/aim.digifeeds.check_zephir.rst create mode 100644 docs/api/aim.digifeeds.move_to_pickup.rst create mode 100644 tests/digifeeds/test_move_to_pickup.py diff --git a/.config/rclone/rclone.conf.example b/.config/rclone/rclone.conf.example new file mode 100644 index 0000000..9275310 --- /dev/null +++ b/.config/rclone/rclone.conf.example @@ -0,0 +1,16 @@ +[digifeeds_gdrive] +type = drive +client_id = YOUR_CLIENT_ID +scope = drive +service_account_file = /conf/digifeeds_gdrive_credentials.json +root_folder_id = YOUR_ROOT_FOLDER_ID + +[digifeeds_s3] +type = s3 +provider = AWS +access_key_id = YOUR_ACCESS_KEY +secret_access_key = YOUR_SECRET_ACCESS_KEY + +[digifeeds_bucket] +type = alias +remote = digifeeds_s3:YOUR_BUCKET_NAME \ No newline at end of file diff --git a/.gitignore b/.gitignore index 61b6a30..660825f 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,6 @@ htmlcov/ requirements.txt docs/_build -bin/digifeeds/*.config \ No newline at end of file +bin/digifeeds/*.config +.config/rclone/rclone.conf +.config/rclone/*.json diff --git a/Dockerfile b/Dockerfile index 7385197..2811a57 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,6 +29,7 @@ RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends \ build-essential \ pkg-config \ default-mysql-client \ + rclone\ vim-tiny # Set the working directory to /app @@ -46,17 +47,17 @@ RUN pip install poetry==${POETRY_VERSION} # Use this page as a reference for python and poetry environment variables: https://docs.python.org/3/using/cmdline.html#envvar-PYTHONUNBUFFERED # Ensure the stdout and stderr streams are sent straight to terminal, then you can see the output of your application ENV PYTHONUNBUFFERED=1\ - # Avoid the generation of .pyc files during package install - # Disable pip's cache, then reduce the size of the image - PIP_NO_CACHE_DIR=off \ - # Save runtime because it is not look for updating pip version - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - # Disable poetry interaction - POETRY_NO_INTERACTION=1 \ - POETRY_VIRTUALENVS_CREATE=1 \ - POETRY_VIRTUALENVS_IN_PROJECT=1 \ - POETRY_CACHE_DIR=/tmp/poetry_cache + # Avoid the generation of .pyc files during package install + # Disable pip's cache, then reduce the size of the image + PIP_NO_CACHE_DIR=off \ + # Save runtime because it is not look for updating pip version + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 \ + # Disable poetry interaction + POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_CREATE=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_CACHE_DIR=/tmp/poetry_cache FROM poetry AS build # Just copy the files needed to install the dependencies @@ -68,13 +69,13 @@ RUN poetry export --without dev -f requirements.txt --output requirements.txt # We want poetry on in development FROM poetry AS development RUN apt-get update -yqq && apt-get install -yqq --no-install-recommends \ - git \ - bats \ - bats-assert \ - bats-file\ - wget\ - zip\ - unzip + git \ + bats \ + bats-assert \ + bats-file\ + wget\ + zip\ + unzip RUN wget -P /opt/ https://github.com/boschresearch/shellmock/releases/download/0.9.1/shellmock.bash && \ chown ${UID}:${GID} /opt/shellmock.bash diff --git a/aim/digifeeds/move_to_pickup.py b/aim/digifeeds/move_to_pickup.py new file mode 100644 index 0000000..64dddf3 --- /dev/null +++ b/aim/digifeeds/move_to_pickup.py @@ -0,0 +1,35 @@ +from rclone_python import rclone +from aim.digifeeds.item import Item +from aim.digifeeds.db_client import DBClient +from aim.services import S +from datetime import datetime + +print(rclone.is_installed()) + + +def move_to_pickup(barcode: str): + raw_item = DBClient().get_item(barcode) + if raw_item is None: + raise Exception("Item not found in database") + + item = Item(raw_item) + + if not item.in_zephir_for_long_enough: + return None + + DBClient().add_item_status(barcode=barcode, status="copying_start") + rclone.copyto( + f"{S.digifeeds_s3_rclone_remote}:{S.digifeeds_s3_input_path}/{barcode}.zip", + f"{S.digifeeds_gdrive_rclone_remote}:{barcode}.zip", + ) + DBClient().add_item_status(barcode=barcode, status="copying_end") + timestamp = datetime.now().strftime("%F_%H-%M-%S") + rclone.moveto( + f"{S.digifeeds_s3_rclone_remote}:{S.digifeeds_s3_input_path}/{barcode}.zip", + f"{S.digifeeds_s3_rclone_remote}:{S.digifeeds_s3_processed_path}/{timestamp}_{barcode}.zip", + ) + final_raw_item = DBClient().add_item_status( + barcode=barcode, status="pending_deletion" + ) + + return final_raw_item diff --git a/aim/services.py b/aim/services.py index 8cb2e78..5c5d530 100644 --- a/aim/services.py +++ b/aim/services.py @@ -43,6 +43,17 @@ class Services(NamedTuple): #: The zephir item bib api zephir_bib_api_url: str + #: The url in the s3 bucket for processed barcodes + digifeeds_s3_processed_path: str + + #: The name of the rclone remote/bucket alias for the s3 input bucket + digifeeds_s3_rclone_remote: str + + #: The name of the google drive rclone remote where google picks up items + digifeeds_gdrive_rclone_remote: str + + #: The path to the temporary/scratch directory + tmp_dir: str S = Services( @@ -67,4 +78,11 @@ class Services(NamedTuple): digifeeds_s3_input_path=os.getenv("DIGIFEEDS_S3_INPUT_PATH") or "path_to_input_barcodes", zephir_bib_api_url="http://zephir.cdlib.org/api/item", + digifeeds_s3_processed_path=os.getenv("DIGIFEEDS_S3_PROCESSED_PATH") + or "path_to_processed_barcodes", + digifeeds_s3_rclone_remote=os.getenv("DIGIFEEDS_S3_RCLONE_REMOTE") + or "digifeeds_bucket", + digifeeds_gdrive_rclone_remote=os.getenv("DIGIFEEDS_GDRIVE_RCLONE_REMOTE") + or "digifeeds_gdrive", + tmp_dir="/app/tmp", ) diff --git a/compose.yml b/compose.yml index fe0a109..3a74686 100644 --- a/compose.yml +++ b/compose.yml @@ -1,6 +1,6 @@ services: app: - build: + build: context: . target: development dockerfile: Dockerfile @@ -30,7 +30,7 @@ services: - MARIADB_DATABASE=database api: - build: + build: context: . target: development dockerfile: Dockerfile @@ -51,7 +51,7 @@ services: command: "poetry run uvicorn aim.digifeeds.database.main:app --host 0.0.0.0 --reload" docs: - build: + build: context: . target: development dockerfile: Dockerfile @@ -69,16 +69,7 @@ services: - .:/app tty: true stdin_open: true - command: [ - "poetry", "run", "sphinx-autobuild", - "--watch", "aim", - "--port", "8888", - "--host", "0.0.0.0", - "--pre-build", "/app/prebuild.sh", - "docs", "docs/_build/html" - ] - - + command: [ "poetry", "run", "sphinx-autobuild", "--watch", "aim", "--port", "8888", "--host", "0.0.0.0", "--pre-build", "/app/prebuild.sh", "docs", "docs/_build/html" ] volumes: database: diff --git a/docs/api/aim.digifeeds.check_zephir.rst b/docs/api/aim.digifeeds.check_zephir.rst new file mode 100644 index 0000000..5beaa2a --- /dev/null +++ b/docs/api/aim.digifeeds.check_zephir.rst @@ -0,0 +1,7 @@ +aim.digifeeds.check\_zephir module +================================== + +.. automodule:: aim.digifeeds.check_zephir + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/aim.digifeeds.move_to_pickup.rst b/docs/api/aim.digifeeds.move_to_pickup.rst new file mode 100644 index 0000000..8a5ccbd --- /dev/null +++ b/docs/api/aim.digifeeds.move_to_pickup.rst @@ -0,0 +1,7 @@ +aim.digifeeds.move\_to\_pickup module +===================================== + +.. automodule:: aim.digifeeds.move_to_pickup + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/aim.digifeeds.rst b/docs/api/aim.digifeeds.rst index f5efb84..9380920 100644 --- a/docs/api/aim.digifeeds.rst +++ b/docs/api/aim.digifeeds.rst @@ -22,6 +22,8 @@ Submodules aim.digifeeds.add_to_db aim.digifeeds.alma_client + aim.digifeeds.check_zephir aim.digifeeds.db_client aim.digifeeds.item aim.digifeeds.list_barcodes_in_bucket + aim.digifeeds.move_to_pickup diff --git a/init.sh b/init.sh index 69523e8..2e0767a 100755 --- a/init.sh +++ b/init.sh @@ -5,11 +5,18 @@ if [ -f ".env" ]; then else echo "🌎 .env does not exist. Copying .env-example to .env" cp env.example .env - YOUR_UID=`id -u` - YOUR_GID=`id -g` - echo "🙂 Setting your UID ($YOUR_UID) and GID ($YOUR_UID) in .env" - docker run --rm -v ./.env:/.env alpine echo "$(sed s/YOUR_UID/$YOUR_UID/ .env)" > .env - docker run --rm -v ./.env:/.env alpine echo "$(sed s/YOUR_GID/$YOUR_GID/ .env)" > .env + YOUR_UID=$(id -u) + YOUR_GID=$(id -g) + echo "🙂 Setting your UID (${YOUR_UID}) and GID (${YOUR_UID}) in .env" + docker run --rm -v ./.env:/.env alpine echo "$(sed s/YOUR_UID/${YOUR_UID}/ .env)" >.env + docker run --rm -v ./.env:/.env alpine echo "$(sed s/YOUR_GID/${YOUR_GID}/ .env)" >.env +fi + +if [ -f ".config/rclone/rclone.conf" ]; then + echo "📋 .config/rclone/rclone.conf exists. Leaving alone" +else + echo "📋 .config/rclone/rclone.conf does not exist. Copying .config/rclone/rclone.conf.example to rclone_config" + cp .config/rclone/rclone.conf.example .config/rclone/rclone.conf fi echo "🚢 Build docker images" @@ -24,6 +31,3 @@ docker compose run --rm app sh -c "cd aim/digifeeds/database && poetry run alemb echo "🗄️ Load statuses" docker compose run --rm app sh -c "poetry run python aim/digifeeds/bin/load_statuses.py" - - - diff --git a/poetry.lock b/poetry.lock index 703beb1..ad3edf6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1689,6 +1689,20 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "rclone-python" +version = "0.1.12" +description = "A python wrapper for rclone." +optional = false +python-versions = ">=3.6" +files = [ + {file = "rclone-python-0.1.12.tar.gz", hash = "sha256:92a1d21c5e225574db438908a42522eff97e467f0bfeebd2007c83723a834a8b"}, + {file = "rclone_python-0.1.12-py3-none-any.whl", hash = "sha256:09a2779831684ec49e7a99201f4bf5cc2eff5871ffed48b0925e6b492b27bb54"}, +] + +[package.dependencies] +rich = "*" + [[package]] name = "requests" version = "2.32.3" @@ -2700,4 +2714,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "9f1a0d6ba054bb0e68ce11698f69f120b663045e7ea5a9b37f47cb1e3a7ce0f7" +content-hash = "043a661dfac1a774264112597553529e933c6ac6521d3450ba6b0fbbb5b81231" diff --git a/pyproject.toml b/pyproject.toml index b503eaf..04d3077 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ httpx = "^0.27.2" alembic = "^1.13.2" typer = "^0.12.5" boto3 = "^1.35.28" +rclone-python = "^0.1.12" [tool.poetry.group.dev.dependencies] diff --git a/tests/digifeeds/test_move_to_pickup.py b/tests/digifeeds/test_move_to_pickup.py new file mode 100644 index 0000000..5b69657 --- /dev/null +++ b/tests/digifeeds/test_move_to_pickup.py @@ -0,0 +1,69 @@ +from aim.digifeeds.move_to_pickup import move_to_pickup, rclone, DBClient +import json +import pytest +from datetime import datetime + + +@pytest.fixture +def item_data(): + with open("tests/fixtures/digifeeds/item.json") as f: + output = json.load(f) + return output + + +@pytest.fixture +def item_in_zephir_for_long_enough(item_data): + item_data["statuses"][0]["name"] = "in_zephir" + return item_data + + +@pytest.fixture +def item_in_zephir_too_recent(item_in_zephir_for_long_enough): + item_in_zephir_for_long_enough["statuses"][0]["created_at"] = ( + datetime.now().isoformat(timespec="seconds") + ) + return item_in_zephir_for_long_enough + + +def test_move_to_pickup_success(mocker, item_in_zephir_for_long_enough): + rclone_copyto_mock = mocker.patch.object(rclone, "copyto") + rclone_moveto_mock = mocker.patch.object(rclone, "moveto") + get_item_mock = mocker.patch.object( + DBClient, + "get_item", + return_value=item_in_zephir_for_long_enough, + ) + add_status_mock = mocker.patch.object( + DBClient, + "add_item_status", + return_value=item_in_zephir_for_long_enough, + ) + + result = move_to_pickup(item_in_zephir_for_long_enough["barcode"]) + + get_item_mock.assert_called_once() + rclone_copyto_mock.assert_called_once() + rclone_moveto_mock.assert_called_once() + assert add_status_mock.call_count == 3 + assert result is not None + + +def test_move_to_pickup_no_item(mocker): + get_item_mock = mocker.patch.object(DBClient, "get_item", return_value=None) + with pytest.raises(Exception) as exc_info: + move_to_pickup("some_barcode") + + get_item_mock.assert_called_once() + assert str(exc_info.value) == "Item not found in database" + + +def test_move_to_pickup_item_too_recent(mocker, item_in_zephir_too_recent): + get_item_mock = mocker.patch.object( + DBClient, + "get_item", + return_value=item_in_zephir_too_recent, + ) + result = move_to_pickup(item_in_zephir_too_recent["barcode"]) + + get_item_mock.assert_called_once() + assert result is None From c7c9b952d1fd2dd80ec051259e847566d1207b9b Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Wed, 6 Nov 2024 21:42:42 +0000 Subject: [PATCH 3/7] changed the item_statuses database to have an id --- .../bbb334fc10e7_creating_initial_tables.py | 65 ++++++++++++------- aim/digifeeds/database/models.py | 56 +++++++++------- 2 files changed, 76 insertions(+), 45 deletions(-) diff --git a/aim/digifeeds/database/migrations/versions/bbb334fc10e7_creating_initial_tables.py b/aim/digifeeds/database/migrations/versions/bbb334fc10e7_creating_initial_tables.py index 7da929e..3edd00f 100644 --- a/aim/digifeeds/database/migrations/versions/bbb334fc10e7_creating_initial_tables.py +++ b/aim/digifeeds/database/migrations/versions/bbb334fc10e7_creating_initial_tables.py @@ -1,10 +1,11 @@ """Creating initial tables Revision ID: bbb334fc10e7 -Revises: +Revises: Create Date: 2024-09-19 18:37:32.652676 """ + from typing import Sequence, Union from alembic import op @@ -12,7 +13,7 @@ # revision identifiers, used by Alembic. -revision: str = 'bbb334fc10e7' +revision: str = "bbb334fc10e7" down_revision: Union[str, None] = None branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None @@ -20,32 +21,52 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.create_table('items', - sa.Column('barcode', sa.String(length=256), nullable=False), - sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False), - sa.PrimaryKeyConstraint('barcode'), - sa.UniqueConstraint('barcode') + op.create_table( + "items", + sa.Column("barcode", sa.String(length=256), nullable=False), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.PrimaryKeyConstraint("barcode"), + sa.UniqueConstraint("barcode"), ) - op.create_table('statuses', - sa.Column('id', sa.Integer(), autoincrement=True, nullable=False), - sa.Column('name', sa.String(length=256), nullable=False), - sa.Column('description', sa.String(length=499), nullable=False), - sa.PrimaryKeyConstraint('id') + op.create_table( + "statuses", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("name", sa.String(length=256), nullable=False), + sa.Column("description", sa.String(length=499), nullable=False), + sa.PrimaryKeyConstraint("id"), ) - op.create_table('item_statuses', - sa.Column('item_barcode', sa.String(length=256), nullable=False), - sa.Column('status_id', sa.Integer(), nullable=False), - sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False), - sa.ForeignKeyConstraint(['item_barcode'], ['items.barcode'], ), - sa.ForeignKeyConstraint(['status_id'], ['statuses.id'], ), - sa.PrimaryKeyConstraint('item_barcode', 'status_id') + op.create_table( + "item_statuses", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("item_barcode", sa.String(length=256), nullable=False), + sa.Column("status_id", sa.Integer(), nullable=False), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["item_barcode"], + ["items.barcode"], + ), + sa.ForeignKeyConstraint( + ["status_id"], + ["statuses.id"], + ), + sa.PrimaryKeyConstraint("id"), ) # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('item_statuses') - op.drop_table('statuses') - op.drop_table('items') + op.drop_table("item_statuses") + op.drop_table("statuses") + op.drop_table("items") # ### end Alembic commands ### diff --git a/aim/digifeeds/database/models.py b/aim/digifeeds/database/models.py index b69850a..74ae6d3 100644 --- a/aim/digifeeds/database/models.py +++ b/aim/digifeeds/database/models.py @@ -1,6 +1,6 @@ """ Digifeeds Models -================ +================ """ from sqlalchemy import String, ForeignKey, DateTime @@ -15,35 +15,35 @@ class Base(DeclarativeBase): class Item(Base): - __tablename__ = 'items' + __tablename__ = "items" - barcode: Mapped[str] = mapped_column( - String(256), unique=True, primary_key=True) + barcode: Mapped[str] = mapped_column(String(256), unique=True, primary_key=True) created_at: Mapped[datetime.datetime] = mapped_column( - DateTime(timezone=True), server_default=func.now()) + DateTime(timezone=True), server_default=func.now() + ) statuses: Mapped[list["ItemStatus"]] = relationship() class Status(Base): - __tablename__ = 'statuses' - id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + __tablename__ = "statuses" + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) name: Mapped[str] = mapped_column(String(256)) - description: Mapped[str] = mapped_column(String(499)) + description: Mapped[str] = mapped_column(String(499)) items: Mapped[list["ItemStatus"]] = relationship() def __repr__(self): - return (f'Status(id={self.id}, name={self.name}, description={self.description})') + return f"Status(id={self.id}, name={self.name}, description={self.description})" class ItemStatus(Base): - __tablename__ = 'item_statuses' - item_barcode: Mapped[int] = mapped_column( - ForeignKey('items.barcode'), primary_key=True) - status_id: Mapped[int] = mapped_column( - ForeignKey('statuses.id'), primary_key=True) + __tablename__ = "item_statuses" + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + item_barcode: Mapped[int] = mapped_column(ForeignKey("items.barcode")) + status_id: Mapped[int] = mapped_column(ForeignKey("statuses.id")) # https://docs.sqlalchemy.org/en/20/core/functions.html#sqlalchemy.sql.functions.now Tell the db to set the date. created_at: Mapped[datetime.datetime] = mapped_column( - DateTime(timezone=True), server_default=func.now()) + DateTime(timezone=True), server_default=func.now() + ) item: Mapped["Item"] = relationship(back_populates="statuses") status: Mapped["Status"] = relationship(back_populates="items") @@ -51,19 +51,29 @@ class ItemStatus(Base): # proxies status_name = association_proxy(target_collection="status", attr="name") status_description = association_proxy( - target_collection="status", attr="description") + target_collection="status", attr="description" + ) def load_statuses(session: Session): statuses = [ {"name": "in_zephir", "description": "Item is in zephir"}, - {"name": "added_to_digifeeds_set", - "description": "Item has been added to the digifeeds set"}, - {"name": "copying_start", - "description": "The process for zipping and copying an item to the pickup location has started"}, - {"name": "copying_end", "description": "The process for zipping and copying an item to the pickup location has completed successfully"}, - {"name": "pending_deletion", - "description": "The item has been copied to the pickup location and can be deleted upon ingest confirmation"}, + { + "name": "added_to_digifeeds_set", + "description": "Item has been added to the digifeeds set", + }, + { + "name": "copying_start", + "description": "The process for zipping and copying an item to the pickup location has started", + }, + { + "name": "copying_end", + "description": "The process for zipping and copying an item to the pickup location has completed successfully", + }, + { + "name": "pending_deletion", + "description": "The item has been copied to the pickup location and can be deleted upon ingest confirmation", + }, {"name": "not_found_in_alma", "description": "Barcode wasn't found in Alma"}, ] objects = [] From ad85243d5109b52afbe9c8cb85bd14b691fb2105 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Thu, 7 Nov 2024 19:11:34 +0000 Subject: [PATCH 4/7] adds cli command for move_to_pickup --- aim/cli/digifeeds.py | 21 +++++++++++++++++++++ tests/cli/test_digifeeds.py | 26 ++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/aim/cli/digifeeds.py b/aim/cli/digifeeds.py index a4649aa..2bbffdc 100644 --- a/aim/cli/digifeeds.py +++ b/aim/cli/digifeeds.py @@ -7,6 +7,7 @@ from aim.digifeeds.add_to_db import add_to_db as add_to_digifeeds_db from aim.digifeeds.list_barcodes_in_bucket import list_barcodes_in_bucket from aim.digifeeds.check_zephir import check_zephir as check_zephir_for_barcode +from aim.digifeeds.move_to_pickup import move_to_pickup as move_volume_to_pickup from aim.digifeeds.database import models, main import json import sys @@ -78,3 +79,23 @@ def list_barcodes_in_input_bucket(): List the barcodes currently in the input directory in the S3 bucket. """ json.dump(list_barcodes_in_bucket(), sys.stdout) + + +@app.command() +def move_to_pickup( + barcode: Annotated[ + str, + typer.Argument(help="The barcode to be added to the database"), + ], +): + """ + Moves the zipped volume from the s3 bucket to the google drive folder for + pickup from google. When it's finished, the volume is moved to the processed + folder in the bucket and prefixed with the date and time. + """ + print(f'Moving barcode "{barcode}" from the s3 bucket to the google drive') + item = move_volume_to_pickup(barcode) + if item is None: + print("Item has not been in zephir long enough") + else: + print("Item has been successfully moved to pickup") diff --git a/tests/cli/test_digifeeds.py b/tests/cli/test_digifeeds.py index 6bf8f0d..4e729cc 100644 --- a/tests/cli/test_digifeeds.py +++ b/tests/cli/test_digifeeds.py @@ -6,6 +6,7 @@ from aim.cli.main import app from aim.services import S from aim.digifeeds.item import Item +import aim.cli.digifeeds as digifeeds_cli runner = CliRunner() @@ -113,3 +114,28 @@ def test_check_zephir_for_item_when_item_is_not_in_zephir(item_data): assert add_item_status.call_count == 0 assert result.exit_code == 0 assert "some_barcode is NOT in Zephir" in result.stdout + + +def test_move_to_pickup_success(mocker, item_data): + item = Item(item_data) + move_volume_to_pickup_mock = mocker.patch.object( + digifeeds_cli, "move_volume_to_pickup", return_value=item + ) + + result = runner.invoke(app, ["digifeeds", "move-to-pickup", "some_barcode"]) + + move_volume_to_pickup_mock.assert_called_once() + assert "Item has been successfully moved to pickup" in result.stdout + assert result.exit_code == 0 + + +def test_move_to_pickup_where_not_in_zephir(mocker): + move_volume_to_pickup_mock = mocker.patch.object( + digifeeds_cli, "move_volume_to_pickup", return_value=None + ) + + result = runner.invoke(app, ["digifeeds", "move-to-pickup", "some_barcode"]) + + move_volume_to_pickup_mock.assert_called_once() + assert "Item has not been in zephir long enough" in result.stdout + assert result.exit_code == 0 From c0e526f639e82161d1b2dba943eb3775294de4b7 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Thu, 7 Nov 2024 19:25:35 +0000 Subject: [PATCH 5/7] markdown linting and add rclone config setup instructions --- README.md | 99 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index f05b8c2..c248881 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# AIM-py +# AIM-py AIM's python code repository @@ -6,35 +6,43 @@ AIM's python code repository 1. Clone the repo -``` -git clone https://github.com/mlibrary/aim-py.git -cd aim-py -``` + ```bash + git clone https://github.com/mlibrary/aim-py.git + cd aim-py + ``` -2. In the terminal, run the `init.sh` -``` -./init.sh -``` -This will: +2. In the terminal, run the `init.sh` -* set up the initial environment variables file -* build the docker image -* install the python dependencies -* Set up the database for digifeeds + ```bash + ./init.sh + ``` -`./init.sh` can be run more than once. + This will: + * set up the initial environment variables file + * set up the rclone config with the example file + * build the docker image + * install the python dependencies + * Set up the database for digifeeds + + `./init.sh` can be run more than once. 3. Edit `.env` with actual environment variables -4. If using VSCode for editing, the repository is set up for use with dev containers. You will have to rebuild the container in there. +4. Edit `.config/rclone/rclone.conf` with your actual values -5. In the app container, use `poetry shell` to enable the virtual environment. Otherwise use: -``` +5. If using VSCode for editing, the repository is set up for use with dev containers. You will have to rebuild the container in there. + +6. In the app container, use `poetry shell` to enable the virtual environment. Otherwise use: + +```bash docker compose run --rm app poetry run YOUR_COMMAND ``` + ## Structure + The codebase has the following structure high level structure: -``` + +```text . ├── aim/ │ ├── cli/ @@ -61,40 +69,43 @@ The codebase has the following structure high level structure: └── conftest.py ``` -`aim` is the directory where all of the business logic lives. Every directory and subdirectory within `aim` has an `__init__.py` file so that python imports work properly. +`aim` is the directory where all of the business logic lives. Every directory and subdirectory within `aim` has an `__init__.py` file so that python imports work properly. -In this example there is an application/product/project called `my_project`. `my_project` has several subdirectories and files with related code. +In this example there is an application/product/project called `my_project`. `my_project` has several subdirectories and files with related code. -One is `aim/my_project`. That's where the application code lives. Code within `my_project` can be arranged however makes sense for the project. Further subjectories within `my_project` are fine if they help make the project code easier to think about and work with. +One is `aim/my_project`. That's where the application code lives. Code within `my_project` can be arranged however makes sense for the project. Further subjectories within `my_project` are fine if they help make the project code easier to think about and work with. -If the application is a subcommand for the AIM cli then code related to the cli goes in `cli/my_project`. +If the application is a subcommand for the AIM cli then code related to the cli goes in `cli/my_project`. `aim/services.py` has configuration objects. It provides an object called `S` that has things like environment variable values or database connection strings. Configuration for all projects goes in here. Use `my_project_` as a prefix if there are concerns about name collisions. `S` is a `NamedTuple` so that these objects show up in code completion in IDEs like vscode. Tests go in `tets/my_project`. Ideally the folder and file structure in `tests/my_project` should mirror `aim/my_project`. This way it's easy to figure out where relevant tests should live. Prefix tests files with `test_` so that `pytest` picks up the tests. If there are fixture files for your tests, put them in `fixtures/my_project`. This should make it easier to tell what tests the fixtures are for. As with the code in `aim` every folder in tests except for `fixtures` needs to have an `__init__.py` file. -`tests/conftest.py` has test configuration that's available for all tests for all projects. For now it has code to handle setting up the `digifeeds` database and its API for tests. - +`tests/conftest.py` has test configuration that's available for all tests for all projects. For now it has code to handle setting up the `digifeeds` database and its API for tests. ## Projects ### Digifeeds -Digifeeds code is in the `aim/digifeeds` folder. The `database` folder has the code for the database and its web API. +Digifeeds code is in the `aim/digifeeds` folder. The `database` folder has the code for the database and its web API. #### Database + To run database migrations: -``` + +```bash cd aim/digifeeds/database alembic upgrade heads ``` + The alembic migrations live in the `aim/digifeeds/database/migrations` folder. #### Web API for the Database + The docker compose `api` service runs the application on port 8000. Assuming docker compose is up for the `aim-py` repository, in the browser go to: -http://localhost:8000/docs to work with the API. + to work with the API. #### CLI @@ -102,60 +113,70 @@ The digifeeds CLI is in the file `aim/cli/digifeeds.py` It has a mix a database operations and application operations. To use the CLI, on the command line run: -``` + +```bash docker compose run --rm app poetry run aim digifeeds --help ``` + This will show the commands available for the digifeeds cli applciation. ## Tests + To run tests: -``` + +```bash docker compose run --rm app poetry run pytest ``` ### Connecting to the internet is blocked for tests + We are using `pytest-socket` to block actually http requests in tests. To mock http requests, use the `responses` library. Don't forget to put the `@responses.activate` decorator above tests that use `responses`. -Blocking requests occurs because in `pyproject.toml` we've set `pytest` to run with the `--disable-socket` option. The `--allow-unix-socket` option allows connection to our test database. +Blocking requests occurs because in `pyproject.toml` we've set `pytest` to run with the `--disable-socket` option. The `--allow-unix-socket` option allows connection to our test database. ### Mocking objects + `pytest-mock` is included in the project, so the `mocker` fixture is available in all tests. ### Test Coverage + `pytest-cov` is used for test coverage information. On every run of `pytest` there's a summary of coverage in the terminal, and an html report in the folder `htmlcov`. This is configured with the following `pytest` options in `pyproject.toml`: `--cov=aim --cov-report=html --cov-report=term:skip-covered` ### Using the `digifeeds` database -`tests/conftest.py` sets up a couple of `pytest` fixtures for working with the `digifeeds` database. -One is `db_session` which provides a `sqlalchemy` database session object. You can commit changes in the session and they will only last for the duration of thests. +`tests/conftest.py` sets up a couple of `pytest` fixtures for working with the `digifeeds` database. -The other is `client`, which provides a `fastapi` `TestClient` that knows about the `db_session` fixture. +One is `db_session` which provides a `sqlalchemy` database session object. You can commit changes in the session and they will only last for the duration of thests. + +The other is `client`, which provides a `fastapi` `TestClient` that knows about the `db_session` fixture. ### CLI tests -The `typer` `CliRunner` works without special modification. This is a good place to put in some integration tests since this is the entrypoint for using the application. That said, it's ok to mock out things like database calls. + +The `typer` `CliRunner` works without special modification. This is a good place to put in some integration tests since this is the entrypoint for using the application. That said, it's ok to mock out things like database calls. ## Documentation -Documentation lives in the `/docs` directory. +Documentation lives in the `/docs` directory. -[Sphinx](https://www.sphinx-doc.org) is used to generate the documentation website. +[Sphinx](https://www.sphinx-doc.org) is used to generate the documentation website. The [documentation site](https://mlibrary.github.io/aim-py/) is built with a Github Action on each push to main. We are using [Google style docstrings](https://google.github.io/styleguide/pyguide.html#s3.8-comments-and-docstrings). -In development the documentation should build automatically and be available at http://localhost:8888/ +In development the documentation should build automatically and be available at ## Deployment ### Production Docker image + The production Docker image of `aim-py` uses `poetry` to generate a `requirements.txt` file of the dependencies necessary to run the application in produciton. In a separate step that `requirements.txt` file is copied into the container and then installed with `pip`. That means the project is used differently in production than in development. In development you need to run `poetry shell` to get enable the virtual environment. If you have the virtual environment activated you can run commands like `aim --help` because `pyproject.toml` knows about the `aim` cli. -In production, you do not need to enable a virtual environment because all of the dependencies are installed globally in the image. To run the cli you need to run `python -m aim --help` to get the same help menu. +In production, you do not need to enable a virtual environment because all of the dependencies are installed globally in the image. To run the cli you need to run `python -m aim --help` to get the same help menu. ### Github Actions Workflows From 36d9f77945f4fbb205056bd908f9cff8a05f1fed Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Fri, 8 Nov 2024 14:54:12 -0500 Subject: [PATCH 6/7] remove print rclone statement --- aim/digifeeds/move_to_pickup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/aim/digifeeds/move_to_pickup.py b/aim/digifeeds/move_to_pickup.py index 64dddf3..d3a6376 100644 --- a/aim/digifeeds/move_to_pickup.py +++ b/aim/digifeeds/move_to_pickup.py @@ -4,8 +4,6 @@ from aim.services import S from datetime import datetime -print(rclone.is_installed()) - def move_to_pickup(barcode: str): raw_item = DBClient().get_item(barcode) From cfb2275a24f1c9b28c681e8a22ea20e8cbb5ff17 Mon Sep 17 00:00:00 2001 From: Monique Rio Date: Fri, 8 Nov 2024 19:58:50 +0000 Subject: [PATCH 7/7] removed tmp dir because we don't need it --- aim/services.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/aim/services.py b/aim/services.py index 5c5d530..9caea7f 100644 --- a/aim/services.py +++ b/aim/services.py @@ -52,9 +52,6 @@ class Services(NamedTuple): #: The name of the google drive rclone remote where google picks up items digifeeds_gdrive_rclone_remote: str - #: The path to the temporary/scratch directory - tmp_dir: str - S = Services( mysql_database=sa.engine.URL.create( @@ -84,5 +81,4 @@ class Services(NamedTuple): or "digifeeds_bucket", digifeeds_gdrive_rclone_remote=os.getenv("DIGIFEEDS_GDRIVE_RCLONE_REMOTE") or "digifeeds_gdrive", - tmp_dir="/app/tmp", )