Skip to content

Commit

Permalink
refactor: Utilize ELASTIC_VOLUME from .env for dynamic path construction
Browse files Browse the repository at this point in the history
  • Loading branch information
xmnlab committed Jun 11, 2024
1 parent eb8bfd2 commit c64b611
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 7 deletions.
8 changes: 4 additions & 4 deletions .makim.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ groups:
run: |
set -e
index_data_dir="containers/elasticsearch/data"
mkdir -p "${index_data_dir}"
index_data_dir="/opt/services/data/elasticsearch"
download_dir="${index_data_dir}/rxivx/${{ args.server_name }}/downloaded"

download_dir="data/rxivx/${{ args.server_name }}/downloaded"
mkdir -p "${download_dir}"
last_date=$(date -d "yesterday" '+%Y-%m-%d')
current_date=$(date -d "yesterday" '+%Y-%m-%d')
output_filename="${{ args.server_name }}_${last_date}_${current_date}.json"
Expand Down
Empty file removed data/.gitkeep
Empty file.
12 changes: 11 additions & 1 deletion scripts/fetch_rxivx_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,17 @@ activate_path="$(dirname "$(dirname "$conda_path")")/bin/activate"
source "$activate_path" es-journals

# Get the current working directory

path_root=$(pwd)

# Source the environment variables
if [ -f $path_root/.env ]; then
source $path_root/.env
else
echo ".env file not found"
exit 1
fi

# Check if the server name was provided
server="$1"

Expand All @@ -26,7 +35,8 @@ if [ -z "$server" ]; then
exit 1
fi

downloaded_path="${path_root}/data/rxivx/${server}/downloaded"
# Use the ELASTIC_VOLUME variable
downloaded_path="$(dirname "$ELASTIC_VOLUME")/rxivx/${server}/downloaded"

# Ensure the directory exists
if [ ! -d "${downloaded_path}" ]; then
Expand Down
11 changes: 9 additions & 2 deletions scripts/index_arxiv_to_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,15 @@ def find_rxiv_path(index_name: str) -> Path:
FileNotFoundError
If no files are found for the given pattern.
"""
base_dir = Path(__file__).resolve().parent.parent
pattern = f"data/rxivx/{index_name}/downloaded/{index_name}_*.json"

# Retrieve the base directory from the environment variable
elastic_volume = os.getenv("ELASTIC_VOLUME")
if not elastic_volume:
raise EnvironmentError("ELASTIC_VOLUME environment variable is not set.")

# Construct the path to the downloaded files
base_dir = Path(elastic_volume).parent / "rxivx" / index_name / "downloaded"
pattern = f"{index_name}_*.json"
files = list(base_dir.glob(pattern))

if not files:
Expand Down

0 comments on commit c64b611

Please sign in to comment.