Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bugfix: Update project structure and Enhance specifications and configs settings #13

Merged
merged 8 commits into from
May 22, 2024
Merged
9 changes: 6 additions & 3 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,13 @@ jobs:
environment-file: conda/base.yaml
channels: conda-forge,nodefaults
channel-priority: true
activate-environment: rxiv-rest-api
activate-environment: es-journals
use-mamba: true
miniforge-variant: Mambaforge

- name: Set Environment Variable
run: echo "GITHUB_PAT=${{ secrets.DEVTOOLS_PAT }}" >> $GITHUB_ENV

- name: Setup development environment for Rxivs
run: |
for rxiv_name in biorxiv medrxiv; do
Expand All @@ -59,8 +62,8 @@ jobs:
run: |
sugar ext start --group dev --options -d

- name: Copy and test certificates
run: bash .github/ci/healthcheck.sh es 80 && makim develop.test-certs
# - name: Copy and test certificates
# run: bash .github/ci/healthcheck.sh es 80 && makim develop.test-certs

- name: Fetch and process MedRxiv data
run: |
Expand Down
57 changes: 37 additions & 20 deletions .makim.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: 1.0
groups:
containers:
targets:
tasks:
get-ip:
help: Get the IP from a container-ID
args:
Expand All @@ -13,10 +13,10 @@ groups:
run: |
docker inspect \
-f {% raw -%}"{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}"{%- endraw %} \
{{ args.container_id }}
${{ args.container_id }}

develop:
targets:
tasks:
test-certs:
env-file: .env
help: Generate and copy certs
Expand All @@ -25,7 +25,7 @@ groups:
mkdir -p containers/esconfig/certs
if docker cp es:/usr/share/elasticsearch/config/certs/http_ca.crt ./containers/esconfig/certs/; then
echo "Certificate copied successfully."
curl --cacert ./containers/esconfig/certs/http_ca.crt -u elastic:{{ env.ES_PASSWORD }} https://localhost:9200/
curl --cacert ./containers/esconfig/certs/http_ca.crt -u elastic:${{ env.ES_PASSWORD }} https://localhost:9200/
else
echo "Error: Could not find the certificate in the container."
fi
Expand All @@ -40,7 +40,7 @@ groups:
required: true
shell: bash
run: |
python scripts/es_index_doc_counter.py {{ args.index_name }}
python scripts/es_index_doc_counter.py ${{ args.index_name }}

setup-dev-env:
help: Setup development environment and prepare .env file if it doesn't exist.
Expand All @@ -56,15 +56,15 @@ groups:
index_data_dir="containers/elasticsearch/data"
mkdir -p "${index_data_dir}"

download_dir="data/rxivx/{{ args.server_name }}/downloaded"
download_dir="data/rxivx/${{ args.server_name }}/downloaded"
mkdir -p "${download_dir}"

last_date=$(date -d "yesterday" '+%Y-%m-%d')
current_date=$(date -d "yesterday" '+%Y-%m-%d')
output_filename="{{ args.server_name }}_${last_date}_${current_date}.json"
output_filename="${{ args.server_name }}_${last_date}_${current_date}.json"
touch "${download_dir}/${output_filename}"
ls -la ${download_dir}/${output_filename}
echo "Prepared development environment for server: {{ args.server_name }}"
echo "Prepared development environment for server: ${{ args.server_name }}"

if [ -f ".env" ]; then
echo ".env file already exists. Exiting without modifying environment variables."
Expand All @@ -79,7 +79,7 @@ groups:
fi

scheduler:
targets:
tasks:
download-rxivr:
help: |
Download data from BioRxiv/MedRxiv API within a specified date range.
Expand All @@ -102,29 +102,46 @@ groups:
required: true
shell: Rscript
run: |
library(devtools)

# Check if the package is installed
is_package_installed <- function(pkg) {
pkg %in% rownames(installed.packages())
}

package_name <- "medrxivr"
repo <- "esloch/medrxivr@fix-numeric-type"

if (!is_package_installed(package_name)) {
install_github(repo, dependencies=TRUE)
message("Package installed successfully.")
} else {
message("Package is already installed.")
}

library(medrxivr)
library(jsonlite)

print("Starting download data from: {{ args.server }}")
print("Starting download data from: ${{ args.server }}")

biorxiv_data <- mx_api_content(
server = '{{ args.server }}',
from_date = "{{ args.begin }}",
to_date = "{{ args.end }}"
server = '${{ args.server }}',
from_date = "${{ args.begin }}",
to_date = "${{ args.end }}"
)

json_data <- toJSON(biorxiv_data, pretty = TRUE)

json_file_path <- "{{ args.target }}{{ args.server }}_{{ args.begin }}_{{ args.end }}.json"
json_file_path <- "${{ args.target }}${{ args.server }}_${{ args.begin }}_${{ args.end }}.json"
writeLines(json_data, json_file_path)

cat("Data object stored to:", "{{ args.server }}", "json file \n")
cat("Data object stored to:", "${{ args.server }}", "json file \n")

setup-cron:
help: Setup cron jobs for the develop user
shell: bash
run: |
CRON_PATH="/opt/services/literev-elasticsearch/scripts/cronjobs"
CRON_PATH="/opt/services/es-journals/scripts/cronjobs"

# Check if cronjob file exists
if [ ! -f "$CRON_PATH" ]; then
Expand All @@ -138,7 +155,7 @@ groups:
echo "Cron jobs for LiteRev Elasticsearch have been set up successfully for user develop."

clean:
targets:
tasks:
all:
help: Clean unnecessary temporary files
run: |
Expand All @@ -158,7 +175,7 @@ groups:
rm -fr .ruff_cache

docs:
targets:
tasks:
build:
help: Build documentation
run: |
Expand All @@ -167,12 +184,12 @@ groups:
preview:
help: Preview documentation page locally
dependencies:
- target: docs.build
- task: docs.build
run: |
mkdocs build --config-file docs/mkdocs.yaml

tests:
targets:
tasks:
linter:
help: Run linter tools
run: |
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
A service for rXiv REST API, such as biorxiv and medrxiv

* Free software: BSD 3 Clause
* Documentation: https://osl-incubator.github.io/rxiv-rest-api
* Documentation: https://osl-incubator.github.io/es-journals

## Features

Expand Down
2 changes: 1 addition & 1 deletion conda/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ dependencies:
- python >=3.8.1,<4
- pip
- poetry
- r-medrxivr
- r-base
- r-devtools
- shellcheck
- typer
- pip:
Expand Down
35 changes: 24 additions & 11 deletions containers/compose.elasticsearch.dev.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,34 @@
version: "3.9"

services:
es:
hostname: es
container_name: es
image: docker.elastic.co/elasticsearch/elasticsearch:8.12.0
user: "1000:1000" # Set the UID:GID to run the container
container_name: es
user: "1000:1000"
env_file:
../.env
environment:
- discovery.type=single-node
- node.name=es
- discovery.type=single-node
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
- bootstrap.memory_lock=false
- ELASTIC_PASSWORD=${ES_PASSWORD}
- bootstrap.memory_lock=true
- xpack.security.enabled=false
- xpack.security.enrollment.enabled=false
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- esdata:/usr/share/elasticsearch/data
ports:
- 9200:9200
healthcheck:
test: ["CMD-SHELL", "curl -s -k ${ES_HOSTNAME} | grep -q 'missing authentication credentials'"]
networks:
- elastic


volumes:
esdata:
driver: local
name: es_data

networks:
elastic:
driver: bridge
name: elastic_dev_xnet
19 changes: 19 additions & 0 deletions containers/compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
services:
es-dev:
hostname: es-dev
container_name: es-dev
image: docker.elastic.co/elasticsearch/elasticsearch:8.12.0
user: "1000:1000" # Set the UID:GID to run the container
env_file:
../.env
environment:
- discovery.type=single-node
- node.name=es-dev
- ELASTIC_PASSWORD=${ES_PASSWORD}
- bootstrap.memory_lock=true
- xpack.security.enabled=false
- xpack.security.enrollment.enabled=false
ports:
- 9200:9200
healthcheck:
test: ["CMD-SHELL", "curl -s -k ${ES_HOSTNAME} | grep -q 'missing authentication credentials'"]
10 changes: 5 additions & 5 deletions docs/contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,20 @@ If you are proposing a feature:

## Get Started!

Ready to contribute? Here’s how to set up `rxiv-rest-api` for local development.
Ready to contribute? Here’s how to set up `es-journals` for local development.

1. Fork the `rxiv-rest-api` repo on GitHub.
1. Fork the `es-journals` repo on GitHub.

2. Clone your fork locally::

$ git clone [email protected]:your_name_here/rxiv-rest-api.git
$ git clone [email protected]:your_name_here/es-journals.git

3. Install your local copy into a virtualenv. Assuming you have
virtualenvwrapper installed, this is how you set up your fork for
local development::

$ mkvirtualenv rxiv-rest-api
$ cd rxiv-rest-api/
$ mkvirtualenv es-journals
$ cd es-journals/
$ python setup.py develop

4. Create a branch for local development::
Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
A service for rXiv REST API, such as biorxiv and medrxiv

* License: BSD 3 Clause
* Documentation: https://rxiv-rest-api.github.io
* Documentation: https://es-journals.github.io

## Features

Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ To install rXiv REST API, run this command in your
terminal:

```bash
$ pip install rxiv-rest-api
$ pip install es-journals
```

This is the preferred method to install rXiv REST API,
Expand Down
2 changes: 1 addition & 1 deletion docs/mkdocs.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
site_name: rXiv REST API
site_url: https://osl-incubator.github.io/rxiv-rest-api
site_url: https://osl-incubator.github.io/es-journals
repo_url: https://github.com/xmnlab/rxiv-restapi.git
docs_dir: ./
site_dir: ../build
Expand Down
4 changes: 2 additions & 2 deletions scripts/cronjobs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ SHELL=/bin/bash

# m h dom mon dow commandid
# “At 02:00.”
0 2 * * * . ~/.bashrc && cd /opt/services/literev-elasticsearch/ && bash /opt/services/literev-elasticsearch/scripts/fetch_rxivx_data.sh medrxiv >> /tmp/medrxiv_cronjob.log 2>&1
0 2 * * * . ~/.bashrc && cd /opt/services/es-journals/ && bash /opt/services/es-journals/scripts/fetch_rxivx_data.sh medrxiv >> /tmp/medrxiv_cronjob.log 2>&1
# “At 02:30.”
30 2 * * * . ~/.bashrc && cd /opt/services/literev-elasticsearch/ && bash /opt/services/literev-elasticsearch/scripts/fetch_rxivx_data.sh biorxiv >> /tmp/biorxiv_cronjob.log 2>&1
30 2 * * * . ~/.bashrc && cd /opt/services/es-journals/ && bash /opt/services/es-journals/scripts/fetch_rxivx_data.sh biorxiv >> /tmp/biorxiv_cronjob.log 2>&1
2 changes: 1 addition & 1 deletion scripts/es_index_doc_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def create_es_client(
Elasticsearch
An instance of Elasticsearch client configured with the given credentials.
"""
return Elasticsearch([host], basic_auth=(username, password), ca_certs=ca_certs)
return Elasticsearch([host], basic_auth=(username, password), verify_certs=False)


def get_total_documents_in_index(es: Elasticsearch, index_name: str) -> int:
Expand Down
2 changes: 1 addition & 1 deletion scripts/fetch_rxivx_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ fi

# Activate the Python environment
activate_path="$(dirname "$(dirname "$conda_path")")/bin/activate"
source "$activate_path" rxiv-rest-api
source "$activate_path" es-journals

# Get the current working directory
path_root=$(pwd)
Expand Down
2 changes: 1 addition & 1 deletion scripts/index_arxiv_to_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def create_es_client(host: str, username: str, password: str, ca_certs: str) ->
extra_args = {}
if ca_certs:
extra_args["ca_certs"] = ca_certs
return Elasticsearch([host], basic_auth=(username, password), **extra_args)
return Elasticsearch([host], basic_auth=(username, password), verify_certs=False, **extra_args)

def generate_document_id(doc: dict) -> str:
"""
Expand Down
Loading