thegraphnetwork-literev · xmnlab · May 22, 2024 · May 21, 2024 · May 22, 2024 · May 22, 2024
diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -45,10 +45,13 @@ jobs:
         environment-file: conda/base.yaml
         channels: conda-forge,nodefaults
         channel-priority: true
-        activate-environment: rxiv-rest-api
+        activate-environment: es-journals
         use-mamba: true
         miniforge-variant: Mambaforge
 
+    - name: Set Environment Variable
+      run: echo "GITHUB_PAT=${{ secrets.DEVTOOLS_PAT }}" >> $GITHUB_ENV
+
     - name: Setup development environment for Rxivs
       run: |
         for rxiv_name in biorxiv medrxiv; do
@@ -59,8 +62,8 @@ jobs:
       run: |
         sugar ext start --group dev --options -d
 
-    - name: Copy and test certificates
-      run: bash .github/ci/healthcheck.sh es 80 && makim develop.test-certs
+    # - name: Copy and test certificates
+    #   run: bash .github/ci/healthcheck.sh es 80 && makim develop.test-certs
 
     - name: Fetch and process MedRxiv data
       run: |       

diff --git a/.makim.yaml b/.makim.yaml
@@ -1,7 +1,7 @@
 version: 1.0
 groups:
   containers:
-    targets:
+    tasks:
       get-ip:
         help: Get the IP from a container-ID
         args:
@@ -13,10 +13,10 @@ groups:
         run: |
           docker inspect \
             -f {% raw -%}"{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}"{%- endraw %} \
-            {{ args.container_id }}
+            ${{ args.container_id }}
 
   develop:
-    targets:
+    tasks:
       test-certs:
         env-file: .env
         help: Generate and copy certs
@@ -25,7 +25,7 @@ groups:
           mkdir -p containers/esconfig/certs
           if docker cp es:/usr/share/elasticsearch/config/certs/http_ca.crt ./containers/esconfig/certs/; then
             echo "Certificate copied successfully."
-            curl --cacert ./containers/esconfig/certs/http_ca.crt -u elastic:{{ env.ES_PASSWORD }} https://localhost:9200/
+            curl --cacert ./containers/esconfig/certs/http_ca.crt -u elastic:${{ env.ES_PASSWORD }} https://localhost:9200/
           else
             echo "Error: Could not find the certificate in the container."
           fi
@@ -40,7 +40,7 @@ groups:
             required: true      
         shell: bash
         run: |
-          python scripts/es_index_doc_counter.py {{ args.index_name }}
+          python scripts/es_index_doc_counter.py ${{ args.index_name }}
 
       setup-dev-env:
         help: Setup development environment and prepare .env file if it doesn't exist.
@@ -56,15 +56,15 @@ groups:
           index_data_dir="containers/elasticsearch/data"
           mkdir -p "${index_data_dir}"
 
-          download_dir="data/rxivx/{{ args.server_name }}/downloaded"
+          download_dir="data/rxivx/${{ args.server_name }}/downloaded"
           mkdir -p "${download_dir}"
 
           last_date=$(date -d "yesterday" '+%Y-%m-%d')
           current_date=$(date -d "yesterday" '+%Y-%m-%d')
-          output_filename="{{ args.server_name }}_${last_date}_${current_date}.json"
+          output_filename="${{ args.server_name }}_${last_date}_${current_date}.json"
           touch "${download_dir}/${output_filename}"
           ls -la ${download_dir}/${output_filename}
-          echo "Prepared development environment for server: {{ args.server_name }}"
+          echo "Prepared development environment for server: ${{ args.server_name }}"
 
           if [ -f ".env" ]; then
               echo ".env file already exists. Exiting without modifying environment variables."
@@ -79,7 +79,7 @@ groups:
           fi
 
   scheduler:
-    targets:
+    tasks:
       download-rxivr:
         help: |
           Download data from BioRxiv/MedRxiv API within a specified date range.  
@@ -102,29 +102,46 @@ groups:
             required: true
         shell: Rscript
         run: |
+          library(devtools)
+
+          # Check if the package is installed
+          is_package_installed <- function(pkg) {
+            pkg %in% rownames(installed.packages())
+          }
+
+          package_name <- "medrxivr"
+          repo <- "esloch/medrxivr@fix-numeric-type"
+
+          if (!is_package_installed(package_name)) {
+            install_github(repo, dependencies=TRUE)
+            message("Package installed successfully.")
+          } else {
+            message("Package is already installed.")
+          }
+
           library(medrxivr)
           library(jsonlite)
 
-          print("Starting download data from: {{ args.server }}")
+          print("Starting download data from: ${{ args.server }}")
 
           biorxiv_data <- mx_api_content(
-            server = '{{ args.server }}',
-            from_date = "{{ args.begin }}",
-            to_date = "{{ args.end }}"
+            server = '${{ args.server }}',
+            from_date = "${{ args.begin }}",
+            to_date = "${{ args.end }}"
           )
 
           json_data <- toJSON(biorxiv_data, pretty = TRUE)
 
-          json_file_path <- "{{ args.target }}{{ args.server }}_{{ args.begin }}_{{ args.end }}.json"
+          json_file_path <- "${{ args.target }}${{ args.server }}_${{ args.begin }}_${{ args.end }}.json"
           writeLines(json_data, json_file_path)
 
-          cat("Data object stored to:", "{{ args.server }}", "json file \n")
+          cat("Data object stored to:", "${{ args.server }}", "json file \n")
 
       setup-cron:
         help: Setup cron jobs for the develop user
         shell: bash
         run: |
-          CRON_PATH="/opt/services/literev-elasticsearch/scripts/cronjobs"
+          CRON_PATH="/opt/services/es-journals/scripts/cronjobs"
 
           # Check if cronjob file exists
           if [ ! -f "$CRON_PATH" ]; then
@@ -138,7 +155,7 @@ groups:
           echo "Cron jobs for LiteRev Elasticsearch have been set up successfully for user develop."
 
   clean:
-    targets:
+    tasks:
       all:
         help: Clean unnecessary temporary files
         run: |
@@ -158,7 +175,7 @@ groups:
           rm -fr .ruff_cache
 
   docs:
-    targets:
+    tasks:
       build:
         help: Build documentation
         run: |
@@ -167,12 +184,12 @@ groups:
       preview:
         help: Preview documentation page locally
         dependencies:
-          - target: docs.build
+          - task: docs.build
         run: |
           mkdocs build --config-file docs/mkdocs.yaml
 
   tests:
-    targets:
+    tasks:
       linter:
         help: Run linter tools
         run: |

diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 A service for rXiv REST API, such as biorxiv and medrxiv
 
 * Free software: BSD 3 Clause
-* Documentation: https://osl-incubator.github.io/rxiv-rest-api
+* Documentation: https://osl-incubator.github.io/es-journals
 
 ## Features
 

diff --git a/conda/base.yaml b/conda/base.yaml
@@ -9,8 +9,8 @@ dependencies:
   - python >=3.8.1,<4
   - pip
   - poetry
-  - r-medrxivr
   - r-base
+  - r-devtools
   - shellcheck
   - typer
   - pip:

diff --git a/containers/compose.elasticsearch.dev.yaml b/containers/compose.elasticsearch.dev.yaml
@@ -1,21 +1,34 @@
-version: "3.9"
-
 services:
   es:
-    hostname: es
-    container_name: es
     image: docker.elastic.co/elasticsearch/elasticsearch:8.12.0
-    user: "1000:1000"  # Set the UID:GID to run the container
+    container_name: es
+    user: "1000:1000"
     env_file:
       ../.env
     environment:
-      - discovery.type=single-node
       - node.name=es
+      - discovery.type=single-node
+      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
+      - bootstrap.memory_lock=false
       - ELASTIC_PASSWORD=${ES_PASSWORD}
-      - bootstrap.memory_lock=true
-      - xpack.security.enabled=false
-      - xpack.security.enrollment.enabled=false
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+    volumes:
+      - esdata:/usr/share/elasticsearch/data
     ports:
       - 9200:9200
-    healthcheck:
-      test: ["CMD-SHELL", "curl -s -k ${ES_HOSTNAME} | grep -q 'missing authentication credentials'"]
+    networks:
+      - elastic
+
+
+volumes:
+  esdata:
+    driver: local
+    name: es_data
+
+networks:
+  elastic:
+    driver: bridge
+    name: elastic_dev_xnet
diff --git a/containers/compose.yaml b/containers/compose.yaml
@@ -0,0 +1,19 @@
+services:
+  es-dev:
+    hostname: es-dev
+    container_name: es-dev
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.12.0
+    user: "1000:1000"  # Set the UID:GID to run the container
+    env_file:
+      ../.env
+    environment:
+      - discovery.type=single-node
+      - node.name=es-dev
+      - ELASTIC_PASSWORD=${ES_PASSWORD}
+      - bootstrap.memory_lock=true
+      - xpack.security.enabled=false
+      - xpack.security.enrollment.enabled=false
+    ports:
+      - 9200:9200
+    healthcheck:
+      test: ["CMD-SHELL", "curl -s -k ${ES_HOSTNAME} | grep -q 'missing authentication credentials'"]
diff --git a/docs/contributing.md b/docs/contributing.md
@@ -66,20 +66,20 @@ If you are proposing a feature:
 
 ## Get Started!
 
-Ready to contribute? Here’s how to set up `rxiv-rest-api` for local development.
+Ready to contribute? Here’s how to set up `es-journals` for local development.
 
-1.  Fork the `rxiv-rest-api` repo on GitHub.
+1.  Fork the `es-journals` repo on GitHub.
 
 2.  Clone your fork locally::
 
-    $ git clone [email protected]:your_name_here/rxiv-rest-api.git
+    $ git clone [email protected]:your_name_here/es-journals.git
 
 3.  Install your local copy into a virtualenv. Assuming you have
     virtualenvwrapper installed, this is how you set up your fork for
     local development::
 
-    $ mkvirtualenv rxiv-rest-api
-    $ cd rxiv-rest-api/
+    $ mkvirtualenv es-journals
+    $ cd es-journals/
     $ python setup.py develop
 
 4.  Create a branch for local development::

diff --git a/docs/index.md b/docs/index.md
@@ -5,7 +5,7 @@
 A service for rXiv REST API, such as biorxiv and medrxiv
 
 * License: BSD 3 Clause
-* Documentation: https://rxiv-rest-api.github.io
+* Documentation: https://es-journals.github.io
 
 ## Features
 

diff --git a/docs/installation.md b/docs/installation.md
@@ -6,7 +6,7 @@ To install rXiv REST API, run this command in your
 terminal:
 
 ```bash
-$ pip install rxiv-rest-api
+$ pip install es-journals
 ```
 
 This is the preferred method to install rXiv REST API,

diff --git a/docs/mkdocs.yaml b/docs/mkdocs.yaml
@@ -1,5 +1,5 @@
 site_name: rXiv REST API
-site_url: https://osl-incubator.github.io/rxiv-rest-api
+site_url: https://osl-incubator.github.io/es-journals
 repo_url: https://github.com/xmnlab/rxiv-restapi.git
 docs_dir: ./
 site_dir: ../build

diff --git a/scripts/cronjobs b/scripts/cronjobs
@@ -2,6 +2,6 @@ SHELL=/bin/bash
 
 # m h  dom mon dow   commandid
 # “At 02:00.”
-0 2 * * * . ~/.bashrc && cd /opt/services/literev-elasticsearch/ && bash /opt/services/literev-elasticsearch/scripts/fetch_rxivx_data.sh medrxiv  >> /tmp/medrxiv_cronjob.log 2>&1
+0 2 * * * . ~/.bashrc && cd /opt/services/es-journals/ && bash /opt/services/es-journals/scripts/fetch_rxivx_data.sh medrxiv  >> /tmp/medrxiv_cronjob.log 2>&1
 # “At 02:30.”
-30 2 * * * . ~/.bashrc && cd /opt/services/literev-elasticsearch/ && bash /opt/services/literev-elasticsearch/scripts/fetch_rxivx_data.sh biorxiv  >> /tmp/biorxiv_cronjob.log 2>&1
+30 2 * * * . ~/.bashrc && cd /opt/services/es-journals/ && bash /opt/services/es-journals/scripts/fetch_rxivx_data.sh biorxiv  >> /tmp/biorxiv_cronjob.log 2>&1
diff --git a/scripts/es_index_doc_counter.py b/scripts/es_index_doc_counter.py
@@ -38,7 +38,7 @@ def create_es_client(
     Elasticsearch
         An instance of Elasticsearch client configured with the given credentials.
     """
-    return Elasticsearch([host], basic_auth=(username, password), ca_certs=ca_certs)
+    return Elasticsearch([host], basic_auth=(username, password), verify_certs=False)
 
 
 def get_total_documents_in_index(es: Elasticsearch, index_name: str) -> int:

diff --git a/scripts/fetch_rxivx_data.sh b/scripts/fetch_rxivx_data.sh
@@ -13,7 +13,7 @@ fi
 
 # Activate the Python environment
 activate_path="$(dirname "$(dirname "$conda_path")")/bin/activate"
-source "$activate_path" rxiv-rest-api
+source "$activate_path" es-journals
 
 # Get the current working directory
 path_root=$(pwd)

diff --git a/scripts/index_arxiv_to_es.py b/scripts/index_arxiv_to_es.py
@@ -46,7 +46,7 @@ def create_es_client(host: str, username: str, password: str, ca_certs: str) ->
     extra_args = {}
     if ca_certs:
         extra_args["ca_certs"] = ca_certs
-    return Elasticsearch([host], basic_auth=(username, password), **extra_args)
+    return Elasticsearch([host], basic_auth=(username, password), verify_certs=False, **extra_args)
 
 def generate_document_id(doc: dict) -> str:
     """