Skip to content

Commit

Permalink
Add WARC download cron job
Browse files Browse the repository at this point in the history
  • Loading branch information
janheinrichmerker committed Nov 15, 2023
1 parent 8565d86 commit 8619320
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ $.Release.Name }}-serps-download-warc
namespace: {{ $.Release.Namespace }}
annotations:
checksum/config-map: {{ include (print $.Template.BasePath "/archive-query-log-config-map.yml") . | sha256sum }}
checksum/secret: {{ include (print $.Template.BasePath "/archive-query-log-secret.yaml") . | sha256sum }}
spec:
suspend: {{ not $.Values.serpsDownloadWarc.enabled }}
schedule: "{{ $.Values.serpsDownloadWarc.schedule }}"
concurrencyPolicy: Forbid
jobTemplate:
spec:
completions: {{ $.Values.serpsDownloadWarc.completions }}
parallelism: {{ $.Values.serpsDownloadWarc.parallelism }}
backoffLimit: {{ $.Values.serpsDownloadWarc.backoffLimit }}
ttlSecondsAfterFinished: {{ mul 60 $.Values.serpsDownloadWarc.ttlMinutesAfterFinished }}
template:
spec:
containers:
- name: {{ $.Release.Name }}-serps-download-warc
image: "{{ .Values.image }}:{{ .Chart.AppVersion }}"
imagePullPolicy: IfNotPresent
resources:
requests:
memory: 4Gi
command:
- /venv/bin/python
- -m
- archive_query_log
- -f
- /workspace/config.config-map.yml
- -f
- /workspace/config.secret.yml
- serps
- download
- warc
volumeMounts:
- name: {{ $.Release.Name }}-config-map
mountPath: /workspace/config.config-map.yml
readOnly: true
subPath: config.yml
- name: {{ $.Release.Name }}-secret
mountPath: /workspace/config.secret.yml
readOnly: true
subPath: config.yml
restartPolicy: OnFailure
volumes:
- name: {{ $.Release.Name }}-config-map
configMap:
name: {{ $.Release.Name }}-config-map
- name: {{ $.Release.Name }}-secret
secret:
secretName: {{ $.Release.Name }}-secret
8 changes: 8 additions & 0 deletions helm/archive-query-log/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ serpsParseUrlQuery:
backoffLimit: 25
ttlMinutesAfterFinished: 5

serpsDownloadWarc:
enabled: true
schedule: "* * * * *"
completions: 10
parallelism: 10
backoffLimit: 25
ttlMinutesAfterFinished: 5

monitoring:
ingressClassName: nginx
host: aql-monitoring.srv.webis.de
Expand Down

0 comments on commit 8619320

Please sign in to comment.