diff --git a/helm/archive-query-log/templates/archive-query-log-cron-job-serps-download-warc.yml b/helm/archive-query-log/templates/archive-query-log-cron-job-serps-download-warc.yml new file mode 100644 index 00000000..49f80912 --- /dev/null +++ b/helm/archive-query-log/templates/archive-query-log-cron-job-serps-download-warc.yml @@ -0,0 +1,55 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ $.Release.Name }}-serps-download-warc + namespace: {{ $.Release.Namespace }} + annotations: + checksum/config-map: {{ include (print $.Template.BasePath "/archive-query-log-config-map.yml") . | sha256sum }} + checksum/secret: {{ include (print $.Template.BasePath "/archive-query-log-secret.yaml") . | sha256sum }} +spec: + suspend: {{ not $.Values.serpsDownloadWarc.enabled }} + schedule: "{{ $.Values.serpsDownloadWarc.schedule }}" + concurrencyPolicy: Forbid + jobTemplate: + spec: + completions: {{ $.Values.serpsDownloadWarc.completions }} + parallelism: {{ $.Values.serpsDownloadWarc.parallelism }} + backoffLimit: {{ $.Values.serpsDownloadWarc.backoffLimit }} + ttlSecondsAfterFinished: {{ mul 60 $.Values.serpsDownloadWarc.ttlMinutesAfterFinished }} + template: + spec: + containers: + - name: {{ $.Release.Name }}-serps-download-warc + image: "{{ .Values.image }}:{{ .Chart.AppVersion }}" + imagePullPolicy: IfNotPresent + resources: + requests: + memory: 4Gi + command: + - /venv/bin/python + - -m + - archive_query_log + - -f + - /workspace/config.config-map.yml + - -f + - /workspace/config.secret.yml + - serps + - download + - warc + volumeMounts: + - name: {{ $.Release.Name }}-config-map + mountPath: /workspace/config.config-map.yml + readOnly: true + subPath: config.yml + - name: {{ $.Release.Name }}-secret + mountPath: /workspace/config.secret.yml + readOnly: true + subPath: config.yml + restartPolicy: OnFailure + volumes: + - name: {{ $.Release.Name }}-config-map + configMap: + name: {{ $.Release.Name }}-config-map + - name: {{ $.Release.Name }}-secret + secret: + secretName: {{ $.Release.Name }}-secret diff --git a/helm/archive-query-log/values.yaml b/helm/archive-query-log/values.yaml index 39db198f..b798b962 100644 --- a/helm/archive-query-log/values.yaml +++ b/helm/archive-query-log/values.yaml @@ -33,6 +33,14 @@ serpsParseUrlQuery: backoffLimit: 25 ttlMinutesAfterFinished: 5 +serpsDownloadWarc: + enabled: true + schedule: "* * * * *" + completions: 10 + parallelism: 10 + backoffLimit: 25 + ttlMinutesAfterFinished: 5 + monitoring: ingressClassName: nginx host: aql-monitoring.srv.webis.de