I am trying to run scala distributed code using spark-submit with cluster mode in minikube. 1.I used this dockerfile
FROM datamechanics/spark:2.4.6-hadoop-3.1.0-java-8-scala-2.12-python-3.7-dm18
WORKDIR /opt/application
RUN mkdir /tmp/data-pvc
COPY target/DisDPMM_2_13-1.0-jar-with-dependencies.jar .
2.I create namespace, serviceacount and rolebinding
kubectl create namespace spark-kubernetes-concepts-demo
kubectl create serviceaccount spark-editor --namespace=spark-kubernetes-concepts-demo
kubectl create rolebinding spark-edito-role --clusterrole=cluster-admin --serviceaccount=spark-kubernetes-concepts-demo:spark-editor
3.In addition,I create StorageClass, PersistentVolume and PersistentVolumeClaim StorageClass:
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: spark-local-dir-v1
namespace: spark-kubernetes-concepts-demo
provisioner: kubernetes.io/no-provisioner
volumeBindingMode: WaitForFirstConsumer
PersistentVolume :
apiVersion: v1
kind: PersistentVolume
metadata:
name: spark-local-dir-pv
namespace: spark-kubernetes-concepts-demo
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Retain
storageClassName: spark-local-dir-v1
local:
path: /mnt/data
nodeAffinity:
required:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- minikube
PersistentVolumeClaim:
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: spark-claim
namespace: spark-kubernetes-concepts-demo
spec:
accessModes:
- ReadWriteOnce
storageClassName: spark-local-dir-v1
resources:
requests:
storage: 10Gi
by the end i submit my job using spark-submit
spark-submit \
--master k8s://$KUBERNETES_MASTER\
--deploy-mode cluster \
--name DisDPMM \
--class my_code.DisDPMM\
--conf spark.executor.instances=1 \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark-editor \
--conf spark.kubernetes.namespace=spark-kubernetes-concepts-demo\
--conf spark.kubernetes.container.image=spark_kubernetes_concepts_demo:latest \
--conf spark.dynamicAllocation.enabled=true \
--conf spark.dynamicAllocation.shuffleTracking.enabled=true \
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-pv.options.claimName=OnDemand\
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-pv.options.storageClass=spark-local-dir-v1\
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-pv.options.sizeLimit=10Gi\
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-pv.mount.path=/mnt/data\
--conf spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-pv.mount.readOnly=false\
--conf spark.kubernetes.context=minikube\
local:////opt/application/DisDPMM_2_13-1.0-jar-with-dependencies.jar
end i get this error enter image description here the logs in the pod:
Unsetting extraneous env vars (UTC): 14:14:00
Finished unsetting extraneous env vars (UTC): 14:14:00
++ id -u
+ myuid=185
++ id -g
+ mygid=0
+ set +e
++ getent passwd 185
+ uidentry=
+ set -e
+ '[' -z '' ']'
+ '[' -w /etc/passwd ']'
+ echo '185:x:185:0:anonymous uid:/opt/spark:/bin/false'
+ SPARK_K8S_CMD=driver
+ case "$SPARK_K8S_CMD" in
+ shift 1
+ SPARK_CLASSPATH=':/opt/spark/jars/*'
+ env
+ grep SPARK_JAVA_OPT_
+ sort -t_ -k4 -n
+ sed 's/[^=]*=\(.*\)/\1/g'
+ readarray -t SPARK_EXECUTOR_JAVA_OPTS
+ '[' -n '' ']'
+ '[' -n '' ']'
+ PYSPARK_ARGS=
+ '[' -n '' ']'
+ R_ARGS=
+ '[' -n '' ']'
+ '[' 3 == 2 ']'
+ '[' 3 == 3 ']'
++ python3 -V
+ pyv3='Python 3.7.13'
+ export PYTHON_VERSION=3.7.13
+ PYTHON_VERSION=3.7.13
+ export PYSPARK_PYTHON=python3
+ PYSPARK_PYTHON=python3
+ export PYSPARK_DRIVER_PYTHON=python3
+ PYSPARK_DRIVER_PYTHON=python3
+ case "$SPARK_K8S_CMD" in
+ CMD=("$SPARK_HOME/bin/spark-submit" --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" --deploy-mode client "$@")
+ exec /usr/bin/tini -s -- /opt/spark/bin/spark-submit --conf spark.driver.bindAddress=10.244.0.30 --deploy-mode client --properties-file /opt/spark/conf/spark.properties --class my_code.DisDPMM local:////opt/application/DisDPMM_2_13-1.0-jar-with-dependencies.jar
23/04/27 14:14:02 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
log4j:WARN No appenders could be found for logger (org.apache.spark.deploy.SparkSubmit$$anon$2).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.