docker-compose.yml
version: '2.1'
services:
zookeeper-1:
hostname: zookeeper-1
container_name: zookeeper-1
image: zookeeper:3.6
restart: always
ports:
- 2181:2181
environment:
ZOO_MY_ID: 1
ZOO_SERVERS: server.1=zookeeper-1:2888:3888;2181 server.2=zookeeper-2:2888:3888;2181 server.3=zookeeper-3:2888:3888;2181
volumes:
- type: bind
source: ./zk-cluster/zookeeper-1/data
target: /data
read_only: false
zookeeper-2:
hostname: zookeeper-2
container_name: zookeeper-2
image: zookeeper:3.6
restart: always
ports:
- 2182:2181
environment:
ZOO_MY_ID: 2
ZOO_SERVERS: server.1=zookeeper-1:2888:3888;2181 server.2=zookeeper-2:2888:3888;2181 server.3=zookeeper-3:2888:3888;2181
volumes:
- type: bind
source: ./zk-cluster/zookeeper-2/data
target: /data
read_only: false
zookeeper-3:
hostname: zookeeper-3
container_name: zookeeper-3
image: zookeeper:3.6
restart: always
ports:
- 2183:2181
environment:
ZOO_MY_ID: 3
ZOO_SERVERS: server.1=zookeeper-1:2888:3888;2181 server.2=zookeeper-2:2888:3888;2181 server.3=zookeeper-3:2888:3888;2181
volumes:
- type: bind
source: ./zk-cluster/zookeeper-3/data
target: /data
read_only: false
zookeeper-navigator:
hostname: zookeeper-navigator
container_name: zookeeper-navigator
image: elkozmon/zoonavigator:1.1.2
restart: always
ports:
- 9000:9000
environment:
HTTP_PORT: 9000
spark-master-1:
hostname: spark-master-1
container_name: spark-master-1
image: spark-cluster:0.04
restart: always
scale: 1
ports:
- 8081:8080
stdin_open: true
tty: true
environment:
- SPARK_MODE=master
- SPARK_MASTER_HOST=spark-master-1
- SPARK_MASTER_PORT=7077
- SPARK_MASTER_WEBUI_PORT=8080
- SPARK_CONF_DIR=/home/spark/conf
- SPARK_LOG_DIR=/home/spark/logs
- SPARK_LOG_MAX_FILES=5
- SPARK_PID_DIR=/tmp
spark-master-2:
hostname: spark-master-2
container_name: spark-master-2
image: spark-cluster:0.04
restart: always
ports:
- 8082:8080
stdin_open: true
tty: true
environment:
- SPARK_MODE=master
- SPARK_MASTER_HOST=spark-master-2
- SPARK_MASTER_PORT=7077
- SPARK_MASTER_WEBUI_PORT=8080
- SPARK_CONF_DIR=/home/spark/conf
- SPARK_LOG_DIR=/home/spark/logs
- SPARK_LOG_MAX_FILES=5
- SPARK_PID_DIR=/tmp
spark-slave-1:
hostname: spark-slave-1
container_name: spark-slave-1
image: spark-cluster:0.04
restart: always
stdin_open: true
tty: true
ports:
- 8091:8081
environment:
- SPARK_MODE=slave
- SPARK_MASTER_HOST=spark-master-1
- SPARK_MASTER_HOST_SUB=spark-master-2
- SPARK_MASTER_PORT=7077
- SPARK_MASTER_PORT_SUB=7077
- SPARK_WORKER_CORES=4
- SPARK_WORKER_MEMORY=4g
- SPARK_WORKER_WEBUI_PORT=8081
- SPARK_WORKER_PORT=18081
- SPARK_CONF_DIR=/home/spark/conf
- SPARK_LOG_DIR=/home/spark/logs
- SPARK_LOG_MAX_FILES=5
- SPARK_PID_DIR=/tmp
depends_on:
- spark-master-1
- spark-master-2
spark-slave-2:
hostname: spark-slave-2
container_name: spark-slave-2
image: spark-cluster:0.04
restart: always
stdin_open: true
tty: true
ports:
- 8092:8081
environment:
- SPARK_MODE=slave
- SPARK_MASTER_HOST=spark-master-1
- SPARK_MASTER_HOST_SUB=spark-master-2
- SPARK_MASTER_PORT=7077
- SPARK_MASTER_PORT_SUB=7077
- SPARK_WORKER_CORES=4
- SPARK_WORKER_MEMORY=4g
- SPARK_WORKER_WEBUI_PORT=8081
- SPARK_WORKER_PORT=18081
- SPARK_CONF_DIR=/home/spark/conf
- SPARK_LOG_DIR=/home/spark/logs
- SPARK_LOG_MAX_FILES=5
- SPARK_PID_DIR=/tmp
depends_on:
- spark-master-1
- spark-master-2
spark-slave-3:
hostname: spark-slave-3
container_name: spark-slave-3
image: spark-cluster:0.04
restart: always
stdin_open: true
tty: true
ports:
- 8083:8081
environment:
- SPARK_MODE=slave
- SPARK_MASTER_HOST=spark-master-1
- SPARK_MASTER_HOST_SUB=spark-master-2
- SPARK_MASTER_PORT=7077
- SPARK_MASTER_PORT_SUB=7077
- SPARK_WORKER_CORES=4
- SPARK_WORKER_MEMORY=4g
- SPARK_WORKER_WEBUI_PORT=8081
- SPARK_WORKER_PORT=18081
- SPARK_CONF_DIR=/home/spark/conf
- SPARK_LOG_DIR=/home/spark/logs
- SPARK_LOG_MAX_FILES=5
- SPARK_PID_DIR=/tmp
depends_on:
- spark-master-1
- spark-master-2
zeppelin:
hostname: zeppelin
container_name: zeppelin
image: spark-cluster:0.04
restart: always
stdin_open: true
tty: true
ports:
- 9999:9999
- 14040:4040
environment:
- SPARK_MODE=zeppelin
- SPARK_MASTER=spark://spark-master-1:7077,spark-master-2:7077
- ZEPPELIN_PORT=9999
- ZEPPELIN_ADDR=0.0.0.0
- ZEPPELIN_LOG_DIR=/home/zeppelin/logs
- ZEPPELIN_PID_DIR=/home/zeppelin/run
- SPARK_SUBMIT_OPTIONS=--total-executor-cores 4 --name MyFirstZeppelin
volumes:
- type: bind
source: ./zeppelin-notebook/
target: /home/zeppelin/notebook/
read_only: false
depends_on:
- spark-master-1
- spark-master-2
spark-client:
hostname: spark-client
container_name: spark-client
image: spark-cluster:0.04
restart: always
stdin_open: true
tty: true
ports:
- 4040:4040
- 4041:4041
- 4042:4042
environment:
- SPARK_MODE=client
volumes:
- type: bind
source: ./dev/
target: /home/spark/dev/
read_only: false
- type: bind
source: ./result/
target: /home/spark/result/
read_only: false
depends_on:
- spark-master-1
- spark-master-2
entrypoint.sh
#!/bin/bash
sleep 1
# 스파크 마스터 실행
if [ ${SPARK_MODE} == "master" ]; then
bash /home/spark/sbin/start-master.sh --properties-file /home/spark/ha.conf
fi
sleep 1
# 스파크 슬레이브 실행
if [ ${SPARK_MODE} == "slave" ]; then
bash /home/spark/sbin/start-slave.sh spark://${SPARK_MASTER_HOST}:${SPARK_MASTER_PORT},${SPARK_MASTER_HOST_SUB}:${SPARK_MASTER_PORT_SUB} -c ${SPARK_WORKER_CORES} -m ${SPARK_WORKER_MEMORY}
fi
sleep 1
# 제플린 실행
if [ ${SPARK_MODE} == "zeppelin" ]; then
bash /home/zeppelin/bin/zeppelin-daemon.sh start
fi
sleep 1
bash
ha.conf
spark.deploy.recoveryMode=ZOOKEEPER
spark.deploy.zookeeper.url=zookeeper-1:2181,zookeeper-2:2181,zookeeper-3:2181
spark.deploy.zookeeper.dir=/spark
'Data Engineering > Spark' 카테고리의 다른 글
[Spark] pyspark RDD upper(), lower() (0) | 2022.06.05 |
---|---|
[Spark] spark cluster + zookeeper 고가용성 테스트 (0) | 2022.06.05 |
[Zookeeper] 도커로 주키퍼 클러스터 만드는 방법 (0) | 2022.06.05 |
[Spark] To set up memory in a spark session (0) | 2022.06.04 |
[Spark] How to use the Global Temporary View (0) | 2022.06.04 |