首先下載如下的軟體:
- jdk_ri-8u40-b25-linux-x64-10_feb_2015.tar.gz
- hadoop-3.1.2.tar.gz
- spark-2.4.3-bin-hadoop2.7.tgz
- 修改 hostname
hostnamectl set-hostname 新的hostname
並於三台 server 的 /etc/hosts 中加入如下的設定:
192.168.0.103 master
192.168.0.107 slave1
192.168.0.106 slave2
IP 當然要視你自己的電腦而改變。192.168.0.107 slave1
192.168.0.106 slave2
- 環境配置
spark ALL=(ALL) ALL
並且在三台 server 都建立如下目錄:- /app/soft
- /app/spark
- /home/spark/work
- 設定 ssh 免密碼認證
RSAAuthentication yes
PubkeyAuthentication yes
AuthorizedKeysFile .ssh/authorized_keys
開放使用 RSA 加密演算法公私鑰方式認證,key 放置於 ~/.ssh/authorized_keys 目錄裡,修改好執行 service sshd restart 重啟 SSH 服務,接下來都以 spark 使用者登入,並如下指令,產生公私鑰,並將公鑰提供給另兩台 server,匯入 ~/.ssh/authorized_keys 檔案裡。PubkeyAuthentication yes
AuthorizedKeysFile .ssh/authorized_keys
ssh-keygen -t rsa
cd .ssh
# master server
mv id_rsa.pub authorized_keys_master.pub
# slave1 server
mv id_rsa.pub authorized_keys_slave1.pub
# slave2 server
mv id_rsa.pub authorized_keys_slave2.pub
# master server 將公鑰複製到另兩台機器 (三台都這麼做)
scp authorized_keys_master.pub spark@slave1:/home/spark/.ssh
scp authorized_keys_master.pub spark@slave2:/home/spark/.ssh
cat authorized_keys_master.pub >> authorized_keys
cat authorized_keys_slave1.pub >> authorized_keys
cat authorized_keys_slave2.pub >> authorized_keys
chmod 400 authorized_keys
cd .ssh
# master server
mv id_rsa.pub authorized_keys_master.pub
# slave1 server
mv id_rsa.pub authorized_keys_slave1.pub
# slave2 server
mv id_rsa.pub authorized_keys_slave2.pub
# master server 將公鑰複製到另兩台機器 (三台都這麼做)
scp authorized_keys_master.pub spark@slave1:/home/spark/.ssh
scp authorized_keys_master.pub spark@slave2:/home/spark/.ssh
cat authorized_keys_master.pub >> authorized_keys
cat authorized_keys_slave1.pub >> authorized_keys
cat authorized_keys_slave2.pub >> authorized_keys
chmod 400 authorized_keys
- 安裝 JDK
cd /home/spark/work
tar xzvf jdk_ri-8u40-b25-linux-x64-10_feb_2015.tar.gz
mv java-se-8u40-ri /app/soft
以 root 身分編輯 /etc/profile,如下:tar xzvf jdk_ri-8u40-b25-linux-x64-10_feb_2015.tar.gz
mv java-se-8u40-ri /app/soft
#java
JAVA_HOME=/app/soft/java-se-8u40-ri
CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
PATH=$JAVA_HOME/bin:$PATH
export PATH CLASSPATH JAVA_HOME
編輯完下如下指令讓設定生效。JAVA_HOME=/app/soft/java-se-8u40-ri
CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
PATH=$JAVA_HOME/bin:$PATH
export PATH CLASSPATH JAVA_HOME
source /etc/profile
- 安裝 Hadoop & Spark
cd /home/spark/work
tar xvzf hadoop-3.1.2.tar.gz
mv hadoop-3.1.2 /app/soft
tar xvzf spark-2.4.3-bin-hadoop2.7.tgz
mv spark-2.4.3 /app/spark
使用 root 編輯 /etc/profile,如下:tar xvzf hadoop-3.1.2.tar.gz
mv hadoop-3.1.2 /app/soft
tar xvzf spark-2.4.3-bin-hadoop2.7.tgz
mv spark-2.4.3 /app/spark
# spark
export HADOOP_HOME=/usr/local/hadoop-3.1.2
export SPARK_HOME=/app/spark/spark-2.4.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SPARK_HOME/bin:$SPARK_HOME/sbin
編輯完下如下指令讓設定生效。export HADOOP_HOME=/usr/local/hadoop-3.1.2
export SPARK_HOME=/app/spark/spark-2.4.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$SPARK_HOME/bin:$SPARK_HOME/sbin
source /etc/profile
- 設定 Spark
export SPARK_MASTER_IP=master
export SPARK_MASTER_PORT=7077
export SPARK_EXECUTOR_INSTANCES=1
export SPARK_WORKER_INSTANCES=1
export SPARK_WORKER_CORES=1
export SPARK_WORKER_MEMORY=1024M
export SPARK_MASTER_WEBUI_PORT=8080
export SPARK_CONF_DIR=/app/spark/spark-2.4.3/conf
export SPARK_MASTER_PORT=7077
export SPARK_EXECUTOR_INSTANCES=1
export SPARK_WORKER_INSTANCES=1
export SPARK_WORKER_CORES=1
export SPARK_WORKER_MEMORY=1024M
export SPARK_MASTER_WEBUI_PORT=8080
export SPARK_CONF_DIR=/app/spark/spark-2.4.3/conf
- 啟動 Spark
cd /app/spark/spark-2.4.3/sbin
./start-all.sh
在兩台 slave 上執行如下指令:./start-all.sh
cd /app/spark/spark-2.4.3/sbin
./start-slave.sh spark://master:7077
怎麼驗證呢? 在 master 上開啟瀏覽器,輸入網址 http://localhost:8080 這是 Web UI 管理介面,可以看到 master 正常啟動,且 slave1、slave2 連到 master。./start-slave.sh spark://master:7077