摘要:本篇教程探讨了大数据技术 学习环境搭建(CentOS6.9+Hadoop2.7.3+Hive1.2.1+Hbase1.3.1+Spark2.1.1),希望阅读本篇文章以后大家有所收获,帮助大家对大数据技术的理解更加深入。
本篇教程探讨了大数据技术 学习环境搭建(CentOS6.9+Hadoop2.7.3+Hive1.2.1+Hbase1.3.1+Spark2.1.1),希望阅读本篇文章以后大家有所收获,帮助大家对大数据技术的理解更加深入。
<
node1
192.168.1.11
node2
192.168.1.12
node3
192.168.1.13
备注
NameNode
Hadoop
Y
Y
高可用
DateNode
Y
Y
Y
ResourceManager
Y
Y
高可用
NodeManager
Y
Y
Y
JournalNodes
Y
Y
Y
奇数个,至少3个节点
ZKFC(DFSZKFailoverController)
Y
Y
有namenode的地方就有ZKFC
QuorumPeerMain
Zookeeper
Y
Y
Y
MySQL
HIVE
Y
Hive元数据库
Metastore(RunJar)
Y
HIVE(RunJar)
Y
HMaster HBase Y Y
高可用
HRegionServer Y Y Y
Spark(Master)
Spark
Y
Y
高可用
Spark(Worker)
Y
Y
Y
以前搭建过一套,带Federation,至少需4台机器,过于复杂,笔记本也吃不消。现为了学习Spark2.0版本,决定去掉Federation,简化学习环境,不过还是完全分布式
所有软件包:
apache-ant-1.9.9-bin.tar.gz
apache-hive-1.2.1-bin.tar.gz
apache-maven-3.3.9-bin.tar.gz
apache-tomcat-6.0.44.tar.gz
CentOS-6.9-x86_64-minimal.iso
findbugs-3.0.1.tar.gz
hadoop-2.7.3-src.tar.gz
hadoop-2.7.3.tar.gz
hadoop-2.7.3(自已编译的centOS6.9版本).tar.gz
hbase-1.3.1-bin(自己编译).tar.gz
hbase-1.3.1-src.tar.gz
jdk-8u121-linux-x64.tar.gz
mysql-connector-java-5.6-bin.jar
protobuf-2.5.0.tar.gz
scala-2.11.11.tgz
snappy-1.1.3.tar.gz
spark-2.1.1-bin-hadoop2.7.tgz
关闭防火墙
[root@node1 ~]# service iptables stop
[root@node1 ~]# chkconfig iptables off
zookeeper
[root@node1 ~]# wget -O /root/zookeeper-3.4.9.tar.gz https://mirrors.tuna.tsinghua.edu.cn/apache/zookeeper/zookeeper-3.4.9/zookeeper-3.4.9.tar.gz
[root@node1 ~]# tar -zxvf /root/zookeeper-3.4.9.tar.gz -C /root
[root@node1 ~]# cp /root/zookeeper-3.4.9/conf/zoo_sample.cfg /root/zookeeper-3.4.9/conf/zoo.cfg
[root@node1 ~]# vi /root/zookeeper-3.4.9/conf/zoo.cfg
[root@node1 ~]# vi /root/zookeeper-3.4.9/bin/zkEnv.sh
[root@node1 ~]# mkdir /root/zookeeper-3.4.9/logs
[root@node1 ~]# vi /root/zookeeper-3.4.9/conf/log4j.properties
[root@node1 ~]# mkdir /root/zookeeper-3.4.9/zkData
[root@node1 ~]# scp -r /root/zookeeper-3.4.9 node2:/root
[root@node1 ~]# scp -r /root/zookeeper-3.4.9 node3:/root
[root@node1 ~]# touch /root/zookeeper-3.4.9/zkData/myid
[root@node1 ~]# echo 1 > /root/zookeeper-3.4.9/zkData/myid
[root@node2 ~]# touch /root/zookeeper-3.4.9/zkData/myid
[root@node2 ~]# echo 2 > /root/zookeeper-3.4.9/zkData/myid
[root@node3 ~]# touch /root/zookeeper-3.4.9/zkData/myid
[root@node3 ~]# echo 3 > /root/zookeeper-3.4.9/zkData/myid
环境变量
[root@node1 ~]# vi /etc/profile
export JAVA_HOME=/root/jdk1.8.0_121
export SCALA_HOME=/root/scala-2.11.11
export HADOOP_HOME=/root/hadoop-2.7.3
export HIVE_HOME=/root/apache-hive-1.2.1-bin
export HBASE_HOME=/root/hbase-1.3.1
export SPARK_HOME=/root/spark-2.1.1-bin-hadoop2.7
export PATH=.:PATH:
JAVA_HOME/bin:SCALAHOME/bin:HADOOP_HOME/bin:HADOOPHOME/sbin:/root:HIVE_HOME/bin:HBASEHOME/bin:
SPARK_HOME
export CLASSPATH=.:JAVAHOME/jre/lib/rt.jar:
JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
[root@node1 ~]# source /etc/profile
[root@node1 ~]# scp /etc/profile node2:/etc
[root@node2 ~]# source /etc/profile
[root@node1~]# scp /etc/profile node3:/etc
[root@node3 ~]# source /etc/profile
Hadoop
[root@node1 ~]# wget -O /root/hadoop-2.7.3.tar.gz //mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz
[root@node1 ~]# tar -zxvf /root/hadoop-2.7.3.tar.gz -C /root
[root@node1 ~]# vi /root/hadoop-2.7.3/etc/hadoop/hadoop-env.sh
[root@node1 ~]# vi /root/hadoop-2.7.3/etc/hadoop/hdfs-site.xml
[root@node1 ~]# vi /root/hadoop-2.7.3/etc/hadoop/core-site.xml
[root@node1 ~]# vi /root/hadoop-2.7.3/etc/hadoop/slaves
node1
node2
node3
[root@node1 ~]# vi /root/hadoop-2.7.3/etc/hadoop/yarn-env.sh
[root@node1 ~]# vi /root/hadoop-2.7.3/etc/hadoop/mapred-site.xml
[root@node1 ~]# vi /root/hadoop-2.7.3/etc/hadoop/yarn-site.xml
[root@node1 ~]# mkdir -p /root/hadoop-2.7.3/tmp/journal
[root@node2 ~]# mkdir -p /root/hadoop-2.7.3/tmp/journal
[root@node3 ~]# mkdir -p /root/hadoop-2.7.3/tmp/journal
将编译的本地包中的native库替换/root/hadoop-2.7.3/lib/native
[root@node1 ~]# scp -r /root/hadoop-2.7.3/ node2:/root
[root@node1 ~]# scp -r /root/hadoop-2.7.3/ node3:/root
查看自己的Hadoop是32位还是64位
[root@node1 native]# file libhadoop.so.1.0.0
libhadoop.so.1.0.0: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, not stripped
[root@node1 native]# pwd
/root/hadoop-2.7.3/lib/native
启动ZK
[root@node1 ~]#/root/zookeeper-3.4.9/bin/zkServer.sh start
[root@node2 ~]#/root/zookeeper-3.4.9/bin/zkServer.sh start
[root@node3 ~]#/root/zookeeper-3.4.9/bin/zkServer.sh start
格式化zkfc
[root@node1 ~]# /root/hadoop-2.7.3/bin/hdfs zkfc -formatZK
[root@node1 ~]# /root/zookeeper-3.4.9/bin/zkCli.sh
启动journalnode
[root@node1 ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start journalnode
[root@node2 ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start journalnode
[root@node3 ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start journalnode
Namenode格式化和启动
[root@node1 ~]# /root/hadoop-2.7.3/bin/hdfs namenode -format
[root@node1 ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start namenode
[root@node2 ~]# /root/hadoop-2.7.3/bin/hdfs namenode -bootstrapStandby
[root@node2 ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start namenode
启动zkfc
[root@node1 ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start zkfc
[root@node2 ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start zkfc
启动datanode
[root@node1 ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start datanode
[root@node2 ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start datanode
[root@node3 ~]# /root/hadoop-2.7.3/sbin/hadoop-daemon.sh start datanode
启动yarn
[root@node1 ~]# /root/hadoop-2.7.3/sbin/yarn-daemon.sh start resourcemanager
[root@node2 ~]# /root/hadoop-2.7.3/sbin/yarn-daemon.sh start resourcemanager
[root@node1 ~]# /root/hadoop-2.7.3/sbin/yarn-daemon.sh start nodemanager
[root@node2 ~]# /root/hadoop-2.7.3/sbin/yarn-daemon.sh start nodemanager
[root@node3 ~]# /root/hadoop-2.7.3/sbin/yarn-daemon.sh start nodemanager
[root@node1 ~]# hdfs dfs -chmod -R 777 /
安装MySQL
[root@node1 ~]# yum remove -y mysql-libs
[root@node1 ~]# yum install mysql-server
[root@node1 ~]# service mysqld start
[root@node1 ~]# chkconfig mysqld on
[root@node1 ~]# mysqladmin -u root password 'AAAaaa111'
[root@node1 ~]# mysqladmin -u root -h node1 password 'AAAaaa111'
[root@node1 ~]# mysql -h localhost -u root -p
Enter password: AAAaaa111
mysql> GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY 'AAAaaa111' WITH GRANT OPTION;
mysql> flush privileges;
[root@node1 ~]# vi /etc/my.cnf
[client]
default-character-set=utf8
[mysql]
default-character-set=utf8
[mysqld]
character-set-server=utf8
lower_case_table_names = 1
[root@node1 ~]# service mysqld restart
HIVE安装
由于官方提供的spark-2.1.1-bin-hadoop2.7.tgz包中集成的Hive是1.2.1,所以Hive版本选择1.2.1
[root@node1 ~]# wget //archive.apache.org/dist/hive/hive-1.2.1/apache-hive-1.2.1-bin.tar.gz
[root@node1 ~]# tar -xvf apache-hive-1.2.1-bin.tar.gz
将mysql-connector-java-5.6-bin.jar 驱动放在 /root/hive-1.2.1/lib/ 目录下面
[root@node1 ~]# cp /root/apache-hive-1.2.1-bin/conf/hive-env.sh.template /root/apache-hive-1.2.1-bin/conf/hive-env.sh
[root@node1 ~]# vi /root/apache-hive-1.2.1-bin/conf/hive-env.sh
export HADOOP_HOME=/root/hadoop-2.7.3
[root@node1 ~]# cp /root/apache-hive-1.2.1-bin/conf/hive-log4j.properties.template /root/apache-hive-1.2.1-bin/conf/hive-log4j.properties
[root@node1 ~]# vi /root/apache-hive-1.2.1-bin/conf/hive-site.xml
[root@node1 ~]# vi /etc/init.d/hive-metastore
/root/apache-hive-1.2.1-bin/bin/hive --service metastore >/dev/null 2>&1 &
[root@node1 ~]# chmod 777 /etc/init.d/hive-metastore
[root@node1 ~]# ln -s /etc/init.d/hive-metastore /etc/rc.d/rc3.d/S65hive-metastore
[root@node1 ~]# hive
[root@node1 ~]# mysql -h localhost -u root -p
mysql> alter database hive character set latin1;
Hbase编译安装
//archive.apache.org/dist/hbase/1.3.1/hbase-1.3.1-src.tar.gz
官方提供的是基础Hadoop2.5.1编译的,所以要进行编译:
将pom.xml文件中依赖的hadoop版本修改:
修改为
修改为
例如如下命令打包:
mvn clean package -DskipTests -Prelease assembly:single
/root/hbase-1.3.1/hbase-assembly/target/hbase-1.3.1-bin.tar.gz
下面基于此安装包进行Hbase的安装:
[root@node1 ~]# cp /root/hadoop-2.7.3/etc/hadoop/hdfs-site.xml /root/hadoop-2.7.3/etc/hadoop/core-site.xml /root/hbase-1.3.1/conf/
[root@node1 ~]# vi /root/hbase-1.3.1/conf/hbase-env.sh
export JAVA_HOME=/root/jdk1.8.0_121
export HBASE_MANAGES_ZK=false
[root@node1 ~]# vi /root/hbase-1.3.1/conf/hbase-site.xml
[root@node1 ~]# vi /root/hbase-1.3.1/conf/regionservers
node1
node2
node3
[root@node1 ~]# mkdir -p /root/hbase-1.3.1/tmp
[root@node1 ~]# vi /root/hbase-1.3.1/conf/hbase-env.sh
# Configure PermSize. Only needed in JDK7. You can safely remove it for JDK8+
#export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"
#export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -XX:PermSize=128m -XX:MaxPermSize=128m"
将etc/profile,及hbase复制到其他两个节点上
[root@node1 ~]# start-hbase.sh
#back-master需要手动起
[root@node2 ~]# hbase-daemon.sh start master
[root@node1 ~]# hbase shell
spark
https://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz
[root@node1 ~]# cp /root/spark-2.1.1-bin-hadoop2.7/conf/spark-env.sh.template /root/spark-2.1.1-bin-hadoop2.7/conf/spark-env.sh
[root@node1 ~]# vi /root/spark-2.1.1-bin-hadoop2.7/conf/spark-env.sh
export SCALA_HOME=/root/scala-2.11.11
export JAVA_HOME=/root/jdk1.8.0_121
export HADOOP_HOME=/root/hadoop-2.7.3
export HADOOP_CONF_DIR=/root/hadoop-2.7.3/etc/hadoop
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=node1:2181,node2:2181,node3:2181 -Dspark.deploy.zookeeper.dir=/spark"
[root@node1 ~]# cp /root/spark-2.1.1-bin-hadoop2.7/conf/slaves.template /root/spark-2.1.1-bin-hadoop2.7/conf/slaves
[root@node1 ~]# vi /root/spark-2.1.1-bin-hadoop2.7/conf/slaves
node1
node2
node3
[root@node1 ~]# scp -r /root/spark-2.1.1-bin-hadoop2.7 node2:/root
[root@node1 ~]# scp -r /root/spark-2.1.1-bin-hadoop2.7 node3:/root
[root@node1 ~]# /root/spark-2.1.1-bin-hadoop2.7/sbin/start-all.sh
./start.sh
/root/zookeeper-3.4.9/bin/zkServer.sh start
ssh root@node2 'export BASH_ENV=/etc/profile;/root/zookeeper-3.4.9/bin/zkServer.sh start'
ssh root@node3 'export BASH_ENV=/etc/profile;/root/zookeeper-3.4.9/bin/zkServer.sh start'
/root/hadoop-2.7.3/sbin/start-dfs.sh
/root/hadoop-2.7.3/sbin/start-yarn.sh
#如果Yarn做HA,则打开
#ssh root@node2 'export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/sbin/yarn-daemon.sh start resourcemanager'
/root/hadoop-2.7.3/sbin/hadoop-daemon.sh start zkfc
ssh root@node2 'export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/sbin/hadoop-daemon.sh start zkfc'
ssh root@node3 'export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/sbin/hadoop-daemon.sh start zkfc'
/root/hadoop-2.7.3/bin/hdfs haadmin -ns mycluster -failover nn2 nn1
echo 'Y' | ssh root@node1 'export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/bin/yarn rmadmin -transitionToActive --forcemanual rm1'
/root/hbase-1.3.1/bin/start-hbase.sh
#如果HBase做HA,则打开
#ssh root@node2 'export BASH_ENV=/etc/profile;/root/hbase-1.3.1/bin/hbase-daemon.sh start master'
/root/spark-2.1.1-bin-hadoop2.7/sbin/start-all.sh
#如果Spark做HA,则打开
#ssh root@node2 'export BASH_ENV=/etc/profile;/root/spark-2.1.1-bin-hadoop2.7/sbin/start-master.sh'
/root/hadoop-2.7.3/sbin/mr-jobhistory-daemon.sh start historyserver
echo '--------------node1---------------'
jps | grep -v Jps | sort -k 2 -t ' '
echo '--------------node2---------------'
ssh root@node2 "export PATH=/usr/bin:$PATH;jps | grep -v Jps | sort -k 2 -t ' '"
echo '--------------node3---------------'
ssh root@node3 "export PATH=/usr/bin:$PATH;jps | grep -v Jps | sort -k 2 -t ' '"
./stop.sh
/root/spark-2.1.1-bin-hadoop2.7/sbin/stop-all.sh
/root/hbase-1.3.1/bin/stop-hbase.sh
#如果Yarn开HA,则去掉注释
#ssh root@node2 'export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/sbin/yarn-daemon.sh stop resourcemanager'
/root/hadoop-2.7.3/sbin/stop-yarn.sh
/root/hadoop-2.7.3/sbin/stop-dfs.sh
/root/hadoop-2.7.3/sbin/hadoop-daemon.sh stop zkfc
ssh root@node2 'export BASH_ENV=/etc/profile;/root/hadoop-2.7.3/sbin/hadoop-daemon.sh stop zkfc'
/root/zookeeper-3.4.9/bin/zkServer.sh stop
ssh root@node2 'export BASH_ENV=/etc/profile;/root/zookeeper-3.4.9/bin/zkServer.sh stop'
ssh root@node3 'export BASH_ENV=/etc/profile;/root/zookeeper-3.4.9/bin/zkServer.sh stop'
/root/hadoop-2.7.3/sbin/mr-jobhistory-daemon.sh stop historyserver
./shutdown.sh
ssh root@node2 "export PATH=/usr/bin:$PATH;shutdown -h now"
ssh root@node3 "export PATH=/usr/bin:$PATH;shutdown -h now"
shutdown -h now
./reboot.sh
ssh root@node2 "export PATH=/usr/bin:$PATH;reboot"
ssh root@node3 "export PATH=/usr/bin:$PATH;reboot"
reboot
本文由职坐标整理发布,学习更多的大数据技术相关知识,请关注职坐标大技术云计算大技术技术频道!
您输入的评论内容中包含违禁敏感词
我知道了
请输入正确的手机号码
请输入正确的验证码
您今天的短信下发次数太多了,明天再试试吧!
我们会在第一时间安排职业规划师联系您!
您也可以联系我们的职业规划师咨询:
版权所有 职坐标-一站式IT培训就业服务领导者 沪ICP备13042190号-4
上海海同信息科技有限公司 Copyright ©2015 www.zhizuobiao.com,All Rights Reserved.
沪公网安备 31011502005948号