Hive On Spark 概述、安装配置、计算引擎更换、应用、非常办理 - IT评测·应用市场-qidao123.com

tar -xvf spark-2.3.0-bin-without-hadoop.tgz -C /opt/module/

复制代码

cd /opt/module
mv spark-2.3.0-bin-without-hadoop/ spark-2.3.0

复制代码

#SPAKR_HOME
export SPARK_HOME=/opt/module/spark-2.3.0
export PATH=$PATH:$SPARK_HOME/bin

复制代码

cd $SPARK_HOME/conf
mv spark-env.sh.template spark-env.sh
vim spark-env.sh

复制代码

export SPARK_DIST_CLASSPATH=$(hadoop classpath)

复制代码

cd $HIVE_HOME/conf
vim spark-default.conf

复制代码

# 指定提交到 yarn 运行
spark.master yarn
# 开启日志并存储到 HDFS 上
spark.eventLog.enabled true
spark.eventLog.dir hdfs://hadoop120:8020/spark-logDir
# 指定每个执行器的内存
spark.executor.memory 1g
# 指定每个调度器的内存
spark.driver.memory 1g

复制代码

hadoop fs -mkdir /spark-logDir

复制代码

cp $HIVE_HOME/lib/hive-exec-3.1.3.jar $SPARK_HOME/jars/

复制代码

hadoop fs -mkdir /spark-jars
cd $SPARK_HOME
hadoop fs -put ./jars/* /spark-jars

复制代码

cd $HIVE_HOME/conf
vim hive-site.xml

复制代码

<property>
<name>spark.yarn.jars</name>
<value>hdfs://hadoop120:8020/spark-jars/*</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>spark</value>
</property>
<property>
<name>hive.spark.client.connect.timeout</name>
<value>5000</value>
</property>

复制代码

drop table if exists books;
create table books(id int,book_name string);

复制代码

insert into books values (1,'bigdata');
insert into books values (2,'hive');
insert into books values (3,'spark');

复制代码

select * from books;

复制代码

cd $HADOOP_HOME/etc/hadoop
vim capacity-scheduler.xml

复制代码

Job failed with java.lang.IllegalAccessError: tried to access method com.google.common.base.Stopwatch.<init>()V from class org.apache.hadoop.mapreduce.lib.input.FileInputFormat
at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(FileInputFormat.java:262)
at org.apache.hadoop.hive.shims.Hadoop23Shims$1.listStatus(Hadoop23Shims.java:134)
at org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.getSplits(CombineFileInputFormat.java:217)
at org.apache.hadoop.mapred.lib.CombineFileInputFormat.getSplits(CombineFileInputFormat.java:75)
at org.apache.hadoop.hive.shims.HadoopShimsSecure$CombineFileInputFormatShim.getSplits(HadoopShimsSecure.java:321)
at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getCombineSplits(CombineHiveInputFormat.java:444)
at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:564)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:200)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
at org.apache.spark.rdd.RDD.getNumPartitions(RDD.scala:267)
at org.apache.spark.api.java.JavaRDDLike$class.getNumPartitions(JavaRDDLike.scala:65)
at org.apache.spark.api.java.AbstractJavaRDDLike.getNumPartitions(JavaRDDLike.scala:45)
at org.apache.hadoop.hive.ql.exec.spark.SparkPlanGenerator.generateMapInput(SparkPlanGenerator.java:215)
at org.apache.hadoop.hive.ql.exec.spark.SparkPlanGenerator.generateParentTran(SparkPlanGenerator.java:142)
at org.apache.hadoop.hive.ql.exec.spark.SparkPlanGenerator.generate(SparkPlanGenerator.java:114)
at org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient$JobStatusJob.call(RemoteHiveSparkClient.java:359)
at org.apache.hive.spark.client.RemoteDriver$JobWrapper.call(RemoteDriver.java:378)
at org.apache.hive.spark.client.RemoteDriver$JobWrapper.call(RemoteDriver.java:343)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask. Spark job failed during runtime. Please check stacktrace for the root cause.

复制代码

# 备份 Hive 的高版本
cd $HIVE_HOME/lib
mv guava-19.0.jar guava-19.0.jar.bak
# 将低版本放入 Hive 与 Spark 中
cp guava-13.0.jar $HIVE_HOME/lib
cp guava-13.0.jar $SPARK_HOME/jars
# 还需上传到 HDFS 中存储 Spark Jars 的目录下
hadoop fs -put guava-13.0.jar /spark-jars

复制代码