我有一个项目,其中包括培训一个模型,然后存储的结果,在蜂巢使用卡夫卡主题最好的模型。
我尝试了各种配置和解决方案,但都没有成功。
这是使用的坞-撰写文件。
version: "3"
services:
namenode:
image: bde2020/hadoop-namenode:1.1.0-hadoop2.8-java8
container_name: namenode
volumes:
- namenode:/hadoop/dfs/name
- ./infra/zeppelin/examples:/opt/sansa-examples
environment:
- CLUSTER_NAME=test
env_file:
- ./infra/hadoop/hadoop-hive.env
ports:
- "50070:50070"
- "8020:8020"
- "8081:8081"
datanode:
image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8
container_name: datanode
volumes:
- datanode:/hadoop/dfs/data
env_file:
- ./infra/hadoop/hadoop-hive.env
links:
- namenode
spark-master:
image: bde2020/spark-master:2.1.0-hadoop2.8-hive-java8
container_name: spark-master
ports:
- "8090:800"
- "7077:7077"
environment:
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- SPARK_PUBLIC_DNS=localhost
depends_on:
- namenode
- datanode
links:
- namenode
- datanode
spark-worker:
image: bde2020/spark-worker:2.1.0-hadoop2.8-hive-java8
container_name: spark-worker
ports:
- "8083:8083"
environment:
- "SPARK_MASTER=spark://spark-master:7077"
environment:
- CORE_CONF_fs_defaultFS=hdfs://namenode:8020
- SPARK_PUBLIC_DNS=localhost
links:
- spark-master
hue:
image: bde2020/hdfs-filebrowser:3.11
container_name: hue
ports:
- 8088:8088
environment:
- NAMENODE_HOST=namenode
- SPARK_MASTER=spark://spark-master:7077
links:
- spark-master
zeppelin:
image: bde2020/zeppelin:0.0.1-zeppelin-0.7.1-hadoop-2.8.0-spark-2.1.0
container_name: zeppelin
ports:
- 8080:8080
volumes:
- ./data:/data
- ./data:/opt/zeppelin/data
# - ./infra/zeppelin/conf:/opt/zeppelin/conf
- ./infra/zeppelin/logs:/opt/zeppelin/logs
- ./infra/zeppelin/notebooks:/opt/zeppelin/notebook
- ./infra/zeppelin/examples:/opt/sansa-examples
environment:
CORE_CONF_fs_defaultFS: "hdfs://namenode:8020"
SPARK_MASTER: "spark://spark-master:7077"
MASTER: "spark://spark-master:7077"
SPARK_SUBMIT_OPTIONS: "--jars /opt/sansa-examples/jars/sansa-examples-spark.jar --conf spark.serializer=org.apache.spark.serializer.KryoSerializer"
links:
- spark-master
hive-server:
image: bde2020/hive
container_name: hive-server
env_file:
- ./infra/hadoop/hadoop-hive.env
environment:
- "HIVE_CORE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore/metastore"
links:
- namenode
- hive-metastore
ports:
- 10000:10000
hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql
container_name: hive-metastore-postgresql
hive-metastore:
image: bde2020/hive
container_name: hive-metastore
env_file:
- ./infra/hadoop/hadoop-hive.env
links:
- namenode
- hive-metastore-postgresql
command: /opt/hive/bin/hive --service metastore
ports:
- 9083:9083
zookeeper:
image: confluentinc/cp-zookeeper
container_name: zookeeper
environment:
ZOOKEEPER_CLIENT_PORT: 2181
volumes:
- zookeeper:/var/lib/zookeeper
kafka:
image: wurstmeister/kafka
container_name: kafka
ports:
- "9092:9092"
environment:
KAFKA_ADVERTISED_HOST_NAME: localhost
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
links:
- zookeeper
depends_on:
- zookeeper
# kafka:
# image: confluentinc/cp-kafka
# container_name: kafka
# ports:
# - 9092:9092
# environment:
# KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
# KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
# KAFKA_NUM_PARTITIONS: 1
# KAFKA_DEFAULT_REPLICATION_FACTOR: 1
# KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
# KAFKA_DELETE_TOPIC_ENABLE: "true"
# volumes:
# - kafka:/var/lib/kafka
# links:
# - zookeeper
# depends_on:
# - zookeeper
nifi:
image: xemuliam/nifi
container_name: nifi
ports:
- 5080:5080
- 5443:8443
- 5081:5081
## for scaling we have to do this
# - 8080
links:
- zookeeper
- kafka
depends_on:
- zookeeper
- kafka
volumes:
- ./infra/nifi/conf:/opt/nifi/conf
- ./infra/nifi/logs:/opt/nifi/logs
- ./data:/opt/datafiles
- nifi:/opt/nifi/flowfile_repository
- nifi:/opt/nifi/database_repository
- nifi:/opt/nifi/content_repository
- nifi:/opt/nifi/provenance_repository
environment:
ZK_NODES_LIST: zookeeper
IS_CLUSTER_NODE: 1
ELECTION_TIME: 1 min
volumes:
namenode:
datanode:
zookeeper:
kafka:
nifi:这是hadoop环境
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083
HIVE_SITE_CONF_hive_fetch_task_conversion=none
CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031我的Hadoop版本是: 2.7.4我的Hive版本是: 2.3.2
这是齐柏林飞艇( zeppelin 在这里输入图像描述 )内相互间蜂巢的配置。
这是错误在这里输入图像描述
发布于 2022-11-30 13:35:51
您需要将Kafka处理程序JAR添加到Hiveserver2容器类路径,因为这才是真正执行查询的内容,而不是齐柏林飞艇。
要做到这一点,唯一的方法是在撰写文件中挂载一个卷,该卷位于Hive用于读取库的路径下。
否则,只需使用星火结构化流,或您的Nifi容器,而不是写信给卡夫卡。只有元数据存储在“蜂巢”中。真正的数据存储在卡夫卡中。另外,我不太确定Cloudera是否维护了Hive,而且它似乎也没有发布到Maven Central。
https://stackoverflow.com/questions/74619927
复制相似问题