首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >无法与卡夫卡连接蜂箱

无法与卡夫卡连接蜂箱
EN

Stack Overflow用户
提问于 2022-11-29 20:29:12
回答 1查看 19关注 0票数 0

我有一个项目,其中包括培训一个模型,然后存储的结果,在蜂巢使用卡夫卡主题最好的模型。

我尝试了各种配置和解决方案,但都没有成功。

这是使用的坞-撰写文件。

代码语言:javascript
复制
version: "3"

services:
  namenode:
    image: bde2020/hadoop-namenode:1.1.0-hadoop2.8-java8
    container_name: namenode
    volumes:
      - namenode:/hadoop/dfs/name
      - ./infra/zeppelin/examples:/opt/sansa-examples
    environment:
      - CLUSTER_NAME=test
    env_file:
      - ./infra/hadoop/hadoop-hive.env
    ports:
      - "50070:50070"
      - "8020:8020"
      - "8081:8081"

  datanode:
    image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8
    container_name: datanode
    volumes:
      - datanode:/hadoop/dfs/data
    env_file:
      - ./infra/hadoop/hadoop-hive.env
    links:
      - namenode

  spark-master:
    image: bde2020/spark-master:2.1.0-hadoop2.8-hive-java8
    container_name: spark-master
    ports:
      - "8090:800"
      - "7077:7077"
    environment:
      - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
      - SPARK_PUBLIC_DNS=localhost
    depends_on:
      - namenode
      - datanode
    links:
      - namenode
      - datanode

  spark-worker:
    image: bde2020/spark-worker:2.1.0-hadoop2.8-hive-java8
    container_name: spark-worker
    ports:
      - "8083:8083"
    environment:
      - "SPARK_MASTER=spark://spark-master:7077"
    environment:
      - CORE_CONF_fs_defaultFS=hdfs://namenode:8020
      - SPARK_PUBLIC_DNS=localhost
    links:
      - spark-master

  hue:
    image: bde2020/hdfs-filebrowser:3.11
    container_name: hue
    ports:
      - 8088:8088
    environment:
      - NAMENODE_HOST=namenode
      - SPARK_MASTER=spark://spark-master:7077
    links:
      - spark-master

  zeppelin:
    image: bde2020/zeppelin:0.0.1-zeppelin-0.7.1-hadoop-2.8.0-spark-2.1.0
    container_name: zeppelin
    ports:
      - 8080:8080
    volumes:
      - ./data:/data
      - ./data:/opt/zeppelin/data
  #    - ./infra/zeppelin/conf:/opt/zeppelin/conf
      - ./infra/zeppelin/logs:/opt/zeppelin/logs
      - ./infra/zeppelin/notebooks:/opt/zeppelin/notebook
      - ./infra/zeppelin/examples:/opt/sansa-examples
    environment:
      CORE_CONF_fs_defaultFS: "hdfs://namenode:8020"
      SPARK_MASTER: "spark://spark-master:7077"
      MASTER: "spark://spark-master:7077"
      SPARK_SUBMIT_OPTIONS: "--jars /opt/sansa-examples/jars/sansa-examples-spark.jar --conf spark.serializer=org.apache.spark.serializer.KryoSerializer"
    links:
      - spark-master

  hive-server:
    image: bde2020/hive
    container_name: hive-server
    env_file:
      - ./infra/hadoop/hadoop-hive.env
    environment:
      - "HIVE_CORE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore/metastore"
    links:
      - namenode
      - hive-metastore
    ports:
      - 10000:10000

  hive-metastore-postgresql:
    image: bde2020/hive-metastore-postgresql
    container_name: hive-metastore-postgresql

  hive-metastore:
    image: bde2020/hive
    container_name: hive-metastore
    env_file:
      - ./infra/hadoop/hadoop-hive.env
    links:
      - namenode
      - hive-metastore-postgresql
    command: /opt/hive/bin/hive --service metastore
    ports:
      - 9083:9083

  zookeeper:
    image: confluentinc/cp-zookeeper
    container_name: zookeeper
    environment:
      ZOOKEEPER_CLIENT_PORT: 2181
    volumes:
      - zookeeper:/var/lib/zookeeper

  kafka:
    image: wurstmeister/kafka
    container_name: kafka
    ports:
      - "9092:9092"
    environment:
      KAFKA_ADVERTISED_HOST_NAME: localhost
      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
    links:
      - zookeeper
    depends_on:
      - zookeeper  
      
  # kafka:
  #   image: confluentinc/cp-kafka
  #   container_name: kafka
  #   ports:
  #     - 9092:9092
  #   environment:
  #     KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
  #     KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
  #     KAFKA_NUM_PARTITIONS: 1
  #     KAFKA_DEFAULT_REPLICATION_FACTOR: 1
  #     KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
  #     KAFKA_DELETE_TOPIC_ENABLE: "true"
  #   volumes:
  #     - kafka:/var/lib/kafka
  #   links:
  #     - zookeeper
  #   depends_on:
  #     - zookeeper

  nifi:
    image: xemuliam/nifi
    container_name: nifi
    ports:
      - 5080:5080
      - 5443:8443
      - 5081:5081
## for scaling we have to do this
#      - 8080
    links:
      - zookeeper
      - kafka
    depends_on:
      - zookeeper
      - kafka
    volumes:
      - ./infra/nifi/conf:/opt/nifi/conf
      - ./infra/nifi/logs:/opt/nifi/logs
      - ./data:/opt/datafiles
      - nifi:/opt/nifi/flowfile_repository
      - nifi:/opt/nifi/database_repository
      - nifi:/opt/nifi/content_repository
      - nifi:/opt/nifi/provenance_repository
    environment:
      ZK_NODES_LIST: zookeeper
      IS_CLUSTER_NODE: 1
      ELECTION_TIME: 1 min

volumes:
  namenode:
  datanode:
  zookeeper:
  kafka:
  nifi:

这是hadoop环境

代码语言:javascript
复制
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hive-metastore:9083
HIVE_SITE_CONF_hive_fetch_task_conversion=none

CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031

我的Hadoop版本是: 2.7.4我的Hive版本是: 2.3.2

这是齐柏林飞艇( zeppelin 在这里输入图像描述 )内相互间蜂巢的配置。

这是错误在这里输入图像描述

EN

回答 1

Stack Overflow用户

发布于 2022-11-30 13:35:51

您需要将Kafka处理程序JAR添加到Hiveserver2容器类路径,因为这才是真正执行查询的内容,而不是齐柏林飞艇。

要做到这一点,唯一的方法是在撰写文件中挂载一个卷,该卷位于Hive用于读取库的路径下。

否则,只需使用星火结构化流,或您的Nifi容器,而不是写信给卡夫卡。只有元数据存储在“蜂巢”中。真正的数据存储在卡夫卡中。另外,我不太确定Cloudera是否维护了Hive,而且它似乎也没有发布到Maven Central。

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/74619927

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档