我正在尝试读取配置单元查询中GCP存储桶中存在的文件。
基本上,我想做的就是
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.StorageOptions;
Storage storage = StorageOptions.getDefaultInstance().getService();
Blob blob = storage.get(BlobId.of(bucketName, srcFilename));
String fileContent = new String(blob.getContent());
return fileContent;现在,当我在我的mac上运行它时,它可以工作(我已经设置了gcloud,这样我就可以访问存储桶了)
现在,我想在一个hive udf中拥有相同的功能。因此,我构建了一个非常简单的jar
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.UDFType;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.StorageOptions;
@UDFType(deterministic = true)
public class MyAwesomeUDF extends GenericUDF{
@Override
public String process(String srcFilename, String bucketName) throws IOException {
Storage storage = StorageOptions.getDefaultInstance().getService();
Blob blob = storage.get(BlobId.of(bucketName, srcFilename));
String fileContent = new String(blob.getContent());
return fileContent;
}
}这是我的pom.xml
<dependencies>
<!-- https://mvnrepository.com/artifact/com.google.cloud/google-cloud-storage -->
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-storage</artifactId>
<version>1.71.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
<version>1.2.1</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>apache-log4j-extras</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<finalName>hive-exe-jar-with-dependencies</finalName>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<relocations>
<relocation>
<pattern>com.google.common</pattern>
<shadedPattern>repackaged.com.google.common</shadedPattern>
</relocation>
</relocations>
</configuration>
</execution>
</executions>
</plugin>
</plugins>接下来,我构建这个jar,并能够在VM上运行它。
最后,下面是我想要运行的配置单元查询
add jar /path/to/my/awesome/jar;
use myDb;
create temporary function awesome_fun as 'package.path.to.my.MyAwesomeUDF';
select
awesome_fun('bucketName','srcFileName');但在这里我得到了
Exception in thread "main" java.lang.NoSuchMethodError: com.google.api.services.storage.Storage$Objects$Get.setUserProject(Ljava/lang/String;)Lcom/google/api/services/storage/Storage$Objects$Get;
at com.google.cloud.storage.spi.v1.HttpStorageRpc.getCall(HttpStorageRpc.java:403)
at com.google.cloud.storage.spi.v1.HttpStorageRpc.get(HttpStorageRpc.java:411)
at com.google.cloud.storage.StorageImpl$5.call(StorageImpl.java:198)
at com.google.cloud.storage.StorageImpl$5.call(StorageImpl.java:195)
at com.google.api.gax.retrying.DirectRetryingExecutor.submit(DirectRetryingExecutor.java:89)
at com.google.cloud.RetryHelper.run(RetryHelper.java:74)
at com.google.cloud.RetryHelper.runWithRetries(RetryHelper.java:51)
at com.google.cloud.storage.StorageImpl.get(StorageImpl.java:195)
at com.google.cloud.storage.StorageImpl.get(StorageImpl.java:209)错误发生在
Storage storage = StorageOptions.getDefaultInstance().getService();
此外,在构建jar之后,我可以(使用jar -tf)看到com.google.api.services.storage.Storage$Objects$Get已经存在。
我做错了什么?
发布于 2019-05-14 06:03:07
问题是缺少方法,请确保在编译或验证编译后的类和库的版本相同时更新实际运行的类文件。
https://stackoverflow.com/questions/56084366
复制相似问题