diff --git a/api/src/main/java/org/apache/gravitino/credential/COSSecretKeyCredential.java b/api/src/main/java/org/apache/gravitino/credential/COSSecretKeyCredential.java new file mode 100644 index 00000000000..a6737e50c6a --- /dev/null +++ b/api/src/main/java/org/apache/gravitino/credential/COSSecretKeyCredential.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.credential; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.commons.lang3.StringUtils; + +/** Tencent Cloud COS secret key credential. */ +public class COSSecretKeyCredential implements Credential { + + /** COS secret key credential type. */ + public static final String COS_SECRET_KEY_CREDENTIAL_TYPE = "cos-secret-key"; + /** The static access key ID (a.k.a. SecretId in Tencent Cloud) used to access COS data. */ + public static final String GRAVITINO_COS_STATIC_ACCESS_KEY_ID = "cos-access-key-id"; + /** The static secret access key (a.k.a. SecretKey in Tencent Cloud) used to access COS data. */ + public static final String GRAVITINO_COS_STATIC_SECRET_ACCESS_KEY = "cos-secret-access-key"; + + private String accessKeyId; + private String secretAccessKey; + + /** + * Constructs an instance of {@link COSSecretKeyCredential} with the static COS access key ID and + * secret access key. + * + * @param accessKeyId The COS static access key ID. + * @param secretAccessKey The COS static secret access key. + */ + public COSSecretKeyCredential(String accessKeyId, String secretAccessKey) { + validate(accessKeyId, secretAccessKey, 0); + this.accessKeyId = accessKeyId; + this.secretAccessKey = secretAccessKey; + } + + /** + * This is the constructor that is used by credential factory to create an instance of credential + * according to the credential information. + */ + public COSSecretKeyCredential() {} + + @Override + public String credentialType() { + return COS_SECRET_KEY_CREDENTIAL_TYPE; + } + + @Override + public long expireTimeInMs() { + return 0; + } + + @Override + public Map credentialInfo() { + return (new ImmutableMap.Builder()) + .put(GRAVITINO_COS_STATIC_ACCESS_KEY_ID, accessKeyId) + .put(GRAVITINO_COS_STATIC_SECRET_ACCESS_KEY, secretAccessKey) + .build(); + } + + /** + * Initialize the credential with the credential information. + * + *

This method is invoked to deserialize the credential in client side. + * + * @param credentialInfo The credential information from {@link #credentialInfo}. + * @param expireTimeInMs The expire-time from {@link #expireTimeInMs()}. + */ + @Override + public void initialize(Map credentialInfo, long expireTimeInMs) { + String accessKeyId = credentialInfo.get(GRAVITINO_COS_STATIC_ACCESS_KEY_ID); + String secretAccessKey = credentialInfo.get(GRAVITINO_COS_STATIC_SECRET_ACCESS_KEY); + validate(accessKeyId, secretAccessKey, expireTimeInMs); + this.accessKeyId = accessKeyId; + this.secretAccessKey = secretAccessKey; + } + + /** + * Get COS static access key ID. + * + * @return The COS access key ID. + */ + public String accessKeyId() { + return accessKeyId; + } + + /** + * Get COS static secret access key. + * + * @return The COS secret access key. + */ + public String secretAccessKey() { + return secretAccessKey; + } + + private void validate(String accessKeyId, String secretAccessKey, long expireTimeInMs) { + Preconditions.checkArgument( + StringUtils.isNotBlank(accessKeyId), "COS access key Id should not empty"); + Preconditions.checkArgument( + StringUtils.isNotBlank(secretAccessKey), "COS secret access key should not empty"); + Preconditions.checkArgument( + expireTimeInMs == 0, "The expiration time of COSSecretKeyCredential is not 0"); + } +} diff --git a/bundles/tencent-bundle/build.gradle.kts b/bundles/tencent-bundle/build.gradle.kts new file mode 100644 index 00000000000..6e421e0a2b6 --- /dev/null +++ b/bundles/tencent-bundle/build.gradle.kts @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { + `maven-publish` + id("java") + alias(libs.plugins.shadow) +} + +dependencies { + implementation(project(":bundles:tencent")) + + implementation(libs.commons.collections3) + implementation(libs.commons.io) + implementation(libs.hadoop3.client.api) + implementation(libs.hadoop3.client.runtime) + implementation(libs.hadoop3.cos) + implementation(libs.httpclient) +} + +tasks.withType(ShadowJar::class.java) { + isZip64 = true + configurations = listOf(project.configurations.runtimeClasspath.get()) + archiveClassifier.set("") + mergeServiceFiles() + + dependencies { + exclude(dependency("org.slf4j:slf4j-api")) + + // Exclude Gravitino modules to prevent class duplication and "Split Packages" issues. + // These modules (api, common, catalogs) are already provided by the Gravitino server and gravitino-filesystem-hadoop3-runtime. + // Including them here would cause the Relocation rules below to incorrectly modify + // method signatures (e.g., JsonUtils.anyFieldMapper returning a shaded ObjectMapper), + // leading to java.lang.NoSuchMethodError at runtime. + exclude(project(":api")) + exclude(project(":common")) + exclude(project(":catalogs:catalog-common")) + exclude(project(":catalogs:hadoop-common")) + } + + // Relocate dependencies to avoid conflicts. + // hadoop-cos (from com.qcloud.cos:hadoop-cos) bundles the qcloud-cos SDK and a few common + // libraries; relocate them under "org.apache.gravitino.tencent.shaded.*" following the + // same pattern as the aws/aliyun/azure/gcp bundles. + relocate("com.fasterxml.jackson", "org.apache.gravitino.tencent.shaded.com.fasterxml.jackson") + relocate("com.google", "org.apache.gravitino.tencent.shaded.com.google") + relocate("com.qcloud", "org.apache.gravitino.tencent.shaded.com.qcloud") + relocate("org.apache.commons", "org.apache.gravitino.tencent.shaded.org.apache.commons") + relocate("org.apache.http", "org.apache.gravitino.tencent.shaded.org.apache.http") + relocate("org.checkerframework", "org.apache.gravitino.tencent.shaded.org.checkerframework") + relocate("org.jacoco.agent.rt", "org.apache.gravitino.tencent.shaded.org.jacoco.agent.rt") +} + +tasks.jar { + dependsOn(tasks.named("shadowJar")) + archiveClassifier.set("empty") +} + +tasks.compileJava { + dependsOn(":catalogs:catalog-fileset:runtimeJars") +} diff --git a/bundles/tencent/build.gradle.kts b/bundles/tencent/build.gradle.kts new file mode 100644 index 00000000000..98aa70196d4 --- /dev/null +++ b/bundles/tencent/build.gradle.kts @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { + `maven-publish` + id("java") +} + +dependencies { + implementation(project(":api")) { + exclude("*") + } + implementation(project(":catalogs:catalog-common")) { + exclude("*") + } + implementation(project(":catalogs:hadoop-common")) { + exclude("*") + } + implementation(project(":common")) { + exclude("*") + } + + implementation(libs.commons.lang3) + implementation(libs.guava) + implementation(libs.jackson.databind) + + compileOnly(libs.hadoop3.client.api) + compileOnly(libs.hadoop3.cos) + compileOnly(libs.lombok) + + testImplementation(libs.hadoop3.client.api) + testImplementation(libs.hadoop3.client.runtime) + testImplementation(libs.hadoop3.cos) + testImplementation(libs.junit.jupiter.api) + testImplementation(libs.junit.jupiter.params) + testRuntimeOnly(libs.junit.jupiter.engine) + testRuntimeOnly(libs.bundles.log4j) +} + +tasks.compileJava { + dependsOn(":catalogs:catalog-fileset:runtimeJars") +} diff --git a/bundles/tencent/src/main/java/org/apache/gravitino/cos/credential/COSSecretKeyProvider.java b/bundles/tencent/src/main/java/org/apache/gravitino/cos/credential/COSSecretKeyProvider.java new file mode 100644 index 00000000000..2abc609134f --- /dev/null +++ b/bundles/tencent/src/main/java/org/apache/gravitino/cos/credential/COSSecretKeyProvider.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.cos.credential; + +import java.util.Map; +import org.apache.gravitino.credential.COSSecretKeyCredential; +import org.apache.gravitino.credential.Credential; +import org.apache.gravitino.credential.CredentialContext; +import org.apache.gravitino.credential.CredentialProvider; +import org.apache.gravitino.credential.config.COSCredentialConfig; + +/** Generate COS access key and secret key to access Tencent Cloud COS data. */ +public class COSSecretKeyProvider implements CredentialProvider { + + private String accessKey; + private String secretKey; + + @Override + public void initialize(Map properties) { + COSCredentialConfig cosCredentialConfig = new COSCredentialConfig(properties); + this.accessKey = cosCredentialConfig.accessKeyID(); + this.secretKey = cosCredentialConfig.secretAccessKey(); + } + + @Override + public void close() {} + + @Override + public boolean supportsScheme(String scheme) { + // hadoop-cos exposes the `cosn://` scheme, matching `COSFileSystemProvider#scheme()`. + return "cosn".equalsIgnoreCase(scheme); + } + + @Override + public String credentialType() { + return COSSecretKeyCredential.COS_SECRET_KEY_CREDENTIAL_TYPE; + } + + @Override + public Credential getCredential(CredentialContext context) { + return new COSSecretKeyCredential(accessKey, secretKey); + } +} diff --git a/bundles/tencent/src/main/java/org/apache/gravitino/cos/fs/COSCredentialsProvider.java b/bundles/tencent/src/main/java/org/apache/gravitino/cos/fs/COSCredentialsProvider.java new file mode 100644 index 00000000000..038271ffcff --- /dev/null +++ b/bundles/tencent/src/main/java/org/apache/gravitino/cos/fs/COSCredentialsProvider.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.cos.fs; + +import com.qcloud.cos.auth.BasicCOSCredentials; +import com.qcloud.cos.auth.COSCredentials; +import java.net.URI; +import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils; +import org.apache.gravitino.catalog.hadoop.fs.GravitinoFileSystemCredentialsProvider; +import org.apache.gravitino.credential.COSSecretKeyCredential; +import org.apache.gravitino.credential.Credential; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.auth.AbstractCOSCredentialProvider; + +/** + * Hadoop-COS credential provider that pulls vended credentials out of Gravitino and feeds them to + * the underlying {@code com.qcloud.cos.auth.COSCredentialsProvider} contract. PR-A only handles + * static secret-key credentials; STS / token credentials will be added by a follow-up PR. + */ +public class COSCredentialsProvider extends AbstractCOSCredentialProvider { + + private final GravitinoFileSystemCredentialsProvider gravitinoFileSystemCredentialsProvider; + private volatile COSCredentials basicCredentials; + private volatile long expirationTime = Long.MAX_VALUE; + private static final double EXPIRATION_TIME_FACTOR = 0.5D; + + public COSCredentialsProvider(URI uri, Configuration conf) { + super(uri, conf); + this.gravitinoFileSystemCredentialsProvider = FileSystemUtils.getGvfsCredentialProvider(conf); + } + + @Override + public COSCredentials getCredentials() { + if (basicCredentials == null || System.currentTimeMillis() >= expirationTime) { + synchronized (this) { + if (basicCredentials == null || System.currentTimeMillis() >= expirationTime) { + refresh(); + } + } + } + return basicCredentials; + } + + @Override + public void refresh() { + Credential[] gravitinoCredentials = gravitinoFileSystemCredentialsProvider.getCredentials(); + Credential credential = COSUtils.getSuitableCredential(gravitinoCredentials); + if (credential == null) { + throw new RuntimeException("No suitable credential for COS found..."); + } + + if (credential instanceof COSSecretKeyCredential) { + COSSecretKeyCredential cosSecretKeyCredential = (COSSecretKeyCredential) credential; + this.basicCredentials = + new BasicCOSCredentials( + cosSecretKeyCredential.accessKeyId(), cosSecretKeyCredential.secretAccessKey()); + } else { + // Defensive: COSUtils#getSuitableCredential currently only returns + // COSSecretKeyCredential, but a follow-up PR will add STS / token support. Failing fast + // here avoids silently leaving {@link #basicCredentials} stale (or null) if a new + // credential type is wired into the selector without updating this branch. + throw new RuntimeException( + "Unsupported credential type for COS: " + credential.getClass().getName()); + } + + if (credential.expireTimeInMs() > 0) { + this.expirationTime = + System.currentTimeMillis() + + (long) + ((credential.expireTimeInMs() - System.currentTimeMillis()) + * EXPIRATION_TIME_FACTOR); + } + } +} diff --git a/bundles/tencent/src/main/java/org/apache/gravitino/cos/fs/COSFileSystemProvider.java b/bundles/tencent/src/main/java/org/apache/gravitino/cos/fs/COSFileSystemProvider.java new file mode 100644 index 00000000000..cdc430f27bc --- /dev/null +++ b/bundles/tencent/src/main/java/org/apache/gravitino/cos/fs/COSFileSystemProvider.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.cos.fs; + +import static org.apache.gravitino.catalog.hadoop.fs.Constants.COS_CONNECTION_TIMEOUT_KEY; +import static org.apache.gravitino.catalog.hadoop.fs.Constants.COS_MAX_RETRIES_KEY; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import java.io.IOException; +import java.util.Map; +import org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider; +import org.apache.gravitino.catalog.hadoop.fs.FileSystemUtils; +import org.apache.gravitino.catalog.hadoop.fs.SupportsCredentialVending; +import org.apache.gravitino.credential.COSSecretKeyCredential; +import org.apache.gravitino.credential.Credential; +import org.apache.gravitino.storage.COSProperties; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CosFileSystem; +import org.apache.hadoop.fs.CosNConfigKeys; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class COSFileSystemProvider implements FileSystemProvider, SupportsCredentialVending { + + private static final String COS_FILESYSTEM_IMPL = "fs.cosn.impl"; + + // Default connection timeout (ms) used when caller does not specify one. Aligns with the + // OSS bundle's "fail fast in tests" defaults rather than hadoop-cos's own 10s default. + @VisibleForTesting static final String DEFAULT_COS_CONNECTION_TIMEOUT_MS = "5000"; + + // Default max retries used when caller does not specify one. hadoop-cos's own default is 200, + // which is too aggressive for non-production usage. + @VisibleForTesting static final String DEFAULT_COS_MAX_RETRIES = "2"; + + // Mapping from Gravitino property keys to the corresponding hadoop-cos + // (CosNConfigKeys) keys. Mirrors OSSFileSystemProvider.GRAVITINO_KEY_TO_OSS_HADOOP_KEY. + @VisibleForTesting + public static final Map GRAVITINO_KEY_TO_COS_HADOOP_KEY = + ImmutableMap.of( + COSProperties.GRAVITINO_COS_REGION, CosNConfigKeys.COSN_REGION_KEY, + COSProperties.GRAVITINO_COS_ENDPOINT, CosNConfigKeys.COSN_ENDPOINT_SUFFIX_KEY, + COSProperties.GRAVITINO_COS_ACCESS_KEY_ID, CosNConfigKeys.COSN_USERINFO_SECRET_ID_KEY, + COSProperties.GRAVITINO_COS_ACCESS_KEY_SECRET, + CosNConfigKeys.COSN_USERINFO_SECRET_KEY_KEY); + + @Override + public FileSystem getFileSystem(Path path, Map config) throws IOException { + Map hadoopConfMap = + FileSystemUtils.toHadoopConfigMap(config, GRAVITINO_KEY_TO_COS_HADOOP_KEY); + // hadoop-cos relies on fs.cosn.impl rather than ServiceLoader for FileSystem registration. + if (!hadoopConfMap.containsKey(COS_FILESYSTEM_IMPL)) { + hadoopConfMap.put(COS_FILESYSTEM_IMPL, CosFileSystem.class.getCanonicalName()); + } + + hadoopConfMap = additionalCOSConfig(hadoopConfMap); + + Configuration configuration = FileSystemUtils.createCompatibleConfiguration(hadoopConfMap); + + // CosFileSystem does not expose a `newInstance(URI, Configuration)` static helper the way + // AliyunOSSFileSystem does, so we go through Hadoop's generic FileSystem.newInstance which + // is the recommended path and honours `fs.cosn.impl`. + return FileSystem.newInstance(path.toUri(), configuration); + } + + @Override + public Map getFileSystemCredentialConf(Credential[] credentials) { + Credential credential = COSUtils.getSuitableCredential(credentials); + Map result = Maps.newHashMap(); + if (credential instanceof COSSecretKeyCredential) { + result.put( + CosNConfigKeys.COSN_CREDENTIALS_PROVIDER, + COSCredentialsProvider.class.getCanonicalName()); + } + + return result; + } + + @Override + public String scheme() { + // hadoop-cos uses the `cosn://` scheme. We expose the same scheme to Gravitino users so + // existing Hadoop tooling keeps working unchanged. + return "cosn"; + } + + @Override + public String name() { + return "cos"; + } + + /** + * Add additional COS configurations for better performance and reliability in test/dev + * environments. Production deployments are free to override these via catalog properties. + * + * @param configs Original configurations + * @return Configurations with additional COS settings + */ + private Map additionalCOSConfig(Map configs) { + Map additionalConfigs = Maps.newHashMap(configs); + + if (!configs.containsKey(COS_CONNECTION_TIMEOUT_KEY)) { + additionalConfigs.put(COS_CONNECTION_TIMEOUT_KEY, DEFAULT_COS_CONNECTION_TIMEOUT_MS); + } + + if (!configs.containsKey(COS_MAX_RETRIES_KEY)) { + additionalConfigs.put(COS_MAX_RETRIES_KEY, DEFAULT_COS_MAX_RETRIES); + } + + return ImmutableMap.copyOf(additionalConfigs); + } +} diff --git a/bundles/tencent/src/main/java/org/apache/gravitino/cos/fs/COSUtils.java b/bundles/tencent/src/main/java/org/apache/gravitino/cos/fs/COSUtils.java new file mode 100644 index 00000000000..1a1dfcf664b --- /dev/null +++ b/bundles/tencent/src/main/java/org/apache/gravitino/cos/fs/COSUtils.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.cos.fs; + +import org.apache.gravitino.credential.COSSecretKeyCredential; +import org.apache.gravitino.credential.Credential; + +public class COSUtils { + + /** + * Get the credential from the credential array. PR-A only ships static secret-key support; STS + * token credentials will be added by a follow-up PR, at which point this helper should mirror + * {@code OSSUtils#getSuitableCredential} and prefer dynamic over static credentials. + * + * @param credentials The credential array. + * @return A credential. Null if not found. + */ + static Credential getSuitableCredential(Credential[] credentials) { + for (Credential credential : credentials) { + if (credential instanceof COSSecretKeyCredential) { + return credential; + } + } + return null; + } + + private COSUtils() {} +} diff --git a/bundles/tencent/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider b/bundles/tencent/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider new file mode 100644 index 00000000000..6c5369c6e47 --- /dev/null +++ b/bundles/tencent/src/main/resources/META-INF/services/org.apache.gravitino.catalog.hadoop.fs.FileSystemProvider @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +org.apache.gravitino.cos.fs.COSFileSystemProvider diff --git a/bundles/tencent/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider b/bundles/tencent/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider new file mode 100644 index 00000000000..88770144110 --- /dev/null +++ b/bundles/tencent/src/main/resources/META-INF/services/org.apache.gravitino.credential.CredentialProvider @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +org.apache.gravitino.cos.credential.COSSecretKeyProvider diff --git a/bundles/tencent/src/test/java/org/apache/gravitino/cos/credential/TestCOSCredentialProvider.java b/bundles/tencent/src/test/java/org/apache/gravitino/cos/credential/TestCOSCredentialProvider.java new file mode 100644 index 00000000000..1a83c3795e5 --- /dev/null +++ b/bundles/tencent/src/test/java/org/apache/gravitino/cos/credential/TestCOSCredentialProvider.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.cos.credential; + +import com.google.common.collect.ImmutableMap; +import org.apache.gravitino.credential.COSSecretKeyCredential; +import org.apache.gravitino.credential.Credential; +import org.apache.gravitino.storage.COSProperties; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestCOSCredentialProvider { + + @Test + void testCredentialType() { + COSSecretKeyProvider provider = new COSSecretKeyProvider(); + Assertions.assertEquals( + COSSecretKeyCredential.COS_SECRET_KEY_CREDENTIAL_TYPE, provider.credentialType()); + } + + @Test + void testSupportsScheme() { + COSSecretKeyProvider provider = new COSSecretKeyProvider(); + Assertions.assertTrue(provider.supportsScheme("cosn")); + Assertions.assertTrue(provider.supportsScheme("COSN")); + Assertions.assertFalse(provider.supportsScheme("cos")); + Assertions.assertFalse(provider.supportsScheme("oss")); + Assertions.assertFalse(provider.supportsScheme("s3a")); + } + + @Test + void testInitializeAndGetCredential() { + COSSecretKeyProvider provider = new COSSecretKeyProvider(); + provider.initialize( + ImmutableMap.of( + COSProperties.GRAVITINO_COS_ACCESS_KEY_ID, "ak", + COSProperties.GRAVITINO_COS_ACCESS_KEY_SECRET, "sk")); + + Credential credential = provider.getCredential(null); + + Assertions.assertTrue(credential instanceof COSSecretKeyCredential); + COSSecretKeyCredential typed = (COSSecretKeyCredential) credential; + Assertions.assertEquals("ak", typed.accessKeyId()); + Assertions.assertEquals("sk", typed.secretAccessKey()); + } +} diff --git a/bundles/tencent/src/test/java/org/apache/gravitino/cos/fs/TestCOSCredentialsProvider.java b/bundles/tencent/src/test/java/org/apache/gravitino/cos/fs/TestCOSCredentialsProvider.java new file mode 100644 index 00000000000..f1b1244bec8 --- /dev/null +++ b/bundles/tencent/src/test/java/org/apache/gravitino/cos/fs/TestCOSCredentialsProvider.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.cos.fs; + +import com.qcloud.cos.auth.BasicCOSCredentials; +import com.qcloud.cos.auth.COSCredentials; +import java.net.URI; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.gravitino.catalog.hadoop.fs.GravitinoFileSystemCredentialsProvider; +import org.apache.gravitino.credential.COSSecretKeyCredential; +import org.apache.gravitino.credential.Credential; +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for {@link COSCredentialsProvider}, the Hadoop-side bridge that turns Gravitino-vended + * credentials into {@code com.qcloud.cos.auth.COSCredentials} instances. The stub {@link + * StubGravitinoFileSystemCredentialsProvider} below is wired via {@code + * fs.gvfs.credential.provider} so that {@code FileSystemUtils.getGvfsCredentialProvider} can + * reflectively instantiate it during the {@link COSCredentialsProvider} constructor. + */ +public class TestCOSCredentialsProvider { + + private static final URI COS_URI = URI.create("cosn://test-bucket-1250000000/"); + + @BeforeEach + void resetStub() { + StubGravitinoFileSystemCredentialsProvider.reset(); + } + + @AfterEach + void clearStub() { + StubGravitinoFileSystemCredentialsProvider.reset(); + } + + @Test + void testRefreshWithSecretKeyCredential() { + StubGravitinoFileSystemCredentialsProvider.nextCredentials = + new Credential[] {new COSSecretKeyCredential("test-ak", "test-sk")}; + + COSCredentialsProvider provider = new COSCredentialsProvider(COS_URI, newStubConf()); + + COSCredentials credentials = provider.getCredentials(); + + Assertions.assertNotNull(credentials); + Assertions.assertTrue(credentials instanceof BasicCOSCredentials); + Assertions.assertEquals("test-ak", credentials.getCOSAccessKeyId()); + Assertions.assertEquals("test-sk", credentials.getCOSSecretKey()); + } + + @Test + void testRefreshThrowsWhenNoSuitableCredential() { + // No COSSecretKeyCredential in the array -> COSUtils#getSuitableCredential returns null + // -> COSCredentialsProvider#refresh throws. + StubGravitinoFileSystemCredentialsProvider.nextCredentials = new Credential[] {}; + + COSCredentialsProvider provider = new COSCredentialsProvider(COS_URI, newStubConf()); + + RuntimeException ex = Assertions.assertThrows(RuntimeException.class, provider::getCredentials); + Assertions.assertTrue( + ex.getMessage() != null && ex.getMessage().contains("No suitable credential"), + "Expected message about no suitable credential, but got: " + ex.getMessage()); + } + + @Test + void testGetCredentialsIsCachedForNonExpiringCredential() { + // COSSecretKeyCredential#expireTimeInMs() returns 0, so expirationTime stays at + // Long.MAX_VALUE and subsequent getCredentials() calls must return the cached + // BasicCOSCredentials without re-asking the upstream provider. + StubGravitinoFileSystemCredentialsProvider.nextCredentials = + new Credential[] {new COSSecretKeyCredential("ak1", "sk1")}; + + COSCredentialsProvider provider = new COSCredentialsProvider(COS_URI, newStubConf()); + + COSCredentials first = provider.getCredentials(); + COSCredentials second = provider.getCredentials(); + COSCredentials third = provider.getCredentials(); + + Assertions.assertSame(first, second); + Assertions.assertSame(second, third); + Assertions.assertEquals( + 1, + StubGravitinoFileSystemCredentialsProvider.callCount.get(), + "Static, non-expiring credentials should only be fetched from the upstream provider once"); + } + + private static Configuration newStubConf() { + Configuration conf = new Configuration(false); + conf.set( + GravitinoFileSystemCredentialsProvider.GVFS_CREDENTIAL_PROVIDER, + StubGravitinoFileSystemCredentialsProvider.class.getName()); + return conf; + } + + /** + * Test stub for {@link GravitinoFileSystemCredentialsProvider}. Must be {@code public} with a + * public no-arg constructor because {@code FileSystemUtils#getGvfsCredentialProvider} + * instantiates it reflectively. + * + *

Tests communicate with the stub through the {@link #nextCredentials} static field, because + * the reflective construction prevents passing values via the constructor. + */ + public static final class StubGravitinoFileSystemCredentialsProvider + implements GravitinoFileSystemCredentialsProvider { + + static volatile Credential[] nextCredentials = new Credential[] {}; + static final AtomicInteger callCount = new AtomicInteger(0); + + private Configuration conf; + + public StubGravitinoFileSystemCredentialsProvider() {} + + static void reset() { + nextCredentials = new Credential[] {}; + callCount.set(0); + } + + @Override + public Credential[] getCredentials() { + callCount.incrementAndGet(); + return nextCredentials; + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } + } +} diff --git a/bundles/tencent/src/test/java/org/apache/gravitino/cos/fs/TestCOSFileSystemProvider.java b/bundles/tencent/src/test/java/org/apache/gravitino/cos/fs/TestCOSFileSystemProvider.java new file mode 100644 index 00000000000..399e7827548 --- /dev/null +++ b/bundles/tencent/src/test/java/org/apache/gravitino/cos/fs/TestCOSFileSystemProvider.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.cos.fs; + +import java.util.HashMap; +import java.util.Map; +import org.apache.gravitino.credential.COSSecretKeyCredential; +import org.apache.gravitino.credential.Credential; +import org.apache.gravitino.storage.COSProperties; +import org.apache.hadoop.fs.CosNConfigKeys; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestCOSFileSystemProvider { + + @Test + void testSchemeAndName() { + COSFileSystemProvider provider = new COSFileSystemProvider(); + Assertions.assertEquals("cosn", provider.scheme()); + Assertions.assertEquals("cos", provider.name()); + } + + @Test + void testGravitinoToHadoopKeyMapping() { + Map mapping = COSFileSystemProvider.GRAVITINO_KEY_TO_COS_HADOOP_KEY; + + Assertions.assertEquals( + CosNConfigKeys.COSN_REGION_KEY, mapping.get(COSProperties.GRAVITINO_COS_REGION)); + Assertions.assertEquals( + CosNConfigKeys.COSN_ENDPOINT_SUFFIX_KEY, mapping.get(COSProperties.GRAVITINO_COS_ENDPOINT)); + Assertions.assertEquals( + CosNConfigKeys.COSN_USERINFO_SECRET_ID_KEY, + mapping.get(COSProperties.GRAVITINO_COS_ACCESS_KEY_ID)); + Assertions.assertEquals( + CosNConfigKeys.COSN_USERINFO_SECRET_KEY_KEY, + mapping.get(COSProperties.GRAVITINO_COS_ACCESS_KEY_SECRET)); + } + + @Test + void testGetFileSystemCredentialConfWithSecretKey() { + COSFileSystemProvider provider = new COSFileSystemProvider(); + Credential[] credentials = new Credential[] {new COSSecretKeyCredential("ak", "sk")}; + + Map conf = provider.getFileSystemCredentialConf(credentials); + + Assertions.assertEquals( + COSCredentialsProvider.class.getCanonicalName(), + conf.get(CosNConfigKeys.COSN_CREDENTIALS_PROVIDER)); + } + + @Test + void testGetFileSystemCredentialConfWithEmptyCredentials() { + COSFileSystemProvider provider = new COSFileSystemProvider(); + + Map conf = provider.getFileSystemCredentialConf(new Credential[0]); + + Assertions.assertTrue( + conf.isEmpty(), + "No credentials should yield an empty conf map (no credentials provider injected)"); + } + + @Test + void testKeyMappingDoesNotLeakUnknownProperties() { + // Sanity: random non-COS properties must not appear in the Gravitino->Hadoop mapping. + Map mapping = COSFileSystemProvider.GRAVITINO_KEY_TO_COS_HADOOP_KEY; + Map notMapped = new HashMap<>(); + notMapped.put("oss-region", "value"); + notMapped.put("s3-region", "value"); + for (String key : notMapped.keySet()) { + Assertions.assertFalse( + mapping.containsKey(key), + "Gravitino->Hadoop COS mapping should not contain unrelated key: " + key); + } + } +} diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/credential/config/COSCredentialConfig.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/credential/config/COSCredentialConfig.java new file mode 100644 index 00000000000..b411cc99555 --- /dev/null +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/credential/config/COSCredentialConfig.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.credential.config; + +import java.util.Map; +import javax.validation.constraints.NotNull; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.Config; +import org.apache.gravitino.config.ConfigBuilder; +import org.apache.gravitino.config.ConfigConstants; +import org.apache.gravitino.config.ConfigEntry; +import org.apache.gravitino.storage.COSProperties; + +/** + * Slim credential config for Tencent Cloud COS, covering only the static secret-key path. STS / + * token-related entries (role arn, token expire) will be added by a follow-up PR that introduces + * the dynamic credential vending support. + */ +public class COSCredentialConfig extends Config { + + public static final ConfigEntry COS_REGION = + new ConfigBuilder(COSProperties.GRAVITINO_COS_REGION) + .doc("The region of the Tencent Cloud COS service") + .version(ConfigConstants.VERSION_1_4_0) + .stringConf() + .create(); + + public static final ConfigEntry COS_ACCESS_KEY_ID = + new ConfigBuilder(COSProperties.GRAVITINO_COS_ACCESS_KEY_ID) + .doc("The static access key ID (Tencent Cloud SecretId) used to access COS data") + .version(ConfigConstants.VERSION_1_4_0) + .stringConf() + .checkValue(StringUtils::isNotBlank, ConfigConstants.NOT_BLANK_ERROR_MSG) + .create(); + + public static final ConfigEntry COS_SECRET_ACCESS_KEY = + new ConfigBuilder(COSProperties.GRAVITINO_COS_ACCESS_KEY_SECRET) + .doc("The static secret access key (Tencent Cloud SecretKey) used to access COS data") + .version(ConfigConstants.VERSION_1_4_0) + .stringConf() + .checkValue(StringUtils::isNotBlank, ConfigConstants.NOT_BLANK_ERROR_MSG) + .create(); + + public COSCredentialConfig(Map properties) { + super(false); + loadFromMap(properties, k -> true); + } + + public String region() { + return this.get(COS_REGION); + } + + @NotNull + public String accessKeyID() { + return this.get(COS_ACCESS_KEY_ID); + } + + @NotNull + public String secretAccessKey() { + return this.get(COS_SECRET_ACCESS_KEY); + } +} diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/COSProperties.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/COSProperties.java new file mode 100644 index 00000000000..7326ebc3358 --- /dev/null +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/storage/COSProperties.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.storage; + +// Properties for Tencent Cloud COS. +public class COSProperties { + + // The region of Tencent Cloud COS, e.g. "ap-guangzhou". + public static final String GRAVITINO_COS_REGION = "cos-region"; + // The endpoint of Tencent Cloud COS (optional, normally derived from region). + public static final String GRAVITINO_COS_ENDPOINT = "cos-endpoint"; + // The static access key ID (Tencent Cloud SecretId) used to access COS data. + public static final String GRAVITINO_COS_ACCESS_KEY_ID = "cos-access-key-id"; + // The static secret access key (Tencent Cloud SecretKey) used to access COS data. + public static final String GRAVITINO_COS_ACCESS_KEY_SECRET = "cos-secret-access-key"; + + private COSProperties() {} +} diff --git a/catalogs/catalog-fileset/build.gradle.kts b/catalogs/catalog-fileset/build.gradle.kts index 6e048200dc0..1127c211119 100644 --- a/catalogs/catalog-fileset/build.gradle.kts +++ b/catalogs/catalog-fileset/build.gradle.kts @@ -53,6 +53,7 @@ dependencies { testImplementation(project(":bundles:aws-bundle", configuration = "shadow")) testImplementation(project(":bundles:azure-bundle", configuration = "shadow")) testImplementation(project(":bundles:gcp-bundle", configuration = "shadow")) + testImplementation(project(":bundles:tencent-bundle", configuration = "shadow")) testImplementation(project(":clients:client-java")) testImplementation(project(":common")) testImplementation(project(":core")) @@ -141,6 +142,7 @@ tasks.test { dependsOn(":bundles:aws-bundle:jar") dependsOn(":bundles:aliyun-bundle:jar") dependsOn(":bundles:gcp-bundle:jar") + dependsOn(":bundles:tencent-bundle:jar") } tasks.getByName("generateMetadataFileForMavenJavaPublication") { diff --git a/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/integration/test/FilesetCOSCatalogIT.java b/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/integration/test/FilesetCOSCatalogIT.java new file mode 100644 index 00000000000..05f370dabe3 --- /dev/null +++ b/catalogs/catalog-fileset/src/test/java/org/apache/gravitino/catalog/fileset/integration/test/FilesetCOSCatalogIT.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.fileset.integration.test; + +import static org.apache.gravitino.catalog.fileset.FilesetCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import java.io.IOException; +import java.net.URI; +import java.util.Map; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Schema; +import org.apache.gravitino.file.Fileset; +import org.apache.gravitino.integration.test.util.GravitinoITUtils; +import org.apache.gravitino.storage.COSProperties; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIf; +import org.junit.platform.commons.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Integration test for the fileset catalog backed by Tencent Cloud COS. It is disabled unless the + * required environment variables (COS_*) are present, mirroring the OSS / AWS counterparts. + */ +@EnabledIf(value = "cosIsConfigured", disabledReason = "Tencent Cloud COS is not configured.") +public class FilesetCOSCatalogIT extends FilesetCatalogIT { + private static final Logger LOG = LoggerFactory.getLogger(FilesetCOSCatalogIT.class); + + public static final String BUCKET_NAME = System.getenv("COS_BUCKET_NAME"); + public static final String COS_ACCESS_KEY = System.getenv("COS_ACCESS_KEY_ID"); + public static final String COS_SECRET_KEY = System.getenv("COS_SECRET_ACCESS_KEY"); + public static final String COS_REGION = System.getenv("COS_REGION"); + // Optional: caller may override the endpoint suffix (e.g. for cos-internal endpoints). + public static final String COS_ENDPOINT = System.getenv("COS_ENDPOINT"); + + @VisibleForTesting + public void startIntegrationTest() throws Exception {} + + @BeforeAll + public void setup() throws IOException { + copyBundleJarsToHadoop("tencent-bundle"); + + try { + super.startIntegrationTest(); + } catch (Exception e) { + throw new RuntimeException("Failed to start integration test", e); + } + + metalakeName = GravitinoITUtils.genRandomName("CatalogFilesetIT_metalake"); + catalogName = GravitinoITUtils.genRandomName("CatalogFilesetIT_catalog"); + schemaName = GravitinoITUtils.genRandomName(SCHEMA_PREFIX); + Configuration conf = new Configuration(); + + conf.set("fs.cosn.userinfo.secretId", COS_ACCESS_KEY); + conf.set("fs.cosn.userinfo.secretKey", COS_SECRET_KEY); + conf.set("fs.cosn.bucket.region", COS_REGION); + if (StringUtils.isNotBlank(COS_ENDPOINT)) { + conf.set("fs.cosn.bucket.endpoint_suffix", COS_ENDPOINT); + } + conf.set("fs.cosn.impl", "org.apache.hadoop.fs.CosFileSystem"); + fileSystem = FileSystem.get(URI.create(String.format("cosn://%s", BUCKET_NAME)), conf); + + createMetalake(); + createCatalog(); + createSchema(); + } + + @AfterAll + public void stop() throws IOException { + Catalog catalog = metalake.loadCatalog(catalogName); + catalog.asSchemas().dropSchema(schemaName, true); + metalake.dropCatalog(catalogName, true); + client.dropMetalake(metalakeName, true); + + try { + closer.close(); + } catch (Exception e) { + LOG.error("Failed to close CloseableGroup", e); + } + } + + protected String defaultBaseLocation() { + if (defaultBaseLocation == null) { + try { + Path bucket = + new Path( + String.format( + "cosn://%s/%s", + BUCKET_NAME, GravitinoITUtils.genRandomName("CatalogFilesetIT"))); + if (!fileSystem.exists(bucket)) { + fileSystem.mkdirs(bucket); + } + + defaultBaseLocation = bucket.toString(); + } catch (IOException e) { + throw new RuntimeException("Failed to create default base location", e); + } + } + + return defaultBaseLocation; + } + + protected void createCatalog() { + Map map = Maps.newHashMap(); + map.put(COSProperties.GRAVITINO_COS_ACCESS_KEY_ID, COS_ACCESS_KEY); + map.put(COSProperties.GRAVITINO_COS_ACCESS_KEY_SECRET, COS_SECRET_KEY); + map.put(COSProperties.GRAVITINO_COS_REGION, COS_REGION); + if (StringUtils.isNotBlank(COS_ENDPOINT)) { + map.put(COSProperties.GRAVITINO_COS_ENDPOINT, COS_ENDPOINT); + } + map.put(FILESYSTEM_PROVIDERS, "cos"); + + metalake.createCatalog(catalogName, Catalog.Type.FILESET, provider, "comment", map); + + catalog = metalake.loadCatalog(catalogName); + } + + protected String generateLocation(String filesetName) { + return String.format("%s/%s", defaultBaseLocation, filesetName); + } + + @Test + public void testCreateSchemaAndFilesetWithSpecialLocation() { + String localCatalogName = GravitinoITUtils.genRandomName("local_catalog"); + + String cosLocation = String.format("cosn://%s", BUCKET_NAME); + Map catalogProps = Maps.newHashMap(); + catalogProps.put("location", cosLocation); + catalogProps.put(COSProperties.GRAVITINO_COS_ACCESS_KEY_ID, COS_ACCESS_KEY); + catalogProps.put(COSProperties.GRAVITINO_COS_ACCESS_KEY_SECRET, COS_SECRET_KEY); + catalogProps.put(COSProperties.GRAVITINO_COS_REGION, COS_REGION); + if (StringUtils.isNotBlank(COS_ENDPOINT)) { + catalogProps.put(COSProperties.GRAVITINO_COS_ENDPOINT, COS_ENDPOINT); + } + catalogProps.put(FILESYSTEM_PROVIDERS, "cos"); + + Catalog localCatalog = + metalake.createCatalog( + localCatalogName, Catalog.Type.FILESET, provider, "comment", catalogProps); + Assertions.assertEquals(cosLocation, localCatalog.properties().get("location")); + + // Create schema without specifying location. + Schema localSchema = + localCatalog + .asSchemas() + .createSchema("local_schema", "comment", ImmutableMap.of("key1", "val1")); + + Fileset localFileset = + localCatalog + .asFilesetCatalog() + .createFileset( + NameIdentifier.of(localSchema.name(), "local_fileset"), + "fileset comment", + Fileset.Type.MANAGED, + null, + ImmutableMap.of("k1", "v1")); + Assertions.assertEquals( + cosLocation + "/local_schema/local_fileset", localFileset.storageLocation()); + + // Delete schema + localCatalog.asSchemas().dropSchema(localSchema.name(), true); + + // Create schema with specifying location. + Map schemaProps = ImmutableMap.of("location", cosLocation); + Schema localSchema2 = + localCatalog.asSchemas().createSchema("local_schema2", "comment", schemaProps); + Assertions.assertEquals(cosLocation, localSchema2.properties().get("location")); + + Fileset localFileset2 = + localCatalog + .asFilesetCatalog() + .createFileset( + NameIdentifier.of(localSchema2.name(), "local_fileset2"), + "fileset comment", + Fileset.Type.MANAGED, + null, + ImmutableMap.of("k1", "v1")); + Assertions.assertEquals(cosLocation + "/local_fileset2", localFileset2.storageLocation()); + + // Delete schema + localCatalog.asSchemas().dropSchema(localSchema2.name(), true); + + // Delete catalog + metalake.dropCatalog(localCatalogName, true); + } + + protected static boolean cosIsConfigured() { + return StringUtils.isNotBlank(System.getenv("COS_ACCESS_KEY_ID")) + && StringUtils.isNotBlank(System.getenv("COS_SECRET_ACCESS_KEY")) + && StringUtils.isNotBlank(System.getenv("COS_REGION")) + && StringUtils.isNotBlank(System.getenv("COS_BUCKET_NAME")); + } +} diff --git a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/Constants.java b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/Constants.java index 39ef923b39b..76edc5b9cc3 100644 --- a/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/Constants.java +++ b/catalogs/hadoop-common/src/main/java/org/apache/gravitino/catalog/hadoop/fs/Constants.java @@ -67,6 +67,12 @@ public class Constants { public static final String OSS_ESTABLISH_TIMEOUT_KEY = "fs.oss.connection.establish.timeout"; public static final String OSS_MAX_ERROR_RETRIES_KEY = "fs.oss.attempts.maximum"; + /** Maps to {@code CosNConfigKeys.COSN_CONNECTION_TIMEOUT}. hadoop-cos default is 10000 ms. */ + public static final String COS_CONNECTION_TIMEOUT_KEY = "fs.cosn.connection.timeout"; + + /** Maps to {@code CosNConfigKeys.COSN_MAX_RETRIES_KEY}. hadoop-cos default is 200. */ + public static final String COS_MAX_RETRIES_KEY = "fs.cosn.maxRetries"; + // Azure Blob Storage specific configuration keys, please see: AbfsConfiguration public static final String ADLS_MAX_RETRIES = "fs.azure.io.retry.max.retries"; diff --git a/clients/filesystem-hadoop3-runtime/build.gradle.kts b/clients/filesystem-hadoop3-runtime/build.gradle.kts index 2a5b36b79fd..28aa61be43f 100644 --- a/clients/filesystem-hadoop3-runtime/build.gradle.kts +++ b/clients/filesystem-hadoop3-runtime/build.gradle.kts @@ -30,6 +30,7 @@ dependencies { implementation(project(":bundles:aws")) implementation(project(":bundles:azure")) implementation(project(":bundles:gcp")) + implementation(project(":bundles:tencent")) implementation(project(":clients:filesystem-hadoop3")) { exclude(group = "org.slf4j") } diff --git a/common/src/main/java/org/apache/gravitino/config/ConfigConstants.java b/common/src/main/java/org/apache/gravitino/config/ConfigConstants.java index 78e5b69b112..6370e134466 100644 --- a/common/src/main/java/org/apache/gravitino/config/ConfigConstants.java +++ b/common/src/main/java/org/apache/gravitino/config/ConfigConstants.java @@ -92,6 +92,9 @@ private ConfigConstants() {} /** The version number for the 1.3.0 release. */ public static final String VERSION_1_3_0 = "1.3.0"; + /** The version number for the 1.4.0 release. */ + public static final String VERSION_1_4_0 = "1.4.0"; + /** The current version of backend storage initialization script. */ public static final String CURRENT_SCRIPT_VERSION = VERSION_1_3_0; } diff --git a/docs/fileset-catalog-with-cos.md b/docs/fileset-catalog-with-cos.md new file mode 100644 index 00000000000..418c0e5a2d2 --- /dev/null +++ b/docs/fileset-catalog-with-cos.md @@ -0,0 +1,548 @@ +--- +title: "Fileset Catalog with COS" +slug: "/fileset-catalog-with-cos" +date: 2026-06-17 +keyword: "Fileset catalog COS Tencent" +license: "This software is licensed under the Apache License version 2." +--- + +## Introduction + +This document explains how to configure a Fileset catalog with Tencent Cloud COS (Cloud Object Storage) in Gravitino. + +## Prerequisites + +To set up a Fileset catalog with COS, follow these steps: + +1. Download the [`gravitino-tencent-bundle-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-tencent-bundle) file. +2. Place the downloaded file into the Gravitino Fileset catalog classpath at `${GRAVITINO_HOME}/catalogs/fileset/libs/`. +3. Start the Gravitino server by running the following command: + +```bash +$ ${GRAVITINO_HOME}/bin/gravitino-server.sh start +``` + +Once the server is up and running, you can proceed to configure the Fileset catalog with COS. In the rest of this document we will use `http://localhost:8090` as the Gravitino server URL, replace with your actual server URL. + +## COS Catalog Configuration + +### COS Fileset Catalog Configuration + +In addition to the basic configurations mentioned in [Fileset-catalog-catalog-configuration](./fileset-catalog.md#catalog-properties), the following properties are required to configure a Fileset catalog with COS: + +| Configuration item | Description | Default value | Required | Since version | +|-------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------| +| `cos-region` | The region of the Tencent Cloud COS bucket, e.g. `ap-guangzhou`, `ap-shanghai`. | (none) | Yes | 1.4.0 | +| `cos-endpoint` | The endpoint *suffix* of the Tencent Cloud COS service (mapped to `fs.cosn.bucket.endpoint_suffix`). It is a host suffix, not a full URL — e.g. `cos.ap-guangzhou.myqcloud.com`, **not** `https://cos.ap-guangzhou.myqcloud.com`. Optional; when not set, hadoop-cos derives it from `cos-region` (`cos.${region}.myqcloud.com`). Set this only if you need to point to a non-public endpoint (e.g. an internal/VPC endpoint). | (none) | No | 1.4.0 | +| `cos-access-key-id` | The static access key ID (Tencent Cloud `SecretId`) used to access COS data. | (none) | Yes | 1.4.0 | +| `cos-secret-access-key` | The static secret access key (Tencent Cloud `SecretKey`) used to access COS data. | (none) | Yes | 1.4.0 | +| `credential-providers` | The credential provider types, separated by comma. The currently supported value is `cos-secret-key`. Setting this enables credential vending provided by the Gravitino server, so the GVFS client no longer needs `cos-access-key-id` / `cos-secret-access-key` locally. See [cos-credential-vending](#fileset-with-credential-vending) below for details. | (none) | No | 1.4.0 | + +:::note +The fileset catalog automatically loads filesystem providers on the classpath. The COS provider is registered when the `gravitino-tencent-bundle` jar is present, so `default-filesystem-provider` and `filesystem-providers` do not need to be set. +::: + +:::note +`cos-region` is mandatory for hadoop-cos: signing requests, building the default endpoint and selecting the right CAM scope all require the region. Even if you also set `cos-endpoint`, please keep `cos-region` set. +::: + +### Schema Configuration + +To create a schema, refer to [Schema configurations](./fileset-catalog.md#schema-properties). + +### Fileset Configuration + +For instructions on how to create a fileset, refer to [Fileset configurations](./fileset-catalog.md#fileset-properties) for more details. + +## Create the Catalog, Schema, and Fileset + +This section will show you how to use the Fileset catalog with COS in Gravitino, including detailed examples. + +### Step 1: Create a Fileset Catalog with COS + +First, you need to create a Fileset catalog for COS. The following examples demonstrate how to create a Fileset catalog with COS: + + + + +```shell +curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" -d '{ + "name": "test_catalog", + "type": "FILESET", + "comment": "This is a COS fileset catalog", + "properties": { + "location": "cosn://my-bucket-1250000000/root", + "cos-region": "ap-guangzhou", + "cos-access-key-id": "access_key", + "cos-secret-access-key": "secret_key" + } +}' http://localhost:8090/api/metalakes/metalake/catalogs +``` + + + + +```java +GravitinoClient gravitinoClient = GravitinoClient + .builder("http://localhost:8090") + .withMetalake("metalake") + .build(); + +Map cosProperties = ImmutableMap.builder() + .put("location", "cosn://my-bucket-1250000000/root") + .put("cos-region", "ap-guangzhou") + .put("cos-access-key-id", "access_key") + .put("cos-secret-access-key", "secret_key") + .build(); + +Catalog cosCatalog = gravitinoClient.createCatalog("test_catalog", + Type.FILESET, + "This is a COS fileset catalog", + cosProperties); +// ... + +``` + + + + +```python +gravitino_client: GravitinoClient = GravitinoClient(uri="http://localhost:8090", metalake_name="metalake") +cos_properties = { + "location": "cosn://my-bucket-1250000000/root", + "cos-region": "ap-guangzhou", + "cos-access-key-id": "access_key", + "cos-secret-access-key": "secret_key" +} + +cos_catalog = gravitino_client.create_catalog(name="test_catalog", + catalog_type=Catalog.Type.FILESET, + provider=None, + comment="This is a COS fileset catalog", + properties=cos_properties) +``` + + + + +:::note +Tencent Cloud COS bucket names always end with the appid suffix, for example `my-bucket-1250000000`. Use that full bucket name in `location` and in any `cosn://` URI. +::: + +### Step 2: Create a Schema + +Once the Fileset catalog with COS is created, you can create a schema inside that catalog. Below are examples of how to do this: + + + + +```shell +curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" -d '{ + "name": "test_schema", + "comment": "This is a COS schema", + "properties": { + "location": "cosn://my-bucket-1250000000/root/schema" + } +}' http://localhost:8090/api/metalakes/metalake/catalogs/test_catalog/schemas +``` + + + + +```java +Catalog catalog = gravitinoClient.loadCatalog("test_catalog"); + +SupportsSchemas supportsSchemas = catalog.asSchemas(); + +Map schemaProperties = ImmutableMap.builder() + .put("location", "cosn://my-bucket-1250000000/root/schema") + .build(); +Schema schema = supportsSchemas.createSchema("test_schema", + "This is a COS schema", + schemaProperties +); +// ... +``` + + + + +```python +gravitino_client: GravitinoClient = GravitinoClient(uri="http://localhost:8090", metalake_name="metalake") +catalog: Catalog = gravitino_client.load_catalog(name="test_catalog") +catalog.as_schemas().create_schema(name="test_schema", + comment="This is a COS schema", + properties={"location": "cosn://my-bucket-1250000000/root/schema"}) +``` + + + + + +### Step 3: Create a Fileset + +Now that the schema is created, you can create a fileset inside it. Here's how: + + + + +```shell +curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" -d '{ + "name": "example_fileset", + "comment": "This is an example fileset", + "type": "MANAGED", + "storageLocation": "cosn://my-bucket-1250000000/root/schema/example_fileset", + "properties": { + "k1": "v1" + } +}' http://localhost:8090/api/metalakes/metalake/catalogs/test_catalog/schemas/test_schema/filesets +``` + + + + +```java +GravitinoClient gravitinoClient = GravitinoClient + .builder("http://localhost:8090") + .withMetalake("metalake") + .build(); + +Catalog catalog = gravitinoClient.loadCatalog("test_catalog"); +FilesetCatalog filesetCatalog = catalog.asFilesetCatalog(); + +Map propertiesMap = ImmutableMap.builder() + .put("k1", "v1") + .build(); + +filesetCatalog.createFileset( + NameIdentifier.of("test_schema", "example_fileset"), + "This is an example fileset", + Fileset.Type.MANAGED, + "cosn://my-bucket-1250000000/root/schema/example_fileset", + propertiesMap, +); +``` + + + + +```python +gravitino_client: GravitinoClient = GravitinoClient(uri="http://localhost:8090", metalake_name="metalake") + +catalog: Catalog = gravitino_client.load_catalog(name="test_catalog") +catalog.as_fileset_catalog().create_fileset(ident=NameIdentifier.of("test_schema", "example_fileset"), + type=Fileset.Type.MANAGED, + comment="This is an example fileset", + storage_location="cosn://my-bucket-1250000000/root/schema/example_fileset", + properties={"k1": "v1"}) +``` + + + + +## Access a Fileset with COS + +### Access the Fileset with the GVFS Java Client + +To access fileset with COS using the GVFS Java client, based on the [basic GVFS configurations](./how-to-use-gvfs.md#configuration-1), you need to add the following configurations: + +| Configuration item | Description | Default value | Required | Since version | +|-------------------------|--------------------------------------------------------------|---------------|----------|---------------| +| `cos-region` | The region of the Tencent Cloud COS bucket. | (none) | Yes | 1.4.0 | +| `cos-endpoint` | The endpoint *suffix* of the Tencent Cloud COS service (e.g. `cos.ap-guangzhou.myqcloud.com`, not a full URL). Optional. | (none) | No | 1.4.0 | +| `cos-access-key-id` | The access key ID (Tencent Cloud SecretId) for COS data. | (none) | Yes | 1.4.0 | +| `cos-secret-access-key` | The secret access key (Tencent Cloud SecretKey) for COS data.| (none) | Yes | 1.4.0 | + +:::note +If the catalog has enabled [credential vending](security/credential-vending.md), the AK/SK properties above can be omitted. More details can be found in [Fileset with credential vending](#fileset-with-credential-vending). +::: + +```java +Configuration conf = new Configuration(); +conf.set("fs.AbstractFileSystem.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.Gvfs"); +conf.set("fs.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem"); +conf.set("fs.gravitino.server.uri", "http://localhost:8090"); +conf.set("fs.gravitino.client.metalake", "test_metalake"); +conf.set("cos-region", "ap-guangzhou"); +conf.set("cos-access-key-id", "access_key"); +conf.set("cos-secret-access-key", "secret_key"); +Path filesetPath = new Path("gvfs://fileset/test_catalog/test_schema/test_fileset/new_dir"); +FileSystem fs = filesetPath.getFileSystem(conf); +fs.mkdirs(filesetPath); +... +``` + +Similar to Spark configurations, you need to add COS (bundle) jars to the classpath according to your environment. +If you want to customise your hadoop version or there is already a hadoop version in your project, you can add the following dependencies to your `pom.xml`: + +```xml + + org.apache.hadoop + hadoop-common + ${HADOOP_VERSION} + + + + + com.qcloud.cos + hadoop-cos + 3.3.0-8.3.23 + + + + org.apache.gravitino + gravitino-filesystem-hadoop3-runtime + ${GRAVITINO_VERSION} + +``` + +:::note +Unlike the S3, OSS, GCS and Azure connectors, COS does **not** ship with Apache Hadoop. The HCFS adapter for COS is published by Tencent Cloud as `com.qcloud.cos:hadoop-cos`. Make sure the version you pick is compatible with your Hadoop version (the `-` form encodes both, e.g. `3.3.0-8.3.23` targets Hadoop 3.3.0). +::: + +:::note +Since version 1.4.0, the `gravitino-tencent` JAR is no longer required separately, as it is included in the `gravitino-filesystem-hadoop3-runtime` JAR. +::: + +Or use the bundle jar with Hadoop environment if there is no Hadoop environment: + +```xml + + org.apache.gravitino + gravitino-tencent-bundle + ${GRAVITINO_VERSION} + + + + org.apache.gravitino + gravitino-filesystem-hadoop3-runtime + ${GRAVITINO_VERSION} + +``` + +### Access the Fileset with Spark + +The following code snippet shows how to use **PySpark 3.5.0 with Hadoop environment(Hadoop 3.3.4)** to access the fileset: + +Before running the following code, you need to install required packages: + +```bash +pip install pyspark==3.5.0 +pip install apache-gravitino==${GRAVITINO_VERSION} +``` +Then you can run the following code: + +```python +from pyspark.sql import SparkSession +import os + +gravitino_url = "http://localhost:8090" +metalake_name = "test" + +catalog_name = "your_cos_catalog" +schema_name = "your_cos_schema" +fileset_name = "your_cos_fileset" + +# JDK8 as follows. JDK17 will be slightly different, you need to add +# '--conf "spark.driver.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED" +# --conf "spark.executor.extraJavaOptions=--add-opens=java.base/sun.nio.ch=ALL-UNNAMED"' +# to the submit args. +os.environ["PYSPARK_SUBMIT_ARGS"] = ( + "--jars " + "/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar," + "/path/to/hadoop-cos-3.3.0-8.3.23.jar," + "/path/to/cos_api-bundle-5.6.227.jar " + "--master local[1] pyspark-shell" +) +spark = SparkSession.builder \ + .appName("cos_fileset_test") \ + .config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.Gvfs") \ + .config("spark.hadoop.fs.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem") \ + .config("spark.hadoop.fs.gravitino.server.uri", gravitino_url) \ + .config("spark.hadoop.fs.gravitino.client.metalake", "test") \ + .config("spark.hadoop.cos-region", "ap-guangzhou") \ + .config("spark.hadoop.cos-access-key-id", os.environ["COS_ACCESS_KEY_ID"]) \ + .config("spark.hadoop.cos-secret-access-key", os.environ["COS_SECRET_ACCESS_KEY"]) \ + .config("spark.driver.memory", "2g") \ + .config("spark.driver.port", "2048") \ + .getOrCreate() + +data = [("Alice", 25), ("Bob", 30), ("Cathy", 45)] +columns = ["Name", "Age"] +spark_df = spark.createDataFrame(data, schema=columns) +gvfs_path = f"gvfs://fileset/{catalog_name}/{schema_name}/{fileset_name}/people" + +spark_df.coalesce(1).write \ + .mode("overwrite") \ + .option("header", "true") \ + .csv(gvfs_path) +``` + +If your Spark is **without Hadoop environment**, you can use the following code snippet to access the fileset: + +```python +## Replace the following code snippet with the above code snippet with the same environment variables + +os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars /path/to/gravitino-tencent-bundle-{gravitino-version}.jar,/path/to/gravitino-filesystem-hadoop3-runtime-{gravitino-version}.jar --master local[1] pyspark-shell" +``` + +- [`gravitino-tencent-bundle-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-tencent-bundle): A "fat" JAR that includes `gravitino-tencent` functionality and all necessary dependencies like `hadoop-cos` and the Tencent Cloud COS Java SDK. Use this if your Spark environment doesn't have a pre-existing Hadoop setup. +- [`gravitino-filesystem-hadoop3-runtime-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-filesystem-hadoop3-runtime): A "fat" JAR that bundles Gravitino's virtual filesystem client and includes the functionality of `gravitino-tencent`. It is required for accessing Gravitino filesets. +- `hadoop-cos-3.3.0-8.3.23.jar` and `cos_api-bundle-5.6.227.jar`: The Tencent-Cloud-published HCFS adapter and Java SDK for COS. If you are running in an existing Hadoop environment, you need to provide these JARs yourself; they are not part of the standard Apache Hadoop distribution and must be downloaded from Maven Central or Tencent's release page. +- [`gravitino-tencent-${gravitino-version}.jar`](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-tencent): A "thin" JAR that only provides the COS integration code. Its functionality is already included in the `gravitino-tencent-bundle` and `gravitino-filesystem-hadoop3-runtime` JARs, so you do not need to add it as a direct dependency unless you want to manage all Hadoop and COS dependencies manually. + +Please choose the correct jar according to your environment. + +:::note +In some Spark versions, a Hadoop environment is needed by the driver, adding the bundle jars with `--jars` may not work. If this is the case, you should add the jars to the spark CLASSPATH directly. +::: + +### Access a Fileset Using the Hadoop Fs Command + +The following are examples of how to use the `hadoop fs` command to access the fileset in Hadoop 3.1.3: + +1. Add the following contents to the `${HADOOP_HOME}/etc/hadoop/core-site.xml` file: + +```xml + + fs.AbstractFileSystem.gvfs.impl + org.apache.gravitino.filesystem.hadoop.Gvfs + + + + fs.gvfs.impl + org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem + + + + fs.gravitino.server.uri + http://localhost:8090 + + + + fs.gravitino.client.metalake + test + + + + cos-region + ap-guangzhou + + + + cos-access-key-id + access-key + + + + cos-secret-access-key + secret-key + +``` + +2. Add the necessary jars to the Hadoop classpath. + +For COS, you need to add `gravitino-filesystem-hadoop3-runtime-${gravitino-version}.jar`, `hadoop-cos-3.3.0-8.3.23.jar` and `cos_api-bundle-5.6.227.jar` to the Hadoop classpath. Unlike `hadoop-aws` or `hadoop-aliyun`, these jars are *not* part of the Apache Hadoop distribution; download them from Maven Central and place them under `${HADOOP_HOME}/share/hadoop/tools/lib/`. + +3. Run the following command to access the fileset: + +```shell +./${HADOOP_HOME}/bin/hadoop dfs -ls gvfs://fileset/cos_catalog/cos_schema/cos_fileset +./${HADOOP_HOME}/bin/hadoop dfs -put /path/to/local/file gvfs://fileset/cos_catalog/cos_schema/cos_fileset +``` + +### Access the Fileset with the GVFS Python Client / Pandas + +:::note +The GVFS **Python** client does not yet ship a COS storage handler, so reading +and writing COS-backed filesets via the Python `gvfs.GravitinoVirtualFileSystem` +or pandas `read_csv("gvfs://...")` is **not supported in this release**. Only +the GVFS **Java** client (and Spark / `hadoop fs` on top of it) can do fileset +I/O against COS today. + +The Python `GravitinoClient` itself is not affected: you can still create, +inspect, update and delete COS catalogs, schemas and filesets through the +metadata API exactly as shown in the [Step 1 / 2 / 3 examples above](#step-1-create-a-fileset-catalog-with-cos). +What is missing is purely the *data plane* (`gvfs.ls`, `gvfs.open`, +pandas `read_csv("gvfs://...")`, etc.) for COS, which will be added in a +follow-up release together with a `COSStorageHandler` and the corresponding +fsspec adapter. +::: + +For other use cases of the Java GVFS client, refer to the [Gravitino Virtual File System](./how-to-use-gvfs.md) document. + +## Fileset with Credential Vending + +Since 1.4.0, Gravitino supports credential vending for COS fileset. If the catalog has been [configured with credential](./security/credential-vending.md), you can access COS fileset without providing authentication information like `cos-access-key-id` and `cos-secret-access-key` in the GVFS client configuration. + +The currently supported credential providers are listed below: + +| Credential provider | Description | Vended credential type | +|---------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------| +| `cos-secret-key` | The Gravitino server hands out the static `cos-access-key-id` / `cos-secret-access-key` configured on the catalog. Useful for centralising credentials on the server side. | Static AK/SK | + +:::note +STS-token-based credential vending (`cos-token`) for COS, including role-arn / assume-role configuration, will be added in a follow-up release. Until then, only the static `cos-secret-key` provider is available. +::: + +### Create a COS Fileset Catalog with Credential Vending + +In addition to the configuration described in [COS fileset catalog configuration](#cos-fileset-catalog-configuration), you only need to set `credential-providers` to `cos-secret-key` to enable credential vending: + +```shell +curl -X POST -H "Accept: application/vnd.gravitino.v1+json" \ +-H "Content-Type: application/json" -d '{ + "name": "cos-catalog-with-credential-vending", + "type": "FILESET", + "comment": "This is a COS fileset catalog with credential vending", + "properties": { + "location": "cosn://my-bucket-1250000000/root", + "cos-region": "ap-guangzhou", + "cos-access-key-id": "access_key", + "cos-secret-access-key": "secret_key", + "credential-providers": "cos-secret-key" + } +}' http://localhost:8090/api/metalakes/metalake/catalogs +``` + +### Access a COS Fileset with Credential Vending + +When the catalog is configured with credentials and client-side credential vending is enabled, +you can access COS filesets directly using the GVFS Java client or Spark without providing AK/SK in the client. + +GVFS Java client: + +```java +Configuration conf = new Configuration(); +conf.setBoolean("fs.gravitino.enableCredentialVending", true); +conf.set("fs.AbstractFileSystem.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.Gvfs"); +conf.set("fs.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem"); +conf.set("fs.gravitino.server.uri", "http://localhost:8090"); +conf.set("fs.gravitino.client.metalake", "test_metalake"); +// No need to set cos-access-key-id and cos-secret-access-key +Path filesetPath = new Path("gvfs://fileset/cos_test_catalog/test_schema/test_fileset/new_dir"); +FileSystem fs = filesetPath.getFileSystem(conf); +fs.mkdirs(filesetPath); +... +``` + +Spark: + +```python +spark = SparkSession.builder \ + .appName("cos_fileset_test") \ + .config("spark.hadoop.fs.gravitino.enableCredentialVending", "true") \ + .config("spark.hadoop.fs.AbstractFileSystem.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.Gvfs") \ + .config("spark.hadoop.fs.gvfs.impl", "org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem") \ + .config("spark.hadoop.fs.gravitino.server.uri", "http://localhost:8090") \ + .config("spark.hadoop.fs.gravitino.client.metalake", "test") \ + # No need to set cos-access-key-id and cos-secret-access-key + .config("spark.driver.memory", "2g") \ + .config("spark.driver.port", "2048") \ + .getOrCreate() +``` + +The Hadoop `fs` command is similar to the above examples. The GVFS **Python** client cannot access COS-backed filesets yet — see the note in [Access the Fileset with the GVFS Python Client / Pandas](#access-the-fileset-with-the-gvfs-python-client--pandas). diff --git a/docs/fileset-catalog.md b/docs/fileset-catalog.md index bb7b315fd1f..8feaf148b2c 100644 --- a/docs/fileset-catalog.md +++ b/docs/fileset-catalog.md @@ -12,9 +12,10 @@ Fileset catalog is a fileset catalog that using Hadoop Compatible File System (H the storage location of the fileset. It supports the local filesystem and HDFS. Since 0.7.0-incubating, Gravitino supports [S3](fileset-catalog-with-s3.md), [GCS](fileset-catalog-with-gcs.md), [OSS](fileset-catalog-with-oss.md) and [Azure Blob Storage](fileset-catalog-with-adls.md) through Fileset catalog. +Since 1.3.0, Gravitino also supports [Tencent Cloud COS](fileset-catalog-with-cos.md). The rest of this document will use HDFS or local file as an example to illustrate how to use the Fileset catalog. -For S3, GCS, OSS and Azure Blob Storage, the configuration is similar to HDFS, +For S3, GCS, OSS, Azure Blob Storage and COS, the configuration is similar to HDFS, refer to the corresponding document for more details. Note that Gravitino uses Hadoop 3 dependencies to build Fileset catalog. Theoretically, it should be @@ -43,7 +44,7 @@ the Fileset catalog has the following properties: | `fs.path.config.` | Defines a logical location entry. Set `fs.path.config.` to the real base URI (for example, `hdfs://cluster1/`). Any key that starts with the same prefix (such as `fs.path.config..config.resource`) is treated as a location-scoped property and will be forwarded to the underlying filesystem client. | (none) | No | 1.2.0 | :::note -`default-filesystem-provider` and `filesystem-providers` are deprecated since 1.2.0. The fileset catalog automatically loads filesystem providers on the classpath, including buildin filesystem provider and cloud providers when the corresponding bundle jar is present (for example, `gravitino-aws-bundle`, `gravitino-azure-bundle`, `gravitino-aliyun-bundle`, or `gravitino-gcp-bundle`). +`default-filesystem-provider` and `filesystem-providers` are deprecated since 1.2.0. The fileset catalog automatically loads filesystem providers on the classpath, including the built-in filesystem provider and cloud providers when the corresponding bundle jar is present (for example, `gravitino-aws-bundle`, `gravitino-azure-bundle`, `gravitino-aliyun-bundle`, `gravitino-gcp-bundle`, or `gravitino-tencent-bundle`). ::: Refer to [Credential vending](./security/credential-vending.md) for more details about credential vending. @@ -75,13 +76,14 @@ The Gravitino Fileset extends the following properties in the `xxx-site.xml`: ### Fileset Catalog with Cloud Storage In the current implementation, the fileset uses the HDFS protocol to access its location. If users use S3, GCS, OSS, -or Azure Blob Storage, they can also configure the `config.resources` to specify custom configuration +Azure Blob Storage or Tencent Cloud COS, they can also configure the `config.resources` to specify custom configuration files. - For S3, refer to [Fileset-catalog-with-s3](./fileset-catalog-with-s3.md) for more details. - For GCS, refer to [Fileset-catalog-with-gcs](./fileset-catalog-with-gcs.md) for more details. - For OSS, refer to [Fileset-catalog-with-oss](./fileset-catalog-with-oss.md) for more details. - For Azure Blob Storage, refer to [Fileset-catalog-with-adls](./fileset-catalog-with-adls.md) for more details. +- For Tencent Cloud COS, refer to [Fileset-catalog-with-cos](./fileset-catalog-with-cos.md) for more details. ### Implement a Custom HCFS File System Fileset @@ -96,11 +98,11 @@ the jar [gravitino-hadoop-common](https://repo1.maven.org/maven2/org/apache/grav throws IOException; // The schema name of the file system provider. 'file' for Local file system, - // 'hdfs' for HDFS, 's3a' for AWS S3, 'gs' for GCS, 'oss' for Aliyun OSS. + // 'hdfs' for HDFS, 's3a' for AWS S3, 'gs' for GCS, 'oss' for Aliyun OSS, 'cosn' for Tencent Cloud COS. String scheme(); // Name of the file system provider. 'builtin-local' for Local file system, 'builtin-hdfs' for HDFS, - // 's3' for AWS S3, 'gcs' for GCS, 'oss' for Aliyun OSS. + // 's3' for AWS S3, 'gcs' for GCS, 'oss' for Aliyun OSS, 'cos' for Tencent Cloud COS. String name(); ``` diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index bd9a1d15ff6..e1f033f1cf1 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -45,6 +45,7 @@ hadoop3 = "3.3.6" hadoop3-gcs = "1.9.4-hadoop3" hadoop3-abs = "3.3.6" hadoop3-aliyun = "3.3.6" +hadoop-cos = "3.3.0-8.3.23" hadoop3-thirdparty = "1.1.1" hadoop-minikdc = "3.3.6" re2j = "1.7" @@ -236,6 +237,7 @@ hadoop3-mapreduce-client-core = { group = "org.apache.hadoop", name = "hadoop-ma hadoop3-minicluster = { group = "org.apache.hadoop", name = "hadoop-minicluster", version.ref = "hadoop-minikdc"} hadoop3-gcs = { group = "com.google.cloud.bigdataoss", name = "gcs-connector", version.ref = "hadoop3-gcs"} hadoop3-oss = { group = "org.apache.hadoop", name = "hadoop-aliyun", version.ref = "hadoop3-aliyun"} +hadoop3-cos = { group = "com.qcloud.cos", name = "hadoop-cos", version.ref = "hadoop-cos"} hadoop3-abs = { group = "org.apache.hadoop", name = "hadoop-azure", version.ref = "hadoop3-abs"} hadoop3-shaded-guava = { group = "org.apache.hadoop.thirdparty", name = "hadoop-shaded-guava", version.ref = "hadoop3-thirdparty" } hadoop3-shaded-protobuf = { group = "org.apache.hadoop.thirdparty", name = "hadoop-shaded-protobuf_3_7", version.ref = "hadoop3-thirdparty" } diff --git a/settings.gradle.kts b/settings.gradle.kts index 5f21cc77906..1e347c5286f 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -122,6 +122,7 @@ include("integration-test-common") include(":bundles:aws", ":bundles:aws-bundle", ":bundles:iceberg-aws-bundle") include(":bundles:gcp", ":bundles:gcp-bundle", ":bundles:iceberg-gcp-bundle") include(":bundles:aliyun", ":bundles:aliyun-bundle", ":bundles:iceberg-aliyun-bundle") +include(":bundles:tencent", ":bundles:tencent-bundle") include(":bundles:azure", ":bundles:azure-bundle", ":bundles:iceberg-azure-bundle") include(":catalogs:hadoop-common") include(":lineage")