diff --git a/python-package/basedosdados/core/base.py b/python-package/basedosdados/core/base.py index 65b54db88..488044550 100644 --- a/python-package/basedosdados/core/base.py +++ b/python-package/basedosdados/core/base.py @@ -33,6 +33,7 @@ def __init__( self, config_path=".basedosdados", bucket_name=None, + billing_project_id=None, overwrite_cli_config=False, ): """ @@ -50,6 +51,10 @@ def __init__( self.config = self._load_config() self._config_log(config.verbose) self.bucket_name = bucket_name or self.config["bucket_name"] + self.billing_project_id = ( + billing_project_id + or self.config["gcloud-projects"]["staging"]["name"] + ) self.uri = f"gs://{self.bucket_name}" + "/staging/{dataset}/{table}/*" self._backend = Backend(self.config.get("api", {}).get("url", None)) diff --git a/python-package/basedosdados/upload/storage.py b/python-package/basedosdados/upload/storage.py index 62d68d001..57f6f0e98 100644 --- a/python-package/basedosdados/upload/storage.py +++ b/python-package/basedosdados/upload/storage.py @@ -38,7 +38,9 @@ def __init__(self, dataset_id: str, table_id: str, **kwargs): """ super().__init__(**kwargs) - self.bucket = self.client["storage_staging"].bucket(self.bucket_name) + self.bucket = self.client["storage_staging"].bucket( + self.bucket_name, user_project=self.billing_project_id + ) self.dataset_id = dataset_id.replace("-", "_") self.table_id = table_id.replace("-", "_") @@ -124,7 +126,9 @@ def init(self, replace: bool = False, very_sure: bool = False) -> None: ) self.bucket.delete(force=True) - self.client["storage_staging"].create_bucket(self.bucket) + self.client["storage_staging"].create_bucket( + self.bucket, user_project=self.billing_project_id + ) for folder in ["staging/", "raw/"]: self.bucket.blob(folder).upload_from_string("") @@ -399,7 +403,7 @@ def delete_table( if bucket_name is not None: table_blobs = list( self.client["storage_staging"] - .bucket(f"{bucket_name}") + .bucket(f"{bucket_name}", user_project=self.billing_project_id) .list_blobs(prefix=prefix) ) @@ -469,7 +473,7 @@ def copy_table( source_table_ref = list( self.client["storage_staging"] - .bucket(source_bucket_name) + .bucket(source_bucket_name, user_project=self.billing_project_id) .list_blobs(prefix=f"{mode}/{self.dataset_id}/{self.table_id}/") ) @@ -483,7 +487,7 @@ def copy_table( else: destination_bucket = self.client["storage_staging"].bucket( - destination_bucket_name + destination_bucket_name, user_project=self.billing_project_id ) # Divides source_table_ref list for maximum batch request size diff --git a/python-package/basedosdados/upload/table.py b/python-package/basedosdados/upload/table.py index 963530491..d7cc73d28 100644 --- a/python-package/basedosdados/upload/table.py +++ b/python-package/basedosdados/upload/table.py @@ -274,8 +274,10 @@ def _parser_blobs_to_partition_dict(self) -> Optional[dict[Any, Any]]: return blobs = ( self.client["storage_staging"] - .bucket(self.bucket_name) - .list_blobs(prefix=f"staging/{self.dataset_id}/{self.table_id}/") + .bucket(self.bucket_name, user_project=self.billing_project_id) + .list_blobs( + prefix=f"staging/{self.dataset_id}/{self.table_id}/", + ) ) partitions_dict = {} # only needs the first bloob @@ -558,12 +560,12 @@ def create( exists on your bucket: * `raise`: Raises a Conflict exception * `replace`: Replaces the table - * `pass`: Does nothing + * `pass`: Do nothing if_dataset_exists: Determines what to do if the dataset already exists: * `raise`: Raises a Conflict exception * `replace`: Replaces the dataset - * `pass`: Does nothing + * `pass`: Do nothing dataset_is_public: Controls if the prod dataset is public or not. By default, staging datasets like `dataset_id_staging` are not public. @@ -587,9 +589,12 @@ def create( if path is None: # Look if table data already exists at Storage - data = self.client["storage_staging"].list_blobs( - self.bucket_name, - prefix=f"staging/{self.dataset_id}/{self.table_id}", + data = ( + self.client["storage_staging"] + .bucket(self.bucket_name, user_project=self.billing_project_id) + .list_blobs( + prefix=f"staging/{self.dataset_id}/{self.table_id}", + ) ) # Raise: Cannot create table without external data