Source code for visual_viper.dataset_builders.gs_dataset_builder

import gspread
from google.oauth2 import service_account as sa
from googleapiclient.discovery import build

from .abstract_dataset_builder import *

[docs] class GoogleSpreadsheetDatasetBuilder(AbstractDatasetBuilder): """ The GoogleSpreadsheetDatasetBuilder class is responsible for building datasets from Google Sheets using named ranges. It leverages the gspread library to interact with the Google Sheets API. Attributes: DEFAULT_SA_PATH (str): The default path to the service account credentials JSON file. DEFAULT_SCOPES (list): The default OAuth 2.0 scopes required for authentication. Methods: __init__(sa_path=None): Initializes the dataset builder, authenticating with Google Drive. build_dataset(params, ws_index=0): Builds and returns a dataset based on the given parameters. """ DEFAULT_SA_PATH = "./service_account.json" DEFAULT_SCOPES = ['https://www.googleapis.com/auth/drive'] def __init__(self, file_id=None, sa_path=None) -> None: self.file_id = file_id self.sa_path = sa_path or self.DEFAULT_SA_PATH self.auth = sa.Credentials.from_service_account_file( self.sa_path, scopes=self.DEFAULT_SCOPES ) self.dataset = dict()
[docs] def build(self, params=None, ws_index=0): """ Builds and returns a dataset based on the provideparameters, including named ranges and an optional worksheet index Args: params (dict): Dictionary containing requireparameters such as file_id and ranges. ws_index (int, optional): Index of the worksheet tbe used (default is 0) Returns: Dataset: A dictionary containing the retrieved datmapped by named ranges. """ gs = gspread.service_account(self.sa_path) range_sets = dict() for el in params["ranges"]: if not isinstance(el, tuple): el = (el, self.file_id) named_range, file_id = el if not file_id in range_sets: range_sets[file_id] = [] range_sets[file_id].append(named_range) for file_id, ranges in range_sets.items(): sheet = gs.open_by_key(file_id) worksheet = sheet.get_worksheet(ws_index) response = worksheet.batch_get( ranges, value_render_option="UNFORMATTED_VALUE", ) response_ = {} for i in range(len(response)): # TODO @mariana.pais Make sure the system is able to handle missing data. This is just a quick fix for the process to work. try: response_[ranges[i]] = round(response[i][0][0], 3) except Exception: pass self.dataset.update(response_) return self.dataset