Try to populate csv from tableau report, but get error about single positional, not sure if it's due to the over 100,000,000 data shows in report
Describe the bug I try to use the custom download funtion to get data from tableau report, the project name and view name is correct. it shows error saying: An unexpected error occurred: single positional indexer is out-of-bounds. there are some data shows over 100M, not sure if this is due to the long text data that affect the download procedure.
Versions Details of your environment, including:
Tableau Server version (or note if using Tableau Online): 2023.1.8 Python version: python 3.11.6 TSC library version: 0.30
To Reproduce
``
def get_workbooks_data(server):
Function to sign in to Tableau, retrieve workbook data, and create a DataFrame.
Parameters:
tableau_auth (object): Tableau authentication object.
server (object): Tableau server object.
Returns:
pd.DataFrame: DataFrame containing workbook details.
"""
all_workbooks_items, pagination_item = server.workbooks.get()
# Print the total number of workbooks
# Extract the relevant fields from each workbook item
workbook_data = []
for workbook in all_workbooks_items:
workbook_data.append({
'ID': workbook.id,
'Name': workbook.name,
'Created At': workbook.created_at.replace(tzinfo=None),
'Updated At': workbook.updated_at.replace(tzinfo=None),
'Project Name': workbook.project_name,
'Description': workbook.description,
'Owner ID': workbook.owner_id,
'Project ID': workbook.project_id
})
# Create a DataFrame from the extracted data
df_workbooks = pd.DataFrame(workbook_data)
return df_workbooks
def get_workbook_id_by_name_and_project(folder_name, workbook_name, established_server):
# Establish authentication with Tableau Server
# Get all workbooks from the Tableau Server
all_workbooks = list(tsc.Pager(established_server.workbooks))
# Initialize variable to store the workbook ID
workbook_id = None
# Iterate through each workbook
for workbook in all_workbooks:
# Check if the workbook belongs to the specified folder and has the specified name
if workbook.project_name == folder_name and workbook.name == workbook_name:
workbook_id = workbook.id
break
return workbook_id
def get_tabs_data_by_workbook_id(server, workbook_id):
"""
Function to retrieve views data from a specific workbook by its ID and create a DataFrame.
Parameters:
tableau_auth: Tableau authentication object.
server: Tableau server object.
workbook_id (str): ID of the workbook to retrieve views from.
Returns:
pd.DataFrame: DataFrame containing views data.
"""
# Sign in to Tableau Server
# Get the workbook by its ID
wm_workbook = server.workbooks.get_by_id(workbook_id)
# Get the views from the workbook
wm_workbook_views = wm_workbook.views
# Extract view data
view_data = []
for view in wm_workbook_views:
view_data.append({
'ID': view.id,
'Name': view.name
})
# Create a DataFrame from the extracted data
df_views = pd.DataFrame(view_data)
return df_views
def get_tab_id_by_name(df, name):
"""
Function to retrieve the ID for a specific name from a DataFrame.
Parameters:
df (pd.DataFrame): DataFrame containing 'ID' and 'Name' columns.
name (str): The name for which the ID is to be retrieved.
Returns:
str: The ID corresponding to the provided name, or None if the name is not found.
"""
# Filter the DataFrame to find the row with the specific name
filtered_df = df[df['Name'] == name]
if not filtered_df.empty:
# If the filtered DataFrame is not empty, return the ID
return filtered_df.iloc[0]['ID']
else:
# If the name is not found, return None
return None
def download_tableau_crosstab(tableau_server, tableau_auth,folder_name,workbook_name,view_name,req_option=None):
try:
with tableau_server.auth.sign_in(tableau_auth):
# Find the view by name
view = None
workbook_id=get_workbook_id_by_name_and_project(folder_name,workbook_name,tableau_server)
df_tabs=get_tabs_data_by_workbook_id(tableau_server,workbook_id)
resource_id=get_tab_id_by_name(df_tabs,view_name)
for resource in tsc.Pager(tableau_server.views):
if resource.name == view_name:
view = tableau_server.views.get_by_id(resource_id)
break
if not view:
raise ValueError(f"Worksheet '{view_name}' not found on the Tableau server.")
print(f"Populating - {view_name}")
# Populate CSV data
tableau_server.views.populate_csv(view,req_option)
# Convert CSV bytes to DataFrame
bytes_file = b''.join(view.csv)
string = bytes_file.decode('utf-8')
csv_reader = csv.reader(string.split('\n'), delimiter=',')
downloaded_df = pd.DataFrame(csv_reader)
# Clear rows with all null values in all columns
downloaded_df = downloaded_df.dropna(how='all')
# Convert the top row to headers and then drop that row
downloaded_df.columns = downloaded_df.iloc[0]
downloaded_df.drop(downloaded_df.index[0], inplace=True)
downloaded_df.reset_index(drop=True, inplace=True)
print(f"Downloaded - {view_name}")
except tsc.ServerResponseError as e:
print(f"Tableau Server error: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
return downloaded_df
Results What are the results or error messages received?
Populating - view_name An unexpected error occurred: single positional indexer is out-of-bounds
NOTE: Be careful not to post user names, passwords, auth tokens or any other private or sensitive information.
This sounds like a problem with your dataframe handling. If you print the full exception stack, what line is actually throwing the error? (traceback.print_stack() should do it)
The function get_workbook_id_by_name_and_project should be replaced by using the Queryset operations rather than retrieve and loop over every workbook on the server.
workbooks = server.workbooks.filter(name=workbook_name, project_name=folder_name)
# Check how many were retrieved
if len(workbooks) > 0:
workbook_id = workbooks[0].id
Or, since you're ultimately trying to retrieve a view, why not query for the view directly in the same way?
views = server.views.filter(name=view_name, workbook_name=workbook_name, project_name=folder_name)
if len(views) > 0:
view = views[0]
As jacalata said, the error seems to be produced in your dataframe operations, not within TSC. Have you tried length checking the bytes_file variable returned? My hypothesis is that whatever view you end up querying is not returning any CSV data.
In fact, the way you load the dataframe could be simpler by using io.BytesIO and pd.read_csv
# Convert CSV bytes to DataFrame
bytes_file = io.BytesIO()
written = bytes_file.write(b''.join(view.csv))
if written in (0, None):
raise RuntimeError("No data to download")
bytes_file.seek(0)
downloaded_df = pd.read_csv(bytes_file)
# Clear rows with all null values in all columns
downloaded_df = downloaded_df.dropna(how='all')
print(f"Downloaded - {view_name}")
Hopefully the comments were enough info to fix your issue - feel free to re-open if it still doesn't work.