Source code for FAIRLinked.QBWorkflow.data_template_generator

import openpyxl
from openpyxl.styles import (
    PatternFill, Alignment, Font, Border, Side
)
from FAIRLinked.QBWorkflow.utility import (
    CATEGORY_COLORS, ALT_LABEL_INSTR, 
    UNIT_INSTR, IS_MEASURE_INSTR, EXISTING_URI_INSTR, 
    EXPERIMENT_ID_INSTR, NO_TERMS_MSG, TEMPLATE_GENERATED_MSG
)

[docs] def generate_data_xlsx_template(children_terms, output_xlsx_file_path): """ Generates an Excel template for data collection with various features such as merged cells, category headers, colors, formatting, and borders. If no categories are provided, a 'Miscellaneous' category is generated. Args: children_terms (dict): A dictionary where keys represent ontology categories (e.g., 'mds:tool') and values are lists of terms (e.g., ['InstrumentId', 'InstrumentName']). If categories are empty, a default 'Miscellaneous' category is created. output_xlsx_file_path (str): The path where the generated Excel file should be saved. Returns: None: The function creates and saves an Excel file with the specified formatting at the provided path. """ # Create a new workbook and select the active worksheet wb = openpyxl.Workbook() ws = wb.active # Define border style thin_border = Border( left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin') ) # Row indices based on 1-indexing row_category_headers = 1 row_static_instructions_start = 2 row_static_instructions_end = 5 row_variable_names = 6 row_data_start = 7 # Add the instruction in Row 1, Column B cell = ws.cell(row=row_category_headers, column=2, value=EXPERIMENT_ID_INSTR) cell.alignment = Alignment(wrap_text=True, horizontal='center', vertical='center') cell.font = Font(bold=True) cell.border = thin_border ws.column_dimensions['B'].width = 50 # Static instructions in Column A instruction_colors = ["FFF2CC", "E2EFDA", "D9E1F2", "FCE4D6"] # Light colors for each instruction static_instructions = [ ALT_LABEL_INSTR, UNIT_INSTR, IS_MEASURE_INSTR, EXISTING_URI_INSTR ] for idx, instruction in enumerate(static_instructions, start=row_static_instructions_start): cell = ws.cell(row=idx, column=1, value=instruction) cell.alignment = Alignment(wrap_text=True, horizontal='center', vertical='center') cell.font = Font(bold=True) fill_color = instruction_colors[idx - row_static_instructions_start] cell.fill = PatternFill(start_color=fill_color, end_color=fill_color, fill_type="solid") cell.border = thin_border ws.row_dimensions[idx].height = 60 # Adjust row height for wrapping ws.column_dimensions['A'].width = 80 # To accommodate long instructions # Column B: ExperimentId header (Row 6) cell = ws.cell(row=row_variable_names, column=2, value="ExperimentId") cell.alignment = Alignment(wrap_text=True, horizontal='center', vertical='center') cell.font = Font(bold=True) cell.border = thin_border ws.column_dimensions['B'].width = 20 # Determine which categories have terms categories_with_terms = {category: terms for category, terms in children_terms.items() if terms} if not categories_with_terms: # No categories have terms, create a 'Miscellaneous' category print(NO_TERMS_MSG) categories_with_terms = {'Miscellaneous': ['var1', 'var2', 'var3']} category_colors = {'Miscellaneous': 'CCCCCC'} # Light gray else: # Use the existing CATEGORY_COLORS for the categories present category_colors = {category: CATEGORY_COLORS.get(category, "FFFFFF") for category in categories_with_terms.keys()} # Starting from Column C col_idx = 3 # Column C for category, terms in categories_with_terms.items(): # Add debug print # print(f"Processing category: {category}") # print(f"Available colors: {CATEGORY_COLORS}") start_col = col_idx # Write variable names for term in terms: cell = ws.cell(row=row_variable_names, column=col_idx, value=term) cell.alignment = Alignment(wrap_text=True, horizontal='center', vertical='center') cell.font = Font(bold=True) cell.border = thin_border ws.column_dimensions[openpyxl.utils.get_column_letter(col_idx)].width = max(len(term) + 2, 15) col_idx += 1 end_col = col_idx - 1 # Merge category headers (Row 1) ws.merge_cells(start_row=row_category_headers, start_column=start_col, end_row=row_category_headers, end_column=end_col) category_name = category.replace("mds:", "").capitalize() category_cell = ws.cell(row=row_category_headers, column=start_col, value=category_name) # Apply formatting to category header fill_color = category_colors.get(category, "FFFFFF") # print(f"Using color {fill_color} for category {category}") fill = PatternFill(start_color=fill_color, end_color=fill_color, fill_type="solid") category_cell.fill = fill category_cell.alignment = Alignment(wrap_text=True, horizontal='center', vertical='center') category_cell.font = Font(bold=True) # Apply borders to each cell in the merged range of category header for col in range(start_col, end_col + 1): cell = ws.cell(row=row_category_headers, column=col) cell.border = thin_border # Apply fill and borders to variable name cells and data cells for col in range(start_col, end_col + 1): # Variable name cells cell = ws.cell(row=row_variable_names, column=col) cell.fill = fill cell.alignment = Alignment(wrap_text=True, horizontal='center', vertical='center') cell.font = Font(bold=True) cell.border = thin_border # Data cells for row in range(row_data_start, row_data_start + 2): # Adjust range as needed data_cell = ws.cell(row=row, column=col) data_cell.alignment = Alignment(horizontal='center', vertical='center') data_cell.border = thin_border # Add the "Data (if any)" column cell = ws.cell(row=row_category_headers, column=col_idx, value="Data (if any) Supply the name of your data file") cell.alignment = Alignment(wrap_text=True, horizontal='center', vertical='center') cell.font = Font(bold=True) cell.border = thin_border ws.column_dimensions[openpyxl.utils.get_column_letter(col_idx)].width = 30 # FileName header in Row 6 cell = ws.cell(row=row_variable_names, column=col_idx, value="FileName") cell.alignment = Alignment(wrap_text=True, horizontal='center', vertical='center') cell.font = Font(bold=True) cell.border = thin_border # Apply alignment and borders to FileName data cells for row in range(row_data_start, row_data_start + 2): # Adjust range as needed data_cell = ws.cell(row=row, column=col_idx) data_cell.alignment = Alignment(horizontal='center', vertical='center') data_cell.border = thin_border # Sample data rows ws.cell(row=row_data_start, column=2, value="1") # ExperimentId ws.cell(row=row_data_start, column=col_idx, value="Image_1.jpg") # FileName ws.cell(row=row_data_start + 1, column=2, value="2") # Next ExperimentId # Apply borders and alignment to ExperimentId data cells for row in range(row_data_start, row_data_start + 2): cell = ws.cell(row=row, column=2) cell.alignment = Alignment(horizontal='center', vertical='center') cell.border = thin_border # Center align all cells and apply borders max_row = ws.max_row max_col = ws.max_column for row in ws.iter_rows(min_row=1, max_row=max_row, min_col=1, max_col=max_col): for cell in row: if cell.value is None: continue if cell.row > row_variable_names: # Data cells cell.alignment = Alignment(horizontal='center', vertical='center') cell.border = thin_border # Save the workbook wb.save(output_xlsx_file_path) print(TEMPLATE_GENERATED_MSG.format(output_xlsx_file_path))