Source code for FAIRLinked.QBWorkflow.namespace_parser

from FAIRLinked.QBWorkflow.utility import NAMESPACE_MAP
import openpyxl

[docs] def parse_excel_to_namespace_map(excel_file_path): """ Parses the Excel file containing namespaces and base URIs. Updates and returns the namespace map. Args: excel_file_path (str): The path to the Excel file to parse. Returns: dict: Updated namespace map. """ # Start with the default namespace map namespace_map = NAMESPACE_MAP.copy() try: # Load the workbook and select the active worksheet wb = openpyxl.load_workbook(excel_file_path) ws = wb.active # Get headers from the first row headers = [cell.value.strip() if cell.value else "" for cell in ws[1]] # Check if required columns are present if "Namespace you are using" not in headers or "Base URI" not in headers: raise KeyError("Missing expected columns in the Excel file.") # Map header names to column indices header_indices = {header: idx for idx, header in enumerate(headers)} # Prepare a reverse mapping to detect conflicts uri_to_namespace = {v: k for k, v in namespace_map.items()} # Iterate over the rows starting from the second row for row in ws.iter_rows(min_row=2, values_only=True): # Safely get namespace and base URI values namespace = row[header_indices["Namespace you are using"]] base_uri = row[header_indices["Base URI"]] if namespace and base_uri: namespace = str(namespace).strip().lower() base_uri = str(base_uri).strip() # Check for conflicting URIs if base_uri in uri_to_namespace and uri_to_namespace[base_uri] != namespace: raise ValueError(f"Conflict detected: URI '{base_uri}' is already mapped to namespace '{uri_to_namespace[base_uri]}'.") namespace_map[namespace] = base_uri uri_to_namespace[base_uri] = namespace except FileNotFoundError: print(f"The file '{excel_file_path}' was not found.") except KeyError as e: print(f"Missing expected column in the Excel file: {e}") except ValueError as e: print(e) except Exception as e: print(f"An unexpected error occurred while parsing the Excel file: {e}") return namespace_map