Source code for FAIRLinked.InterfaceMDS.term_search_general

import rdflib
from rdflib import Graph, RDFS, Namespace
import FAIRLinked.InterfaceMDS.load_mds_ontology
from FAIRLinked.InterfaceMDS.load_mds_ontology import load_mds_ontology_graph


[docs] def term_search_general(mds_ontology_graph=None, query_term=None, search_types=None, ttl_extr=False, ttl_path=None): """ Search an RDF ontology for subjects with a specified predicate and optional query term. Args: mds_ontology_graph (rdflib.Graph, optional): An existing RDF graph. If None, one will be loaded. query_term (str, optional): Term to match against the object of the predicate. If None, all values will be returned for the given search types. search_types (list[str]): List of search types: "Domain", "SubDomain", or "Study Stage". ttl_extr (int, optional): If not 0, extract the search results into a new graph. Defaults to 0. ttl_path (str, optional): The file path to save the extracted turtle (.ttl) file. Required if ttl_extr is not 0. Prints: - A list of labels for matching subjects, grouped by search type. """ if ttl_extr and ttl_path is None: raise ValueError("A file path must be provided via ttl_path to save the results when ttl_extr is enabled.") # Define namespace MDS = Namespace("https://cwrusdle.bitbucket.io/mds/") # Load ontology if mds_ontology_graph is None: mds_ontology_graph = load_mds_ontology_graph() # Predicate map type_to_pred = { "Domain": MDS.hasDomain, "SubDomain": MDS.hasSubDomain, "Study Stage": MDS.hasStudyStage, } if not search_types: print("No search types specified.") return if query_term: query_term = query_term.lower() # Step 1: Collect all unique subjects that match any of the criteria. all_matching_subjects = set() for search_type in search_types: pred = type_to_pred.get(search_type) if not pred: print(f"Unsupported search type: {search_type}") continue # Find subjects that match for the current search_type for subj, obj in mds_ontology_graph.subject_objects(predicate=pred): if query_term is None or str(obj).lower() == query_term: all_matching_subjects.add(subj) # Now, check if we found anything at all. if not all_matching_subjects: print("No matches found.") return # Print the human-readable results first print("\nFound matching subjects:") for s in sorted(all_matching_subjects, key=lambda x: str(x)): label = mds_ontology_graph.value(subject=s, predicate=RDFS.label) label_str = str(label) if label else f"[no label for {s}]" print(f" {label_str}") # Step 2: If extraction is enabled, build and save the results graph. if ttl_extr: results_graph = Graph() # Copy all namespace prefixes from the original graph to the new one for prefix, namespace in mds_ontology_graph.namespace_manager.namespaces(): results_graph.bind(prefix, namespace) # For each subject we found, get ALL its triples from the main graph for subj in all_matching_subjects: # This query (subj, None, None) fetches all triples for that subject. for triple in mds_ontology_graph.triples((subj, None, None)): results_graph.add(triple) # Finally, save the complete graph to the file ONCE, after the loops. print(f"\nSaving {len(results_graph)} triples to {ttl_path}...") results_graph.serialize(destination=ttl_path, format="turtle") print("Save complete.")
[docs] def filter_interface(args): """ Term search using Domain, SubDomain, or Study Stage. For complete list of Domains and SubDomains, run the following commands in bash: FAIRLinked view-domains FAIRLinked dir-make. The current list of Study Stages include: Synthesis, Formulation, Materials Processing, Sample, Tool, Recipe, Result, Analysis, Modeling. For more details about Study Stages, please view go see https://cwrusdle.bitbucket.io/. """ if args.ontology_path == "default": ontology_graph = load_mds_ontology_graph() else: ontology_graph = Graph() ontology_graph.parse(args.ontology_path) if args.ttl_extr == "F": args.ttl_extr = False elif args.ttl_extr == "T": args.ttl_extr = True term_search_general(mds_ontology_graph=ontology_graph, query_term=args.query_term, search_types=args.search_types, ttl_extr=args.ttl_extr, ttl_path=args.ttl_path)