Differences
This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
| ivci:nuva-utils [2024/03/20 11:47] – fkaag | ivci:nuva-utils [2025/04/25 09:03] (current) – fkaag | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| ====== Python utilities to handle NUVA ====== | ====== Python utilities to handle NUVA ====== | ||
| - | A [[https://github.com/fkaag71/ | + | [[https://pypi.org/project/ |
| - | The supported functions are so far: | + | It can be installed with command: |
| - | < | + | < |
| - | get_nuva_version() | + | pip install nuva-utils |
| </ | </ | ||
| - | Returns the version index for the last publication of NUVA. | ||
| + | The supported functions are so far: | ||
| <code python> | <code python> | ||
| - | get_nuva(version) | + | def nuva_version(): |
| - | </ | + | """ |
| - | Uploads in the current | + | |
| + | """ | ||
| + | def nuva_core_graph(): | ||
| + | """ | ||
| + | Returns the core graph of NUVA as a RDFLib graph | ||
| + | :return: the core graph | ||
| + | """ | ||
| + | def nuva_add_codes_to_graph(g, | ||
| + | """ | ||
| + | Adds the alignments for an external code system. | ||
| - | <code python> | + | g: The graph where the alignments are to be added |
| - | split_nuva() | + | codesystem: The code system of the aligments |
| - | </ | + | |
| - | From the uploaded **nuva_ivci.rdf** file, creates | + | """ |
| - | * **nuva_core.ttl** including | + | def nuva_add_lang(g,lang): |
| - | * **nuva_lang_XX.ttl** includes | + | """ |
| - | * **nuva_refcode_YYY.ttl** includes | + | Adds a language graph to a base graph |
| + | """ | ||
| + | def nuva_get_vaccines(g, | ||
| + | """ | ||
| + | Return a Dict of all NUVA vaccines and their properties | ||
| + | """ | ||
| + | def nuva_translate(g, | ||
| + | """ | ||
| + | Extracts from a graph the translation across 2 languages | ||
| + | """ | ||
| + | def nuva_optimize(g,codesystem,onlyAbstract): | ||
| + | """ | ||
| + | Determines the optimal mapping of a code system to NUVA, either full or limited to abstract vaccines. | ||
| + | Returns a dictionary with three items: | ||
| + | - bestcodes, a dictionary of all NUVA concepts | ||
| + | - revcodes, a dictionary of all codes in the code system | ||
| + | - metrics, the computed metrics of the code system | ||
| - | <code python> | + | For each NUVA concept, bestcodes is formed by: |
| - | refturtle_to_map(code) | + | - label: the English label of the concept |
| - | </ | + | - isAbstract: whether the concept is abstract |
| - | Starting from the **nuva_refcode_YYY.ttl** file for the given code, creates a simple CSV file **nuva_refcode_YYY.csv** with alignments between | + | - nbequiv: |
| + | - blur: the number of concepts covered by the narrowest codes for the NUVA concept. If nbequiv is not 0, blur should be 1 | ||
| + | - codes: the list of codes with the given blur | ||
| - | <code python> | + | For each code in the code system, revcodes is formed by: |
| - | map_to_turtle(code) | + | - label: the English label of the corresponding NUVA concept |
| + | - cardinality: | ||
| + | - may: the list of these NUVA concepts | ||
| + | - blur: the number of NUVA concepts for which the given code is the best possible one | ||
| + | - best: the list of these NUVA concepts, that is a subset of " | ||
| + | |||
| + | The metrics is formed by: | ||
| + | - completeness: | ||
| + | - precision: the inverse of the average blur over all the codes in the code system, when using the most optimal one for each concept. | ||
| + | - redundancy: for the NUVA concepts that have exact alignments in the code system, the average number of such alignments. | ||
| + | """ | ||
| </ | </ | ||
| - | Assuming that the **nuva_refcode_YYY.csv** file has been copied to work file **nuva_code_YYY.csv**, | ||
| - | Note that the refcode file contains | + | Here an example of use: |
| + | - Retrieve | ||
| + | - Retrieve | ||
| + | - Complement it with ATC alignments | ||
| + | - Complement it with French | ||
| + | - Display the list of vaccines | ||
| + | - Display a translation table from English to French | ||
| + | - Determine the best possible mapping | ||
| - | < | + | < |
| - | query_core(q) | + | import os |
| - | </ | + | import nuva_utils |
| - | Runs a SPARQL query q against the core graph loaded | + | from pathlib import Path |
| + | from nuva_utils.nuva_utils import | ||
| - | <code python> | + | # Here the main program - Adapt the work directory |
| - | query_code(q, | + | |
| - | </ | + | |
| - | Runs a SPARQL query q against a graph formed by merging **nuva_core.ttl** and the work file **nuva_code_YYY.ttl**, | + | |
| + | os.chdir(str(Path.home())+"/ | ||
| + | version = nuva_version() | ||
| + | print(version) | ||
| - | < | + | g = nuva_core_graph() |
| - | eval_code(code) | + | print ("Core graph loaded" |
| - | </ | + | |
| - | Produces the metrics for a code system, given a nuva_code_YYY.csv file for alignments. | + | |
| - | Subproducts are: | + | codes = [] |
| - | * nuva_reverse_YYY.csv : file with all NUVA codes matching a given external code | + | csv_file = open(" |
| - | * nuva_best_YYY.csv: file with the best possible external code for a given NUVA code | + | reader = csv.DictReader(csv_file, |
| + | codesystem = reader.fieldnames[0] | ||
| + | for row in reader: | ||
| + | codes.append(row) | ||
| - | An example use sequence is included in the file: | + | nuva_add_codes_to_graph(g, |
| - | <code python> | + | nuva_add_lang(g,' |
| - | # Here the main program - Adapt the work directory to your environment | + | vaccines = nuva_get_vaccines(g,' |
| + | print(vaccines) | ||
| + | trans = nuva_translate(g,' | ||
| + | print(trans) | ||
| + | eval_codes = nuva_optimize(g, | ||
| + | bestcodes = eval_codes[' | ||
| + | revcodes = eval_codes[' | ||
| + | metrics = eval_codes[' | ||
| - | os.chdir(str(Path.home())+"/Documents/ | + | rev_fname = f"{codesystem}/nuva_reverse_{codesystem}.csv" |
| - | get_nuva(get_nuva_version()) | + | best_fname= f"{codesystem}/ |
| - | split_nuva() | + | metrics_fname=f"{codesystem}/ |
| - | refturtle_to_map(" | + | |
| - | shutil.copyfile(" | + | |
| - | map_to_turtle("CVX") | + | |
| - | q1 = """ | + | print ("Create best codes report |
| - | # All vaccines against smallpox | + | best_file = open(best_fname,' |
| - | SELECT ?vcode ?vl WHERE { | + | best_writer = csv.writer(best_file, |
| - | ?dis rdfs: | + | best_writer.writerow([" |
| - | ?dis rdfs: | + | for nuva_code |
| - | ?vac rdfs: | + | |
| - | ?vac rdfs:label ?vl . | + | bestcodes[nuva_code][' |
| - | ?vac skos: | + | best_file.close |
| - | ?vac nuvs: | + | |
| - | ?val nuvs: | + | |
| - | } | + | |
| - | """ | + | |
| - | res = query_core(q1) | + | |
| - | for row in res: | + | |
| - | | + | |
| - | q2="" | + | print (" |
| - | # List CVX Codes | + | rev_file = open(rev_fname,' |
| - | SELECT ?cvx ?nuva ?lvac WHERE { | + | rev_writer = csv.writer(rev_file, |
| - | ?vac rdfs: | + | rev_writer.writerow([codesystem," |
| - | ?vac skos: | + | for extcode |
| - | ?vac skos: | + | |
| - | ?code rdfs: | + | revcodes[extcode][' |
| - | ?code skos: | + | revcodes[extcode][' |
| - | ?vac rdfs:label $lvac | + | rev_file.close |
| - | } | + | |
| - | """ | + | |
| - | res=query_code(q2,"CVX") | + | |
| - | for row in res: | + | |
| - | | + | |
| - | </ | + | |
| + | nbnuva = len(bestcodes) | ||
| + | nbcodes = len(revcodes) | ||
| + | |||
| + | print (f" | ||
| + | print (f" | ||
| + | print (" | ||
| + | print (f" | ||
| + | print (" | ||
| + | print (" | ||
| + | </ | ||