Differences
This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
| ivci:nuva-utils [2024/01/05 14:21] – fkaag | ivci:nuva-utils [2025/04/25 09:03] (current) – fkaag | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| ====== Python utilities to handle NUVA ====== | ====== Python utilities to handle NUVA ====== | ||
| - | A [[https://github.com/fkaag71/ | + | [[https://pypi.org/project/ |
| - | The supported functions are so far: | + | It can be installed with command: |
| - | < | + | < |
| - | get_nuva_version() | + | pip install nuva-utils |
| </ | </ | ||
| - | Returns the version index for the last publication of NUVA. | ||
| + | The supported functions are so far: | ||
| <code python> | <code python> | ||
| - | get_nuva(version) | + | def nuva_version(): |
| - | </ | + | """ |
| - | Uploads in the current | + | |
| + | """ | ||
| + | def nuva_core_graph(): | ||
| + | """ | ||
| + | Returns the core graph of NUVA as a RDFLib graph | ||
| + | :return: the core graph | ||
| + | """ | ||
| + | def nuva_add_codes_to_graph(g, | ||
| + | """ | ||
| + | Adds the alignments for an external code system. | ||
| - | <code python> | + | g: The graph where the alignments are to be added |
| - | split_nuva() | + | codesystem: The code system of the aligments |
| - | </ | + | |
| - | From the uploaded **nuva_ivci.rdf** file, creates | + | """ |
| - | * **nuva_core.ttl** including | + | def nuva_add_lang(g,lang): |
| - | * **nuva_lang_XX.ttl** includes | + | """ |
| - | * **nuva_refcode_YYY.ttl** includes | + | Adds a language graph to a base graph |
| + | """ | ||
| + | def nuva_get_vaccines(g, | ||
| + | """ | ||
| + | Return a Dict of all NUVA vaccines and their properties | ||
| + | """ | ||
| + | def nuva_translate(g, | ||
| + | """ | ||
| + | Extracts from a graph the translation across 2 languages | ||
| + | """ | ||
| + | def nuva_optimize(g,codesystem,onlyAbstract): | ||
| + | """ | ||
| + | Determines the optimal mapping of a code system to NUVA, either full or limited to abstract vaccines. | ||
| + | Returns a dictionary with three items: | ||
| + | - bestcodes, a dictionary of all NUVA concepts | ||
| + | - revcodes, a dictionary of all codes in the code system | ||
| + | - metrics, the computed metrics of the code system | ||
| - | <code python> | + | For each NUVA concept, bestcodes is formed by: |
| - | refturtle_to_map(code) | + | - label: the English label of the concept |
| - | </ | + | - isAbstract: whether the concept is abstract |
| - | Starting from the **nuva_refcode_YYY.ttl** file for the given code, creates a simple CSV file **nuva_refcode_YYY.csv** with alignments between | + | - nbequiv: |
| + | - blur: the number of concepts covered by the narrowest codes for the NUVA concept. If nbequiv is not 0, blur should be 1 | ||
| + | - codes: the list of codes with the given blur | ||
| - | <code python> | + | For each code in the code system, revcodes is formed by: |
| - | map_to_turtle(code) | + | - label: the English label of the corresponding NUVA concept |
| - | </code> | + | - cardinality: |
| - | Assuming that the **nuva_refcode_YYY.csv** file has been copied to work file **nuva_code_YYY.csv**, | + | - may: the list of these NUVA concepts |
| + | - blur: the number of NUVA concepts | ||
| + | - best: the list of these NUVA concepts, that is a subset of " | ||
| - | Note that the refcode file contains the NUVA English labels | + | The metrics is formed by: |
| - | + | - completeness: | |
| - | <code python> | + | - precision: the inverse of the average blur over all the codes in the code system, when using the most optimal one for each concept. |
| - | query_core(q) | + | - redundancy: for the NUVA concepts that have exact alignments in the code system, the average number of such alignments. |
| + | """ | ||
| </ | </ | ||
| - | Runs a SPARQL query q against the core graph loaded from **nuva_core.ttl** | ||
| - | <code python> | + | Here an example of use: |
| - | query_code(q, | + | - Retrieve the NUVA version |
| - | </ | + | - Retrieve the NUVA core graph |
| - | Runs a SPARQL query q against a graph formed by merging **nuva_core.ttl** and the work file **nuva_code_YYY.ttl**, | + | - Complement it with ATC alignments |
| + | - Complement it with French labels | ||
| + | - Display the list of vaccines | ||
| + | - Display | ||
| + | - Determine | ||
| + | <code Python> | ||
| + | import os | ||
| + | import nuva_utils | ||
| + | from pathlib import Path | ||
| + | from nuva_utils.nuva_utils import * | ||
| - | An example use sequence is included in the file: | ||
| - | <code python> | ||
| # Here the main program - Adapt the work directory to your environment | # Here the main program - Adapt the work directory to your environment | ||
| os.chdir(str(Path.home())+"/ | os.chdir(str(Path.home())+"/ | ||
| - | get_nuva(get_nuva_version()) | + | version = nuva_version() |
| - | split_nuva() | + | print(version) |
| - | refturtle_to_map(" | + | |
| - | shutil.copyfile(" | + | |
| - | map_to_turtle(" | + | |
| - | q1 = """ | + | g = nuva_core_graph() |
| - | # All vaccines against smallpox | + | print ("Core graph loaded") |
| - | SELECT ?vcode ?vl WHERE { | + | |
| - | ?dis rdfs: | + | |
| - | ?dis rdfs:label " | + | |
| - | ?vac rdfs: | + | |
| - | ?vac rdfs:label ?vl . | + | |
| - | ?vac skos: | + | |
| - | ?vac nuvs: | + | |
| - | ?val nuvs: | + | |
| - | } | + | |
| - | """ | + | |
| - | res = query_core(q1) | + | |
| - | for row in res: | + | |
| - | | + | |
| - | q2=""" | + | codes = [] |
| - | # List CVX Codes | + | csv_file = open("NUVA_refcode_ATC.csv",' |
| - | | + | reader = csv.DictReader(csv_file, |
| - | ?vac rdfs: | + | codesystem = reader.fieldnames[0] |
| - | ?vac skos:notation ?nuva . | + | for row in reader: |
| - | | + | |
| - | ?code rdfs: | + | |
| - | ?code skos: | + | nuva_add_codes_to_graph(g, |
| - | ?vac rdfs:label $lvac | + | nuva_add_lang(g,' |
| - | | + | vaccines = nuva_get_vaccines(g,' |
| - | """ | + | print(vaccines) |
| - | res=query_code(q2,"CVX") | + | trans = nuva_translate(g,' |
| - | for row in res: | + | print(trans) |
| - | print (f"CVX {row.cvx} | + | eval_codes = nuva_optimize(g, |
| - | </code> | + | bestcodes = eval_codes[' |
| + | revcodes = eval_codes[' | ||
| + | metrics = eval_codes[' | ||
| + | |||
| + | rev_fname = f" | ||
| + | best_fname= f" | ||
| + | metrics_fname=f" | ||
| + | |||
| + | print ("Create best codes report | ||
| + | best_file = open(best_fname,' | ||
| + | best_writer | ||
| + | best_writer.writerow([" | ||
| + | for nuva_code | ||
| + | | ||
| + | bestcodes[nuva_code][' | ||
| + | best_file.close | ||
| + | |||
| + | print ("Create reverse codes report " | ||
| + | rev_file | ||
| + | rev_writer = csv.writer(rev_file, | ||
| + | rev_writer.writerow([codesystem," | ||
| + | for extcode in revcodes: | ||
| + | rev_writer.writerow([extcode, | ||
| + | revcodes[extcode][' | ||
| + | revcodes[extcode][' | ||
| + | rev_file.close | ||
| + | nbnuva = len(bestcodes) | ||
| + | nbcodes = len(revcodes) | ||
| + | |||
| + | print (f" | ||
| + | print (f" | ||
| + | print (" | ||
| + | print (f" | ||
| + | print (" | ||
| + | print (" | ||
| + | </ | ||