Source code for gunshotmatch_pipeline.exporters

#!/usr/bin/env python3
#
#  exporters.py
"""
Functions and classes for export to disk, and verification of saved data.
"""
#
#  Copyright © 2020-2023 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#

# stdlib
from typing import List

# 3rd party
import sdjson
from domdf_python_tools.paths import PathPlus
from libgunshotmatch.datafile import Datafile, Repeat
from libgunshotmatch.peak import QualifiedPeak
from libgunshotmatch.project import Project
from numpy import allclose, array_equal

# this package
import gunshotmatch_pipeline.results

__all__ = ("verify_saved_datafile", "verify_saved_project", "write_combined_csv", "write_matches_json")


[docs]def verify_saved_datafile(in_process: Datafile, from_file: Datafile) -> None: """ Verify the data in a saved :class:`~libgunshotmatch.datafile.Datafile` matches the data in memory. Will raise an :exc:`AssertionError` if the data do not match. :param in_process: The :class:`~libgunshotmatch.datafile.Datafile` already in memory. :param from_file: A :class:`~libgunshotmatch.datafile.Datafile` loaded from disk. """ assert in_process.intensity_matrix is not None assert from_file.intensity_matrix is not None assert allclose(in_process.intensity_matrix.time_list, from_file.intensity_matrix.time_list) assert allclose(in_process.intensity_matrix.mass_list, from_file.intensity_matrix.mass_list) assert allclose(in_process.intensity_matrix.intensity_array, from_file.intensity_matrix.intensity_array) assert from_file.name == in_process.name assert from_file.user == in_process.user assert from_file.device == in_process.device assert from_file.date_created == in_process.date_created assert from_file.date_modified == in_process.date_modified assert from_file.version == in_process.version assert from_file.original_filename == in_process.original_filename assert from_file.original_filetype == in_process.original_filetype assert from_file.description == in_process.description
[docs]def verify_saved_project(in_process: Project, from_file: Project) -> None: """ Verify the data in a saved :class:`~libgunshotmatch.project.Project` matches the data in memory. Will raise an :exc:`AssertionError` if the data do not match. :param in_process: The :class:`~libgunshotmatch.project.Project` already in memory. :param from_file: A :class:`~libgunshotmatch.project.Project` loaded from disk. """ # TODO: consolidated_peaks assert in_process.name == from_file.name assert in_process.alignment.peakpos == from_file.alignment.peakpos assert in_process.alignment.expr_code == from_file.alignment.expr_code assert array_equal(in_process.alignment.peakalgt, from_file.alignment.peakalgt) # type: ignore[arg-type] assert in_process.alignment.similarity == from_file.alignment.similarity assert in_process.datafile_data.keys() == from_file.datafile_data.keys() left_dd = in_process.datafile_data right_dd = from_file.datafile_data for datafile_id in left_dd: verify_saved_datafile(left_dd[datafile_id].datafile, right_dd[datafile_id].datafile) assert left_dd[datafile_id].peaks == right_dd[datafile_id].peaks assert left_dd[datafile_id].qualified_peaks == right_dd[datafile_id].qualified_peaks
[docs]def write_matches_json(project: Project, output_dir: PathPlus) -> None: """ Write the JSON output file listing the determined "best match" for each peaks. :param project: :param output_dir: The directory to write the ``<project.name>.json`` file to. """ (output_dir / f"{project.name}.json").dump_json( gunshotmatch_pipeline.results.matches(project), indent=2, json_library=sdjson, # type: ignore[arg-type] )
[docs]def write_combined_csv(repeat: Repeat, output_dir: PathPlus) -> None: """ Write a CSV file listing the top hits for each peak in the :class:`~.libgunshotmatch.datafile.Repeat`, with associated data. :param repeat: :param output_dir: Directory to save the file in :rtype: .. latex:clearpage:: """ csv_header_row = "Retention Time;Peak Area;;Match;R Match;Name;CAS Number;Notes" # qualified_peaks = repeat["qualified_peaks"] qualified_peaks = repeat.qualified_peaks assert qualified_peaks is not None combined_csv_file = output_dir / f"{repeat.datafile.name}_COMBINED.csv" # Write output to CSV file with combined_csv_file.open('w') as combine_csv: # Sample name and header row combine_csv.write(f"{repeat.datafile.name}\n{csv_header_row}\n") def to_csv(peak: QualifiedPeak) -> List[List[str]]: assert peak.area is not None area = f"{peak.area / 60:,}" csv = [[str(peak.rt / 60), area, '', '', '', '', '', '']] for hit in peak.hits: csv.append([ '', '', '', str(hit.match_factor), str(hit.reverse_match_factor), hit.name, hit.cas, ]) return csv for peak in qualified_peaks: for row in to_csv(peak): combine_csv.write(f'{";".join(row)}\n')