Source code for dials.util.export_mtz

from __future__ import annotations

import logging
import time
import warnings
from collections import Counter
from copy import deepcopy
from dataclasses import dataclass, field
from math import isclose

import gemmi
import numpy as np
import pandas as pd

from cctbx import uctbx
from dxtbx import flumpy
from iotbx import mtz
from libtbx import env
from rstbx.cftbx.coordinate_frame_helpers import align_reference_frame
from scitbx import matrix
from scitbx.math import r3_rotation_axis_and_angle_from_matrix

import dials.util.ext
from dials.algorithms.scaling.scaling_library import (
    MergedHalfDatasets,
    determine_best_unit_cell,
)
from dials.array_family import flex
from dials.util.batch_handling import (
    assign_batches_to_reflections,
    calculate_batch_offsets,
    get_image_ranges,
)
from dials.util.filter_reflections import filter_reflection_table
from dials.util.multi_dataset_handling import (
    assign_unique_identifiers,
    parse_multiple_datasets,
)
from dials.util.reindex import reindex_experiments, reindex_reflections
from dials.util.version import dials_version

logger = logging.getLogger(__name__)


[docs] class MTZWriterBase: """Helper for adding metadata, crystals and datasets to an mtz file object."""
[docs] def __init__(self, space_group, unit_cell=None): """If a unit cell is provided, will be used as default unless specified for each crystal.""" warnings.warn( "MTZWriterBase classes (MergedMTZWriter and MADMergedMTZWriter) are deprecated. Use MergedMTZCreator instead.\n", DeprecationWarning, stacklevel=2, ) mtz_file = mtz.object() mtz_file.set_title(f"From {env.dispatcher_name}") date_str = time.strftime("%Y-%m-%d at %H:%M:%S %Z") if time.strftime("%Z") != "GMT": date_str += time.strftime(" (%Y-%m-%d at %H:%M:%S %Z)", time.gmtime()) mtz_file.add_history(f"From {dials_version()}, run on {date_str}") mtz_file.set_space_group_info(space_group.info()) self.mtz_file = mtz_file if unit_cell: self.unit_cell = unit_cell self.current_crystal = None self.current_dataset = None self.n_crystals = 0 self.n_datasets = 0
[docs] def add_crystal(self, crystal_name=None, project_name=None, unit_cell=None): """Add a crystal to the mtz file object.""" if not unit_cell: if not self.unit_cell: raise ValueError("Unit cell must be provided.") else: unit_cell = self.unit_cell if not crystal_name: crystal_name = f"crystal_{self.n_crystals + 1}" if not project_name: project_name = "DIALS" self.current_crystal = self.mtz_file.add_crystal( crystal_name, project_name, unit_cell.parameters() ) self.n_crystals += 1
[docs] def add_empty_dataset(self, wavelength, name=None): """Add an empty dataset object to the mtz file.""" if not name: name = "FROMDIALS" self.current_dataset = self.current_crystal.add_dataset(name, wavelength) self.n_datasets += 1
[docs] class MergedMTZWriter(MTZWriterBase): """Mtz writer for merged data."""
[docs] def add_dataset( self, merged_array=None, anom_array=None, amplitudes=None, anom_amplitudes=None, dano=None, multiplicities=None, anom_multiplicities=None, suffix=None, half_datasets: MergedHalfDatasets | None = None, r_free_array=None, ): """Add merged data to the most recent dataset. Args: merged_array: A merged miller array of IMEAN intensities wavelength: The wavelength of the dataset anom_array (Optional): An anomalous merged miller array amplitudes (Optional): A merged miller array of amplitudes anom_amplitudes (Optional): An anomalous merged array of amplitudes suffix (Optional[str]): Column name suffix to use for this dataset. """ if not suffix: suffix = "" if merged_array: self.current_dataset.add_miller_array(merged_array, "IMEAN" + suffix) if multiplicities: self.current_dataset.add_miller_array(multiplicities, "N" + suffix) if amplitudes: self.current_dataset.add_miller_array(amplitudes, "F" + suffix) if anom_array: self.current_dataset.add_miller_array(anom_array, "I" + suffix) if anom_multiplicities: self.current_dataset.add_miller_array(anom_multiplicities, "N" + suffix) if anom_amplitudes: self.current_dataset.add_miller_array(anom_amplitudes, "F" + suffix) if dano: self.current_dataset.add_miller_array( dano, "DANO" + suffix, column_types="DQ" ) if half_datasets: self.current_dataset.add_miller_array( half_datasets.data1, "IHALF1" + suffix, column_types="JQ" ) self.current_dataset.add_miller_array( half_datasets.data2, "IHALF2" + suffix, column_types="JQ" ) self.current_dataset.add_miller_array( half_datasets.multiplicity1, "NHALF1" + suffix, ) self.current_dataset.add_miller_array( half_datasets.multiplicity2, "NHALF2" + suffix, ) if r_free_array: self.current_dataset.add_miller_array( r_free_array, column_root_label="FreeR_flag", column_types="I" )
[docs] class MADMergedMTZWriter(MergedMTZWriter): """Mtz writer for multi-wavelength merged data."""
[docs] def add_dataset( self, merged_array=None, anom_array=None, amplitudes=None, anom_amplitudes=None, dano=None, multiplicities=None, anom_multiplicities=None, suffix=None, half_datasets: MergedHalfDatasets | None = None, r_free_array=None, ): if not suffix: suffix = f"_WAVE{self.n_datasets}" super().add_dataset( merged_array, anom_array, amplitudes, anom_amplitudes, dano, multiplicities, anom_multiplicities, suffix, half_datasets, r_free_array=r_free_array, )
[docs] def add_batch_list( mtz, image_range, experiment, wavelength, dataset_id, batch_offset, force_static_model, ): """Add batch metadata to the gemmi mtz object.""" # Recalculate useful numbers and references here n_batches = image_range[1] - image_range[0] + 1 phi_start = flex.float(n_batches, 0) phi_range = flex.float(n_batches, 0) umat_array = flex.float(flex.grid(n_batches, 9)) cell_array = flex.float(flex.grid(n_batches, 6)) # Reciprocal lattice vectors in the lab frame at zero scan angle if experiment.goniometer: S = matrix.sqr(experiment.goniometer.get_setting_rotation()) F = matrix.sqr(experiment.goniometer.get_fixed_rotation()) UBlab = S * F * matrix.sqr(experiment.crystal.get_A()) axis = matrix.col(experiment.goniometer.get_rotation_axis()) axis_datum = matrix.col(experiment.goniometer.get_rotation_axis_datum()) else: UBlab = matrix.sqr(experiment.crystal.get_A()) i0 = image_range[0] for i in range(n_batches): if experiment.scan and experiment.scan.get_oscillation()[1] != 0.0: phi_start[i], phi_range[i] = experiment.scan.get_image_oscillation(i + i0) # Unit cell and UB matrix for the centre of the image for scan-varying model if ( not force_static_model and experiment.crystal.num_scan_points > 0 and experiment.goniometer ): # Get the index of the image in the sequence e.g. first => 0, second => 1 image_index = i + i0 - experiment.scan.get_image_range()[0] # Find the U matrix at the frame centre by calculating the linear transform # that goes from the start of the frame to the end, and then applying half of # that to the start value U0 = matrix.sqr(experiment.crystal.get_U_at_scan_point(image_index)) U1 = matrix.sqr(experiment.crystal.get_U_at_scan_point(image_index + 1)) M = U1 * U0.inverse() ( angle_M, axis_M, ) = M.r3_rotation_matrix_as_unit_quaternion().unit_quaternion_as_axis_and_angle( deg=False ) M_half = axis_M.axis_and_angle_as_r3_rotation_matrix(angle_M / 2, deg=False) Ucentre = M_half * U0 # Find the B matrix at the frame centre by interpolation B0 = matrix.sqr(experiment.crystal.get_B_at_scan_point(image_index)) B1 = matrix.sqr(experiment.crystal.get_B_at_scan_point(image_index + 1)) Bcentre = (B0 + B1) / 2 # Unit cell at the frame centre unit_cell = uctbx.unit_cell( orthogonalization_matrix=Bcentre.transpose().inverse() ) # Get full lab frame UB then unwind to zero scan angle phi_centre = phi_start[i] + phi_range[i] / 2 R = matrix.sqr( axis_datum.axis_and_angle_as_r3_rotation_matrix(phi_centre, deg=False) ) Rlab_inv = matrix.sqr( axis.axis_and_angle_as_r3_rotation_matrix(-phi_centre, deg=False) ) _UBlab = Rlab_inv * S * R * F * Ucentre * Bcentre else: unit_cell = experiment.crystal.get_unit_cell() _UBlab = UBlab # We assume a single-axis goniometer as it is not clear that multi- # axis goniometry was ever fully supported in MTZ format. Orientation # will be taken from the laboratory frame for this image. U = matrix.sqr(dials.util.ext.ub_to_mosflm_u(_UBlab, unit_cell)) # FIXME need to get what was refined and what was constrained from the # crystal model - see https://github.com/dials/dials/issues/355 _unit_cell_params = unit_cell.parameters() for j in range(6): cell_array[i, j] = _unit_cell_params[j] # Transpose to put in column-major order for MTZ export U_t_elements = U.transpose().elems for j in range(9): umat_array[i, j] = U_t_elements[j] # We ignore panels beyond the first one, at the moment panel = experiment.detector[0] panel_size = panel.get_image_size() panel_distance = panel.get_directed_distance() if experiment.goniometer: axis = flex.float(experiment.goniometer.get_rotation_axis()) else: axis = flex.float((0.0, 0.0, 0.0)) source = flex.float(experiment.beam.get_sample_to_source_direction()) # get the mosaic spread though today it may not actually be set - should # this be in the BATCH headers? try: mosaic = experiment.crystal.get_mosaicity() except AttributeError: mosaic = 0.0 max_batch_number = 0 if mtz.batches: max_batch_number = mtz.batches[-1].number batch_offset += image_range[0] - 1 if max_batch_number > batch_offset: batch_offset = max_batch_number batch = gemmi.Mtz.Batch() # Setting fields that are the same for all batches batch.dataset_id = dataset_id batch.wavelength = wavelength batch.ints[12] = 1 # ncryst batch.ints[14] = 2 # ldtype 3D batch.ints[15] = 1 # jsaxs - goniostat scan axis number batch.ints[17] = 1 # ngonax - number of goniostat axes batch.ints[19] = 1 # ndet batch.floats[21] = mosaic # crydat[0] for j in range(3): batch.floats[38 + j] = axis[j] # scanax batch.floats[43] = 1.0 # bscale (batch scale) for j in range(3): batch.floats[59 + j] = axis[j] # e1 batch.floats[80 + flex.min_index(source)] = -1.0 # idealised source vector for j in range(3): batch.floats[83 + j] = source[j] # source including tilts batch.floats[111] = panel_distance # dx batch.floats[114] = panel_size[0] # NX batch.floats[116] = panel_size[1] # NY batch.axes = ["AXIS"] # gonlab[0] # Setting fields that differ for i_batch in range(n_batches): batch.number = batch_offset + i_batch + 1 batch.title = f"Batch {batch.number}" for j in range(6): batch.floats[j] = cell_array[i_batch, j] # cell for j in range(9): batch.floats[6 + j] = umat_array[i_batch, j] # Umat batch.floats[36] = phi_start[i_batch] # phistt batch.floats[37] = phi_start[i_batch] + phi_range[i_batch] # phiend batch.floats[47] = phi_range[i_batch] # phirange # Append this batch mtz.batches.append(batch) return
[docs] def write_columns(mtz, reflection_table): """Write the column definitions AND data to the current dataset.""" nref = len(reflection_table["miller_index"]) assert nref xdet, ydet, _ = ( flex.double(x) for x in reflection_table["xyzobs.px.value"].parts() ) type_table = { "H": "H", "K": "H", "L": "H", "I": "J", "SIGI": "Q", "IPR": "J", "SIGIPR": "Q", "BG": "R", "SIGBG": "R", "XDET": "R", "YDET": "R", "BATCH": "B", "BGPKRATIOS": "R", "WIDTH": "R", "MPART": "I", "M_ISYM": "Y", "FLAG": "I", "LP": "R", "FRACTIONCALC": "R", "ROT": "R", "QE": "R", } mtz_data = pd.DataFrame( flumpy.to_numpy(reflection_table["miller_index"]).astype("float32"), columns=["H", "K", "L"], ) mtz_data.insert(3, "M/ISYM", np.zeros(nref, dtype="float32")) # H, K, L are in the base dataset, but we have to add M/ISYM mtz.add_column("M/ISYM", type_table["M_ISYM"]) mtz.add_column("BATCH", type_table["BATCH"]) mtz_data.insert( 4, "BATCH", flumpy.to_numpy(reflection_table["batch"]).astype("float32") ) # if intensity values used in scaling exist, then just export these as I, SIGI if "intensity.scale.value" in reflection_table: I_scaling = reflection_table["intensity.scale.value"] V_scaling = reflection_table["intensity.scale.variance"] assert V_scaling.all_gt(0) # Trap negative variances mtz.add_column("I", type_table["I"]) mtz_data.insert( len(mtz_data.columns), "I", flumpy.to_numpy(I_scaling).astype("float32") ) mtz.add_column("SIGI", type_table["SIGI"]) mtz_data.insert( len(mtz_data.columns), "SIGI", flumpy.to_numpy(flex.sqrt(V_scaling)).astype("float32"), ) mtz.add_column("SCALEUSED", "R") mtz_data.insert( len(mtz_data.columns), "SCALEUSED", flumpy.to_numpy(reflection_table["inverse_scale_factor"]).astype("float32"), ) mtz.add_column("SIGSCALEUSED", "R") mtz_data.insert( len(mtz_data.columns), "SIGSCALEUSED", flumpy.to_numpy( flex.sqrt(reflection_table["inverse_scale_factor_variance"]) ).astype("float32"), ) else: if "intensity.prf.value" in reflection_table: if "intensity.sum.value" in reflection_table: col_names = ("IPR", "SIGIPR") else: col_names = ("I", "SIGI") I_profile = reflection_table["intensity.prf.value"] V_profile = reflection_table["intensity.prf.variance"] assert V_profile.all_gt(0) # Trap negative variances mtz.add_column(col_names[0], type_table["I"]) mtz_data.insert( len(mtz_data.columns), col_names[0], flumpy.to_numpy(I_profile.as_float()).astype("float32"), ) mtz.add_column(col_names[1], type_table["SIGI"]) mtz_data.insert( len(mtz_data.columns), col_names[1], flumpy.to_numpy(flex.sqrt(V_profile)).astype("float32"), ) if "intensity.sum.value" in reflection_table: I_sum = reflection_table["intensity.sum.value"] V_sum = reflection_table["intensity.sum.variance"] assert V_sum.all_gt(0) # Trap negative variances mtz.add_column("I", type_table["I"]) mtz_data.insert( len(mtz_data.columns), "I", flumpy.to_numpy(I_sum).astype("float32") ) mtz.add_column("SIGI", type_table["SIGI"]) mtz_data.insert( len(mtz_data.columns), "SIGI", flumpy.to_numpy(flex.sqrt(V_sum)).astype("float32"), ) if ( "background.sum.value" in reflection_table and "background.sum.variance" in reflection_table ): bg = reflection_table["background.sum.value"] varbg = reflection_table["background.sum.variance"] assert (varbg >= 0).count(False) == 0 sigbg = flex.sqrt(varbg) mtz.add_column("BG", type_table["BG"]) mtz_data.insert( len(mtz_data.columns), "BG", flumpy.to_numpy(bg).astype("float32") ) mtz.add_column("SIGBG", type_table["SIGBG"]) mtz_data.insert( len(mtz_data.columns), "SIGBG", flumpy.to_numpy(sigbg).astype("float32") ) mtz.add_column("FRACTIONCALC", type_table["FRACTIONCALC"]) mtz_data.insert( len(mtz_data.columns), "FRACTIONCALC", flumpy.to_numpy(reflection_table["fractioncalc"]).astype("float32"), ) mtz.add_column("XDET", type_table["XDET"]) mtz_data.insert( len(mtz_data.columns), "XDET", flumpy.to_numpy(xdet).astype("float32") ) mtz.add_column("YDET", type_table["YDET"]) mtz_data.insert( len(mtz_data.columns), "YDET", flumpy.to_numpy(ydet).astype("float32") ) mtz.add_column("ROT", type_table["ROT"]) mtz_data.insert( len(mtz_data.columns), "ROT", flumpy.to_numpy(reflection_table["ROT"]).astype("float32"), ) if "lp" in reflection_table: mtz.add_column("LP", type_table["LP"]) mtz_data.insert( len(mtz_data.columns), "LP", flumpy.to_numpy(reflection_table["lp"]).astype("float32"), ) if "qe" in reflection_table: mtz.add_column("QE", type_table["QE"]) mtz_data.insert( len(mtz_data.columns), "QE", flumpy.to_numpy(reflection_table["qe"]).astype("float32"), ) elif "dqe" in reflection_table: mtz.add_column("QE", type_table["QE"]) mtz_data.insert( len(mtz_data.columns), "QE", flumpy.to_numpy(reflection_table["dqe"]).astype("float32"), ) else: mtz.add_column("QE", type_table["QE"]) mtz_data.insert(len(mtz_data.columns), "QE", np.ones(nref).astype("float32")) mtz.switch_to_original_hkl() mtz.set_data(mtz_data.to_numpy())
[docs] def export_mtz( reflection_table, experiment_list, intensity_choice, filename, best_unit_cell=None, partiality_threshold=0.4, combine_partials=True, min_isigi=-5, filter_ice_rings=False, d_min=None, force_static_model=False, crystal_name=None, project_name=None, wavelength_tolerance=1e-4, ): """Export data from reflection_table corresponding to experiment_list to an MTZ file hklout.""" # First get the experiment identifier information out of the data expids_in_table = reflection_table.experiment_identifiers() if not list(expids_in_table.keys()): reflection_tables = parse_multiple_datasets([reflection_table]) experiment_list, refl_list = assign_unique_identifiers( experiment_list, reflection_tables ) reflection_table = flex.reflection_table() for reflections in refl_list: reflection_table.extend(reflections) expids_in_table = reflection_table.experiment_identifiers() reflection_table.assert_experiment_identifiers_are_consistent(experiment_list) expids_in_list = list(experiment_list.identifiers()) # Convert geometry to the Cambridge frame experiment_list = convert_to_cambridge(experiment_list) # Validate multi-experiment assumptions if len(experiment_list) > 1: # All experiments should match crystals, or else we need multiple crystals/datasets if not all( x.crystal == experiment_list[0].crystal for x in experiment_list[1:] ): logger.warning( "Experiment crystals differ. Using first experiment crystal for file-level data." ) # At least, all experiments must have the same space group if len({x.crystal.get_space_group().make_tidy() for x in experiment_list}) != 1: raise ValueError("Experiments do not have a unique space group") # Reindex to a tabulated setting if necessary sg = experiment_list[0].crystal.get_space_group() if sg.match_tabulated_settings().number() == 0: logger.warning( "The data will be reindexed to a tabulated setting of the space group" ) cb_op = sg.info().change_of_basis_op_to_reference_setting() experiment_list = reindex_experiments(experiment_list, cb_op) reflection_table = reindex_reflections( [ reflection_table, ], cb_op, ) # Convert experiment_list to a real python list or else identity assumptions # fail like: # assert experiment_list[0] is experiment_list[0] # And assumptions about added attributes break experiment_list = list(experiment_list) wavelengths = match_wavelengths(experiment_list, wavelength_tolerance) for w in wavelengths.values(): w.calculate_weighted_mean([reflection_table]) if len(wavelengths) > 1: identifiers_list = [e.identifier for e in experiment_list] logger.info( "Multiple wavelengths found: \n%s", "\n".join( " Wavelength: {:.5f}, experiment numbers: {} ".format( v.weighted_mean, ",".join( map(str, [identifiers_list.index(i) for i in v.identifiers]) ), ) for v in wavelengths.values() ), ) # also only work correctly with one panel (for the moment) if any(len(experiment.detector) != 1 for experiment in experiment_list): logger.warning("Ignoring multiple panels in output MTZ") if best_unit_cell is None: best_unit_cell = determine_best_unit_cell(experiment_list) reflection_table["d"] = best_unit_cell.d(reflection_table["miller_index"]) # Clean up the data with the passed in options reflection_table = filter_reflection_table( reflection_table, intensity_choice=intensity_choice, partiality_threshold=partiality_threshold, combine_partials=combine_partials, min_isigi=min_isigi, filter_ice_rings=filter_ice_rings, d_min=d_min, ) # get batch offsets and image ranges - even for scanless experiments batch_offsets = [ expt.scan.get_batch_offset() for expt in experiment_list if expt.scan is not None ] unique_offsets = set(batch_offsets) if len(set(unique_offsets)) <= 1: logger.debug("Calculating new batches") batch_offsets = calculate_batch_offsets(experiment_list) batch_starts = [ e.scan.get_image_range()[0] if e.scan else 0 for e in experiment_list ] effective_offsets = [o + s for o, s in zip(batch_offsets, batch_starts)] unique_offsets = set(effective_offsets) else: logger.debug("Keeping existing batches") image_ranges = get_image_ranges(experiment_list) if len(unique_offsets) != len(batch_offsets): raise ValueError( "Duplicate batch offsets detected: %s" % ", ".join( str(item) for item, count in Counter(batch_offsets).items() if count > 1 ) ) # Create the mtz file mtz = gemmi.Mtz(with_base=True) mtz.title = f"From {env.dispatcher_name}" date_str = time.strftime("%Y-%m-%d at %H:%M:%S %Z") if time.strftime("%Z") != "GMT": date_str += time.strftime(" (%Y-%m-%d at %H:%M:%S %Z)", time.gmtime()) mtz.history += [ f"From {dials_version()}, run on {date_str}", ] # Create the right gemmi spacegroup from the crystal's cctbx space_group # via a Hall symbol hall = experiment_list[0].crystal.get_space_group().type().hall_symbol() ops = gemmi.symops_from_hall(hall) mtz.spacegroup = gemmi.find_spacegroup_by_ops(ops) # FIXME TODO for more than one experiment into an MTZ file: # # - add an epoch (or recover an epoch) from the scan and add this as an extra # column to the MTZ file for scaling, so we know that the two lattices were # integrated at the same time # ✓ decide a sensible BATCH increment to apply to the BATCH value between # experiments and add this for id_ in expids_in_table.keys(): # Grab our subset of the data loc = expids_in_list.index( expids_in_table[id_] ) # get strid and use to find loc in list experiment = experiment_list[loc] identifier = experiment.identifier if len(wavelengths) > 1: for i, wl in enumerate(wavelengths.values()): if identifier in wl.identifiers: wavelength = wl.weighted_mean dataset_id = i + 1 break else: wavelength = list(wavelengths.values())[0].weighted_mean dataset_id = 1 reflections = reflection_table.select(reflection_table["id"] == id_) batch_offset = batch_offsets[loc] image_range = image_ranges[loc] reflections = assign_batches_to_reflections([reflections], [batch_offset])[0] experiment.data = dict(reflections) s0n = matrix.col(experiment.beam.get_s0()).normalize().elems logger.debug("Beam vector: {:.4f} {:.4f} {:.4f}".format(*s0n)) add_batch_list( mtz, image_range, experiment, wavelength, dataset_id, batch_offset=batch_offset, force_static_model=force_static_model, ) # Create the batch offset array. This gives us an experiment (id)-dependent # batch offset to calculate the correct batch from image number. experiment.data["batch_offset"] = flex.int( len(experiment.data["id"]), batch_offset ) # Calculate whether we have a ROT value for this experiment, and set the column _, _, z = experiment.data["xyzcal.px"].parts() if experiment.scan: experiment.data["ROT"] = experiment.scan.get_angle_from_array_index(z) else: experiment.data["ROT"] = z mtz.set_cell_for_all(gemmi.UnitCell(*best_unit_cell.parameters())) # For multi-wave unmerged mtz, we add an empty dataset for each wavelength, # but only write the data into the final dataset (for unmerged the batches # link the unmerged data to the individual wavelengths). for wavelength in wavelengths.values(): ds = mtz.add_dataset("FROMDIALS") ds.crystal_name = crystal_name ds.project_name = project_name ds.wavelength = wavelength.weighted_mean # Combine all of the experiment data columns before writing combined_data = {k: v.deep_copy() for k, v in experiment_list[0].data.items()} for experiment in experiment_list[1:]: for k, v in experiment.data.items(): combined_data[k].extend(v) # ALL columns must be the same length assert len({len(v) for v in combined_data.values()}) == 1, "Column length mismatch" assert len(combined_data["id"]) == len( reflection_table["id"] ), "Lost rows in split/combine" # Write all the data and columns to the mtz file write_columns(mtz, combined_data) # Switch to ASU indices and sort file in standard order mtz.switch_to_asu_hkl() mtz.sort(5) logger.info( "Saving %s integrated reflections to %s", len(combined_data["id"]), filename ) mtz.write_to_file(filename) log_summary(mtz) return mtz
[docs] def log_summary(mtz): """Log a summary of an MTZ object, based on the output of `gemmi mtz --dump`""" logger.info("Title: " + mtz.title) logger.info(f"Total Number of Datasets = {len(mtz.datasets)}\n") for ds in mtz.datasets: logger.info( f"Dataset {ds.id:4d} {ds.project_name} > {ds.crystal_name} > {ds.dataset_name}:" ) logger.info( " cell {:7g} {:7g} {:7g} {:6g} {:6g} {:6g}".format( *ds.cell.parameters ) ) logger.info(f" wavelength {ds.wavelength:g}") logger.info(f"\nNumber of Columns = {len(mtz.columns)}") logger.info( f"Number of Reflections = {mtz.nreflections}", ) logger.info(f"Number of Batches = {len(mtz.batches)}") logger.info(f"Missing values marked as: {mtz.valm}") logger.info( "Global Cell (obsolete): {:7g} {:7g} {:7g} {:6g} {:6g} {:6g}".format( *mtz.cell.parameters ) ) mtz.update_reso() logger.info( f"Resolution: {mtz.resolution_high():.2f} - {mtz.resolution_low():.2f} A" ) logger.info("Sort Order: {:d} {:d} {:d} {:d} {:d}".format(*mtz.sort_order)) logger.info(f"Space Group: {mtz.spacegroup.hm}") logger.info(f"Space Group Number: {mtz.spacegroup.ccp4}") logger.info("Header info:") logger.info("Column Type Dataset Min Max") for col in mtz.columns: # col.min_value and col.max_value are not set, so we have to calculate them here logger.info( f"{col.label:<12s} {col.type} {col.dataset_id:2d} {np.nanmin(col.array):12.6g} {np.nanmax(col.array):10.6g}" ) logger.info(f"History ({len(mtz.history)} lines):") for line in mtz.history: logger.info(line)
[docs] @dataclass class WavelengthGroup: min_wl: float max_possible_wl: float identifiers: list[str] = field(default_factory=list) exp_nos: list[int] = field(default_factory=list) wavelengths: list[float] = field(default_factory=list) weighted_mean: float = 0
[docs] def add_experiment(self, identifier: str, loc_in_list: int, wl: float) -> None: self.identifiers.append(identifier) self.exp_nos.append(loc_in_list) self.wavelengths.append(wl)
[docs] def calculate_weighted_mean( self, reflection_tables: list[flex.reflection_table] ) -> None: n, nw = (0, 0) for i, w in zip(self.identifiers, self.wavelengths): for table in reflection_tables: refls = table.select_on_experiment_identifiers([i]) n_this = refls.select( refls.get_flags(refls.flags.integrated, all=False) ).size() if n_this: n += n_this nw += n_this * w break if n: self.weighted_mean = nw / n
[docs] def match_wavelengths(experiments, absolute_tolerance=1e-4): wavelengths = {} for i, x in enumerate(experiments): w = x.beam.get_wavelength() matches = [isclose(w, k, abs_tol=absolute_tolerance) for k in wavelengths] if not any(matches): wavelengths[w] = WavelengthGroup(w, w + absolute_tolerance) wavelengths[w].add_experiment(x.identifier, i, w) else: match_w = list(wavelengths.keys())[matches.index(True)] wavelengths[match_w].add_experiment(x.identifier, i, w) return wavelengths
[docs] def convert_to_cambridge(experiments): """Rotate the geometry of an experiment list to match the Cambridge frame, in which X is along the idealized X-ray beam and Z is along the primary rotation axis""" # First handle the potential for shared experiment models - we don't # want to apply multiple transformations to shared models, simplest way is # to make a copy for each experiment. n_expt = len(experiments) if len(experiments.crystals()) < n_expt: for expt in experiments: expt.crystal = deepcopy(expt.crystal) if len(experiments.beams()) < n_expt: for expt in experiments: expt.beam = deepcopy(expt.beam) if len(experiments.detectors()) < n_expt: for expt in experiments: expt.detector = deepcopy(expt.detector) if any(experiments.goniometers()) and len(experiments.goniometers()) < n_expt: for expt in experiments: expt.goniometer = deepcopy(expt.goniometer) for expt in experiments: if expt.goniometer: primary_axis = matrix.col(expt.goniometer.get_rotation_axis_datum()) else: primary_axis = matrix.col((1.0, 0.0, 0.0)) us0 = expt.beam.get_unit_s0() R = align_reference_frame(primary_axis, (0, 0, 1), us0, (1, 0, 0)) axis_angle = r3_rotation_axis_and_angle_from_matrix(R) axis = matrix.col(axis_angle.axis) angle = axis_angle.angle() logger.debug( f"Rotating experiment{'s' if len(experiments) else ''} about axis {axis.elems} by {np.degrees(angle):.2f}°" ) expt.detector.rotate_around_origin(axis, angle, deg=False) expt.beam.rotate_around_origin(axis, angle, deg=False) # For the goniometer, each component needs transformation if expt.goniometer: F = matrix.sqr(expt.goniometer.get_fixed_rotation()) expt.goniometer.set_fixed_rotation(R * F * R.transpose()) expt.goniometer.set_rotation_axis_datum(R * primary_axis) S = matrix.sqr(expt.goniometer.get_setting_rotation()) expt.goniometer.set_setting_rotation(R * S * R.transpose()) if expt.crystal is not None: expt.crystal = rotate_crystal(expt.crystal, R, axis, angle) return experiments
[docs] def rotate_crystal(crystal, Rmat, axis, angle): Amats = [] if crystal.num_scan_points > 0: scan_pts = list(range(crystal.num_scan_points)) Amats = [ Rmat * matrix.sqr(crystal.get_U_at_scan_point(t)) * matrix.sqr(crystal.get_B_at_scan_point(t)) for t in scan_pts ] crystal.rotate_around_origin(axis, angle, deg=False) if Amats: crystal.set_A_at_scan_points(Amats) return crystal