xia2.multiplex¶
Introduction¶
xia2.multiplex performs symmetry analysis, scaling and merging of multi-crystal data sets, as well as analysis of various pathologies that typically affect multi-crystal data sets, including non-isomorphism, radiation damage and preferred orientation.
It uses a number of DIALS programs internally, including dials.cosym, dials.two_theta_refine, dials.scale and dials.symmetry:
Preliminary filtering of datasets using hierarchical unit cell clustering
Laue group determination and resolution of indexing ambiguities with dials.cosym
Determination of “best” overall unit cell with dials.two_theta_refine
Initial round of scaling with dials.scale
Estimation of resolution limit with dials.estimate_resolution
Final round of scaling after application of the resolution limit
Analysis of systematic absences with dials.symmetry
Optional ΔCC½ filtering to remove outlier data sets
Analysis of non-isomorphism, radiation damage and preferred orientation
For further details, and to cite usage, please see: Gildea, R. J. et al. (2022) Acta Cryst. D78, 752-769.
Examples use cases¶
Multiple integrated experiments and reflections in combined files:
xia2.multiplex integrated.expt integrated.refl
Integrated experiments and reflections in separate input files:
xia2.multiplex integrated_1.expt integrated_1.refl \
integrated_2.expt integrated_2.refl
Override the automatic space group determination and resolution estimation:
xia2.multiplex space_group=C2 resolution.d_min=2.5 \
integrated_1.expt integrated_1.refl \
integrated_2.expt integrated_2.refl
Filter potential outlier data sets using the ΔCC½ method:
xia2.multiplex filtering.method=deltacchalf \
integrated.expt integrated.refl
Basic parameters¶
unit_cell_clustering {
threshold = 5000
log = False
}
scaling {
anomalous = False
brotation.spacing = None
secondary {
}
model = physical dose_decay array KB *auto
outlier_rejection = simple standard
min_partiality = None
partiality_cutoff = None
reflection_selection {
method = quasi_random intensity_ranges use_all random
Isigma_range = None
}
}
symmetry {
resolve_indexing_ambiguity = True
cosym {
min_reflections = 10
seed = 230
normalisation = kernel quasi *ml_iso ml_aniso
d_min = Auto
min_i_mean_over_sigma_mean = 4
min_cc_half = 0.6
dimensions = Auto
use_curvatures = True
weights = *count standard_error
cc_weights = None sigma
min_pairs = 3
minimization {
engine = *scitbx scipy
max_iterations = 100
max_calls = None
}
nproc = Auto
lattice_group = None
space_group = None
lattice_symmetry_max_delta = 5.0
best_monoclinic_beta = False
relative_length_tolerance = 0.05
absolute_angle_tolerance = 2
}
laue_group = None
space_group = None
}
resolution {
d_max = None
d_min = None
cc_half_method = half_dataset *sigma_tau
reflections_per_bin = 10
labels = None
reference = None
emax = 4
}
rescale_after_resolution_cutoff = False
filtering {
method = None deltacchalf
deltacchalf {
max_cycles = None
max_percent_removed = None
min_completeness = None
mode = dataset image_group
group_size = None
stdcutoff = None
}
}
multi_crystal_analysis {
unit_cell = None
n_bins = 20
d_min = None
batch {
id = None
range = None
}
}
unit_cell {
refine = *two_theta
}
two_theta_refine {
combine_crystal_models = True
}
clustering {
output_clusters = False
method = *hierarchical coordinate
min_cluster_size = 5
min_completeness = 0
min_multiplicity = 0
max_output_clusters = 10
hierarchical {
method = *cos_angle correlation
max_cluster_height = 100
max_cluster_height_cc = 100
max_cluster_height_cos = 100
distinct_clusters = False
}
}
identifiers = None
dose = None
nproc = Auto
remove_profile_fitting_failures = True
r_free_flags {
generate = True
fraction = 0.1
use_lattice_symmetry = True
use_dataman_shells = False
n_shells = 20
extend = True
relative_to_complete_set = False
reference = None
}
wavelength_tolerance = 0.0001
seed = 42
output {
log = xia2.multiplex.log
}
Full parameter definitions¶
unit_cell_clustering
.short_caption = "Unit cell clustering"
{
threshold = 5000
.help = "Threshold value for the clustering"
.type = float(value_min=0, allow_none=True)
log = False
.help = "Display the dendrogram with a log scale"
.type = bool
}
scaling
.short_caption = Scaling
{
anomalous = False
.help = "Separate anomalous pairs in scaling and error model optimisation."
.type = bool
rotation.spacing = None
.short_caption = "Interval (in degrees) between scale factors on rotation"
"axis"
.type = int(allow_none=True)
.expert_level = 2
brotation.spacing = None
.short_caption = "Interval (in degrees) between B-factors on rotation axis"
.type = int(allow_none=True)
secondary {
lmax = 0
.short_caption = "Number of spherical harmonics for absorption"
"correction"
.type = int(allow_none=True)
.expert_level = 2
share.absorption = False
.help = "Apply a shared absorption correction between sweeps. Only"
"suitable for scaling measurements from a single crystal."
.short_caption = "Shared absorption correction"
.type = bool
.expert_level = 2
}
absorption_level = low medium high
.help = "Set the extent of absorption correction in scaling"
.short_caption = "Absorption level"
.type = choice
.expert_level = 2
model = physical dose_decay array KB *auto
.short_caption = "Scaling model"
.type = choice
outlier_rejection = simple standard
.short_caption = "Outlier rejection"
.type = choice
min_partiality = None
.short_caption = "Minimum partiality"
.type = float(value_min=0, value_max=1, allow_none=True)
partiality_cutoff = None
.short_caption = "Partiality cutoff"
.type = float(value_min=0, value_max=1, allow_none=True)
reflection_selection
.short_caption = "Reflection selection"
{
method = quasi_random intensity_ranges use_all random
.help = "Method to use when choosing a reflection subset for scaling"
"model minimisation. The quasi_random option randomly selects"
"reflections groups within a dataset, and also selects groups"
"which have good connectedness across datasets for multi-dataset"
"cases. The random option selects reflection groups randomly for"
"both single and multi dataset scaling, so for a single dataset"
"quasi_random == random. The intensity_ranges option uses the"
"E2_range, Isigma_range and d_range options to the subset of"
"reflections The use_all option uses all suitable reflections,"
"which may be slow for large datasets."
.type = choice
Isigma_range = None
.help = "Minimum and maximum I/sigma values used to select a subset of"
"reflections for minimisation. A value of 0.0 for the maximum"
"indicates that no upper limit should be applied."
.short_caption = "I/σ range"
.type = floats(size=2)
}
}
symmetry
.short_caption = Symmetry
{
resolve_indexing_ambiguity = True
.short_caption = "Resolve indexing ambiguity"
.type = bool
cosym {
min_reflections = 10
.help = "The minimum number of merged reflections per experiment"
"required to perform cosym analysis."
.type = int(value_min=0, allow_none=True)
seed = 230
.type = int(value_min=0, allow_none=True)
normalisation = kernel quasi *ml_iso ml_aniso
.type = choice
d_min = Auto
.type = float(value_min=0, allow_none=True)
min_i_mean_over_sigma_mean = 4
.short_caption = "Minimum <I>/<σ>"
.type = float(value_min=0, allow_none=True)
min_cc_half = 0.6
.short_caption = "Minimum CC½"
.type = float(value_min=0, value_max=1, allow_none=True)
dimensions = Auto
.short_caption = Dimensions
.type = int(value_min=2, allow_none=True)
use_curvatures = True
.short_caption = "Use curvatures"
.type = bool
weights = *count standard_error
.help = "If not None, a weights matrix is used in the cosym procedure."
"weights=count uses the number of reflections used to calculate"
"a pairwise correlation coefficient as its weight"
"weights=standard_error uses the reciprocal of the standard"
"error as the weight. The standard error is given by the sqrt of"
"(1-CC*2)/(n-2), where (n-2) are the degrees of freedom in a"
"pairwise CC calculation."
.short_caption = Weights
.type = choice
cc_weights = None sigma
.help = "If not None, a weighted cc-half formula is used for calculating"
"pairwise correlation coefficients and degrees of freedom in the"
"cosym procedure. weights=sigma uses the intensity uncertainties"
"to perform inverse variance weighting during the cc"
"calculation."
.type = choice
min_pairs = 3
.help = "Minimum number of pairs for inclusion of correlation"
"coefficient in calculation of Rij matrix."
.short_caption = "Minimum number of pairs"
.type = int(value_min=1, allow_none=True)
minimization
.short_caption = Minimization
{
engine = *scitbx scipy
.short_caption = Engine
.type = choice
max_iterations = 100
.short_caption = "Maximum number of iterations"
.type = int(value_min=0, allow_none=True)
max_calls = None
.short_caption = "Maximum number of calls"
.type = int(value_min=0, allow_none=True)
}
nproc = Auto
.help = "Number of processes"
.type = int(value_min=1, allow_none=True)
lattice_group = None
.short_caption = "Lattice group"
.type = space_group
space_group = None
.short_caption = "Space group"
.type = space_group
lattice_symmetry_max_delta = 5.0
.short_caption = "Lattice symmetry max δ"
.type = float(value_min=0, allow_none=True)
best_monoclinic_beta = False
.help = "If True, then for monoclinic centered cells, I2 will be"
"preferred over C2 if it gives a less oblique cell (i.e. smaller"
"beta angle)."
.short_caption = "Best monoclinic β"
.type = bool
relative_length_tolerance = 0.05
.help = "Datasets are only accepted if unit cell lengths fall within"
"this relative tolerance of the median cell lengths."
.type = float(value_min=0, allow_none=True)
absolute_angle_tolerance = 2
.help = "Datasets are only accepted if unit cell angles fall within this"
"absolute tolerance of the median cell angles."
.type = float(value_min=0, allow_none=True)
}
laue_group = None
.help = "Specify the Laue group. If None, then the Laue group will be"
"determined by dials.cosym."
.short_caption = "Laue group"
.type = space_group
space_group = None
.help = "Specify the space group. If None, then the dials.symmetry will"
"perform analysis of systematically absent reflections to"
"determine the space group."
.short_caption = "Space group"
.type = space_group
}
reference = None
.help = "A file containing a reference set of intensities e.g. MTZ/cif, or a"
"file from which a reference set of intensities can be calculated"
"e.g. .pdb or .cif . The space group of the reference file will be"
"used and if an indexing ambiguity is present, the input data will"
"be reindexed to be consistent with the indexing mode of this"
"reference file."
.type = path
.expert_level = 2
resolution
.short_caption = Resolution
{
d_max = None
.help = "Low resolution cutoff."
.short_caption = "Low resolution cutoff"
.type = float(value_min=0, allow_none=True)
d_min = None
.help = "High resolution cutoff."
.short_caption = "High resolution cutoff"
.type = float(value_min=0, allow_none=True)
rmerge = None
.help = "Maximum value of Rmerge in the outer resolution shell"
.short_caption = "Outer shell Rmerge"
.type = float(value_min=0, allow_none=True)
.expert_level = 1
completeness = None
.help = "Minimum completeness in the outer resolution shell"
.short_caption = "Outer shell completeness"
.type = float(value_min=0, allow_none=True)
.expert_level = 1
cc_ref = 0.1
.help = "Minimum value of CC vs reference data set in the outer resolution"
"shell"
.short_caption = "Outer shell CCref"
.type = float(value_min=0, allow_none=True)
.expert_level = 1
cc_half = 0.3
.help = "Minimum value of CC½ in the outer resolution shell"
.short_caption = "Outer shell CC½"
.type = float(value_min=0, allow_none=True)
.expert_level = 1
cc_half_method = half_dataset *sigma_tau
.short_caption = "CC½ method"
.type = choice
cc_half_significance_level = 0.1
.short_caption = "CC½ significance level"
.type = float(value_min=0, value_max=1, allow_none=True)
.expert_level = 1
cc_half_fit = polynomial *tanh
.short_caption = "CC½ fit"
.type = choice
.expert_level = 1
isigma = None
.help = "Minimum value of the unmerged <I/sigI> in the outer resolution"
"shell"
.short_caption = "Outer shell unmerged <I/sigI>"
.type = float(value_min=0, allow_none=True)
.expert_level = 1
misigma = None
.help = "Minimum value of the merged <I/sigI> in the outer resolution"
"shell"
.short_caption = "Outer shell merged <I/sigI>"
.type = float(value_min=0, allow_none=True)
.expert_level = 1
i_mean_over_sigma_mean = None
.help = "Minimum value of the unmerged <I>/<sigI> in the outer resolution"
"shell"
.short_caption = "Outer shell unmerged <I>/<sigI>"
.type = float(value_min=0, allow_none=True)
.expert_level = 2
nbins = 50
.help = "Maximum number of resolution bins to use for estimation of"
"resolution limit."
.short_caption = "Number of resolution bins."
.type = int(allow_none=True)
.expert_level = 1
reflections_per_bin = 10
.help = "Minimum number of reflections per bin."
.short_caption = "Minimum number of reflections per bin"
.type = int(allow_none=True)
binning_method = *counting_sorted volume
.help = "Use equal-volume bins or bins with approximately equal numbers of"
"reflections per bin."
.short_caption = "Equal-volume or equal #ref binning."
.type = choice
.expert_level = 1
anomalous = False
.help = "Keep anomalous pairs separate in merging statistics"
.short_caption = Anomalous
.type = bool
.expert_level = 1
labels = None
.short_caption = Labels
.type = strings
space_group = None
.short_caption = "Space group"
.type = space_group
.expert_level = 1
reference = None
.short_caption = Reference
.type = path
emax = 4
.help = "Reject reflections with normalised intensities E^2 > emax^2"
.short_caption = "Maximum normalised intensity"
.type = float(value_min=0, allow_none=True)
}
rescale_after_resolution_cutoff = False
.help = "Re-scale the data after application of a resolution cutoff"
.short_caption = "Rescale after resolution cutoff"
.type = bool
filtering
.short_caption = Filtering
{
method = None deltacchalf
.help = "Choice of whether to do any filtering cycles, default None."
.type = choice
deltacchalf
.short_caption = "ΔCC½"
{
max_cycles = None
.short_caption = "Maximum number of cycles"
.type = int(value_min=1, allow_none=True)
max_percent_removed = None
.short_caption = "Maximum percentage removed"
.type = float(allow_none=True)
min_completeness = None
.help = "Desired minimum completeness, as a percentage (0 - 100)."
.short_caption = "Minimum completeness"
.type = float(value_min=0, value_max=100, allow_none=True)
mode = dataset image_group
.help = "Perform analysis on whole datasets or batch groups"
.type = choice
group_size = None
.help = "The number of images to group together when calculating delta"
"cchalf in image_group mode"
.short_caption = "Group size"
.type = int(value_min=1, allow_none=True)
stdcutoff = None
.help = "Datasets with a ΔCC½ below (mean - stdcutoff*std) are removed"
.short_caption = "Standard deviation cutoff"
.type = float(allow_none=True)
}
}
multi_crystal_analysis {
unit_cell = None
.short_caption = "Unit cell"
.type = unit_cell
n_bins = 20
.short_caption = "Number of bins"
.type = int(value_min=1, allow_none=True)
d_min = None
.short_caption = "High resolution cutoff"
.type = float(value_min=0, allow_none=True)
batch
.multiple = True
{
id = None
.type = str
range = None
.type = ints(size=2, value_min=0)
}
}
unit_cell
.short_caption = "Unit cell"
{
refine = *two_theta
.type = choice(multi=True)
}
two_theta_refine
.short_caption = "2θ refinement"
{
combine_crystal_models = True
.short_caption = "Combine crystal models"
.type = bool
}
clustering
.short_caption = Clustering
{
output_clusters = False
.help = "Set this to true to enable scaling and merging of individual"
"clusters"
.short_caption = "Output individual clusters"
.type = bool
method = *hierarchical coordinate
.short_caption = "Clustering method to use - analyse the clusters"
"generated from the hierarchical dendrograms or the"
"density based clustering analysis of the cosym"
"coordinates."
.type = choice(multi=True)
min_cluster_size = 5
.short_caption = "Minimum number of datasets for an output cluster"
.type = int(allow_none=True)
min_completeness = 0
.short_caption = "Minimum completeness"
.type = float(value_min=0, value_max=1, allow_none=True)
min_multiplicity = 0
.short_caption = "Minimum multiplicity"
.type = float(value_min=0, allow_none=True)
max_output_clusters = 10
.short_caption = "Maximum number of clusters to be output"
.type = int(value_min=1, allow_none=True)
hierarchical {
method = *cos_angle correlation
.short_caption = "Metric on which to perform hierarchical clustering"
.type = choice(multi=True)
max_cluster_height = 100
.short_caption = "Maximum height in dendrogram for clusters"
.type = float(allow_none=True)
max_cluster_height_cc = 100
.short_caption = "Maximum height in correlation dendrogram for clusters"
.type = float(allow_none=True)
max_cluster_height_cos = 100
.short_caption = "Maximum height in cos angle dendrogram for clusters"
.type = float(allow_none=True)
distinct_clusters = False
.help = "This will determine whether optional cluster analysis is"
"undertaken. To assist in decreasing computation time, only"
"clusters that have no datasets in common but eventually combine"
"to form a joined cluster in the output dendrogram will be"
"scaled and merged. These may contain interesting differences"
"worth comparing in downstream analysis."
.short_caption = "Find distinct clusters"
.type = bool
}
}
identifiers = None
.help = "Unique DIALS identifiers of experiments to be merged"
.short_caption = Identifiers
.type = strings
dose = None
.short_caption = Dose
.type = ints(size=2, value_min=0)
nproc = Auto
.help = "The number of processors to use"
.type = int(value_min=1, allow_none=True)
.expert_level = 0
remove_profile_fitting_failures = True
.short_caption = "Remove profile fitting failures"
.type = bool
r_free_flags {
generate = True
.short_caption = "Generate R-free flags if not already present"
.type = bool
.style = "bold noauto"
fraction = 0.1
.short_caption = "Fraction of reflections in test set"
.type = float(allow_none=True)
.expert_level = 0
max_free = 2000
.short_caption = "Maximum number of reflections in test set"
.type = int(allow_none=True)
.expert_level = 2
lattice_symmetry_max_delta = 5
.type = float(allow_none=True)
.expert_level = 2
use_lattice_symmetry = True
.short_caption = "Use lattice symmetry to generate test set"
.type = bool
.expert_level = 0
use_dataman_shells = False
.help = "Used to avoid biasing of the test set by certain types of"
"non-crystallographic symmetry."
.short_caption = "Assign test set in thin resolution shells"
.type = bool
n_shells = 20
.short_caption = "Number of resolution shells"
.type = int(allow_none=True)
extend = True
.short_caption = "Extend existing R-free array(s) to full resolution range"
.type = bool
.style = "bold noauto"
old_test_flag_value = None
.help = "Overrides automatic guess of test flag value from existing set."
"This will usually be 1 for reflection files generated by Phenix,"
"and 0 for reflection files from CCP4. Do not change unless you're"
"sure you know what flag to use!"
.short_caption = "Original test flag value"
.type = int(allow_none=True)
.expert_level = 2
d_eps = 0.0001
.short_caption = "Resolution buffer"
.type = float(allow_none=True)
.expert_level = 2
d_max = None
.short_caption = "The maximum resolution to which to generate R-free flags"
.type = float(allow_none=True)
.expert_level = 2
d_min = None
.short_caption = "The minimum resolution to which to generate R-free flags"
.type = float(allow_none=True)
.expert_level = 2
relative_to_complete_set = False
.short_caption = "Generate R-free flags relative to complete set"
.type = bool
reference = None
.type = path
}
exclude_images = None
.help = "Input in the format exp:start:end Exclude a range of images (start,"
"stop) from the dataset with experiment identifier exp (inclusive"
"of frames start, stop). Multiple ranges can be given in one go,"
"e.g. exclude_images=0:150:200,1:200:250 exclude_images='0:150:200"
"1:200:250'"
.short_caption = "Exclude images"
.type = strings
.multiple = True
.expert_level = 1
exclude_images_multiple = None
.help = "Exclude this single image and each multiple of this image number in"
"each experiment. This is provided as a convenient shorthand to"
"specify image exclusions for cRED data, where the scan of"
"diffraction images is interrupted at regular intervals by a crystal"
"positioning image (typically every 20th image)."
.type = int(value_min=2, allow_none=True)
.expert_level = 2
wavelength_tolerance = 0.0001
.help = "Absolute tolerance, in Angstroms, for determining whether to merge"
"data from different wavelengths in the output mtz/sca files."
"Increasing this number significantly may reduce downstream data"
"quality due to loss of information on wavelength."
.type = float(allow_none=True)
seed = 42
.type = int(value_min=0, allow_none=True)
output {
log = xia2.multiplex.log
.type = str
}