xia2.multiplex

Introduction

xia2.multiplex performs symmetry analysis, scaling and merging of multi-crystal data sets, as well as analysis of various pathologies that typically affect multi-crystal data sets, including non-isomorphism, radiation damage and preferred orientation.

It uses a number of DIALS programs internally, including dials.cosym, dials.two_theta_refine, dials.scale and dials.symmetry:

  • Preliminary filtering of datasets using hierarchical unit cell clustering

  • Laue group determination and resolution of indexing ambiguities with dials.cosym

  • Determination of “best” overall unit cell with dials.two_theta_refine

  • Initial round of scaling with dials.scale

  • Estimation of resolution limit with dials.estimate_resolution

  • Final round of scaling after application of the resolution limit

  • Analysis of systematic absences with dials.symmetry

  • Optional ΔCC½ filtering to remove outlier data sets

  • Analysis of non-isomorphism, radiation damage and preferred orientation

Examples use cases

Multiple integrated experiments and reflections in combined files:

xia2.multiplex integrated.expt integrated.refl

Integrated experiments and reflections in separate input files:

xia2.multiplex integrated_1.expt integrated_1.refl \
  integrated_2.expt integrated_2.refl

Override the automatic space group determination and resolution estimation:

xia2.multiplex space_group=C2 resolution.d_min=2.5 \
  integrated_1.expt integrated_1.refl \
  integrated_2.expt integrated_2.refl

Filter potential outlier data sets using the ΔCC½ method:

xia2.multiplex filtering.method=deltacchalf \
  integrated.expt integrated.refl

Basic parameters

unit_cell_clustering {
  threshold = 5000
  log = False
}
scaling {
  brotation.spacing = None
  secondary {
  }
  model = physical dose_decay array KB *auto
  outlier_rejection = simple *standard
  Isigma_range = 2.0,100000
  min_partiality = None
  partiality_cutoff = None
}
symmetry {
  resolve_indexing_ambiguity = True
  cosym {
    normalisation = kernel quasi *ml_iso ml_aniso
    d_min = Auto
    min_i_mean_over_sigma_mean = 4
    min_cc_half = 0.6
    lattice_group = None
    space_group = None
    lattice_symmetry_max_delta = 5.0
    best_monoclinic_beta = False
    dimensions = Auto
    use_curvatures = True
    weights = count standard_error
    min_pairs = 3
    termination_params {
      max_iterations = 100
      max_calls = None
      traditional_convergence_test = True
      traditional_convergence_test_eps = 1
      drop_convergence_test_n_test_points = 5
      drop_convergence_test_max_drop_eps = 1.e-5
      drop_convergence_test_iteration_coefficient = 2
    }
    cluster {
      method = dbscan bisect minimize_divide agglomerative *seed
      n_clusters = auto
      dbscan {
        eps = 0.5
        min_samples = 5
      }
      bisect {
        axis = 0
      }
      seed {
        min_silhouette_score = 0.2
      }
    }
    nproc = 1
  }
  le_page_max_delta = 5
  laue_group = None
  space_group = None
}
resolution {
  d_max = None
  d_min = None
  cc_half_method = half_dataset *sigma_tau
  reflections_per_bin = 10
  labels = None
  reference = None
}
rescale_after_resolution_cutoff = False
filtering {
  method = None deltacchalf
  deltacchalf {
    max_cycles = 6
    min_completeness = None
    stdcutoff = 4.0
  }
}
multi_crystal_analysis {
  unit_cell = None
  n_bins = 20
  d_min = None
  batch {
    id = None
    range = None
  }
}
unit_cell {
  refine = *two_theta
}
two_theta_refine {
  combine_crystal_models = True
}
min_completeness = None
min_multiplicity = None
max_clusters = None
cluster_method = *cos_angle correlation unit_cell
identifiers = None
dose = None
nproc = Auto
remove_profile_fitting_failures = True
seed = 42
output {
  log = xia2.multiplex.log
}

Full parameter definitions

unit_cell_clustering {
  threshold = 5000
    .help = "Threshold value for the clustering"
    .type = float(value_min=0, allow_none=True)
  log = False
    .help = "Display the dendrogram with a log scale"
    .type = bool
}
scaling {
  rotation.spacing = None
    .short_caption = "Interval (in degrees) between scale factors on rotation"
                     "axis"
    .type = int(allow_none=True)
    .expert_level = 2
  brotation.spacing = None
    .short_caption = "Interval (in degrees) between B-factors on rotation axis"
    .type = int(allow_none=True)
  secondary {
    lmax = 0
      .short_caption = "Number of spherical harmonics for absorption"
                       "correction"
      .type = int(allow_none=True)
      .expert_level = 2
  }
  model = physical dose_decay array KB *auto
    .type = choice
  outlier_rejection = simple *standard
    .type = choice
  Isigma_range = 2.0,100000
    .type = floats(size=2)
  min_partiality = None
    .type = float(value_min=0, value_max=1, allow_none=True)
  partiality_cutoff = None
    .type = float(value_min=0, value_max=1, allow_none=True)
}
symmetry {
  resolve_indexing_ambiguity = True
    .type = bool
  cosym {
    normalisation = kernel quasi *ml_iso ml_aniso
      .type = choice
    d_min = Auto
      .type = float(value_min=0, allow_none=True)
    min_i_mean_over_sigma_mean = 4
      .type = float(value_min=0, allow_none=True)
    min_cc_half = 0.6
      .type = float(value_min=0, value_max=1, allow_none=True)
    lattice_group = None
      .type = space_group
    space_group = None
      .type = space_group
    lattice_symmetry_max_delta = 5.0
      .type = float(value_min=0, allow_none=True)
    best_monoclinic_beta = False
      .help = "If True, then for monoclinic centered cells, I2 will be"
              "preferred over C2 if it gives a more oblique cell (i.e. smaller"
              "beta angle)."
      .type = bool
    dimensions = Auto
      .type = int(value_min=2, allow_none=True)
    use_curvatures = True
      .type = bool
    weights = count standard_error
      .type = choice
    min_pairs = 3
      .help = "Minimum number of pairs for inclusion of correlation"
              "coefficient in calculation of Rij matrix."
      .type = int(value_min=1, allow_none=True)
    termination_params {
      max_iterations = 100
        .type = int(value_min=0, allow_none=True)
      max_calls = None
        .type = int(value_min=0, allow_none=True)
      traditional_convergence_test = True
        .type = bool
      traditional_convergence_test_eps = 1
        .type = float(allow_none=True)
      drop_convergence_test_n_test_points = 5
        .type = int(value_min=2, allow_none=True)
      drop_convergence_test_max_drop_eps = 1.e-5
        .type = float(value_min=0, allow_none=True)
      drop_convergence_test_iteration_coefficient = 2
        .type = float(value_min=1, allow_none=True)
    }
    cluster {
      method = dbscan bisect minimize_divide agglomerative *seed
        .type = choice
      n_clusters = auto
        .type = int(value_min=1, allow_none=True)
      dbscan {
        eps = 0.5
          .type = float(value_min=0, allow_none=True)
        min_samples = 5
          .type = int(value_min=1, allow_none=True)
      }
      bisect {
        axis = 0
          .type = int(value_min=0, allow_none=True)
      }
      seed {
        min_silhouette_score = 0.2
          .type = float(value_min=-1, value_max=1, allow_none=True)
      }
    }
    nproc = 1
      .help = "The number of processes to use."
      .type = int(value_min=1, allow_none=True)
  }
  le_page_max_delta = 5
    .type = float(value_min=0, allow_none=True)
  laue_group = None
    .help = "Specify the Laue group. If None, then the Laue group will be"
            "determined  by dials.cosym."
    .type = space_group
  space_group = None
    .help = "Specify the space group. If None, then the dials.symmetry will"
            "perform  analysis of systematically absent reflections to"
            "determine the space group."
    .type = space_group
}
resolution
  .short_caption = Resolution
{
  d_max = None
    .help = "Low resolution cutoff."
    .short_caption = "Low resolution cutoff"
    .type = float(value_min=0, allow_none=True)
  d_min = None
    .help = "High resolution cutoff."
    .short_caption = "High resolution cutoff"
    .type = float(value_min=0, allow_none=True)
  rmerge = None
    .help = "Maximum value of Rmerge in the outer resolution shell"
    .short_caption = "Outer shell Rmerge"
    .type = float(value_min=0, allow_none=True)
    .expert_level = 1
  completeness = None
    .help = "Minimum completeness in the outer resolution shell"
    .short_caption = "Outer shell completeness"
    .type = float(value_min=0, allow_none=True)
    .expert_level = 1
  cc_ref = 0.1
    .help = "Minimum value of CC vs reference dataset in the outer resolution"
            "shell"
    .short_caption = "Outer shell CCref"
    .type = float(value_min=0, allow_none=True)
    .expert_level = 1
  cc_half = 0.3
    .help = "Minimum value of CC½ in the outer resolution shell"
    .short_caption = "Outer shell CC½"
    .type = float(value_min=0, allow_none=True)
    .expert_level = 1
  cc_half_method = half_dataset *sigma_tau
    .type = choice
  cc_half_significance_level = 0.1
    .type = float(value_min=0, value_max=1, allow_none=True)
    .expert_level = 1
  cc_half_fit = polynomial *tanh
    .type = choice
    .expert_level = 1
  isigma = None
    .help = "Minimum value of the unmerged <I/sigI> in the outer resolution"
            "shell"
    .short_caption = "Outer shell unmerged <I/sigI>"
    .type = float(value_min=0, allow_none=True)
    .expert_level = 1
  misigma = None
    .help = "Minimum value of the merged <I/sigI> in the outer resolution"
            "shell"
    .short_caption = "Outer shell merged <I/sigI>"
    .type = float(value_min=0, allow_none=True)
    .expert_level = 1
  i_mean_over_sigma_mean = None
    .help = "Minimum value of the unmerged <I>/<sigI> in the outer resolution"
            "shell"
    .short_caption = "Outer shell unmerged <I>/<sigI>"
    .type = float(value_min=0, allow_none=True)
    .expert_level = 2
  nbins = 100
    .help = "Maximum number of resolution bins to use for estimation of"
            "resolution limit."
    .short_caption = "Number of resolution bins."
    .type = int(allow_none=True)
    .expert_level = 1
  reflections_per_bin = 10
    .help = "Minimum number of reflections per bin."
    .type = int(allow_none=True)
  binning_method = *counting_sorted volume
    .help = "Use equal-volume bins or bins with approximately equal numbers of"
            "reflections per bin."
    .short_caption = "Equal-volume or equal #ref binning."
    .type = choice
    .expert_level = 1
  anomalous = False
    .short_caption = "Keep anomalous pairs separate in merging statistics"
    .type = bool
    .expert_level = 1
  labels = None
    .type = strings
  space_group = None
    .type = space_group
    .expert_level = 1
  reference = None
    .type = path
}
rescale_after_resolution_cutoff = False
  .help = "Re-scale the data after application of a resolution cutoff"
  .type = bool
filtering {
  method = None deltacchalf
    .help = "Choice of whether to do any filtering cycles, default None."
    .type = choice
  deltacchalf {
    max_cycles = 6
      .type = int(value_min=1, allow_none=True)
    min_completeness = None
      .help = "Desired minimum completeness, as a percentage (0 - 100)."
      .type = float(value_min=0, value_max=100, allow_none=True)
    stdcutoff = 4.0
      .help = "Datasets with a ΔCC½ below (mean - stdcutoff*std) are removed"
      .type = float(allow_none=True)
  }
}
multi_crystal_analysis {
  unit_cell = None
    .type = unit_cell
  n_bins = 20
    .type = int(value_min=1, allow_none=True)
  d_min = None
    .type = float(value_min=0, allow_none=True)
  batch
    .multiple = True
  {
    id = None
      .type = str
    range = None
      .type = ints(size=2, value_min=0)
  }
}
unit_cell {
  refine = *two_theta
    .type = choice(multi=True)
}
two_theta_refine {
  combine_crystal_models = True
    .type = bool
}
min_completeness = None
  .type = float(value_min=0, value_max=1, allow_none=True)
min_multiplicity = None
  .type = float(value_min=0, allow_none=True)
max_clusters = None
  .type = int(value_min=1, allow_none=True)
cluster_method = *cos_angle correlation unit_cell
  .type = choice
identifiers = None
  .type = strings
dose = None
  .type = ints(size=2, value_min=0)
nproc = Auto
  .help = "The number of processors to use"
  .type = int(value_min=1, allow_none=True)
  .expert_level = 0
remove_profile_fitting_failures = True
  .type = bool
exclude_images = None
  .help = "Input in the format exp:start:end Exclude a range of images (start,"
          "stop) from the dataset with experiment identifier exp  (inclusive"
          "of frames start, stop)."
  .type = strings
  .multiple = True
  .expert_level = 1
seed = 42
  .type = int(value_min=0, allow_none=True)
output {
  log = xia2.multiplex.log
    .type = str
}