add_ligands.m 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. #!/usr/bin/env octave
  2. % Copyright (C) 2019 Kei Kebreau
  3. %
  4. % This program is free software: you can redistribute it and/or modify
  5. % it under the terms of the GNU General Public License as published by
  6. % the Free Software Foundation, either version 3 of the License, or
  7. % (at your option) any later version.
  8. %
  9. % This program is distributed in the hope that it will be useful,
  10. % but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. % GNU General Public License for more details.
  13. %
  14. % You should have received a copy of the GNU General Public License
  15. % along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. clear -g;
  17. global PROJECTION_DISTANCE = 2.2;
  18. global HYDROXL_ATOM = 'O';
  19. % It's probably not good practice to compare floating-point values like this.
  20. global TOLERANCE = 1e-6;
  21. % Arguments: 1x3 location of atom and the planes it may be on.
  22. %
  23. % Returns: Indices of the planes where the atom is located.
  24. function indices = which_planes (location, planes)
  25. global TOLERANCE;
  26. indices = [];
  27. for plane = 1:size(planes)(1)
  28. if abs(distancePointPlane(location, planes(plane, :))) < TOLERANCE
  29. indices(end + 1) = plane;
  30. end
  31. end
  32. end
  33. % Arguments: Location of atom, the normals to planes, and the index of the plane
  34. % to project from.
  35. %
  36. % Returns: New location projected from the given plane.
  37. function new_location = project(location, plane_normals, plane_index)
  38. global PROJECTION_DISTANCE;
  39. new_location = zeros(1, 3);
  40. new_location(1) = location(1) + sign(plane_normals(plane_index, 1)) * PROJECTION_DISTANCE / sqrt(3);
  41. new_location(2) = location(2) + sign(plane_normals(plane_index, 2)) * PROJECTION_DISTANCE / sqrt(3);
  42. new_location(3) = location(3) + sign(plane_normals(plane_index, 3)) * PROJECTION_DISTANCE / sqrt(3);
  43. end
  44. % Let user select input file.
  45. [input_file, file_path] = uigetfile({'*.xyz*', 'Supported file formats'});
  46. % Load the input file.
  47. raw_data = fileread([file_path input_file]);
  48. [~, file_name, ~] = fileparts(input_file);
  49. cd_regexp = ['Cd' repmat('\s+([+-]?[0-9]*\.?[0-9]+)', [1,3])];
  50. cd_locations = regexp(raw_data, cd_regexp, 'tokens');
  51. % Convert cd_locations from a nested cell array of strings to rows of XYZ coordinates.
  52. cd_locations = reshape(cellfun(@str2num, cell2mat(cd_locations)), 3, [])';
  53. num_atoms = str2num(regexp(raw_data, '^(\d+)', 'tokens', 'once'){1});
  54. max_hydroxl = num_atoms - length(cd_locations);
  55. new_atom_count = 0;
  56. new_atoms = '';
  57. % Find vertices and faces of the minimum convex hull and their planes.
  58. faces = convhull(cd_locations);
  59. vertices = cd_locations(unique(faces), :);
  60. planes = zeros(length(faces), 9);
  61. plane_normals = zeros(length(faces), 3);
  62. for plane = 1:size(planes)(1)
  63. planes(plane, :) = createPlane(vertices([1 2 3 4]([1 2 3 4] ~= plane), :));
  64. plane_normals(plane, :) = planeNormal(planes(plane, :));
  65. end
  66. % Orient the normals of these planes away from the center of the tetrahedron.
  67. plane_normals(1, :) = plane_normals(1, :) * -1;
  68. plane_normals(3, :) = plane_normals(3, :) * -1;
  69. for row = 1:length(cd_locations)
  70. cd_atom = cd_locations(row, :);
  71. cd_faces = which_planes(cd_atom, planes);
  72. % After the coordinates, indices representing the planes that the new atom is
  73. % on will be placed for further processing by the hydroxl distance calculation
  74. % script! This should not interfere with proper processing of the resulting
  75. % .xyz files.
  76. for cd_face = 1:length(cd_faces)
  77. new_atom_location = project(cd_atom, plane_normals, cd_faces(cd_face));
  78. new_atoms{new_atom_count + 1} = sprintf('N %s',
  79. sprintf('%f ', new_atom_location));
  80. new_atom_count = new_atom_count + 1;
  81. end
  82. end
  83. output_dir = uigetdir('.', 'Select output directory for output xyz files');
  84. % Let the user input how many output files they want.
  85. num_clusters = str2num(inputdlg('How many clusters to generate?', 'Cluster count'){1});
  86. % Determine how many leading zeros are necessary for correctly listing output clusters.
  87. leading_zeros = floor(log10(num_clusters)) + 1;
  88. original_new_atoms = new_atoms;
  89. xyz_format_str = ['%s%s%s_random_%0' num2str(leading_zeros) 'd.xyz'];
  90. loading_bar = waitbar(0, 'Generating randomly ligated clusters...');
  91. for cluster = 1:num_clusters
  92. new_atoms = original_new_atoms;
  93. rand_mutations = randperm(new_atom_count)(1:max_hydroxl);
  94. for r = rand_mutations
  95. new_atoms{r} = strrep(new_atoms{r}, 'N', HYDROXL_ATOM);
  96. end
  97. xyz_name = sprintf(xyz_format_str, output_dir, filesep, file_name, cluster);
  98. xyz_id = fopen(xyz_name, 'w');
  99. fdisp(xyz_id, sprintf('%d\n', num_atoms + new_atom_count));
  100. fdisp(xyz_id, regexp(raw_data, '(Cd.*)\n', 'tokens', 'once'){1});
  101. fdisp(xyz_id, strjoin(new_atoms, '\n'));
  102. fclose(xyz_id);
  103. waitbar(cluster / num_clusters, loading_bar);
  104. end
  105. close(loading_bar);
  106. clear -g;