calc_hydroxl_dist.m 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. #!/usr/bin/env octave
  2. % Copyright (C) 2019 Kei Kebreau
  3. %
  4. % This program is free software: you can redistribute it and/or modify
  5. % it under the terms of the GNU General Public License as published by
  6. % the Free Software Foundation, either version 3 of the License, or
  7. % (at your option) any later version.
  8. %
  9. % This program is distributed in the hope that it will be useful,
  10. % but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. % GNU General Public License for more details.
  13. %
  14. % You should have received a copy of the GNU General Public License
  15. % along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. % A function of a different name cannot come first in a function file.
  17. % Use this otherwise useless statement to get around that fact.
  18. 1;
  19. % This function was taken from the Octave-Forge statistics package, licensed
  20. % under the GPL version 3 or later.
  21. function y = pdist (x)
  22. y = [];
  23. if (rows(x) == 1)
  24. return;
  25. endif
  26. order = nchoosek(1:rows(x),2);
  27. Xi = order(:,1);
  28. Yi = order(:,2);
  29. X = x';
  30. d = X(:,Xi) - X(:,Yi);
  31. y = norm (d, 'cols');
  32. endfunction
  33. % Operating system specific adjustments.
  34. if ispc()
  35. newline = sprintf('\r\n');
  36. else
  37. newline = sprintf('\n');
  38. endif
  39. % Let user select input directory.
  40. input_dir = uigetdir('.', 'Select directory of xyz files for comparison');
  41. loaded_dir = dir(fullfile(input_dir, '*.xyz'));
  42. [output_file, output_dir] = uiputfile('.csv', 'Select output file');
  43. numel_loaded_dir = numel(loaded_dir);
  44. dist_array = zeros(numel_loaded_dir, 1);
  45. name_array = cell(1, numel_loaded_dir);
  46. loading_bar = waitbar(0, 'Calculating distances...');
  47. o_regexp = ['O' repmat('\s+([+-]?[0-9]*\.?[0-9]+)', [1,3])];
  48. % Load the input files.
  49. for entry = 1:numel_loaded_dir
  50. input_file = loaded_dir(entry).name;
  51. file_path = fullfile(input_dir, input_file);
  52. [file_path, file_name, ~] = fileparts(file_path);
  53. raw_data = fileread([file_path filesep input_file]);
  54. name_array{entry} = input_file;
  55. % Get the positions of all oxygen atoms in the file.
  56. hydroxl_group_cells = regexp(raw_data, o_regexp, 'tokens');
  57. hydroxl_group_cells = [hydroxl_group_cells{:}];
  58. hydroxl_groups = reshape(sscanf(sprintf('%s ', hydroxl_group_cells{:}), '%f'),
  59. 3, numel(hydroxl_group_cells) / 3)';
  60. distances = pdist(hydroxl_groups).^-100;
  61. dist_array(entry) = sum(distances);
  62. waitbar(entry / numel_loaded_dir, loading_bar);
  63. endfor
  64. close(loading_bar);
  65. % Scale array so the minimum is equal to 1.
  66. % Minimum "energy" is best.
  67. dist_array ./= min(dist_array);
  68. file_id = fopen([output_dir output_file], 'w');
  69. for ii = 1:numel_loaded_dir
  70. fprintf(file_id, '%s,%.15f%s', name_array{ii}, dist_array(ii), newline);
  71. endfor
  72. fclose(file_id);