12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- #!/usr/bin/env octave
- % Copyright (C) 2019 Kei Kebreau
- %
- % This program is free software: you can redistribute it and/or modify
- % it under the terms of the GNU General Public License as published by
- % the Free Software Foundation, either version 3 of the License, or
- % (at your option) any later version.
- %
- % This program is distributed in the hope that it will be useful,
- % but WITHOUT ANY WARRANTY; without even the implied warranty of
- % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- % GNU General Public License for more details.
- %
- % You should have received a copy of the GNU General Public License
- % along with this program. If not, see <http://www.gnu.org/licenses/>.
- % A function of a different name cannot come first in a function file.
- % Use this otherwise useless statement to get around that fact.
- 1;
- % This function was taken from the Octave-Forge statistics package, licensed
- % under the GPL version 3 or later.
- function y = pdist (x)
- y = [];
- if (rows(x) == 1)
- return;
- endif
- order = nchoosek(1:rows(x),2);
- Xi = order(:,1);
- Yi = order(:,2);
- X = x';
- d = X(:,Xi) - X(:,Yi);
- y = norm (d, 'cols');
- endfunction
- % Operating system specific adjustments.
- if ispc()
- newline = sprintf('\r\n');
- else
- newline = sprintf('\n');
- endif
- % Let user select input directory.
- input_dir = uigetdir('.', 'Select directory of xyz files for comparison');
- loaded_dir = dir(fullfile(input_dir, '*.xyz'));
- [output_file, output_dir] = uiputfile('.csv', 'Select output file');
- numel_loaded_dir = numel(loaded_dir);
- dist_array = zeros(numel_loaded_dir, 1);
- name_array = cell(1, numel_loaded_dir);
- loading_bar = waitbar(0, 'Calculating distances...');
- o_regexp = ['O' repmat('\s+([+-]?[0-9]*\.?[0-9]+)', [1,3])];
- % Load the input files.
- for entry = 1:numel_loaded_dir
- input_file = loaded_dir(entry).name;
- file_path = fullfile(input_dir, input_file);
- [file_path, file_name, ~] = fileparts(file_path);
- raw_data = fileread([file_path filesep input_file]);
- name_array{entry} = input_file;
- % Get the positions of all oxygen atoms in the file.
- hydroxl_group_cells = regexp(raw_data, o_regexp, 'tokens');
- hydroxl_group_cells = [hydroxl_group_cells{:}];
- hydroxl_groups = reshape(sscanf(sprintf('%s ', hydroxl_group_cells{:}), '%f'),
- 3, numel(hydroxl_group_cells) / 3)';
- distances = pdist(hydroxl_groups).^-100;
-
- dist_array(entry) = sum(distances);
- waitbar(entry / numel_loaded_dir, loading_bar);
- endfor
- close(loading_bar);
- % Scale array so the minimum is equal to 1.
- % Minimum "energy" is best.
- dist_array ./= min(dist_array);
- file_id = fopen([output_dir output_file], 'w');
- for ii = 1:numel_loaded_dir
- fprintf(file_id, '%s,%.15f%s', name_array{ii}, dist_array(ii), newline);
- endfor
- fclose(file_id);
|