lev.awk 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. BEGIN {
  2. a = ARGV[1];
  3. b = ARGV[2];
  4. print levenshteinDistance(a, b);
  5. }
  6. function levenshteinDistance(s, t, s1, t1, distA, distB, distC, minDist) {
  7. # If either string is empty,
  8. # then distance is insertion of the other's characters.
  9. if (length(s) == 0) return length(t);
  10. if (length(t) == 0) return length(s);
  11. # Rest of process uses first characters
  12. # and remainder of each string.
  13. s1 = substr(s, 2, length(s));
  14. t1 = substr(t, 2, length(t));
  15. # If leading characters are the same,
  16. # then distance is that between the rest of the strings.
  17. if (substr(s, 1, 1) == substr(t, 1, 1)) {
  18. return levenshteinDistance(s1, t1);
  19. }
  20. # Find the distances between sub strings.
  21. distA = levenshteinDistance(s1, t1);
  22. distB = levenshteinDistance(s, t1);
  23. distC = levenshteinDistance(s1, t );
  24. # Return the minimum distance between substrings.
  25. minDist = distA;
  26. if (distB < minDist) minDist = distB;
  27. if (distC < minDist) minDist = distC;
  28. return minDist + 1; # Include change for the first character.
  29. }