hash.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. /* Copyright (C) 1995,1996,1997, 2000, 2001, 2004 Free Software Foundation, Inc.
  2. *
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2, or (at your option)
  6. * any later version.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License
  14. * along with this software; see the file COPYING. If not, write to
  15. * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  16. * Boston, MA 02110-1301 USA
  17. *
  18. * As a special exception, the Free Software Foundation gives permission
  19. * for additional uses of the text contained in its release of GUILE.
  20. *
  21. * The exception is that, if you link the GUILE library with other files
  22. * to produce an executable, this does not by itself cause the
  23. * resulting executable to be covered by the GNU General Public License.
  24. * Your use of that executable is in no way restricted on account of
  25. * linking the GUILE library code into it.
  26. *
  27. * This exception does not however invalidate any other reasons why
  28. * the executable file might be covered by the GNU General Public License.
  29. *
  30. * This exception applies only to the code released by the
  31. * Free Software Foundation under the name GUILE. If you copy
  32. * code from other Free Software Foundation releases into a copy of
  33. * GUILE, as the General Public License permits, the exception does
  34. * not apply to the code that you add in this way. To avoid misleading
  35. * anyone as to the status of such modified files, you must delete
  36. * this exception notice from them.
  37. *
  38. * If you write modifications of your own for GUILE, it is your choice
  39. * whether to permit this exception to apply to your modifications.
  40. * If you do not wish that, delete this exception notice. */
  41. #include "libguile/_scm.h"
  42. #include "libguile/chars.h"
  43. #include "libguile/ports.h"
  44. #include "libguile/strings.h"
  45. #include "libguile/symbols.h"
  46. #include "libguile/vectors.h"
  47. #include "libguile/validate.h"
  48. #include "libguile/hash.h"
  49. #ifndef floor
  50. extern double floor();
  51. #endif
  52. unsigned long
  53. scm_string_hash (const unsigned char *str, size_t len)
  54. {
  55. if (len > 5)
  56. {
  57. size_t i = 5;
  58. unsigned long h = 264;
  59. while (i--)
  60. h = (h << 8) + (unsigned) str[h % len];
  61. return h;
  62. }
  63. else
  64. {
  65. size_t i = len;
  66. unsigned long h = 0;
  67. while (i)
  68. h = (h << 8) + (unsigned) str[--i];
  69. return h;
  70. }
  71. }
  72. /* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */
  73. /* Dirk:FIXME:: scm_hasher could be made static. */
  74. unsigned long
  75. scm_hasher(SCM obj, unsigned long n, size_t d)
  76. {
  77. switch (SCM_ITAG3 (obj)) {
  78. case scm_tc3_int_1:
  79. case scm_tc3_int_2:
  80. return SCM_INUM(obj) % n; /* SCM_INUMP(obj) */
  81. case scm_tc3_imm24:
  82. if (SCM_CHARP(obj))
  83. return (unsigned)(scm_downcase(SCM_CHAR(obj))) % n;
  84. switch (SCM_UNPACK (obj)) {
  85. #ifndef SICP
  86. case SCM_UNPACK(SCM_EOL):
  87. d = 256;
  88. break;
  89. #endif
  90. case SCM_UNPACK(SCM_BOOL_T):
  91. d = 257;
  92. break;
  93. case SCM_UNPACK(SCM_BOOL_F):
  94. d = 258;
  95. break;
  96. case SCM_UNPACK(SCM_EOF_VAL):
  97. d = 259;
  98. break;
  99. default:
  100. d = 263; /* perhaps should be error */
  101. }
  102. return d % n;
  103. default:
  104. return 263 % n; /* perhaps should be error */
  105. case scm_tc3_cons:
  106. switch SCM_TYP7(obj) {
  107. default:
  108. return 263 % n;
  109. case scm_tc7_smob:
  110. switch SCM_TYP16 (obj) {
  111. case scm_tc16_big:
  112. return SCM_INUM (scm_modulo (obj, SCM_MAKINUM (n)));
  113. default:
  114. return 263 % n;
  115. case scm_tc16_real:
  116. {
  117. double r = SCM_REAL_VALUE (obj);
  118. if (floor (r) == r) {
  119. obj = scm_inexact_to_exact (obj);
  120. if SCM_IMP (obj) return SCM_INUM (obj) % n;
  121. return SCM_INUM (scm_modulo (obj, SCM_MAKINUM (n)));
  122. }
  123. }
  124. case scm_tc16_complex:
  125. obj = scm_number_to_string (obj, SCM_MAKINUM (10));
  126. }
  127. case scm_tc7_string:
  128. case scm_tc7_substring:
  129. return scm_string_hash (SCM_STRING_UCHARS (obj), SCM_STRING_LENGTH (obj)) % n;
  130. case scm_tc7_symbol:
  131. return SCM_SYMBOL_HASH (obj) % n;
  132. case scm_tc7_wvect:
  133. case scm_tc7_vector:
  134. {
  135. size_t len = SCM_VECTOR_LENGTH(obj);
  136. SCM *data = SCM_VELTS(obj);
  137. if (len > 5)
  138. {
  139. size_t i = d/2;
  140. unsigned long h = 1;
  141. while (i--) h = ((h << 8) + (scm_hasher (data[h % len], n, 2))) % n;
  142. return h;
  143. }
  144. else
  145. {
  146. size_t i = len;
  147. unsigned long h = (n)-1;
  148. while (i--) h = ((h << 8) + (scm_hasher (data[i], n, d/len))) % n;
  149. return h;
  150. }
  151. }
  152. case scm_tcs_cons_imcar:
  153. case scm_tcs_cons_nimcar:
  154. if (d) return (scm_hasher (SCM_CAR (obj), n, d/2)
  155. + scm_hasher (SCM_CDR (obj), n, d/2)) % n;
  156. else return 1;
  157. case scm_tc7_port:
  158. return ((SCM_RDNG & SCM_CELL_WORD_0 (obj)) ? 260 : 261) % n;
  159. case scm_tcs_closures:
  160. case scm_tcs_subrs:
  161. return 262 % n;
  162. }
  163. }
  164. }
  165. unsigned long
  166. scm_ihashq (SCM obj, unsigned long n)
  167. {
  168. return (SCM_UNPACK (obj) >> 1) % n;
  169. }
  170. SCM_DEFINE (scm_hashq, "hashq", 2, 0, 0,
  171. (SCM key, SCM size),
  172. "Determine a hash value for @var{key} that is suitable for\n"
  173. "lookups in a hashtable of size @var{size}, where @code{eq?} is\n"
  174. "used as the equality predicate. The function returns an\n"
  175. "integer in the range 0 to @var{size} - 1. Note that\n"
  176. "@code{hashq} may use internal addresses. Thus two calls to\n"
  177. "hashq where the keys are @code{eq?} are not guaranteed to\n"
  178. "deliver the same value if the key object gets garbage collected\n"
  179. "in between. This can happen, for example with symbols:\n"
  180. "@code{(hashq 'foo n) (gc) (hashq 'foo n)} may produce two\n"
  181. "different values, since @code{foo} will be garbage collected.")
  182. #define FUNC_NAME s_scm_hashq
  183. {
  184. SCM_VALIDATE_INUM_MIN (2, size, 1);
  185. return SCM_MAKINUM (scm_ihashq (key, SCM_INUM (size)));
  186. }
  187. #undef FUNC_NAME
  188. unsigned long
  189. scm_ihashv (SCM obj, unsigned long n)
  190. {
  191. if (SCM_CHARP(obj))
  192. return ((unsigned long) (scm_downcase (SCM_CHAR (obj)))) % n; /* downcase!?!! */
  193. if (SCM_NUMP(obj))
  194. return (unsigned long) scm_hasher(obj, n, 10);
  195. else
  196. return SCM_UNPACK (obj) % n;
  197. }
  198. SCM_DEFINE (scm_hashv, "hashv", 2, 0, 0,
  199. (SCM key, SCM size),
  200. "Determine a hash value for @var{key} that is suitable for\n"
  201. "lookups in a hashtable of size @var{size}, where @code{eqv?} is\n"
  202. "used as the equality predicate. The function returns an\n"
  203. "integer in the range 0 to @var{size} - 1. Note that\n"
  204. "@code{(hashv key)} may use internal addresses. Thus two calls\n"
  205. "to hashv where the keys are @code{eqv?} are not guaranteed to\n"
  206. "deliver the same value if the key object gets garbage collected\n"
  207. "in between. This can happen, for example with symbols:\n"
  208. "@code{(hashv 'foo n) (gc) (hashv 'foo n)} may produce two\n"
  209. "different values, since @code{foo} will be garbage collected.")
  210. #define FUNC_NAME s_scm_hashv
  211. {
  212. SCM_VALIDATE_INUM_MIN (2, size, 1);
  213. return SCM_MAKINUM (scm_ihashv (key, SCM_INUM (size)));
  214. }
  215. #undef FUNC_NAME
  216. unsigned long
  217. scm_ihash (SCM obj, unsigned long n)
  218. {
  219. return (unsigned long) scm_hasher (obj, n, 10);
  220. }
  221. SCM_DEFINE (scm_hash, "hash", 2, 0, 0,
  222. (SCM key, SCM size),
  223. "Determine a hash value for @var{key} that is suitable for\n"
  224. "lookups in a hashtable of size @var{size}, where @code{equal?}\n"
  225. "is used as the equality predicate. The function returns an\n"
  226. "integer in the range 0 to @var{size} - 1.")
  227. #define FUNC_NAME s_scm_hash
  228. {
  229. SCM_VALIDATE_INUM_MIN (2, size, 1);
  230. return SCM_MAKINUM (scm_ihash (key, SCM_INUM (size)));
  231. }
  232. #undef FUNC_NAME
  233. void
  234. scm_init_hash ()
  235. {
  236. #include "libguile/hash.x"
  237. }
  238. /*
  239. Local Variables:
  240. c-file-style: "gnu"
  241. End:
  242. */