rb.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <stdint.h>
  5. #include "rb.h"
  6. // de-color a pointer
  7. #define DC(p) ((rb_node*)((intptr_t)p & (~((intptr_t)1ul))))
  8. // get the color of a pointer
  9. #define GC(p) ((intptr_t)((intptr_t)p & ((intptr_t)1ul)))
  10. // copy a pointer's color
  11. #define CC(p, c) ((rb_node*)((intptr_t)DC(p) | GC(c)))
  12. rb_node* rb_find_node(rb_node* n, char* key);
  13. rb_node* rb_bt_node_insert(rb_tree_* tree, rb_node* n, char* key, void* val);
  14. void rb_delete_node(rb_tree_* tree, rb_node* n);
  15. int rb_is_red(rb_node* n) {
  16. return n && GC(n->parent) == 1;
  17. }
  18. int rb_is_black(rb_node* n) {
  19. return !rb_is_red(n);
  20. }
  21. int rb_is_left_child(rb_node* n) {
  22. return n && DC(n->parent) && DC(n->parent)->kids[0] == n;
  23. }
  24. void rb_set_red(rb_node* n) {
  25. if(!n) return;
  26. n->parent = CC(n->parent, 1);
  27. }
  28. void rb_copy_color(rb_node* dst, rb_node* src) {
  29. if(!dst || !src) return;
  30. dst->parent = CC(dst->parent, src->parent);
  31. }
  32. static void rb_swap_color(rb_node* a, rb_node* b) {
  33. int ca = rb_is_red(a);
  34. int cb = rb_is_red(b);
  35. if(a) a->parent = CC(a->parent, cb);
  36. if(b) b->parent = CC(b->parent, ca);
  37. }
  38. void rb_set_black(rb_node* n) {
  39. if(n) n->parent = CC(n->parent, 0);
  40. }
  41. rb_node* rb_has_red_child(rb_node* p) {
  42. if(rb_is_red(p->kids[0])) return p->kids[0];
  43. if(rb_is_red(p->kids[1])) return p->kids[1];
  44. return NULL;
  45. }
  46. rb_node* rb_parent(rb_node* n) {
  47. return n ? DC(n->parent) : NULL;
  48. }
  49. rb_node* rb_grandparent(rb_node* n) {
  50. return n ? (DC(n->parent) ? DC(DC(n->parent)->parent) : NULL) : NULL;
  51. }
  52. rb_node* rb_uncle(rb_node* n) {
  53. if(!n || !DC(n->parent) || !DC(DC(n->parent)->parent)) return NULL;
  54. rb_node* p = DC(n->parent);
  55. rb_node* g = DC(p->parent);
  56. return g->kids[p == g->kids[0]];
  57. }
  58. rb_node* rb_sibling(rb_node* n) {
  59. if(!n || !DC(n->parent)) return NULL;
  60. rb_node* p = DC(n->parent);
  61. return p->kids[n == p->kids[0]];
  62. }
  63. void rb_copy_value_to(rb_node* to, rb_node* from) {
  64. if(to->key) free(to->key);
  65. to->key = strdup(from->key);
  66. to->data = from->data;
  67. // to->color = from->color;
  68. }
  69. // swaps out one of the nodes's children with a different node
  70. static void rb_swap_child(rb_node* p, rb_node* oc, rb_node* nc) {
  71. if(!p) {
  72. if(nc) nc->parent = CC(NULL, nc->parent);
  73. return;
  74. }
  75. if(p->kids[0] == oc) {
  76. p->kids[0] = nc;
  77. }
  78. else if(p->kids[1] == oc) {
  79. p->kids[1] = nc;
  80. }
  81. if(nc) nc->parent = CC(p, nc->parent);
  82. }
  83. #include <stdint.h>
  84. #define mask(x) (void*)((uint64_t)0xff & (uint64_t)(x)>>4)
  85. void rb_rotate_right(rb_tree_* tree, rb_node* g) {
  86. if(!g) return;
  87. // printf("rotating right %p\n", g);
  88. rb_node* op = DC(g->parent);
  89. rb_node* l = g->kids[0];
  90. if(l) {
  91. rb_node* t3 = l->kids[1];
  92. g->kids[0] = t3;
  93. if(t3) t3->parent = CC(g, t3->parent);
  94. l->kids[1] = g;
  95. }
  96. g->parent = CC(l, g->parent);
  97. if(tree->root == g) tree->root = l;
  98. rb_swap_child(op, g, l);
  99. }
  100. void rb_rotate_left(rb_tree_* tree, rb_node* g) {
  101. if(!g) return;
  102. // printf("rotating left %p\n", g);
  103. rb_node* op = DC(g->parent);
  104. rb_node* r = g->kids[1];
  105. if(r) {
  106. rb_node* t3 = r->kids[0];
  107. g->kids[1] = t3;
  108. if(t3) t3->parent = CC(g, t3->parent);
  109. r->kids[0] = g;
  110. g->parent = CC(r, g->parent);
  111. }
  112. if(tree->root == g) tree->root = r;
  113. rb_swap_child(op, g, r);
  114. }
  115. void rb_set_parent(rb_node* n, rb_node* parent) {
  116. // void* c = rb_is_red(n);
  117. // DC(n->parent) = c | parent;
  118. n->parent = CC(parent, n->parent);
  119. }
  120. static void rb_rebalance(rb_tree_* tree, rb_node* n);
  121. rb_node* rb_new_node(rb_node* parent, char* key, void* data) {
  122. rb_node* n = calloc(1, sizeof(*n));
  123. rb_set_parent(n, parent);
  124. n->key = strdup(key);
  125. n->data = data;
  126. n->kids[0] = NULL;
  127. n->kids[1] = NULL;
  128. rb_set_red(n);
  129. return n;
  130. }
  131. void rb_free_node(rb_node* n) {
  132. // printf("freeing node: %p %s\n", n, n->key);
  133. free(n->key);
  134. free(n);
  135. }
  136. void rb_insert_(rb_tree_* tree, char* key, void* val) {
  137. if(tree->root == NULL) {
  138. tree->root = rb_new_node(NULL, key, val);
  139. rb_set_black(tree->root);
  140. return;
  141. }
  142. rb_node* n = rb_bt_node_insert(tree, tree->root, key, val);
  143. if(!n) return;
  144. tree->size++;
  145. rb_rebalance(tree, n);
  146. }
  147. static void rb_rebalance(rb_tree_* tree, rb_node* n) {
  148. if(!n) return;
  149. // printf("---rebalance---\n");
  150. // fprintf(dbg, "prebalance:\n");
  151. // html_print_node(tree->root, get_max_height(tree->root)); html_spacer();
  152. if(n == tree->root) {
  153. // printf(" root set to black\n");
  154. rb_set_black(n);
  155. return;
  156. }
  157. // printf("%x ", mask(n));
  158. rb_node* p = rb_parent(n);
  159. if(!p || !rb_is_red(p)) {
  160. // printf("black parent %p (%s)\n", p, p ? p->key : "");
  161. return;
  162. }
  163. rb_node* u = rb_uncle(n);
  164. rb_node* g = rb_grandparent(n);
  165. if(!g) {
  166. // printf(" no grandparent (%x)\n", mask(g));
  167. return;
  168. }
  169. // printf(" grandparent: %x (%s)\n", mask(g), g ? g->key : "");
  170. // print_tree(tree->root, 3);
  171. if(rb_is_red(u)) {
  172. // printf("red uncle %p (%s)\n", u, u ? u->key : "");
  173. rb_set_black(u);
  174. rb_set_black(p);
  175. rb_set_red(g);
  176. // html_print_node(tree->root, get_max_height(tree->root)); html_spacer();
  177. rb_rebalance(tree, g);
  178. return;
  179. }
  180. else { // black uncle
  181. // printf("black uncle %x (%s): ", mask(u), u ? u->key : "");
  182. if(rb_is_left_child(p)) {
  183. // left left case
  184. if(rb_is_left_child(n)) {
  185. // printf("LL\n");
  186. rb_rotate_right(tree, g);
  187. // print_tree(tree->root, 5);
  188. rb_swap_color(g, p);
  189. }
  190. else { // left right case
  191. // printf("LR\n");
  192. rb_rotate_left(tree, p);
  193. rb_rotate_right(tree, g);
  194. rb_swap_color(g, n);
  195. }
  196. }
  197. else {
  198. // right left case
  199. if(rb_is_left_child(n)) {
  200. // printf("RL\n");
  201. rb_rotate_right(tree, p);
  202. rb_rotate_left(tree, g);
  203. rb_swap_color(g, n);
  204. }
  205. else { // right right case
  206. // printf("RR\n");
  207. rb_rotate_left(tree, g);
  208. rb_swap_color(g, p);
  209. }
  210. }
  211. }
  212. }
  213. // static int N = 0;
  214. rb_node* rb_bt_node_insert(rb_tree_* tree, rb_node* n, char* key, void* val) {
  215. // if(++N > 10) return NULL;
  216. // printf("\nN: %d \n", N);
  217. int d = strcmp(key, n->key);
  218. if(d == 0) {
  219. n->data = val;
  220. return NULL; // no balancing needed
  221. }
  222. else if(d < 0) {
  223. if(n->kids[0]) return rb_bt_node_insert(tree, n->kids[0], key, val);
  224. n->kids[0] = rb_new_node(n, key, val);
  225. return n->kids[0];
  226. }
  227. else {
  228. if(n->kids[1]) return rb_bt_node_insert(tree, n->kids[1], key, val);
  229. n->kids[1] = rb_new_node(n, key, val);
  230. return n->kids[1];
  231. }
  232. }
  233. void* rb_find_(rb_tree_* tree, char* key, int* found) {
  234. rb_node* n = rb_find_node(tree->root, key);
  235. if(n) {
  236. if(found) *found = 1;
  237. return n->data;
  238. }
  239. return NULL;
  240. }
  241. rb_node* rb_find_node(rb_node* n, char* key) {
  242. if(!n) return NULL;
  243. int d = strcmp(key, n->key);
  244. if(d == 0) {
  245. return n;
  246. }
  247. else if(d < 0) {
  248. if(n->kids[0]) return rb_find_node(n->kids[0], key);
  249. }
  250. else {
  251. if(n->kids[1]) return rb_find_node(n->kids[1], key);
  252. }
  253. return NULL; // key not in tree
  254. }
  255. int rb_delete_(rb_tree_* tree, char* key, void** data) {
  256. rb_node* n = rb_find_node(tree->root, key);
  257. if(!n) return 0;
  258. if(data) *data = n->data; // for the caller to clean up
  259. rb_delete_node(tree, n);
  260. tree->size--;
  261. return 1;
  262. }
  263. int rb_is_leaf(rb_node* n) {
  264. return (!n->kids[0] && !n->kids[1]);
  265. }
  266. rb_node* rb_has_one_child(rb_node* n) {
  267. if(n->kids[0] == NULL || n->kids[1] == NULL) {
  268. return n->kids[0] ? n->kids[0] : n->kids[1];
  269. }
  270. return NULL;
  271. }
  272. rb_node* rb_minimum_successor(rb_node* n) {
  273. if(n->kids[0] == NULL) return n;
  274. return rb_minimum_successor(n->kids[0]);
  275. }
  276. void rb_delete_node(rb_tree_* tree, rb_node* n) {
  277. // n is the node the user requested to be deleted
  278. rb_node* c = NULL;
  279. rb_node* d;
  280. rb_node* s = NULL;
  281. rb_node* p;
  282. rb_node* dead;
  283. if(n == tree->root && n->kids[0] == NULL && n->kids[1] == NULL) {
  284. // printf("deleting root\n");
  285. rb_free_node(n);
  286. tree->root = NULL;
  287. return;
  288. }
  289. NEW_N:
  290. if(rb_is_leaf(n)) { // c will be left null
  291. // printf("n is leaf n=%s\n", n->key);
  292. dead = n;
  293. }
  294. else if((c = rb_has_one_child(n))) { // assigns the child to c
  295. // printf("n has one child n:%s, c:%s\n", n->key, c->key);
  296. rb_swap_child(DC(n->parent), n, c); // promote single child
  297. dead = n;
  298. }
  299. else { // cannot get here twice
  300. c = rb_minimum_successor(n->kids[1]);
  301. // printf("internal node, replacing %s with %s\n", n->key, c->key);
  302. rb_copy_value_to(n, c);
  303. n = c;
  304. c = NULL;
  305. goto NEW_N;
  306. }
  307. p = rb_parent(n);
  308. // deleting a red node does nothing to the tree properties
  309. // bail early
  310. if(rb_is_red(n)) {
  311. // printf("n is red\n");
  312. rb_swap_child(DC(dead->parent), dead, NULL); // delete from tree
  313. rb_free_node(dead);
  314. return;
  315. }
  316. // n is black
  317. // if c is red, replace n with c and set c to black
  318. // no change in black height of tree
  319. if(c && rb_is_red(c)) {
  320. // printf("c is red p:%p, n:%s, c:%s\n", p, n->key, c->key);
  321. if(p) {
  322. rb_swap_child(p, n, c);
  323. rb_set_black(c);
  324. }
  325. else {
  326. tree->root = c;
  327. rb_set_black(c);
  328. }
  329. rb_free_node(dead);
  330. return;
  331. }
  332. // if(!c) {
  333. // printf("no child when there should be\n");
  334. // exit(2);
  335. // }
  336. if(p) s = p->kids[p->kids[0] == n];
  337. rb_swap_child(p, n, c);
  338. // n is now the node being removed from the tree. it is black.
  339. // c is its only child, or NULL if n is a leaf
  340. // d will be the 'double black' node
  341. // we fix the tree starting with d = q
  342. d = c;
  343. while(d != tree->root) { // "Case 1" d is (not) root
  344. // get the sibling. rb_sibling() may not work due to now-broken structure and NULLs
  345. // s = p->kids[p->kids[0] == d];
  346. //
  347. // printf("\nrebalancing d:%s, s:%s\n", d?d->key:"-", s?s->key:"-");
  348. if(rb_is_red(d)) {
  349. rb_set_black(d);
  350. break;
  351. }
  352. if(rb_is_red(s)) { // s is red ("Case 2")
  353. rb_set_red(p);
  354. rb_set_black(s);
  355. if(!rb_is_left_child(s)) { // right case
  356. // printf("Case 2, right. p:%s\n", p?p->key:"-");
  357. rb_rotate_left(tree, p);
  358. }
  359. else { // left case
  360. // printf("Case 2, left. p:%s\n", p?p->key:"-");
  361. rb_rotate_right(tree, p);
  362. }
  363. // fprintf(dbg, "End of Case 2");
  364. // html_print_node(tree->root, get_max_height(tree->root)); html_spacer();
  365. // update sibling after rotations.
  366. // d and p stay unchanged
  367. /*if(s == tree->root) {
  368. printf("hacky root bail\n");
  369. rb_set_black(s);// root is always black
  370. break;
  371. }*/
  372. s = p ? p->kids[p->kids[0] == d] : NULL;
  373. // d is black, and the new s is black
  374. }
  375. // s is black
  376. if(rb_is_black(p)) {
  377. if(rb_is_black(s->kids[0]) && rb_is_black(s->kids[1])) {
  378. // printf("Case 3, s:%s\n", s->key);
  379. // case 3, both kids of s are black
  380. rb_set_red(s);
  381. d = p;
  382. p = rb_parent(d);
  383. if(p) s = p->kids[p->kids[0] == d];
  384. // printf("case 3 continue \n");
  385. continue; // restart balancing
  386. }
  387. }
  388. // s is known to be black because of above.
  389. if(rb_is_red(p) && rb_is_black(s->kids[0]) && rb_is_black(s->kids[1])) {
  390. // "Case 4", red p, black s children
  391. // printf("Case 4\n");
  392. rb_set_red(s);
  393. rb_set_black(p);
  394. break; // balancing done
  395. }
  396. // at least one child of s must be red
  397. c = s->kids[0];
  398. s = p->kids[p->kids[0] == d];
  399. p = DC(s->parent);
  400. if(rb_is_black(s)) {
  401. // "Case 5"
  402. // printf("d:%p [%s], d->p:%p\n", d, d?d->key:"-", d?DC(d->parent):NULL);
  403. // printf("s:%p [%s], s->p:%p\n", s, s?s->key:"-", s?DC(s->parent):NULL);
  404. if(!rb_is_left_child(s) && rb_is_red(s->kids[0]) && rb_is_black(s->kids[1])) {
  405. // s's left child is red, right is black
  406. // printf("Case 5, left\n");
  407. rb_set_red(s);
  408. rb_set_black(s->kids[0]);
  409. rb_rotate_right(tree, s);
  410. s = p->kids[p->kids[0] == d];
  411. p = DC(s->parent);
  412. // fall through to Case 6
  413. }
  414. else if(rb_is_left_child(s) && rb_is_red(s->kids[1]) && rb_is_black(s->kids[0])) {
  415. // s's left child is red, right is black
  416. // printf("Case 5, right. s:%s\n", s->key);
  417. rb_set_red(s);
  418. rb_set_black(s->kids[1]);
  419. rb_rotate_left(tree, s);
  420. s = p->kids[p->kids[0] == d];
  421. p = DC(s->parent);
  422. // fall through to Case 6
  423. }
  424. }
  425. // "Case 6"
  426. rb_copy_color(s, p);
  427. rb_set_black(p);
  428. if(!rb_is_left_child(s)) {
  429. // printf("Case 6, left\n");
  430. rb_rotate_left(tree, p);
  431. rb_set_black(s->kids[1]);
  432. }
  433. else {
  434. // printf("Case 6, right\n");
  435. rb_rotate_right(tree, p);
  436. rb_set_black(s->kids[0]);
  437. }
  438. break; // recoloring complete
  439. }
  440. // delete the removed child
  441. if(dead == tree->root) {
  442. tree->root = NULL;
  443. }
  444. else{
  445. rb_swap_child(DC(dead->parent), dead, NULL);
  446. }
  447. rb_free_node(dead);
  448. return;
  449. }
  450. void rb_trunc_node(rb_node* n) {
  451. if(!n) return;
  452. rb_trunc_node(n->kids[0]);
  453. if(n->kids[0]) rb_free_node(n->kids[0]);
  454. rb_trunc_node(n->kids[1]);
  455. if(n->kids[1]) rb_free_node(n->kids[1]);
  456. }
  457. void rb_trunc_(rb_tree_* t) { // TODO needs free fn
  458. if(!t->root) return;
  459. rb_trunc_node(t->root);
  460. rb_free_node(t->root);
  461. t->root = NULL;
  462. t->size = 0;
  463. }
  464. long rb_traverse_node(rb_node* n, rb_traverse_fn fn, void* user_data) {
  465. if(!n) return 0;
  466. long cont;
  467. cont = rb_traverse_node(n->kids[0], fn, user_data);
  468. if(cont) return cont;
  469. cont = fn(n->key, n->data, user_data);
  470. if(cont) return cont;
  471. return rb_traverse_node(n->kids[1], fn, user_data);
  472. }
  473. long rb_r_traverse_node(rb_node* n, rb_traverse_fn fn, void* user_data) {
  474. if(!n) return 0;
  475. long cont;
  476. cont = rb_r_traverse_node(n->kids[1], fn, user_data);
  477. if(cont) return cont;
  478. cont = fn(n->key, n->data, user_data);
  479. if(cont) return cont;
  480. return rb_r_traverse_node(n->kids[0], fn, user_data);
  481. }
  482. long rb_traverse_(rb_tree_* tree, rb_traverse_fn fn, void* user_data) {
  483. return rb_traverse_node(tree->root, fn, user_data);
  484. }
  485. long rb_r_traverse_(rb_tree_* tree, rb_traverse_fn fn, void* user_data) {
  486. return rb_r_traverse_node(tree->root, fn, user_data);
  487. }
  488. // --- debugging functions ---
  489. #ifdef DEBUG_RED_BLACK_TREE
  490. int get_max_height(rb_node* n) {
  491. if(n == NULL) return 0;
  492. int a = get_max_height(n->kids[0]);
  493. int b = get_max_height(n->kids[1]);
  494. return (a > b ? a : b) + 1;
  495. }
  496. void html_print_node(rb_node* n, int h) { return;
  497. if(h == 0) return;
  498. char* type = n ? (rb_is_red(n) ? "red" : "black") : "dummy";
  499. char* barren = n ? ((n->kids[0] == NULL && n->kids[1] == NULL) ? " barren" : "") : "";
  500. fprintf(dbg, "<div class=\"node %s%s\">\n", type, barren);
  501. fprintf(dbg, "<div class=\"key\">%s</div>", n ? n->key : "");
  502. fprintf(dbg, "<div class=\"lines\"></div>");
  503. fprintf(dbg, "<div class=\"child left\">\n");
  504. html_print_node(n ? n->kids[0] : NULL, h-1);
  505. fprintf(dbg, "</div>\n");
  506. fprintf(dbg, "<div class=\"child right\">\n");
  507. html_print_node(n ? n->kids[1] : NULL, h-1);
  508. fprintf(dbg, "</div>\n");
  509. fprintf(dbg, "</div>\n");
  510. fflush(dbg);
  511. }
  512. void html_header(char* file_path) {
  513. dbg = fopen(file_path, "wb");
  514. fprintf(dbg, "<html>\n<head>\n");
  515. fprintf(dbg, "<link rel=\"stylesheet\" type=\"text/css\" href=\"debug.css\" />\n");
  516. fprintf(dbg, "</head>\n<body>\n");
  517. }
  518. void html_spacer() { return;
  519. fprintf(dbg, "<br><br><hr><br><br><br>");
  520. }
  521. void html_footer() { return;
  522. fprintf(dbg, "</body>\n</html>\n");
  523. fclose(dbg);
  524. }
  525. #endif // DEBUG_RED_BLACK_TREE