test_lru_dist.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. /*
  2. * Copyright (c) 2016 Facebook
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of version 2 of the GNU General Public
  6. * License as published by the Free Software Foundation.
  7. */
  8. #define _GNU_SOURCE
  9. #include <linux/types.h>
  10. #include <stdio.h>
  11. #include <unistd.h>
  12. #include <linux/bpf.h>
  13. #include <errno.h>
  14. #include <string.h>
  15. #include <assert.h>
  16. #include <sched.h>
  17. #include <sys/wait.h>
  18. #include <sys/stat.h>
  19. #include <sys/resource.h>
  20. #include <fcntl.h>
  21. #include <stdlib.h>
  22. #include <time.h>
  23. #include "libbpf.h"
  24. #include "bpf_util.h"
  25. #define min(a, b) ((a) < (b) ? (a) : (b))
  26. #define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
  27. #define container_of(ptr, type, member) ({ \
  28. const typeof( ((type *)0)->member ) *__mptr = (ptr); \
  29. (type *)( (char *)__mptr - offsetof(type,member) );})
  30. static int nr_cpus;
  31. static unsigned long long *dist_keys;
  32. static unsigned int dist_key_counts;
  33. struct list_head {
  34. struct list_head *next, *prev;
  35. };
  36. static inline void INIT_LIST_HEAD(struct list_head *list)
  37. {
  38. list->next = list;
  39. list->prev = list;
  40. }
  41. static inline int list_empty(const struct list_head *head)
  42. {
  43. return head->next == head;
  44. }
  45. static inline void __list_add(struct list_head *new,
  46. struct list_head *prev,
  47. struct list_head *next)
  48. {
  49. next->prev = new;
  50. new->next = next;
  51. new->prev = prev;
  52. prev->next = new;
  53. }
  54. static inline void list_add(struct list_head *new, struct list_head *head)
  55. {
  56. __list_add(new, head, head->next);
  57. }
  58. static inline void __list_del(struct list_head *prev, struct list_head *next)
  59. {
  60. next->prev = prev;
  61. prev->next = next;
  62. }
  63. static inline void __list_del_entry(struct list_head *entry)
  64. {
  65. __list_del(entry->prev, entry->next);
  66. }
  67. static inline void list_move(struct list_head *list, struct list_head *head)
  68. {
  69. __list_del_entry(list);
  70. list_add(list, head);
  71. }
  72. #define list_entry(ptr, type, member) \
  73. container_of(ptr, type, member)
  74. #define list_last_entry(ptr, type, member) \
  75. list_entry((ptr)->prev, type, member)
  76. struct pfect_lru_node {
  77. struct list_head list;
  78. unsigned long long key;
  79. };
  80. struct pfect_lru {
  81. struct list_head list;
  82. struct pfect_lru_node *free_nodes;
  83. unsigned int cur_size;
  84. unsigned int lru_size;
  85. unsigned int nr_unique;
  86. unsigned int nr_misses;
  87. unsigned int total;
  88. int map_fd;
  89. };
  90. static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
  91. unsigned int nr_possible_elems)
  92. {
  93. lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
  94. sizeof(unsigned long long),
  95. sizeof(struct pfect_lru_node *),
  96. nr_possible_elems, 0);
  97. assert(lru->map_fd != -1);
  98. lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
  99. assert(lru->free_nodes);
  100. INIT_LIST_HEAD(&lru->list);
  101. lru->cur_size = 0;
  102. lru->lru_size = lru_size;
  103. lru->nr_unique = lru->nr_misses = lru->total = 0;
  104. }
  105. static void pfect_lru_destroy(struct pfect_lru *lru)
  106. {
  107. close(lru->map_fd);
  108. free(lru->free_nodes);
  109. }
  110. static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
  111. unsigned long long key)
  112. {
  113. struct pfect_lru_node *node = NULL;
  114. int seen = 0;
  115. lru->total++;
  116. if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) {
  117. if (node) {
  118. list_move(&node->list, &lru->list);
  119. return 1;
  120. }
  121. seen = 1;
  122. }
  123. if (lru->cur_size < lru->lru_size) {
  124. node = &lru->free_nodes[lru->cur_size++];
  125. INIT_LIST_HEAD(&node->list);
  126. } else {
  127. struct pfect_lru_node *null_node = NULL;
  128. node = list_last_entry(&lru->list,
  129. struct pfect_lru_node,
  130. list);
  131. bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
  132. }
  133. node->key = key;
  134. list_move(&node->list, &lru->list);
  135. lru->nr_misses++;
  136. if (seen) {
  137. assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
  138. } else {
  139. lru->nr_unique++;
  140. assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
  141. }
  142. return seen;
  143. }
  144. static unsigned int read_keys(const char *dist_file,
  145. unsigned long long **keys)
  146. {
  147. struct stat fst;
  148. unsigned long long *retkeys;
  149. unsigned int counts = 0;
  150. int dist_fd;
  151. char *b, *l;
  152. int i;
  153. dist_fd = open(dist_file, 0);
  154. assert(dist_fd != -1);
  155. assert(fstat(dist_fd, &fst) == 0);
  156. b = malloc(fst.st_size);
  157. assert(b);
  158. assert(read(dist_fd, b, fst.st_size) == fst.st_size);
  159. close(dist_fd);
  160. for (i = 0; i < fst.st_size; i++) {
  161. if (b[i] == '\n')
  162. counts++;
  163. }
  164. counts++; /* in case the last line has no \n */
  165. retkeys = malloc(counts * sizeof(unsigned long long));
  166. assert(retkeys);
  167. counts = 0;
  168. for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
  169. retkeys[counts++] = strtoull(l, NULL, 10);
  170. free(b);
  171. *keys = retkeys;
  172. return counts;
  173. }
  174. static int create_map(int map_type, int map_flags, unsigned int size)
  175. {
  176. int map_fd;
  177. map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
  178. sizeof(unsigned long long), size, map_flags);
  179. if (map_fd == -1)
  180. perror("bpf_create_map");
  181. return map_fd;
  182. }
  183. static int sched_next_online(int pid, int next_to_try)
  184. {
  185. cpu_set_t cpuset;
  186. if (next_to_try == nr_cpus)
  187. return -1;
  188. while (next_to_try < nr_cpus) {
  189. CPU_ZERO(&cpuset);
  190. CPU_SET(next_to_try++, &cpuset);
  191. if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
  192. break;
  193. }
  194. return next_to_try;
  195. }
  196. static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
  197. void *data)
  198. {
  199. int next_sched_cpu = 0;
  200. pid_t pid[tasks];
  201. int i;
  202. for (i = 0; i < tasks; i++) {
  203. pid[i] = fork();
  204. if (pid[i] == 0) {
  205. next_sched_cpu = sched_next_online(0, next_sched_cpu);
  206. fn(i, data);
  207. exit(0);
  208. } else if (pid[i] == -1) {
  209. printf("couldn't spawn #%d process\n", i);
  210. exit(1);
  211. }
  212. /* It is mostly redundant and just allow the parent
  213. * process to update next_shced_cpu for the next child
  214. * process
  215. */
  216. next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
  217. }
  218. for (i = 0; i < tasks; i++) {
  219. int status;
  220. assert(waitpid(pid[i], &status, 0) == pid[i]);
  221. assert(status == 0);
  222. }
  223. }
  224. static void do_test_lru_dist(int task, void *data)
  225. {
  226. unsigned int nr_misses = 0;
  227. struct pfect_lru pfect_lru;
  228. unsigned long long key, value = 1234;
  229. unsigned int i;
  230. unsigned int lru_map_fd = ((unsigned int *)data)[0];
  231. unsigned int lru_size = ((unsigned int *)data)[1];
  232. unsigned long long key_offset = task * dist_key_counts;
  233. pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
  234. for (i = 0; i < dist_key_counts; i++) {
  235. key = dist_keys[i] + key_offset;
  236. pfect_lru_lookup_or_insert(&pfect_lru, key);
  237. if (!bpf_map_lookup_elem(lru_map_fd, &key, &value))
  238. continue;
  239. if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
  240. printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n",
  241. key, errno);
  242. assert(0);
  243. }
  244. nr_misses++;
  245. }
  246. printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
  247. task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
  248. dist_key_counts);
  249. printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
  250. task, pfect_lru.nr_unique, pfect_lru.total,
  251. pfect_lru.nr_misses, pfect_lru.total);
  252. pfect_lru_destroy(&pfect_lru);
  253. close(lru_map_fd);
  254. }
  255. static void test_parallel_lru_dist(int map_type, int map_flags,
  256. int nr_tasks, unsigned int lru_size)
  257. {
  258. int child_data[2];
  259. int lru_map_fd;
  260. printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
  261. map_flags);
  262. if (map_flags & BPF_F_NO_COMMON_LRU)
  263. lru_map_fd = create_map(map_type, map_flags,
  264. nr_cpus * lru_size);
  265. else
  266. lru_map_fd = create_map(map_type, map_flags,
  267. nr_tasks * lru_size);
  268. assert(lru_map_fd != -1);
  269. child_data[0] = lru_map_fd;
  270. child_data[1] = lru_size;
  271. run_parallel(nr_tasks, do_test_lru_dist, child_data);
  272. close(lru_map_fd);
  273. }
  274. static void test_lru_loss0(int map_type, int map_flags)
  275. {
  276. unsigned long long key, value[nr_cpus];
  277. unsigned int old_unused_losses = 0;
  278. unsigned int new_unused_losses = 0;
  279. unsigned int used_losses = 0;
  280. int map_fd;
  281. printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
  282. map_flags);
  283. assert(sched_next_online(0, 0) != -1);
  284. if (map_flags & BPF_F_NO_COMMON_LRU)
  285. map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
  286. else
  287. map_fd = create_map(map_type, map_flags, 900);
  288. assert(map_fd != -1);
  289. value[0] = 1234;
  290. for (key = 1; key <= 1000; key++) {
  291. int start_key, end_key;
  292. assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
  293. start_key = 101;
  294. end_key = min(key, 900);
  295. while (start_key <= end_key) {
  296. bpf_map_lookup_elem(map_fd, &start_key, value);
  297. start_key++;
  298. }
  299. }
  300. for (key = 1; key <= 1000; key++) {
  301. if (bpf_map_lookup_elem(map_fd, &key, value)) {
  302. if (key <= 100)
  303. old_unused_losses++;
  304. else if (key <= 900)
  305. used_losses++;
  306. else
  307. new_unused_losses++;
  308. }
  309. }
  310. close(map_fd);
  311. printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
  312. "newer-elem-losses:%d(/100)\n",
  313. old_unused_losses, used_losses, new_unused_losses);
  314. }
  315. static void test_lru_loss1(int map_type, int map_flags)
  316. {
  317. unsigned long long key, value[nr_cpus];
  318. int map_fd;
  319. unsigned int nr_losses = 0;
  320. printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
  321. map_flags);
  322. assert(sched_next_online(0, 0) != -1);
  323. if (map_flags & BPF_F_NO_COMMON_LRU)
  324. map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
  325. else
  326. map_fd = create_map(map_type, map_flags, 1000);
  327. assert(map_fd != -1);
  328. value[0] = 1234;
  329. for (key = 1; key <= 1000; key++)
  330. assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
  331. for (key = 1; key <= 1000; key++) {
  332. if (bpf_map_lookup_elem(map_fd, &key, value))
  333. nr_losses++;
  334. }
  335. close(map_fd);
  336. printf("nr_losses:%d(/1000)\n", nr_losses);
  337. }
  338. static void do_test_parallel_lru_loss(int task, void *data)
  339. {
  340. const unsigned int nr_stable_elems = 1000;
  341. const unsigned int nr_repeats = 100000;
  342. int map_fd = *(int *)data;
  343. unsigned long long stable_base;
  344. unsigned long long key, value[nr_cpus];
  345. unsigned long long next_ins_key;
  346. unsigned int nr_losses = 0;
  347. unsigned int i;
  348. stable_base = task * nr_repeats * 2 + 1;
  349. next_ins_key = stable_base;
  350. value[0] = 1234;
  351. for (i = 0; i < nr_stable_elems; i++) {
  352. assert(bpf_map_update_elem(map_fd, &next_ins_key, value,
  353. BPF_NOEXIST) == 0);
  354. next_ins_key++;
  355. }
  356. for (i = 0; i < nr_repeats; i++) {
  357. int rn;
  358. rn = rand();
  359. if (rn % 10) {
  360. key = rn % nr_stable_elems + stable_base;
  361. bpf_map_lookup_elem(map_fd, &key, value);
  362. } else {
  363. bpf_map_update_elem(map_fd, &next_ins_key, value,
  364. BPF_NOEXIST);
  365. next_ins_key++;
  366. }
  367. }
  368. key = stable_base;
  369. for (i = 0; i < nr_stable_elems; i++) {
  370. if (bpf_map_lookup_elem(map_fd, &key, value))
  371. nr_losses++;
  372. key++;
  373. }
  374. printf(" task:%d nr_losses:%u\n", task, nr_losses);
  375. }
  376. static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
  377. {
  378. int map_fd;
  379. printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
  380. map_flags);
  381. /* Give 20% more than the active working set */
  382. if (map_flags & BPF_F_NO_COMMON_LRU)
  383. map_fd = create_map(map_type, map_flags,
  384. nr_cpus * (1000 + 200));
  385. else
  386. map_fd = create_map(map_type, map_flags,
  387. nr_tasks * (1000 + 200));
  388. assert(map_fd != -1);
  389. run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
  390. close(map_fd);
  391. }
  392. int main(int argc, char **argv)
  393. {
  394. struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
  395. int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
  396. const char *dist_file;
  397. int nr_tasks = 1;
  398. int lru_size;
  399. int f;
  400. if (argc < 4) {
  401. printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
  402. argv[0]);
  403. return -1;
  404. }
  405. dist_file = argv[1];
  406. lru_size = atoi(argv[2]);
  407. nr_tasks = atoi(argv[3]);
  408. setbuf(stdout, NULL);
  409. assert(!setrlimit(RLIMIT_MEMLOCK, &r));
  410. srand(time(NULL));
  411. nr_cpus = bpf_num_possible_cpus();
  412. assert(nr_cpus != -1);
  413. printf("nr_cpus:%d\n\n", nr_cpus);
  414. nr_tasks = min(nr_tasks, nr_cpus);
  415. dist_key_counts = read_keys(dist_file, &dist_keys);
  416. if (!dist_key_counts) {
  417. printf("%s has no key\n", dist_file);
  418. return -1;
  419. }
  420. for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
  421. test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
  422. test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
  423. test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
  424. nr_tasks);
  425. test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
  426. nr_tasks, lru_size);
  427. printf("\n");
  428. }
  429. free(dist_keys);
  430. return 0;
  431. }