arbiter.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. /*
  2. * Memory arbiter functions. Allocates bandwidth through the
  3. * arbiter and sets up arbiter breakpoints.
  4. *
  5. * The algorithm first assigns slots to the clients that has specified
  6. * bandwidth (e.g. ethernet) and then the remaining slots are divided
  7. * on all the active clients.
  8. *
  9. * Copyright (c) 2004-2007 Axis Communications AB.
  10. */
  11. #include <hwregs/reg_map.h>
  12. #include <hwregs/reg_rdwr.h>
  13. #include <hwregs/marb_defs.h>
  14. #include <arbiter.h>
  15. #include <hwregs/intr_vect.h>
  16. #include <linux/interrupt.h>
  17. #include <linux/signal.h>
  18. #include <linux/errno.h>
  19. #include <linux/spinlock.h>
  20. #include <asm/io.h>
  21. #include <asm/irq_regs.h>
  22. struct crisv32_watch_entry {
  23. unsigned long instance;
  24. watch_callback *cb;
  25. unsigned long start;
  26. unsigned long end;
  27. int used;
  28. };
  29. #define NUMBER_OF_BP 4
  30. #define NBR_OF_CLIENTS 14
  31. #define NBR_OF_SLOTS 64
  32. #define SDRAM_BANDWIDTH 100000000 /* Some kind of expected value */
  33. #define INTMEM_BANDWIDTH 400000000
  34. #define NBR_OF_REGIONS 2
  35. static struct crisv32_watch_entry watches[NUMBER_OF_BP] = {
  36. {regi_marb_bp0},
  37. {regi_marb_bp1},
  38. {regi_marb_bp2},
  39. {regi_marb_bp3}
  40. };
  41. static u8 requested_slots[NBR_OF_REGIONS][NBR_OF_CLIENTS];
  42. static u8 active_clients[NBR_OF_REGIONS][NBR_OF_CLIENTS];
  43. static int max_bandwidth[NBR_OF_REGIONS] =
  44. { SDRAM_BANDWIDTH, INTMEM_BANDWIDTH };
  45. DEFINE_SPINLOCK(arbiter_lock);
  46. static irqreturn_t crisv32_arbiter_irq(int irq, void *dev_id);
  47. /*
  48. * "I'm the arbiter, I know the score.
  49. * From square one I'll be watching all 64."
  50. * (memory arbiter slots, that is)
  51. *
  52. * Or in other words:
  53. * Program the memory arbiter slots for "region" according to what's
  54. * in requested_slots[] and active_clients[], while minimizing
  55. * latency. A caller may pass a non-zero positive amount for
  56. * "unused_slots", which must then be the unallocated, remaining
  57. * number of slots, free to hand out to any client.
  58. */
  59. static void crisv32_arbiter_config(int region, int unused_slots)
  60. {
  61. int slot;
  62. int client;
  63. int interval = 0;
  64. /*
  65. * This vector corresponds to the hardware arbiter slots (see
  66. * the hardware documentation for semantics). We initialize
  67. * each slot with a suitable sentinel value outside the valid
  68. * range {0 .. NBR_OF_CLIENTS - 1} and replace them with
  69. * client indexes. Then it's fed to the hardware.
  70. */
  71. s8 val[NBR_OF_SLOTS];
  72. for (slot = 0; slot < NBR_OF_SLOTS; slot++)
  73. val[slot] = -1;
  74. for (client = 0; client < NBR_OF_CLIENTS; client++) {
  75. int pos;
  76. /* Allocate the requested non-zero number of slots, but
  77. * also give clients with zero-requests one slot each
  78. * while stocks last. We do the latter here, in client
  79. * order. This makes sure zero-request clients are the
  80. * first to get to any spare slots, else those slots
  81. * could, when bandwidth is allocated close to the limit,
  82. * all be allocated to low-index non-zero-request clients
  83. * in the default-fill loop below. Another positive but
  84. * secondary effect is a somewhat better spread of the
  85. * zero-bandwidth clients in the vector, avoiding some of
  86. * the latency that could otherwise be caused by the
  87. * partitioning of non-zero-bandwidth clients at low
  88. * indexes and zero-bandwidth clients at high
  89. * indexes. (Note that this spreading can only affect the
  90. * unallocated bandwidth.) All the above only matters for
  91. * memory-intensive situations, of course.
  92. */
  93. if (!requested_slots[region][client]) {
  94. /*
  95. * Skip inactive clients. Also skip zero-slot
  96. * allocations in this pass when there are no known
  97. * free slots.
  98. */
  99. if (!active_clients[region][client]
  100. || unused_slots <= 0)
  101. continue;
  102. unused_slots--;
  103. /* Only allocate one slot for this client. */
  104. interval = NBR_OF_SLOTS;
  105. } else
  106. interval =
  107. NBR_OF_SLOTS / requested_slots[region][client];
  108. pos = 0;
  109. while (pos < NBR_OF_SLOTS) {
  110. if (val[pos] >= 0)
  111. pos++;
  112. else {
  113. val[pos] = client;
  114. pos += interval;
  115. }
  116. }
  117. }
  118. client = 0;
  119. for (slot = 0; slot < NBR_OF_SLOTS; slot++) {
  120. /*
  121. * Allocate remaining slots in round-robin
  122. * client-number order for active clients. For this
  123. * pass, we ignore requested bandwidth and previous
  124. * allocations.
  125. */
  126. if (val[slot] < 0) {
  127. int first = client;
  128. while (!active_clients[region][client]) {
  129. client = (client + 1) % NBR_OF_CLIENTS;
  130. if (client == first)
  131. break;
  132. }
  133. val[slot] = client;
  134. client = (client + 1) % NBR_OF_CLIENTS;
  135. }
  136. if (region == EXT_REGION)
  137. REG_WR_INT_VECT(marb, regi_marb, rw_ext_slots, slot,
  138. val[slot]);
  139. else if (region == INT_REGION)
  140. REG_WR_INT_VECT(marb, regi_marb, rw_int_slots, slot,
  141. val[slot]);
  142. }
  143. }
  144. extern char _stext, _etext;
  145. static void crisv32_arbiter_init(void)
  146. {
  147. static int initialized;
  148. if (initialized)
  149. return;
  150. initialized = 1;
  151. /*
  152. * CPU caches are always set to active, but with zero
  153. * bandwidth allocated. It should be ok to allocate zero
  154. * bandwidth for the caches, because DMA for other channels
  155. * will supposedly finish, once their programmed amount is
  156. * done, and then the caches will get access according to the
  157. * "fixed scheme" for unclaimed slots. Though, if for some
  158. * use-case somewhere, there's a maximum CPU latency for
  159. * e.g. some interrupt, we have to start allocating specific
  160. * bandwidth for the CPU caches too.
  161. */
  162. active_clients[EXT_REGION][10] = active_clients[EXT_REGION][11] = 1;
  163. crisv32_arbiter_config(EXT_REGION, 0);
  164. crisv32_arbiter_config(INT_REGION, 0);
  165. if (request_irq(MEMARB_INTR_VECT, crisv32_arbiter_irq, 0,
  166. "arbiter", NULL))
  167. printk(KERN_ERR "Couldn't allocate arbiter IRQ\n");
  168. #ifndef CONFIG_ETRAX_KGDB
  169. /* Global watch for writes to kernel text segment. */
  170. crisv32_arbiter_watch(virt_to_phys(&_stext), &_etext - &_stext,
  171. arbiter_all_clients, arbiter_all_write, NULL);
  172. #endif
  173. }
  174. /* Main entry for bandwidth allocation. */
  175. int crisv32_arbiter_allocate_bandwidth(int client, int region,
  176. unsigned long bandwidth)
  177. {
  178. int i;
  179. int total_assigned = 0;
  180. int total_clients = 0;
  181. int req;
  182. crisv32_arbiter_init();
  183. for (i = 0; i < NBR_OF_CLIENTS; i++) {
  184. total_assigned += requested_slots[region][i];
  185. total_clients += active_clients[region][i];
  186. }
  187. /* Avoid division by 0 for 0-bandwidth requests. */
  188. req = bandwidth == 0
  189. ? 0 : NBR_OF_SLOTS / (max_bandwidth[region] / bandwidth);
  190. /*
  191. * We make sure that there are enough slots only for non-zero
  192. * requests. Requesting 0 bandwidth *may* allocate slots,
  193. * though if all bandwidth is allocated, such a client won't
  194. * get any and will have to rely on getting memory access
  195. * according to the fixed scheme that's the default when one
  196. * of the slot-allocated clients doesn't claim their slot.
  197. */
  198. if (total_assigned + req > NBR_OF_SLOTS)
  199. return -ENOMEM;
  200. active_clients[region][client] = 1;
  201. requested_slots[region][client] = req;
  202. crisv32_arbiter_config(region, NBR_OF_SLOTS - total_assigned);
  203. return 0;
  204. }
  205. /*
  206. * Main entry for bandwidth deallocation.
  207. *
  208. * Strictly speaking, for a somewhat constant set of clients where
  209. * each client gets a constant bandwidth and is just enabled or
  210. * disabled (somewhat dynamically), no action is necessary here to
  211. * avoid starvation for non-zero-allocation clients, as the allocated
  212. * slots will just be unused. However, handing out those unused slots
  213. * to active clients avoids needless latency if the "fixed scheme"
  214. * would give unclaimed slots to an eager low-index client.
  215. */
  216. void crisv32_arbiter_deallocate_bandwidth(int client, int region)
  217. {
  218. int i;
  219. int total_assigned = 0;
  220. requested_slots[region][client] = 0;
  221. active_clients[region][client] = 0;
  222. for (i = 0; i < NBR_OF_CLIENTS; i++)
  223. total_assigned += requested_slots[region][i];
  224. crisv32_arbiter_config(region, NBR_OF_SLOTS - total_assigned);
  225. }
  226. int crisv32_arbiter_watch(unsigned long start, unsigned long size,
  227. unsigned long clients, unsigned long accesses,
  228. watch_callback *cb)
  229. {
  230. int i;
  231. crisv32_arbiter_init();
  232. if (start > 0x80000000) {
  233. printk(KERN_ERR "Arbiter: %lX doesn't look like a "
  234. "physical address", start);
  235. return -EFAULT;
  236. }
  237. spin_lock(&arbiter_lock);
  238. for (i = 0; i < NUMBER_OF_BP; i++) {
  239. if (!watches[i].used) {
  240. reg_marb_rw_intr_mask intr_mask =
  241. REG_RD(marb, regi_marb, rw_intr_mask);
  242. watches[i].used = 1;
  243. watches[i].start = start;
  244. watches[i].end = start + size;
  245. watches[i].cb = cb;
  246. REG_WR_INT(marb_bp, watches[i].instance, rw_first_addr,
  247. watches[i].start);
  248. REG_WR_INT(marb_bp, watches[i].instance, rw_last_addr,
  249. watches[i].end);
  250. REG_WR_INT(marb_bp, watches[i].instance, rw_op,
  251. accesses);
  252. REG_WR_INT(marb_bp, watches[i].instance, rw_clients,
  253. clients);
  254. if (i == 0)
  255. intr_mask.bp0 = regk_marb_yes;
  256. else if (i == 1)
  257. intr_mask.bp1 = regk_marb_yes;
  258. else if (i == 2)
  259. intr_mask.bp2 = regk_marb_yes;
  260. else if (i == 3)
  261. intr_mask.bp3 = regk_marb_yes;
  262. REG_WR(marb, regi_marb, rw_intr_mask, intr_mask);
  263. spin_unlock(&arbiter_lock);
  264. return i;
  265. }
  266. }
  267. spin_unlock(&arbiter_lock);
  268. return -ENOMEM;
  269. }
  270. int crisv32_arbiter_unwatch(int id)
  271. {
  272. reg_marb_rw_intr_mask intr_mask = REG_RD(marb, regi_marb, rw_intr_mask);
  273. crisv32_arbiter_init();
  274. spin_lock(&arbiter_lock);
  275. if ((id < 0) || (id >= NUMBER_OF_BP) || (!watches[id].used)) {
  276. spin_unlock(&arbiter_lock);
  277. return -EINVAL;
  278. }
  279. memset(&watches[id], 0, sizeof(struct crisv32_watch_entry));
  280. if (id == 0)
  281. intr_mask.bp0 = regk_marb_no;
  282. else if (id == 1)
  283. intr_mask.bp1 = regk_marb_no;
  284. else if (id == 2)
  285. intr_mask.bp2 = regk_marb_no;
  286. else if (id == 3)
  287. intr_mask.bp3 = regk_marb_no;
  288. REG_WR(marb, regi_marb, rw_intr_mask, intr_mask);
  289. spin_unlock(&arbiter_lock);
  290. return 0;
  291. }
  292. extern void show_registers(struct pt_regs *regs);
  293. static irqreturn_t crisv32_arbiter_irq(int irq, void *dev_id)
  294. {
  295. reg_marb_r_masked_intr masked_intr =
  296. REG_RD(marb, regi_marb, r_masked_intr);
  297. reg_marb_bp_r_brk_clients r_clients;
  298. reg_marb_bp_r_brk_addr r_addr;
  299. reg_marb_bp_r_brk_op r_op;
  300. reg_marb_bp_r_brk_first_client r_first;
  301. reg_marb_bp_r_brk_size r_size;
  302. reg_marb_bp_rw_ack ack = { 0 };
  303. reg_marb_rw_ack_intr ack_intr = {
  304. .bp0 = 1, .bp1 = 1, .bp2 = 1, .bp3 = 1
  305. };
  306. struct crisv32_watch_entry *watch;
  307. if (masked_intr.bp0) {
  308. watch = &watches[0];
  309. ack_intr.bp0 = regk_marb_yes;
  310. } else if (masked_intr.bp1) {
  311. watch = &watches[1];
  312. ack_intr.bp1 = regk_marb_yes;
  313. } else if (masked_intr.bp2) {
  314. watch = &watches[2];
  315. ack_intr.bp2 = regk_marb_yes;
  316. } else if (masked_intr.bp3) {
  317. watch = &watches[3];
  318. ack_intr.bp3 = regk_marb_yes;
  319. } else {
  320. return IRQ_NONE;
  321. }
  322. /* Retrieve all useful information and print it. */
  323. r_clients = REG_RD(marb_bp, watch->instance, r_brk_clients);
  324. r_addr = REG_RD(marb_bp, watch->instance, r_brk_addr);
  325. r_op = REG_RD(marb_bp, watch->instance, r_brk_op);
  326. r_first = REG_RD(marb_bp, watch->instance, r_brk_first_client);
  327. r_size = REG_RD(marb_bp, watch->instance, r_brk_size);
  328. printk(KERN_INFO "Arbiter IRQ\n");
  329. printk(KERN_INFO "Clients %X addr %X op %X first %X size %X\n",
  330. REG_TYPE_CONV(int, reg_marb_bp_r_brk_clients, r_clients),
  331. REG_TYPE_CONV(int, reg_marb_bp_r_brk_addr, r_addr),
  332. REG_TYPE_CONV(int, reg_marb_bp_r_brk_op, r_op),
  333. REG_TYPE_CONV(int, reg_marb_bp_r_brk_first_client, r_first),
  334. REG_TYPE_CONV(int, reg_marb_bp_r_brk_size, r_size));
  335. REG_WR(marb_bp, watch->instance, rw_ack, ack);
  336. REG_WR(marb, regi_marb, rw_ack_intr, ack_intr);
  337. printk(KERN_INFO "IRQ occurred at %lX\n", get_irq_regs()->erp);
  338. if (watch->cb)
  339. watch->cb();
  340. return IRQ_HANDLED;
  341. }