dart_iommu.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. /*
  2. * arch/powerpc/sysdev/dart_iommu.c
  3. *
  4. * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
  5. * Copyright (C) 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>,
  6. * IBM Corporation
  7. *
  8. * Based on pSeries_iommu.c:
  9. * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
  10. * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
  11. *
  12. * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu.
  13. *
  14. *
  15. * This program is free software; you can redistribute it and/or modify
  16. * it under the terms of the GNU General Public License as published by
  17. * the Free Software Foundation; either version 2 of the License, or
  18. * (at your option) any later version.
  19. *
  20. * This program is distributed in the hope that it will be useful,
  21. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  22. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  23. * GNU General Public License for more details.
  24. *
  25. * You should have received a copy of the GNU General Public License
  26. * along with this program; if not, write to the Free Software
  27. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  28. */
  29. #include <linux/init.h>
  30. #include <linux/types.h>
  31. #include <linux/mm.h>
  32. #include <linux/spinlock.h>
  33. #include <linux/string.h>
  34. #include <linux/pci.h>
  35. #include <linux/dma-mapping.h>
  36. #include <linux/vmalloc.h>
  37. #include <linux/suspend.h>
  38. #include <linux/memblock.h>
  39. #include <linux/gfp.h>
  40. #include <asm/io.h>
  41. #include <asm/prom.h>
  42. #include <asm/iommu.h>
  43. #include <asm/pci-bridge.h>
  44. #include <asm/machdep.h>
  45. #include <asm/cacheflush.h>
  46. #include <asm/ppc-pci.h>
  47. #include "dart.h"
  48. /* DART table address and size */
  49. static u32 *dart_tablebase;
  50. static unsigned long dart_tablesize;
  51. /* Mapped base address for the dart */
  52. static unsigned int __iomem *dart;
  53. /* Dummy val that entries are set to when unused */
  54. static unsigned int dart_emptyval;
  55. static struct iommu_table iommu_table_dart;
  56. static int iommu_table_dart_inited;
  57. static int dart_dirty;
  58. static int dart_is_u4;
  59. #define DART_U4_BYPASS_BASE 0x8000000000ull
  60. #define DBG(...)
  61. static DEFINE_SPINLOCK(invalidate_lock);
  62. static inline void dart_tlb_invalidate_all(void)
  63. {
  64. unsigned long l = 0;
  65. unsigned int reg, inv_bit;
  66. unsigned long limit;
  67. unsigned long flags;
  68. spin_lock_irqsave(&invalidate_lock, flags);
  69. DBG("dart: flush\n");
  70. /* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the
  71. * control register and wait for it to clear.
  72. *
  73. * Gotcha: Sometimes, the DART won't detect that the bit gets
  74. * set. If so, clear it and set it again.
  75. */
  76. limit = 0;
  77. inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB;
  78. retry:
  79. l = 0;
  80. reg = DART_IN(DART_CNTL);
  81. reg |= inv_bit;
  82. DART_OUT(DART_CNTL, reg);
  83. while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit))
  84. l++;
  85. if (l == (1L << limit)) {
  86. if (limit < 4) {
  87. limit++;
  88. reg = DART_IN(DART_CNTL);
  89. reg &= ~inv_bit;
  90. DART_OUT(DART_CNTL, reg);
  91. goto retry;
  92. } else
  93. panic("DART: TLB did not flush after waiting a long "
  94. "time. Buggy U3 ?");
  95. }
  96. spin_unlock_irqrestore(&invalidate_lock, flags);
  97. }
  98. static inline void dart_tlb_invalidate_one(unsigned long bus_rpn)
  99. {
  100. unsigned int reg;
  101. unsigned int l, limit;
  102. unsigned long flags;
  103. spin_lock_irqsave(&invalidate_lock, flags);
  104. reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE |
  105. (bus_rpn & DART_CNTL_U4_IONE_MASK);
  106. DART_OUT(DART_CNTL, reg);
  107. limit = 0;
  108. wait_more:
  109. l = 0;
  110. while ((DART_IN(DART_CNTL) & DART_CNTL_U4_IONE) && l < (1L << limit)) {
  111. rmb();
  112. l++;
  113. }
  114. if (l == (1L << limit)) {
  115. if (limit < 4) {
  116. limit++;
  117. goto wait_more;
  118. } else
  119. panic("DART: TLB did not flush after waiting a long "
  120. "time. Buggy U4 ?");
  121. }
  122. spin_unlock_irqrestore(&invalidate_lock, flags);
  123. }
  124. static void dart_cache_sync(unsigned int *base, unsigned int count)
  125. {
  126. /*
  127. * We add 1 to the number of entries to flush, following a
  128. * comment in Darwin indicating that the memory controller
  129. * can prefetch unmapped memory under some circumstances.
  130. */
  131. unsigned long start = (unsigned long)base;
  132. unsigned long end = start + (count + 1) * sizeof(unsigned int);
  133. unsigned int tmp;
  134. /* Perform a standard cache flush */
  135. flush_inval_dcache_range(start, end);
  136. /*
  137. * Perform the sequence described in the CPC925 manual to
  138. * ensure all the data gets to a point the cache incoherent
  139. * DART hardware will see.
  140. */
  141. asm volatile(" sync;"
  142. " isync;"
  143. " dcbf 0,%1;"
  144. " sync;"
  145. " isync;"
  146. " lwz %0,0(%1);"
  147. " isync" : "=r" (tmp) : "r" (end) : "memory");
  148. }
  149. static void dart_flush(struct iommu_table *tbl)
  150. {
  151. mb();
  152. if (dart_dirty) {
  153. dart_tlb_invalidate_all();
  154. dart_dirty = 0;
  155. }
  156. }
  157. static int dart_build(struct iommu_table *tbl, long index,
  158. long npages, unsigned long uaddr,
  159. enum dma_data_direction direction,
  160. unsigned long attrs)
  161. {
  162. unsigned int *dp, *orig_dp;
  163. unsigned int rpn;
  164. long l;
  165. DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
  166. orig_dp = dp = ((unsigned int*)tbl->it_base) + index;
  167. /* On U3, all memory is contiguous, so we can move this
  168. * out of the loop.
  169. */
  170. l = npages;
  171. while (l--) {
  172. rpn = __pa(uaddr) >> DART_PAGE_SHIFT;
  173. *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK);
  174. uaddr += DART_PAGE_SIZE;
  175. }
  176. dart_cache_sync(orig_dp, npages);
  177. if (dart_is_u4) {
  178. rpn = index;
  179. while (npages--)
  180. dart_tlb_invalidate_one(rpn++);
  181. } else {
  182. dart_dirty = 1;
  183. }
  184. return 0;
  185. }
  186. static void dart_free(struct iommu_table *tbl, long index, long npages)
  187. {
  188. unsigned int *dp, *orig_dp;
  189. long orig_npages = npages;
  190. /* We don't worry about flushing the TLB cache. The only drawback of
  191. * not doing it is that we won't catch buggy device drivers doing
  192. * bad DMAs, but then no 32-bit architecture ever does either.
  193. */
  194. DBG("dart: free at: %lx, %lx\n", index, npages);
  195. orig_dp = dp = ((unsigned int *)tbl->it_base) + index;
  196. while (npages--)
  197. *(dp++) = dart_emptyval;
  198. dart_cache_sync(orig_dp, orig_npages);
  199. }
  200. static void allocate_dart(void)
  201. {
  202. unsigned long tmp;
  203. /* 512 pages (2MB) is max DART tablesize. */
  204. dart_tablesize = 1UL << 21;
  205. /*
  206. * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we
  207. * will blow up an entire large page anyway in the kernel mapping.
  208. */
  209. dart_tablebase = __va(memblock_alloc_base(1UL<<24,
  210. 1UL<<24, 0x80000000L));
  211. /* There is no point scanning the DART space for leaks*/
  212. kmemleak_no_scan((void *)dart_tablebase);
  213. /* Allocate a spare page to map all invalid DART pages. We need to do
  214. * that to work around what looks like a problem with the HT bridge
  215. * prefetching into invalid pages and corrupting data
  216. */
  217. tmp = memblock_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
  218. dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
  219. DARTMAP_RPNMASK);
  220. printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase);
  221. }
  222. static int __init dart_init(struct device_node *dart_node)
  223. {
  224. unsigned int i;
  225. unsigned long base, size;
  226. struct resource r;
  227. /* IOMMU disabled by the user ? bail out */
  228. if (iommu_is_off)
  229. return -ENODEV;
  230. /*
  231. * Only use the DART if the machine has more than 1GB of RAM
  232. * or if requested with iommu=on on cmdline.
  233. *
  234. * 1GB of RAM is picked as limit because some default devices
  235. * (i.e. Airport Extreme) have 30 bit address range limits.
  236. */
  237. if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull)
  238. return -ENODEV;
  239. /* Get DART registers */
  240. if (of_address_to_resource(dart_node, 0, &r))
  241. panic("DART: can't get register base ! ");
  242. /* Map in DART registers */
  243. dart = ioremap(r.start, resource_size(&r));
  244. if (dart == NULL)
  245. panic("DART: Cannot map registers!");
  246. /* Allocate the DART and dummy page */
  247. allocate_dart();
  248. /* Fill initial table */
  249. for (i = 0; i < dart_tablesize/4; i++)
  250. dart_tablebase[i] = dart_emptyval;
  251. /* Push to memory */
  252. dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
  253. /* Initialize DART with table base and enable it. */
  254. base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT;
  255. size = dart_tablesize >> DART_PAGE_SHIFT;
  256. if (dart_is_u4) {
  257. size &= DART_SIZE_U4_SIZE_MASK;
  258. DART_OUT(DART_BASE_U4, base);
  259. DART_OUT(DART_SIZE_U4, size);
  260. DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE);
  261. } else {
  262. size &= DART_CNTL_U3_SIZE_MASK;
  263. DART_OUT(DART_CNTL,
  264. DART_CNTL_U3_ENABLE |
  265. (base << DART_CNTL_U3_BASE_SHIFT) |
  266. (size << DART_CNTL_U3_SIZE_SHIFT));
  267. }
  268. /* Invalidate DART to get rid of possible stale TLBs */
  269. dart_tlb_invalidate_all();
  270. printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n",
  271. dart_is_u4 ? "U4" : "U3");
  272. return 0;
  273. }
  274. static struct iommu_table_ops iommu_dart_ops = {
  275. .set = dart_build,
  276. .clear = dart_free,
  277. .flush = dart_flush,
  278. };
  279. static void iommu_table_dart_setup(void)
  280. {
  281. iommu_table_dart.it_busno = 0;
  282. iommu_table_dart.it_offset = 0;
  283. /* it_size is in number of entries */
  284. iommu_table_dart.it_size = dart_tablesize / sizeof(u32);
  285. iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
  286. /* Initialize the common IOMMU code */
  287. iommu_table_dart.it_base = (unsigned long)dart_tablebase;
  288. iommu_table_dart.it_index = 0;
  289. iommu_table_dart.it_blocksize = 1;
  290. iommu_table_dart.it_ops = &iommu_dart_ops;
  291. iommu_init_table(&iommu_table_dart, -1);
  292. /* Reserve the last page of the DART to avoid possible prefetch
  293. * past the DART mapped area
  294. */
  295. set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
  296. }
  297. static void pci_dma_dev_setup_dart(struct pci_dev *dev)
  298. {
  299. if (dart_is_u4)
  300. set_dma_offset(&dev->dev, DART_U4_BYPASS_BASE);
  301. set_iommu_table_base(&dev->dev, &iommu_table_dart);
  302. }
  303. static void pci_dma_bus_setup_dart(struct pci_bus *bus)
  304. {
  305. if (!iommu_table_dart_inited) {
  306. iommu_table_dart_inited = 1;
  307. iommu_table_dart_setup();
  308. }
  309. }
  310. static bool dart_device_on_pcie(struct device *dev)
  311. {
  312. struct device_node *np = of_node_get(dev->of_node);
  313. while(np) {
  314. if (of_device_is_compatible(np, "U4-pcie") ||
  315. of_device_is_compatible(np, "u4-pcie")) {
  316. of_node_put(np);
  317. return true;
  318. }
  319. np = of_get_next_parent(np);
  320. }
  321. return false;
  322. }
  323. static int dart_dma_set_mask(struct device *dev, u64 dma_mask)
  324. {
  325. if (!dev->dma_mask || !dma_supported(dev, dma_mask))
  326. return -EIO;
  327. /* U4 supports a DART bypass, we use it for 64-bit capable
  328. * devices to improve performances. However, that only works
  329. * for devices connected to U4 own PCIe interface, not bridged
  330. * through hypertransport. We need the device to support at
  331. * least 40 bits of addresses.
  332. */
  333. if (dart_device_on_pcie(dev) && dma_mask >= DMA_BIT_MASK(40)) {
  334. dev_info(dev, "Using 64-bit DMA iommu bypass\n");
  335. set_dma_ops(dev, &dma_direct_ops);
  336. } else {
  337. dev_info(dev, "Using 32-bit DMA via iommu\n");
  338. set_dma_ops(dev, &dma_iommu_ops);
  339. }
  340. *dev->dma_mask = dma_mask;
  341. return 0;
  342. }
  343. void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops)
  344. {
  345. struct device_node *dn;
  346. /* Find the DART in the device-tree */
  347. dn = of_find_compatible_node(NULL, "dart", "u3-dart");
  348. if (dn == NULL) {
  349. dn = of_find_compatible_node(NULL, "dart", "u4-dart");
  350. if (dn == NULL)
  351. return; /* use default direct_dma_ops */
  352. dart_is_u4 = 1;
  353. }
  354. /* Initialize the DART HW */
  355. if (dart_init(dn) != 0)
  356. goto bail;
  357. /* Setup bypass if supported */
  358. if (dart_is_u4)
  359. ppc_md.dma_set_mask = dart_dma_set_mask;
  360. controller_ops->dma_dev_setup = pci_dma_dev_setup_dart;
  361. controller_ops->dma_bus_setup = pci_dma_bus_setup_dart;
  362. /* Setup pci_dma ops */
  363. set_pci_dma_ops(&dma_iommu_ops);
  364. return;
  365. bail:
  366. /* If init failed, use direct iommu and null setup functions */
  367. controller_ops->dma_dev_setup = NULL;
  368. controller_ops->dma_bus_setup = NULL;
  369. /* Setup pci_dma ops */
  370. set_pci_dma_ops(&dma_direct_ops);
  371. }
  372. #ifdef CONFIG_PM
  373. static void iommu_dart_restore(void)
  374. {
  375. dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32));
  376. dart_tlb_invalidate_all();
  377. }
  378. static int __init iommu_init_late_dart(void)
  379. {
  380. if (!dart_tablebase)
  381. return 0;
  382. ppc_md.iommu_restore = iommu_dart_restore;
  383. return 0;
  384. }
  385. late_initcall(iommu_init_late_dart);
  386. #endif /* CONFIG_PM */