i915_gem_gtt.c 95 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706
  1. /*
  2. * Copyright © 2010 Daniel Vetter
  3. * Copyright © 2011-2014 Intel Corporation
  4. *
  5. * Permission is hereby granted, free of charge, to any person obtaining a
  6. * copy of this software and associated documentation files (the "Software"),
  7. * to deal in the Software without restriction, including without limitation
  8. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. * and/or sell copies of the Software, and to permit persons to whom the
  10. * Software is furnished to do so, subject to the following conditions:
  11. *
  12. * The above copyright notice and this permission notice (including the next
  13. * paragraph) shall be included in all copies or substantial portions of the
  14. * Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22. * IN THE SOFTWARE.
  23. *
  24. */
  25. #include <linux/seq_file.h>
  26. #include <linux/stop_machine.h>
  27. #include <drm/drmP.h>
  28. #include <drm/i915_drm.h>
  29. #include "i915_drv.h"
  30. #include "i915_vgpu.h"
  31. #include "i915_trace.h"
  32. #include "intel_drv.h"
  33. #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
  34. /**
  35. * DOC: Global GTT views
  36. *
  37. * Background and previous state
  38. *
  39. * Historically objects could exists (be bound) in global GTT space only as
  40. * singular instances with a view representing all of the object's backing pages
  41. * in a linear fashion. This view will be called a normal view.
  42. *
  43. * To support multiple views of the same object, where the number of mapped
  44. * pages is not equal to the backing store, or where the layout of the pages
  45. * is not linear, concept of a GGTT view was added.
  46. *
  47. * One example of an alternative view is a stereo display driven by a single
  48. * image. In this case we would have a framebuffer looking like this
  49. * (2x2 pages):
  50. *
  51. * 12
  52. * 34
  53. *
  54. * Above would represent a normal GGTT view as normally mapped for GPU or CPU
  55. * rendering. In contrast, fed to the display engine would be an alternative
  56. * view which could look something like this:
  57. *
  58. * 1212
  59. * 3434
  60. *
  61. * In this example both the size and layout of pages in the alternative view is
  62. * different from the normal view.
  63. *
  64. * Implementation and usage
  65. *
  66. * GGTT views are implemented using VMAs and are distinguished via enum
  67. * i915_ggtt_view_type and struct i915_ggtt_view.
  68. *
  69. * A new flavour of core GEM functions which work with GGTT bound objects were
  70. * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
  71. * renaming in large amounts of code. They take the struct i915_ggtt_view
  72. * parameter encapsulating all metadata required to implement a view.
  73. *
  74. * As a helper for callers which are only interested in the normal view,
  75. * globally const i915_ggtt_view_normal singleton instance exists. All old core
  76. * GEM API functions, the ones not taking the view parameter, are operating on,
  77. * or with the normal GGTT view.
  78. *
  79. * Code wanting to add or use a new GGTT view needs to:
  80. *
  81. * 1. Add a new enum with a suitable name.
  82. * 2. Extend the metadata in the i915_ggtt_view structure if required.
  83. * 3. Add support to i915_get_vma_pages().
  84. *
  85. * New views are required to build a scatter-gather table from within the
  86. * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
  87. * exists for the lifetime of an VMA.
  88. *
  89. * Core API is designed to have copy semantics which means that passed in
  90. * struct i915_ggtt_view does not need to be persistent (left around after
  91. * calling the core API functions).
  92. *
  93. */
  94. static inline struct i915_ggtt *
  95. i915_vm_to_ggtt(struct i915_address_space *vm)
  96. {
  97. GEM_BUG_ON(!i915_is_ggtt(vm));
  98. return container_of(vm, struct i915_ggtt, base);
  99. }
  100. static int
  101. i915_get_ggtt_vma_pages(struct i915_vma *vma);
  102. const struct i915_ggtt_view i915_ggtt_view_normal = {
  103. .type = I915_GGTT_VIEW_NORMAL,
  104. };
  105. const struct i915_ggtt_view i915_ggtt_view_rotated = {
  106. .type = I915_GGTT_VIEW_ROTATED,
  107. };
  108. int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
  109. int enable_ppgtt)
  110. {
  111. bool has_aliasing_ppgtt;
  112. bool has_full_ppgtt;
  113. bool has_full_48bit_ppgtt;
  114. has_aliasing_ppgtt = INTEL_GEN(dev_priv) >= 6;
  115. has_full_ppgtt = INTEL_GEN(dev_priv) >= 7;
  116. has_full_48bit_ppgtt =
  117. IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9;
  118. if (intel_vgpu_active(dev_priv)) {
  119. /* emulation is too hard */
  120. has_full_ppgtt = false;
  121. has_full_48bit_ppgtt = false;
  122. }
  123. if (!has_aliasing_ppgtt)
  124. return 0;
  125. /*
  126. * We don't allow disabling PPGTT for gen9+ as it's a requirement for
  127. * execlists, the sole mechanism available to submit work.
  128. */
  129. if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
  130. return 0;
  131. if (enable_ppgtt == 1)
  132. return 1;
  133. if (enable_ppgtt == 2 && has_full_ppgtt)
  134. return 2;
  135. if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
  136. return 3;
  137. #ifdef CONFIG_INTEL_IOMMU
  138. /* Disable ppgtt on SNB if VT-d is on. */
  139. if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) {
  140. DRM_INFO("Disabling PPGTT because VT-d is on\n");
  141. return 0;
  142. }
  143. #endif
  144. /* Early VLV doesn't have this */
  145. if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
  146. DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
  147. return 0;
  148. }
  149. if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt)
  150. return has_full_48bit_ppgtt ? 3 : 2;
  151. else
  152. return has_aliasing_ppgtt ? 1 : 0;
  153. }
  154. static int ppgtt_bind_vma(struct i915_vma *vma,
  155. enum i915_cache_level cache_level,
  156. u32 unused)
  157. {
  158. u32 pte_flags = 0;
  159. vma->pages = vma->obj->pages;
  160. /* Currently applicable only to VLV */
  161. if (vma->obj->gt_ro)
  162. pte_flags |= PTE_READ_ONLY;
  163. vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
  164. cache_level, pte_flags);
  165. return 0;
  166. }
  167. static void ppgtt_unbind_vma(struct i915_vma *vma)
  168. {
  169. vma->vm->clear_range(vma->vm,
  170. vma->node.start,
  171. vma->size,
  172. true);
  173. }
  174. static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
  175. enum i915_cache_level level,
  176. bool valid)
  177. {
  178. gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
  179. pte |= addr;
  180. switch (level) {
  181. case I915_CACHE_NONE:
  182. pte |= PPAT_UNCACHED_INDEX;
  183. break;
  184. case I915_CACHE_WT:
  185. pte |= PPAT_DISPLAY_ELLC_INDEX;
  186. break;
  187. default:
  188. pte |= PPAT_CACHED_INDEX;
  189. break;
  190. }
  191. return pte;
  192. }
  193. static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
  194. const enum i915_cache_level level)
  195. {
  196. gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
  197. pde |= addr;
  198. if (level != I915_CACHE_NONE)
  199. pde |= PPAT_CACHED_PDE_INDEX;
  200. else
  201. pde |= PPAT_UNCACHED_INDEX;
  202. return pde;
  203. }
  204. #define gen8_pdpe_encode gen8_pde_encode
  205. #define gen8_pml4e_encode gen8_pde_encode
  206. static gen6_pte_t snb_pte_encode(dma_addr_t addr,
  207. enum i915_cache_level level,
  208. bool valid, u32 unused)
  209. {
  210. gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  211. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  212. switch (level) {
  213. case I915_CACHE_L3_LLC:
  214. case I915_CACHE_LLC:
  215. pte |= GEN6_PTE_CACHE_LLC;
  216. break;
  217. case I915_CACHE_NONE:
  218. pte |= GEN6_PTE_UNCACHED;
  219. break;
  220. default:
  221. MISSING_CASE(level);
  222. }
  223. return pte;
  224. }
  225. static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
  226. enum i915_cache_level level,
  227. bool valid, u32 unused)
  228. {
  229. gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  230. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  231. switch (level) {
  232. case I915_CACHE_L3_LLC:
  233. pte |= GEN7_PTE_CACHE_L3_LLC;
  234. break;
  235. case I915_CACHE_LLC:
  236. pte |= GEN6_PTE_CACHE_LLC;
  237. break;
  238. case I915_CACHE_NONE:
  239. pte |= GEN6_PTE_UNCACHED;
  240. break;
  241. default:
  242. MISSING_CASE(level);
  243. }
  244. return pte;
  245. }
  246. static gen6_pte_t byt_pte_encode(dma_addr_t addr,
  247. enum i915_cache_level level,
  248. bool valid, u32 flags)
  249. {
  250. gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  251. pte |= GEN6_PTE_ADDR_ENCODE(addr);
  252. if (!(flags & PTE_READ_ONLY))
  253. pte |= BYT_PTE_WRITEABLE;
  254. if (level != I915_CACHE_NONE)
  255. pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
  256. return pte;
  257. }
  258. static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
  259. enum i915_cache_level level,
  260. bool valid, u32 unused)
  261. {
  262. gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  263. pte |= HSW_PTE_ADDR_ENCODE(addr);
  264. if (level != I915_CACHE_NONE)
  265. pte |= HSW_WB_LLC_AGE3;
  266. return pte;
  267. }
  268. static gen6_pte_t iris_pte_encode(dma_addr_t addr,
  269. enum i915_cache_level level,
  270. bool valid, u32 unused)
  271. {
  272. gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  273. pte |= HSW_PTE_ADDR_ENCODE(addr);
  274. switch (level) {
  275. case I915_CACHE_NONE:
  276. break;
  277. case I915_CACHE_WT:
  278. pte |= HSW_WT_ELLC_LLC_AGE3;
  279. break;
  280. default:
  281. pte |= HSW_WB_ELLC_LLC_AGE3;
  282. break;
  283. }
  284. return pte;
  285. }
  286. static int __setup_page_dma(struct drm_device *dev,
  287. struct i915_page_dma *p, gfp_t flags)
  288. {
  289. struct device *kdev = &dev->pdev->dev;
  290. p->page = alloc_page(flags);
  291. if (!p->page)
  292. return -ENOMEM;
  293. p->daddr = dma_map_page(kdev,
  294. p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
  295. if (dma_mapping_error(kdev, p->daddr)) {
  296. __free_page(p->page);
  297. return -EINVAL;
  298. }
  299. return 0;
  300. }
  301. static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
  302. {
  303. return __setup_page_dma(dev, p, I915_GFP_DMA);
  304. }
  305. static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
  306. {
  307. struct pci_dev *pdev = dev->pdev;
  308. if (WARN_ON(!p->page))
  309. return;
  310. dma_unmap_page(&pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
  311. __free_page(p->page);
  312. memset(p, 0, sizeof(*p));
  313. }
  314. static void *kmap_page_dma(struct i915_page_dma *p)
  315. {
  316. return kmap_atomic(p->page);
  317. }
  318. /* We use the flushing unmap only with ppgtt structures:
  319. * page directories, page tables and scratch pages.
  320. */
  321. static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
  322. {
  323. /* There are only few exceptions for gen >=6. chv and bxt.
  324. * And we are not sure about the latter so play safe for now.
  325. */
  326. if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
  327. drm_clflush_virt_range(vaddr, PAGE_SIZE);
  328. kunmap_atomic(vaddr);
  329. }
  330. #define kmap_px(px) kmap_page_dma(px_base(px))
  331. #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
  332. #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
  333. #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
  334. #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
  335. #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
  336. static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
  337. const uint64_t val)
  338. {
  339. int i;
  340. uint64_t * const vaddr = kmap_page_dma(p);
  341. for (i = 0; i < 512; i++)
  342. vaddr[i] = val;
  343. kunmap_page_dma(dev, vaddr);
  344. }
  345. static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
  346. const uint32_t val32)
  347. {
  348. uint64_t v = val32;
  349. v = v << 32 | val32;
  350. fill_page_dma(dev, p, v);
  351. }
  352. static int
  353. setup_scratch_page(struct drm_device *dev,
  354. struct i915_page_dma *scratch,
  355. gfp_t gfp)
  356. {
  357. return __setup_page_dma(dev, scratch, gfp | __GFP_ZERO);
  358. }
  359. static void cleanup_scratch_page(struct drm_device *dev,
  360. struct i915_page_dma *scratch)
  361. {
  362. cleanup_page_dma(dev, scratch);
  363. }
  364. static struct i915_page_table *alloc_pt(struct drm_device *dev)
  365. {
  366. struct i915_page_table *pt;
  367. const size_t count = INTEL_INFO(dev)->gen >= 8 ?
  368. GEN8_PTES : GEN6_PTES;
  369. int ret = -ENOMEM;
  370. pt = kzalloc(sizeof(*pt), GFP_KERNEL);
  371. if (!pt)
  372. return ERR_PTR(-ENOMEM);
  373. pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
  374. GFP_KERNEL);
  375. if (!pt->used_ptes)
  376. goto fail_bitmap;
  377. ret = setup_px(dev, pt);
  378. if (ret)
  379. goto fail_page_m;
  380. return pt;
  381. fail_page_m:
  382. kfree(pt->used_ptes);
  383. fail_bitmap:
  384. kfree(pt);
  385. return ERR_PTR(ret);
  386. }
  387. static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
  388. {
  389. cleanup_px(dev, pt);
  390. kfree(pt->used_ptes);
  391. kfree(pt);
  392. }
  393. static void gen8_initialize_pt(struct i915_address_space *vm,
  394. struct i915_page_table *pt)
  395. {
  396. gen8_pte_t scratch_pte;
  397. scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
  398. I915_CACHE_LLC, true);
  399. fill_px(vm->dev, pt, scratch_pte);
  400. }
  401. static void gen6_initialize_pt(struct i915_address_space *vm,
  402. struct i915_page_table *pt)
  403. {
  404. gen6_pte_t scratch_pte;
  405. WARN_ON(vm->scratch_page.daddr == 0);
  406. scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
  407. I915_CACHE_LLC, true, 0);
  408. fill32_px(vm->dev, pt, scratch_pte);
  409. }
  410. static struct i915_page_directory *alloc_pd(struct drm_device *dev)
  411. {
  412. struct i915_page_directory *pd;
  413. int ret = -ENOMEM;
  414. pd = kzalloc(sizeof(*pd), GFP_KERNEL);
  415. if (!pd)
  416. return ERR_PTR(-ENOMEM);
  417. pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
  418. sizeof(*pd->used_pdes), GFP_KERNEL);
  419. if (!pd->used_pdes)
  420. goto fail_bitmap;
  421. ret = setup_px(dev, pd);
  422. if (ret)
  423. goto fail_page_m;
  424. return pd;
  425. fail_page_m:
  426. kfree(pd->used_pdes);
  427. fail_bitmap:
  428. kfree(pd);
  429. return ERR_PTR(ret);
  430. }
  431. static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
  432. {
  433. if (px_page(pd)) {
  434. cleanup_px(dev, pd);
  435. kfree(pd->used_pdes);
  436. kfree(pd);
  437. }
  438. }
  439. static void gen8_initialize_pd(struct i915_address_space *vm,
  440. struct i915_page_directory *pd)
  441. {
  442. gen8_pde_t scratch_pde;
  443. scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
  444. fill_px(vm->dev, pd, scratch_pde);
  445. }
  446. static int __pdp_init(struct drm_device *dev,
  447. struct i915_page_directory_pointer *pdp)
  448. {
  449. size_t pdpes = I915_PDPES_PER_PDP(dev);
  450. pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
  451. sizeof(unsigned long),
  452. GFP_KERNEL);
  453. if (!pdp->used_pdpes)
  454. return -ENOMEM;
  455. pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
  456. GFP_KERNEL);
  457. if (!pdp->page_directory) {
  458. kfree(pdp->used_pdpes);
  459. /* the PDP might be the statically allocated top level. Keep it
  460. * as clean as possible */
  461. pdp->used_pdpes = NULL;
  462. return -ENOMEM;
  463. }
  464. return 0;
  465. }
  466. static void __pdp_fini(struct i915_page_directory_pointer *pdp)
  467. {
  468. kfree(pdp->used_pdpes);
  469. kfree(pdp->page_directory);
  470. pdp->page_directory = NULL;
  471. }
  472. static struct
  473. i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
  474. {
  475. struct i915_page_directory_pointer *pdp;
  476. int ret = -ENOMEM;
  477. WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
  478. pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
  479. if (!pdp)
  480. return ERR_PTR(-ENOMEM);
  481. ret = __pdp_init(dev, pdp);
  482. if (ret)
  483. goto fail_bitmap;
  484. ret = setup_px(dev, pdp);
  485. if (ret)
  486. goto fail_page_m;
  487. return pdp;
  488. fail_page_m:
  489. __pdp_fini(pdp);
  490. fail_bitmap:
  491. kfree(pdp);
  492. return ERR_PTR(ret);
  493. }
  494. static void free_pdp(struct drm_device *dev,
  495. struct i915_page_directory_pointer *pdp)
  496. {
  497. __pdp_fini(pdp);
  498. if (USES_FULL_48BIT_PPGTT(dev)) {
  499. cleanup_px(dev, pdp);
  500. kfree(pdp);
  501. }
  502. }
  503. static void gen8_initialize_pdp(struct i915_address_space *vm,
  504. struct i915_page_directory_pointer *pdp)
  505. {
  506. gen8_ppgtt_pdpe_t scratch_pdpe;
  507. scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
  508. fill_px(vm->dev, pdp, scratch_pdpe);
  509. }
  510. static void gen8_initialize_pml4(struct i915_address_space *vm,
  511. struct i915_pml4 *pml4)
  512. {
  513. gen8_ppgtt_pml4e_t scratch_pml4e;
  514. scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
  515. I915_CACHE_LLC);
  516. fill_px(vm->dev, pml4, scratch_pml4e);
  517. }
  518. static void
  519. gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
  520. struct i915_page_directory_pointer *pdp,
  521. struct i915_page_directory *pd,
  522. int index)
  523. {
  524. gen8_ppgtt_pdpe_t *page_directorypo;
  525. if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
  526. return;
  527. page_directorypo = kmap_px(pdp);
  528. page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
  529. kunmap_px(ppgtt, page_directorypo);
  530. }
  531. static void
  532. gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
  533. struct i915_pml4 *pml4,
  534. struct i915_page_directory_pointer *pdp,
  535. int index)
  536. {
  537. gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
  538. WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
  539. pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
  540. kunmap_px(ppgtt, pagemap);
  541. }
  542. /* Broadwell Page Directory Pointer Descriptors */
  543. static int gen8_write_pdp(struct drm_i915_gem_request *req,
  544. unsigned entry,
  545. dma_addr_t addr)
  546. {
  547. struct intel_ring *ring = req->ring;
  548. struct intel_engine_cs *engine = req->engine;
  549. int ret;
  550. BUG_ON(entry >= 4);
  551. ret = intel_ring_begin(req, 6);
  552. if (ret)
  553. return ret;
  554. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
  555. intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry));
  556. intel_ring_emit(ring, upper_32_bits(addr));
  557. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
  558. intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry));
  559. intel_ring_emit(ring, lower_32_bits(addr));
  560. intel_ring_advance(ring);
  561. return 0;
  562. }
  563. static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
  564. struct drm_i915_gem_request *req)
  565. {
  566. int i, ret;
  567. for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
  568. const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
  569. ret = gen8_write_pdp(req, i, pd_daddr);
  570. if (ret)
  571. return ret;
  572. }
  573. return 0;
  574. }
  575. static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
  576. struct drm_i915_gem_request *req)
  577. {
  578. return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
  579. }
  580. static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
  581. struct i915_page_directory_pointer *pdp,
  582. uint64_t start,
  583. uint64_t length,
  584. gen8_pte_t scratch_pte)
  585. {
  586. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  587. gen8_pte_t *pt_vaddr;
  588. unsigned pdpe = gen8_pdpe_index(start);
  589. unsigned pde = gen8_pde_index(start);
  590. unsigned pte = gen8_pte_index(start);
  591. unsigned num_entries = length >> PAGE_SHIFT;
  592. unsigned last_pte, i;
  593. if (WARN_ON(!pdp))
  594. return;
  595. while (num_entries) {
  596. struct i915_page_directory *pd;
  597. struct i915_page_table *pt;
  598. if (WARN_ON(!pdp->page_directory[pdpe]))
  599. break;
  600. pd = pdp->page_directory[pdpe];
  601. if (WARN_ON(!pd->page_table[pde]))
  602. break;
  603. pt = pd->page_table[pde];
  604. if (WARN_ON(!px_page(pt)))
  605. break;
  606. last_pte = pte + num_entries;
  607. if (last_pte > GEN8_PTES)
  608. last_pte = GEN8_PTES;
  609. pt_vaddr = kmap_px(pt);
  610. for (i = pte; i < last_pte; i++) {
  611. pt_vaddr[i] = scratch_pte;
  612. num_entries--;
  613. }
  614. kunmap_px(ppgtt, pt_vaddr);
  615. pte = 0;
  616. if (++pde == I915_PDES) {
  617. if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
  618. break;
  619. pde = 0;
  620. }
  621. }
  622. }
  623. static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
  624. uint64_t start,
  625. uint64_t length,
  626. bool use_scratch)
  627. {
  628. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  629. gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
  630. I915_CACHE_LLC, use_scratch);
  631. if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
  632. gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
  633. scratch_pte);
  634. } else {
  635. uint64_t pml4e;
  636. struct i915_page_directory_pointer *pdp;
  637. gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
  638. gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
  639. scratch_pte);
  640. }
  641. }
  642. }
  643. static void
  644. gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
  645. struct i915_page_directory_pointer *pdp,
  646. struct sg_page_iter *sg_iter,
  647. uint64_t start,
  648. enum i915_cache_level cache_level)
  649. {
  650. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  651. gen8_pte_t *pt_vaddr;
  652. unsigned pdpe = gen8_pdpe_index(start);
  653. unsigned pde = gen8_pde_index(start);
  654. unsigned pte = gen8_pte_index(start);
  655. pt_vaddr = NULL;
  656. while (__sg_page_iter_next(sg_iter)) {
  657. if (pt_vaddr == NULL) {
  658. struct i915_page_directory *pd = pdp->page_directory[pdpe];
  659. struct i915_page_table *pt = pd->page_table[pde];
  660. pt_vaddr = kmap_px(pt);
  661. }
  662. pt_vaddr[pte] =
  663. gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
  664. cache_level, true);
  665. if (++pte == GEN8_PTES) {
  666. kunmap_px(ppgtt, pt_vaddr);
  667. pt_vaddr = NULL;
  668. if (++pde == I915_PDES) {
  669. if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
  670. break;
  671. pde = 0;
  672. }
  673. pte = 0;
  674. }
  675. }
  676. if (pt_vaddr)
  677. kunmap_px(ppgtt, pt_vaddr);
  678. }
  679. static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
  680. struct sg_table *pages,
  681. uint64_t start,
  682. enum i915_cache_level cache_level,
  683. u32 unused)
  684. {
  685. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  686. struct sg_page_iter sg_iter;
  687. __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
  688. if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
  689. gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
  690. cache_level);
  691. } else {
  692. struct i915_page_directory_pointer *pdp;
  693. uint64_t pml4e;
  694. uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
  695. gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
  696. gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
  697. start, cache_level);
  698. }
  699. }
  700. }
  701. static void gen8_free_page_tables(struct drm_device *dev,
  702. struct i915_page_directory *pd)
  703. {
  704. int i;
  705. if (!px_page(pd))
  706. return;
  707. for_each_set_bit(i, pd->used_pdes, I915_PDES) {
  708. if (WARN_ON(!pd->page_table[i]))
  709. continue;
  710. free_pt(dev, pd->page_table[i]);
  711. pd->page_table[i] = NULL;
  712. }
  713. }
  714. static int gen8_init_scratch(struct i915_address_space *vm)
  715. {
  716. struct drm_device *dev = vm->dev;
  717. int ret;
  718. ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
  719. if (ret)
  720. return ret;
  721. vm->scratch_pt = alloc_pt(dev);
  722. if (IS_ERR(vm->scratch_pt)) {
  723. ret = PTR_ERR(vm->scratch_pt);
  724. goto free_scratch_page;
  725. }
  726. vm->scratch_pd = alloc_pd(dev);
  727. if (IS_ERR(vm->scratch_pd)) {
  728. ret = PTR_ERR(vm->scratch_pd);
  729. goto free_pt;
  730. }
  731. if (USES_FULL_48BIT_PPGTT(dev)) {
  732. vm->scratch_pdp = alloc_pdp(dev);
  733. if (IS_ERR(vm->scratch_pdp)) {
  734. ret = PTR_ERR(vm->scratch_pdp);
  735. goto free_pd;
  736. }
  737. }
  738. gen8_initialize_pt(vm, vm->scratch_pt);
  739. gen8_initialize_pd(vm, vm->scratch_pd);
  740. if (USES_FULL_48BIT_PPGTT(dev))
  741. gen8_initialize_pdp(vm, vm->scratch_pdp);
  742. return 0;
  743. free_pd:
  744. free_pd(dev, vm->scratch_pd);
  745. free_pt:
  746. free_pt(dev, vm->scratch_pt);
  747. free_scratch_page:
  748. cleanup_scratch_page(dev, &vm->scratch_page);
  749. return ret;
  750. }
  751. static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
  752. {
  753. enum vgt_g2v_type msg;
  754. struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
  755. int i;
  756. if (USES_FULL_48BIT_PPGTT(dev_priv)) {
  757. u64 daddr = px_dma(&ppgtt->pml4);
  758. I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
  759. I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
  760. msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
  761. VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
  762. } else {
  763. for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
  764. u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
  765. I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
  766. I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
  767. }
  768. msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
  769. VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
  770. }
  771. I915_WRITE(vgtif_reg(g2v_notify), msg);
  772. return 0;
  773. }
  774. static void gen8_free_scratch(struct i915_address_space *vm)
  775. {
  776. struct drm_device *dev = vm->dev;
  777. if (USES_FULL_48BIT_PPGTT(dev))
  778. free_pdp(dev, vm->scratch_pdp);
  779. free_pd(dev, vm->scratch_pd);
  780. free_pt(dev, vm->scratch_pt);
  781. cleanup_scratch_page(dev, &vm->scratch_page);
  782. }
  783. static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
  784. struct i915_page_directory_pointer *pdp)
  785. {
  786. int i;
  787. for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
  788. if (WARN_ON(!pdp->page_directory[i]))
  789. continue;
  790. gen8_free_page_tables(dev, pdp->page_directory[i]);
  791. free_pd(dev, pdp->page_directory[i]);
  792. }
  793. free_pdp(dev, pdp);
  794. }
  795. static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
  796. {
  797. int i;
  798. for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
  799. if (WARN_ON(!ppgtt->pml4.pdps[i]))
  800. continue;
  801. gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
  802. }
  803. cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
  804. }
  805. static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  806. {
  807. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  808. if (intel_vgpu_active(to_i915(vm->dev)))
  809. gen8_ppgtt_notify_vgt(ppgtt, false);
  810. if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
  811. gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
  812. else
  813. gen8_ppgtt_cleanup_4lvl(ppgtt);
  814. gen8_free_scratch(vm);
  815. }
  816. /**
  817. * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
  818. * @vm: Master vm structure.
  819. * @pd: Page directory for this address range.
  820. * @start: Starting virtual address to begin allocations.
  821. * @length: Size of the allocations.
  822. * @new_pts: Bitmap set by function with new allocations. Likely used by the
  823. * caller to free on error.
  824. *
  825. * Allocate the required number of page tables. Extremely similar to
  826. * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
  827. * the page directory boundary (instead of the page directory pointer). That
  828. * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
  829. * possible, and likely that the caller will need to use multiple calls of this
  830. * function to achieve the appropriate allocation.
  831. *
  832. * Return: 0 if success; negative error code otherwise.
  833. */
  834. static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
  835. struct i915_page_directory *pd,
  836. uint64_t start,
  837. uint64_t length,
  838. unsigned long *new_pts)
  839. {
  840. struct drm_device *dev = vm->dev;
  841. struct i915_page_table *pt;
  842. uint32_t pde;
  843. gen8_for_each_pde(pt, pd, start, length, pde) {
  844. /* Don't reallocate page tables */
  845. if (test_bit(pde, pd->used_pdes)) {
  846. /* Scratch is never allocated this way */
  847. WARN_ON(pt == vm->scratch_pt);
  848. continue;
  849. }
  850. pt = alloc_pt(dev);
  851. if (IS_ERR(pt))
  852. goto unwind_out;
  853. gen8_initialize_pt(vm, pt);
  854. pd->page_table[pde] = pt;
  855. __set_bit(pde, new_pts);
  856. trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
  857. }
  858. return 0;
  859. unwind_out:
  860. for_each_set_bit(pde, new_pts, I915_PDES)
  861. free_pt(dev, pd->page_table[pde]);
  862. return -ENOMEM;
  863. }
  864. /**
  865. * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
  866. * @vm: Master vm structure.
  867. * @pdp: Page directory pointer for this address range.
  868. * @start: Starting virtual address to begin allocations.
  869. * @length: Size of the allocations.
  870. * @new_pds: Bitmap set by function with new allocations. Likely used by the
  871. * caller to free on error.
  872. *
  873. * Allocate the required number of page directories starting at the pde index of
  874. * @start, and ending at the pde index @start + @length. This function will skip
  875. * over already allocated page directories within the range, and only allocate
  876. * new ones, setting the appropriate pointer within the pdp as well as the
  877. * correct position in the bitmap @new_pds.
  878. *
  879. * The function will only allocate the pages within the range for a give page
  880. * directory pointer. In other words, if @start + @length straddles a virtually
  881. * addressed PDP boundary (512GB for 4k pages), there will be more allocations
  882. * required by the caller, This is not currently possible, and the BUG in the
  883. * code will prevent it.
  884. *
  885. * Return: 0 if success; negative error code otherwise.
  886. */
  887. static int
  888. gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
  889. struct i915_page_directory_pointer *pdp,
  890. uint64_t start,
  891. uint64_t length,
  892. unsigned long *new_pds)
  893. {
  894. struct drm_device *dev = vm->dev;
  895. struct i915_page_directory *pd;
  896. uint32_t pdpe;
  897. uint32_t pdpes = I915_PDPES_PER_PDP(dev);
  898. WARN_ON(!bitmap_empty(new_pds, pdpes));
  899. gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
  900. if (test_bit(pdpe, pdp->used_pdpes))
  901. continue;
  902. pd = alloc_pd(dev);
  903. if (IS_ERR(pd))
  904. goto unwind_out;
  905. gen8_initialize_pd(vm, pd);
  906. pdp->page_directory[pdpe] = pd;
  907. __set_bit(pdpe, new_pds);
  908. trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
  909. }
  910. return 0;
  911. unwind_out:
  912. for_each_set_bit(pdpe, new_pds, pdpes)
  913. free_pd(dev, pdp->page_directory[pdpe]);
  914. return -ENOMEM;
  915. }
  916. /**
  917. * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
  918. * @vm: Master vm structure.
  919. * @pml4: Page map level 4 for this address range.
  920. * @start: Starting virtual address to begin allocations.
  921. * @length: Size of the allocations.
  922. * @new_pdps: Bitmap set by function with new allocations. Likely used by the
  923. * caller to free on error.
  924. *
  925. * Allocate the required number of page directory pointers. Extremely similar to
  926. * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
  927. * The main difference is here we are limited by the pml4 boundary (instead of
  928. * the page directory pointer).
  929. *
  930. * Return: 0 if success; negative error code otherwise.
  931. */
  932. static int
  933. gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
  934. struct i915_pml4 *pml4,
  935. uint64_t start,
  936. uint64_t length,
  937. unsigned long *new_pdps)
  938. {
  939. struct drm_device *dev = vm->dev;
  940. struct i915_page_directory_pointer *pdp;
  941. uint32_t pml4e;
  942. WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
  943. gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
  944. if (!test_bit(pml4e, pml4->used_pml4es)) {
  945. pdp = alloc_pdp(dev);
  946. if (IS_ERR(pdp))
  947. goto unwind_out;
  948. gen8_initialize_pdp(vm, pdp);
  949. pml4->pdps[pml4e] = pdp;
  950. __set_bit(pml4e, new_pdps);
  951. trace_i915_page_directory_pointer_entry_alloc(vm,
  952. pml4e,
  953. start,
  954. GEN8_PML4E_SHIFT);
  955. }
  956. }
  957. return 0;
  958. unwind_out:
  959. for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
  960. free_pdp(dev, pml4->pdps[pml4e]);
  961. return -ENOMEM;
  962. }
  963. static void
  964. free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
  965. {
  966. kfree(new_pts);
  967. kfree(new_pds);
  968. }
  969. /* Fills in the page directory bitmap, and the array of page tables bitmap. Both
  970. * of these are based on the number of PDPEs in the system.
  971. */
  972. static
  973. int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
  974. unsigned long **new_pts,
  975. uint32_t pdpes)
  976. {
  977. unsigned long *pds;
  978. unsigned long *pts;
  979. pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
  980. if (!pds)
  981. return -ENOMEM;
  982. pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
  983. GFP_TEMPORARY);
  984. if (!pts)
  985. goto err_out;
  986. *new_pds = pds;
  987. *new_pts = pts;
  988. return 0;
  989. err_out:
  990. free_gen8_temp_bitmaps(pds, pts);
  991. return -ENOMEM;
  992. }
  993. /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
  994. * the page table structures, we mark them dirty so that
  995. * context switching/execlist queuing code takes extra steps
  996. * to ensure that tlbs are flushed.
  997. */
  998. static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
  999. {
  1000. ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
  1001. }
  1002. static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
  1003. struct i915_page_directory_pointer *pdp,
  1004. uint64_t start,
  1005. uint64_t length)
  1006. {
  1007. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1008. unsigned long *new_page_dirs, *new_page_tables;
  1009. struct drm_device *dev = vm->dev;
  1010. struct i915_page_directory *pd;
  1011. const uint64_t orig_start = start;
  1012. const uint64_t orig_length = length;
  1013. uint32_t pdpe;
  1014. uint32_t pdpes = I915_PDPES_PER_PDP(dev);
  1015. int ret;
  1016. /* Wrap is never okay since we can only represent 48b, and we don't
  1017. * actually use the other side of the canonical address space.
  1018. */
  1019. if (WARN_ON(start + length < start))
  1020. return -ENODEV;
  1021. if (WARN_ON(start + length > vm->total))
  1022. return -ENODEV;
  1023. ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
  1024. if (ret)
  1025. return ret;
  1026. /* Do the allocations first so we can easily bail out */
  1027. ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
  1028. new_page_dirs);
  1029. if (ret) {
  1030. free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
  1031. return ret;
  1032. }
  1033. /* For every page directory referenced, allocate page tables */
  1034. gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
  1035. ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
  1036. new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
  1037. if (ret)
  1038. goto err_out;
  1039. }
  1040. start = orig_start;
  1041. length = orig_length;
  1042. /* Allocations have completed successfully, so set the bitmaps, and do
  1043. * the mappings. */
  1044. gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
  1045. gen8_pde_t *const page_directory = kmap_px(pd);
  1046. struct i915_page_table *pt;
  1047. uint64_t pd_len = length;
  1048. uint64_t pd_start = start;
  1049. uint32_t pde;
  1050. /* Every pd should be allocated, we just did that above. */
  1051. WARN_ON(!pd);
  1052. gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
  1053. /* Same reasoning as pd */
  1054. WARN_ON(!pt);
  1055. WARN_ON(!pd_len);
  1056. WARN_ON(!gen8_pte_count(pd_start, pd_len));
  1057. /* Set our used ptes within the page table */
  1058. bitmap_set(pt->used_ptes,
  1059. gen8_pte_index(pd_start),
  1060. gen8_pte_count(pd_start, pd_len));
  1061. /* Our pde is now pointing to the pagetable, pt */
  1062. __set_bit(pde, pd->used_pdes);
  1063. /* Map the PDE to the page table */
  1064. page_directory[pde] = gen8_pde_encode(px_dma(pt),
  1065. I915_CACHE_LLC);
  1066. trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
  1067. gen8_pte_index(start),
  1068. gen8_pte_count(start, length),
  1069. GEN8_PTES);
  1070. /* NB: We haven't yet mapped ptes to pages. At this
  1071. * point we're still relying on insert_entries() */
  1072. }
  1073. kunmap_px(ppgtt, page_directory);
  1074. __set_bit(pdpe, pdp->used_pdpes);
  1075. gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
  1076. }
  1077. free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
  1078. mark_tlbs_dirty(ppgtt);
  1079. return 0;
  1080. err_out:
  1081. while (pdpe--) {
  1082. unsigned long temp;
  1083. for_each_set_bit(temp, new_page_tables + pdpe *
  1084. BITS_TO_LONGS(I915_PDES), I915_PDES)
  1085. free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
  1086. }
  1087. for_each_set_bit(pdpe, new_page_dirs, pdpes)
  1088. free_pd(dev, pdp->page_directory[pdpe]);
  1089. free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
  1090. mark_tlbs_dirty(ppgtt);
  1091. return ret;
  1092. }
  1093. static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
  1094. struct i915_pml4 *pml4,
  1095. uint64_t start,
  1096. uint64_t length)
  1097. {
  1098. DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
  1099. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1100. struct i915_page_directory_pointer *pdp;
  1101. uint64_t pml4e;
  1102. int ret = 0;
  1103. /* Do the pml4 allocations first, so we don't need to track the newly
  1104. * allocated tables below the pdp */
  1105. bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
  1106. /* The pagedirectory and pagetable allocations are done in the shared 3
  1107. * and 4 level code. Just allocate the pdps.
  1108. */
  1109. ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
  1110. new_pdps);
  1111. if (ret)
  1112. return ret;
  1113. WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
  1114. "The allocation has spanned more than 512GB. "
  1115. "It is highly likely this is incorrect.");
  1116. gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
  1117. WARN_ON(!pdp);
  1118. ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
  1119. if (ret)
  1120. goto err_out;
  1121. gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
  1122. }
  1123. bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
  1124. GEN8_PML4ES_PER_PML4);
  1125. return 0;
  1126. err_out:
  1127. for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
  1128. gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
  1129. return ret;
  1130. }
  1131. static int gen8_alloc_va_range(struct i915_address_space *vm,
  1132. uint64_t start, uint64_t length)
  1133. {
  1134. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1135. if (USES_FULL_48BIT_PPGTT(vm->dev))
  1136. return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
  1137. else
  1138. return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
  1139. }
  1140. static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
  1141. uint64_t start, uint64_t length,
  1142. gen8_pte_t scratch_pte,
  1143. struct seq_file *m)
  1144. {
  1145. struct i915_page_directory *pd;
  1146. uint32_t pdpe;
  1147. gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
  1148. struct i915_page_table *pt;
  1149. uint64_t pd_len = length;
  1150. uint64_t pd_start = start;
  1151. uint32_t pde;
  1152. if (!test_bit(pdpe, pdp->used_pdpes))
  1153. continue;
  1154. seq_printf(m, "\tPDPE #%d\n", pdpe);
  1155. gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
  1156. uint32_t pte;
  1157. gen8_pte_t *pt_vaddr;
  1158. if (!test_bit(pde, pd->used_pdes))
  1159. continue;
  1160. pt_vaddr = kmap_px(pt);
  1161. for (pte = 0; pte < GEN8_PTES; pte += 4) {
  1162. uint64_t va =
  1163. (pdpe << GEN8_PDPE_SHIFT) |
  1164. (pde << GEN8_PDE_SHIFT) |
  1165. (pte << GEN8_PTE_SHIFT);
  1166. int i;
  1167. bool found = false;
  1168. for (i = 0; i < 4; i++)
  1169. if (pt_vaddr[pte + i] != scratch_pte)
  1170. found = true;
  1171. if (!found)
  1172. continue;
  1173. seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
  1174. for (i = 0; i < 4; i++) {
  1175. if (pt_vaddr[pte + i] != scratch_pte)
  1176. seq_printf(m, " %llx", pt_vaddr[pte + i]);
  1177. else
  1178. seq_puts(m, " SCRATCH ");
  1179. }
  1180. seq_puts(m, "\n");
  1181. }
  1182. /* don't use kunmap_px, it could trigger
  1183. * an unnecessary flush.
  1184. */
  1185. kunmap_atomic(pt_vaddr);
  1186. }
  1187. }
  1188. }
  1189. static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
  1190. {
  1191. struct i915_address_space *vm = &ppgtt->base;
  1192. uint64_t start = ppgtt->base.start;
  1193. uint64_t length = ppgtt->base.total;
  1194. gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
  1195. I915_CACHE_LLC, true);
  1196. if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
  1197. gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
  1198. } else {
  1199. uint64_t pml4e;
  1200. struct i915_pml4 *pml4 = &ppgtt->pml4;
  1201. struct i915_page_directory_pointer *pdp;
  1202. gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
  1203. if (!test_bit(pml4e, pml4->used_pml4es))
  1204. continue;
  1205. seq_printf(m, " PML4E #%llu\n", pml4e);
  1206. gen8_dump_pdp(pdp, start, length, scratch_pte, m);
  1207. }
  1208. }
  1209. }
  1210. static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
  1211. {
  1212. unsigned long *new_page_dirs, *new_page_tables;
  1213. uint32_t pdpes = I915_PDPES_PER_PDP(dev);
  1214. int ret;
  1215. /* We allocate temp bitmap for page tables for no gain
  1216. * but as this is for init only, lets keep the things simple
  1217. */
  1218. ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
  1219. if (ret)
  1220. return ret;
  1221. /* Allocate for all pdps regardless of how the ppgtt
  1222. * was defined.
  1223. */
  1224. ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
  1225. 0, 1ULL << 32,
  1226. new_page_dirs);
  1227. if (!ret)
  1228. *ppgtt->pdp.used_pdpes = *new_page_dirs;
  1229. free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
  1230. return ret;
  1231. }
  1232. /*
  1233. * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  1234. * with a net effect resembling a 2-level page table in normal x86 terms. Each
  1235. * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
  1236. * space.
  1237. *
  1238. */
  1239. static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
  1240. {
  1241. int ret;
  1242. ret = gen8_init_scratch(&ppgtt->base);
  1243. if (ret)
  1244. return ret;
  1245. ppgtt->base.start = 0;
  1246. ppgtt->base.cleanup = gen8_ppgtt_cleanup;
  1247. ppgtt->base.allocate_va_range = gen8_alloc_va_range;
  1248. ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
  1249. ppgtt->base.clear_range = gen8_ppgtt_clear_range;
  1250. ppgtt->base.unbind_vma = ppgtt_unbind_vma;
  1251. ppgtt->base.bind_vma = ppgtt_bind_vma;
  1252. ppgtt->debug_dump = gen8_dump_ppgtt;
  1253. if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
  1254. ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
  1255. if (ret)
  1256. goto free_scratch;
  1257. gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
  1258. ppgtt->base.total = 1ULL << 48;
  1259. ppgtt->switch_mm = gen8_48b_mm_switch;
  1260. } else {
  1261. ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
  1262. if (ret)
  1263. goto free_scratch;
  1264. ppgtt->base.total = 1ULL << 32;
  1265. ppgtt->switch_mm = gen8_legacy_mm_switch;
  1266. trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
  1267. 0, 0,
  1268. GEN8_PML4E_SHIFT);
  1269. if (intel_vgpu_active(to_i915(ppgtt->base.dev))) {
  1270. ret = gen8_preallocate_top_level_pdps(ppgtt);
  1271. if (ret)
  1272. goto free_scratch;
  1273. }
  1274. }
  1275. if (intel_vgpu_active(to_i915(ppgtt->base.dev)))
  1276. gen8_ppgtt_notify_vgt(ppgtt, true);
  1277. return 0;
  1278. free_scratch:
  1279. gen8_free_scratch(&ppgtt->base);
  1280. return ret;
  1281. }
  1282. static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
  1283. {
  1284. struct i915_address_space *vm = &ppgtt->base;
  1285. struct i915_page_table *unused;
  1286. gen6_pte_t scratch_pte;
  1287. uint32_t pd_entry;
  1288. uint32_t pte, pde;
  1289. uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
  1290. scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
  1291. I915_CACHE_LLC, true, 0);
  1292. gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
  1293. u32 expected;
  1294. gen6_pte_t *pt_vaddr;
  1295. const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
  1296. pd_entry = readl(ppgtt->pd_addr + pde);
  1297. expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
  1298. if (pd_entry != expected)
  1299. seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
  1300. pde,
  1301. pd_entry,
  1302. expected);
  1303. seq_printf(m, "\tPDE: %x\n", pd_entry);
  1304. pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
  1305. for (pte = 0; pte < GEN6_PTES; pte+=4) {
  1306. unsigned long va =
  1307. (pde * PAGE_SIZE * GEN6_PTES) +
  1308. (pte * PAGE_SIZE);
  1309. int i;
  1310. bool found = false;
  1311. for (i = 0; i < 4; i++)
  1312. if (pt_vaddr[pte + i] != scratch_pte)
  1313. found = true;
  1314. if (!found)
  1315. continue;
  1316. seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
  1317. for (i = 0; i < 4; i++) {
  1318. if (pt_vaddr[pte + i] != scratch_pte)
  1319. seq_printf(m, " %08x", pt_vaddr[pte + i]);
  1320. else
  1321. seq_puts(m, " SCRATCH ");
  1322. }
  1323. seq_puts(m, "\n");
  1324. }
  1325. kunmap_px(ppgtt, pt_vaddr);
  1326. }
  1327. }
  1328. /* Write pde (index) from the page directory @pd to the page table @pt */
  1329. static void gen6_write_pde(struct i915_page_directory *pd,
  1330. const int pde, struct i915_page_table *pt)
  1331. {
  1332. /* Caller needs to make sure the write completes if necessary */
  1333. struct i915_hw_ppgtt *ppgtt =
  1334. container_of(pd, struct i915_hw_ppgtt, pd);
  1335. u32 pd_entry;
  1336. pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
  1337. pd_entry |= GEN6_PDE_VALID;
  1338. writel(pd_entry, ppgtt->pd_addr + pde);
  1339. }
  1340. /* Write all the page tables found in the ppgtt structure to incrementing page
  1341. * directories. */
  1342. static void gen6_write_page_range(struct drm_i915_private *dev_priv,
  1343. struct i915_page_directory *pd,
  1344. uint32_t start, uint32_t length)
  1345. {
  1346. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  1347. struct i915_page_table *pt;
  1348. uint32_t pde;
  1349. gen6_for_each_pde(pt, pd, start, length, pde)
  1350. gen6_write_pde(pd, pde, pt);
  1351. /* Make sure write is complete before other code can use this page
  1352. * table. Also require for WC mapped PTEs */
  1353. readl(ggtt->gsm);
  1354. }
  1355. static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
  1356. {
  1357. BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
  1358. return (ppgtt->pd.base.ggtt_offset / 64) << 16;
  1359. }
  1360. static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
  1361. struct drm_i915_gem_request *req)
  1362. {
  1363. struct intel_ring *ring = req->ring;
  1364. struct intel_engine_cs *engine = req->engine;
  1365. int ret;
  1366. /* NB: TLBs must be flushed and invalidated before a switch */
  1367. ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
  1368. if (ret)
  1369. return ret;
  1370. ret = intel_ring_begin(req, 6);
  1371. if (ret)
  1372. return ret;
  1373. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
  1374. intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine));
  1375. intel_ring_emit(ring, PP_DIR_DCLV_2G);
  1376. intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine));
  1377. intel_ring_emit(ring, get_pd_offset(ppgtt));
  1378. intel_ring_emit(ring, MI_NOOP);
  1379. intel_ring_advance(ring);
  1380. return 0;
  1381. }
  1382. static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
  1383. struct drm_i915_gem_request *req)
  1384. {
  1385. struct intel_ring *ring = req->ring;
  1386. struct intel_engine_cs *engine = req->engine;
  1387. int ret;
  1388. /* NB: TLBs must be flushed and invalidated before a switch */
  1389. ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
  1390. if (ret)
  1391. return ret;
  1392. ret = intel_ring_begin(req, 6);
  1393. if (ret)
  1394. return ret;
  1395. intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
  1396. intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine));
  1397. intel_ring_emit(ring, PP_DIR_DCLV_2G);
  1398. intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine));
  1399. intel_ring_emit(ring, get_pd_offset(ppgtt));
  1400. intel_ring_emit(ring, MI_NOOP);
  1401. intel_ring_advance(ring);
  1402. /* XXX: RCS is the only one to auto invalidate the TLBs? */
  1403. if (engine->id != RCS) {
  1404. ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
  1405. if (ret)
  1406. return ret;
  1407. }
  1408. return 0;
  1409. }
  1410. static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
  1411. struct drm_i915_gem_request *req)
  1412. {
  1413. struct intel_engine_cs *engine = req->engine;
  1414. struct drm_i915_private *dev_priv = req->i915;
  1415. I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
  1416. I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
  1417. return 0;
  1418. }
  1419. static void gen8_ppgtt_enable(struct drm_device *dev)
  1420. {
  1421. struct drm_i915_private *dev_priv = to_i915(dev);
  1422. struct intel_engine_cs *engine;
  1423. for_each_engine(engine, dev_priv) {
  1424. u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
  1425. I915_WRITE(RING_MODE_GEN7(engine),
  1426. _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
  1427. }
  1428. }
  1429. static void gen7_ppgtt_enable(struct drm_device *dev)
  1430. {
  1431. struct drm_i915_private *dev_priv = to_i915(dev);
  1432. struct intel_engine_cs *engine;
  1433. uint32_t ecochk, ecobits;
  1434. ecobits = I915_READ(GAC_ECO_BITS);
  1435. I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
  1436. ecochk = I915_READ(GAM_ECOCHK);
  1437. if (IS_HASWELL(dev)) {
  1438. ecochk |= ECOCHK_PPGTT_WB_HSW;
  1439. } else {
  1440. ecochk |= ECOCHK_PPGTT_LLC_IVB;
  1441. ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
  1442. }
  1443. I915_WRITE(GAM_ECOCHK, ecochk);
  1444. for_each_engine(engine, dev_priv) {
  1445. /* GFX_MODE is per-ring on gen7+ */
  1446. I915_WRITE(RING_MODE_GEN7(engine),
  1447. _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  1448. }
  1449. }
  1450. static void gen6_ppgtt_enable(struct drm_device *dev)
  1451. {
  1452. struct drm_i915_private *dev_priv = to_i915(dev);
  1453. uint32_t ecochk, gab_ctl, ecobits;
  1454. ecobits = I915_READ(GAC_ECO_BITS);
  1455. I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
  1456. ECOBITS_PPGTT_CACHE64B);
  1457. gab_ctl = I915_READ(GAB_CTL);
  1458. I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
  1459. ecochk = I915_READ(GAM_ECOCHK);
  1460. I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
  1461. I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  1462. }
  1463. /* PPGTT support for Sandybdrige/Gen6 and later */
  1464. static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
  1465. uint64_t start,
  1466. uint64_t length,
  1467. bool use_scratch)
  1468. {
  1469. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1470. gen6_pte_t *pt_vaddr, scratch_pte;
  1471. unsigned first_entry = start >> PAGE_SHIFT;
  1472. unsigned num_entries = length >> PAGE_SHIFT;
  1473. unsigned act_pt = first_entry / GEN6_PTES;
  1474. unsigned first_pte = first_entry % GEN6_PTES;
  1475. unsigned last_pte, i;
  1476. scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
  1477. I915_CACHE_LLC, true, 0);
  1478. while (num_entries) {
  1479. last_pte = first_pte + num_entries;
  1480. if (last_pte > GEN6_PTES)
  1481. last_pte = GEN6_PTES;
  1482. pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
  1483. for (i = first_pte; i < last_pte; i++)
  1484. pt_vaddr[i] = scratch_pte;
  1485. kunmap_px(ppgtt, pt_vaddr);
  1486. num_entries -= last_pte - first_pte;
  1487. first_pte = 0;
  1488. act_pt++;
  1489. }
  1490. }
  1491. static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
  1492. struct sg_table *pages,
  1493. uint64_t start,
  1494. enum i915_cache_level cache_level, u32 flags)
  1495. {
  1496. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1497. unsigned first_entry = start >> PAGE_SHIFT;
  1498. unsigned act_pt = first_entry / GEN6_PTES;
  1499. unsigned act_pte = first_entry % GEN6_PTES;
  1500. gen6_pte_t *pt_vaddr = NULL;
  1501. struct sgt_iter sgt_iter;
  1502. dma_addr_t addr;
  1503. for_each_sgt_dma(addr, sgt_iter, pages) {
  1504. if (pt_vaddr == NULL)
  1505. pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
  1506. pt_vaddr[act_pte] =
  1507. vm->pte_encode(addr, cache_level, true, flags);
  1508. if (++act_pte == GEN6_PTES) {
  1509. kunmap_px(ppgtt, pt_vaddr);
  1510. pt_vaddr = NULL;
  1511. act_pt++;
  1512. act_pte = 0;
  1513. }
  1514. }
  1515. if (pt_vaddr)
  1516. kunmap_px(ppgtt, pt_vaddr);
  1517. }
  1518. static int gen6_alloc_va_range(struct i915_address_space *vm,
  1519. uint64_t start_in, uint64_t length_in)
  1520. {
  1521. DECLARE_BITMAP(new_page_tables, I915_PDES);
  1522. struct drm_device *dev = vm->dev;
  1523. struct drm_i915_private *dev_priv = to_i915(dev);
  1524. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  1525. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1526. struct i915_page_table *pt;
  1527. uint32_t start, length, start_save, length_save;
  1528. uint32_t pde;
  1529. int ret;
  1530. if (WARN_ON(start_in + length_in > ppgtt->base.total))
  1531. return -ENODEV;
  1532. start = start_save = start_in;
  1533. length = length_save = length_in;
  1534. bitmap_zero(new_page_tables, I915_PDES);
  1535. /* The allocation is done in two stages so that we can bail out with
  1536. * minimal amount of pain. The first stage finds new page tables that
  1537. * need allocation. The second stage marks use ptes within the page
  1538. * tables.
  1539. */
  1540. gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
  1541. if (pt != vm->scratch_pt) {
  1542. WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
  1543. continue;
  1544. }
  1545. /* We've already allocated a page table */
  1546. WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
  1547. pt = alloc_pt(dev);
  1548. if (IS_ERR(pt)) {
  1549. ret = PTR_ERR(pt);
  1550. goto unwind_out;
  1551. }
  1552. gen6_initialize_pt(vm, pt);
  1553. ppgtt->pd.page_table[pde] = pt;
  1554. __set_bit(pde, new_page_tables);
  1555. trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
  1556. }
  1557. start = start_save;
  1558. length = length_save;
  1559. gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
  1560. DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
  1561. bitmap_zero(tmp_bitmap, GEN6_PTES);
  1562. bitmap_set(tmp_bitmap, gen6_pte_index(start),
  1563. gen6_pte_count(start, length));
  1564. if (__test_and_clear_bit(pde, new_page_tables))
  1565. gen6_write_pde(&ppgtt->pd, pde, pt);
  1566. trace_i915_page_table_entry_map(vm, pde, pt,
  1567. gen6_pte_index(start),
  1568. gen6_pte_count(start, length),
  1569. GEN6_PTES);
  1570. bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
  1571. GEN6_PTES);
  1572. }
  1573. WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
  1574. /* Make sure write is complete before other code can use this page
  1575. * table. Also require for WC mapped PTEs */
  1576. readl(ggtt->gsm);
  1577. mark_tlbs_dirty(ppgtt);
  1578. return 0;
  1579. unwind_out:
  1580. for_each_set_bit(pde, new_page_tables, I915_PDES) {
  1581. struct i915_page_table *pt = ppgtt->pd.page_table[pde];
  1582. ppgtt->pd.page_table[pde] = vm->scratch_pt;
  1583. free_pt(vm->dev, pt);
  1584. }
  1585. mark_tlbs_dirty(ppgtt);
  1586. return ret;
  1587. }
  1588. static int gen6_init_scratch(struct i915_address_space *vm)
  1589. {
  1590. struct drm_device *dev = vm->dev;
  1591. int ret;
  1592. ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
  1593. if (ret)
  1594. return ret;
  1595. vm->scratch_pt = alloc_pt(dev);
  1596. if (IS_ERR(vm->scratch_pt)) {
  1597. cleanup_scratch_page(dev, &vm->scratch_page);
  1598. return PTR_ERR(vm->scratch_pt);
  1599. }
  1600. gen6_initialize_pt(vm, vm->scratch_pt);
  1601. return 0;
  1602. }
  1603. static void gen6_free_scratch(struct i915_address_space *vm)
  1604. {
  1605. struct drm_device *dev = vm->dev;
  1606. free_pt(dev, vm->scratch_pt);
  1607. cleanup_scratch_page(dev, &vm->scratch_page);
  1608. }
  1609. static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
  1610. {
  1611. struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
  1612. struct i915_page_directory *pd = &ppgtt->pd;
  1613. struct drm_device *dev = vm->dev;
  1614. struct i915_page_table *pt;
  1615. uint32_t pde;
  1616. drm_mm_remove_node(&ppgtt->node);
  1617. gen6_for_all_pdes(pt, pd, pde)
  1618. if (pt != vm->scratch_pt)
  1619. free_pt(dev, pt);
  1620. gen6_free_scratch(vm);
  1621. }
  1622. static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
  1623. {
  1624. struct i915_address_space *vm = &ppgtt->base;
  1625. struct drm_device *dev = ppgtt->base.dev;
  1626. struct drm_i915_private *dev_priv = to_i915(dev);
  1627. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  1628. bool retried = false;
  1629. int ret;
  1630. /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
  1631. * allocator works in address space sizes, so it's multiplied by page
  1632. * size. We allocate at the top of the GTT to avoid fragmentation.
  1633. */
  1634. BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
  1635. ret = gen6_init_scratch(vm);
  1636. if (ret)
  1637. return ret;
  1638. alloc:
  1639. ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
  1640. &ppgtt->node, GEN6_PD_SIZE,
  1641. GEN6_PD_ALIGN, 0,
  1642. 0, ggtt->base.total,
  1643. DRM_MM_TOPDOWN);
  1644. if (ret == -ENOSPC && !retried) {
  1645. ret = i915_gem_evict_something(&ggtt->base,
  1646. GEN6_PD_SIZE, GEN6_PD_ALIGN,
  1647. I915_CACHE_NONE,
  1648. 0, ggtt->base.total,
  1649. 0);
  1650. if (ret)
  1651. goto err_out;
  1652. retried = true;
  1653. goto alloc;
  1654. }
  1655. if (ret)
  1656. goto err_out;
  1657. if (ppgtt->node.start < ggtt->mappable_end)
  1658. DRM_DEBUG("Forced to use aperture for PDEs\n");
  1659. return 0;
  1660. err_out:
  1661. gen6_free_scratch(vm);
  1662. return ret;
  1663. }
  1664. static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
  1665. {
  1666. return gen6_ppgtt_allocate_page_directories(ppgtt);
  1667. }
  1668. static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
  1669. uint64_t start, uint64_t length)
  1670. {
  1671. struct i915_page_table *unused;
  1672. uint32_t pde;
  1673. gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
  1674. ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
  1675. }
  1676. static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
  1677. {
  1678. struct drm_device *dev = ppgtt->base.dev;
  1679. struct drm_i915_private *dev_priv = to_i915(dev);
  1680. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  1681. int ret;
  1682. ppgtt->base.pte_encode = ggtt->base.pte_encode;
  1683. if (intel_vgpu_active(dev_priv) || IS_GEN6(dev))
  1684. ppgtt->switch_mm = gen6_mm_switch;
  1685. else if (IS_HASWELL(dev))
  1686. ppgtt->switch_mm = hsw_mm_switch;
  1687. else if (IS_GEN7(dev))
  1688. ppgtt->switch_mm = gen7_mm_switch;
  1689. else
  1690. BUG();
  1691. ret = gen6_ppgtt_alloc(ppgtt);
  1692. if (ret)
  1693. return ret;
  1694. ppgtt->base.allocate_va_range = gen6_alloc_va_range;
  1695. ppgtt->base.clear_range = gen6_ppgtt_clear_range;
  1696. ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
  1697. ppgtt->base.unbind_vma = ppgtt_unbind_vma;
  1698. ppgtt->base.bind_vma = ppgtt_bind_vma;
  1699. ppgtt->base.cleanup = gen6_ppgtt_cleanup;
  1700. ppgtt->base.start = 0;
  1701. ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
  1702. ppgtt->debug_dump = gen6_dump_ppgtt;
  1703. ppgtt->pd.base.ggtt_offset =
  1704. ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
  1705. ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
  1706. ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
  1707. gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
  1708. gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
  1709. DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
  1710. ppgtt->node.size >> 20,
  1711. ppgtt->node.start / PAGE_SIZE);
  1712. DRM_DEBUG("Adding PPGTT at offset %x\n",
  1713. ppgtt->pd.base.ggtt_offset << 10);
  1714. return 0;
  1715. }
  1716. static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
  1717. struct drm_i915_private *dev_priv)
  1718. {
  1719. ppgtt->base.dev = &dev_priv->drm;
  1720. if (INTEL_INFO(dev_priv)->gen < 8)
  1721. return gen6_ppgtt_init(ppgtt);
  1722. else
  1723. return gen8_ppgtt_init(ppgtt);
  1724. }
  1725. static void i915_address_space_init(struct i915_address_space *vm,
  1726. struct drm_i915_private *dev_priv)
  1727. {
  1728. drm_mm_init(&vm->mm, vm->start, vm->total);
  1729. INIT_LIST_HEAD(&vm->active_list);
  1730. INIT_LIST_HEAD(&vm->inactive_list);
  1731. INIT_LIST_HEAD(&vm->unbound_list);
  1732. list_add_tail(&vm->global_link, &dev_priv->vm_list);
  1733. }
  1734. static void gtt_write_workarounds(struct drm_device *dev)
  1735. {
  1736. struct drm_i915_private *dev_priv = to_i915(dev);
  1737. /* This function is for gtt related workarounds. This function is
  1738. * called on driver load and after a GPU reset, so you can place
  1739. * workarounds here even if they get overwritten by GPU reset.
  1740. */
  1741. /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
  1742. if (IS_BROADWELL(dev))
  1743. I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
  1744. else if (IS_CHERRYVIEW(dev))
  1745. I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
  1746. else if (IS_SKYLAKE(dev))
  1747. I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
  1748. else if (IS_BROXTON(dev))
  1749. I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
  1750. }
  1751. static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
  1752. struct drm_i915_private *dev_priv,
  1753. struct drm_i915_file_private *file_priv)
  1754. {
  1755. int ret;
  1756. ret = __hw_ppgtt_init(ppgtt, dev_priv);
  1757. if (ret == 0) {
  1758. kref_init(&ppgtt->ref);
  1759. i915_address_space_init(&ppgtt->base, dev_priv);
  1760. ppgtt->base.file = file_priv;
  1761. }
  1762. return ret;
  1763. }
  1764. int i915_ppgtt_init_hw(struct drm_device *dev)
  1765. {
  1766. gtt_write_workarounds(dev);
  1767. /* In the case of execlists, PPGTT is enabled by the context descriptor
  1768. * and the PDPs are contained within the context itself. We don't
  1769. * need to do anything here. */
  1770. if (i915.enable_execlists)
  1771. return 0;
  1772. if (!USES_PPGTT(dev))
  1773. return 0;
  1774. if (IS_GEN6(dev))
  1775. gen6_ppgtt_enable(dev);
  1776. else if (IS_GEN7(dev))
  1777. gen7_ppgtt_enable(dev);
  1778. else if (INTEL_INFO(dev)->gen >= 8)
  1779. gen8_ppgtt_enable(dev);
  1780. else
  1781. MISSING_CASE(INTEL_INFO(dev)->gen);
  1782. return 0;
  1783. }
  1784. struct i915_hw_ppgtt *
  1785. i915_ppgtt_create(struct drm_i915_private *dev_priv,
  1786. struct drm_i915_file_private *fpriv)
  1787. {
  1788. struct i915_hw_ppgtt *ppgtt;
  1789. int ret;
  1790. ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
  1791. if (!ppgtt)
  1792. return ERR_PTR(-ENOMEM);
  1793. ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv);
  1794. if (ret) {
  1795. kfree(ppgtt);
  1796. return ERR_PTR(ret);
  1797. }
  1798. trace_i915_ppgtt_create(&ppgtt->base);
  1799. return ppgtt;
  1800. }
  1801. void i915_ppgtt_release(struct kref *kref)
  1802. {
  1803. struct i915_hw_ppgtt *ppgtt =
  1804. container_of(kref, struct i915_hw_ppgtt, ref);
  1805. trace_i915_ppgtt_release(&ppgtt->base);
  1806. /* vmas should already be unbound and destroyed */
  1807. WARN_ON(!list_empty(&ppgtt->base.active_list));
  1808. WARN_ON(!list_empty(&ppgtt->base.inactive_list));
  1809. WARN_ON(!list_empty(&ppgtt->base.unbound_list));
  1810. list_del(&ppgtt->base.global_link);
  1811. drm_mm_takedown(&ppgtt->base.mm);
  1812. ppgtt->base.cleanup(&ppgtt->base);
  1813. kfree(ppgtt);
  1814. }
  1815. /* Certain Gen5 chipsets require require idling the GPU before
  1816. * unmapping anything from the GTT when VT-d is enabled.
  1817. */
  1818. static bool needs_idle_maps(struct drm_i915_private *dev_priv)
  1819. {
  1820. #ifdef CONFIG_INTEL_IOMMU
  1821. /* Query intel_iommu to see if we need the workaround. Presumably that
  1822. * was loaded first.
  1823. */
  1824. if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped)
  1825. return true;
  1826. #endif
  1827. return false;
  1828. }
  1829. void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
  1830. {
  1831. struct intel_engine_cs *engine;
  1832. if (INTEL_INFO(dev_priv)->gen < 6)
  1833. return;
  1834. for_each_engine(engine, dev_priv) {
  1835. u32 fault_reg;
  1836. fault_reg = I915_READ(RING_FAULT_REG(engine));
  1837. if (fault_reg & RING_FAULT_VALID) {
  1838. DRM_DEBUG_DRIVER("Unexpected fault\n"
  1839. "\tAddr: 0x%08lx\n"
  1840. "\tAddress space: %s\n"
  1841. "\tSource ID: %d\n"
  1842. "\tType: %d\n",
  1843. fault_reg & PAGE_MASK,
  1844. fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
  1845. RING_FAULT_SRCID(fault_reg),
  1846. RING_FAULT_FAULT_TYPE(fault_reg));
  1847. I915_WRITE(RING_FAULT_REG(engine),
  1848. fault_reg & ~RING_FAULT_VALID);
  1849. }
  1850. }
  1851. POSTING_READ(RING_FAULT_REG(&dev_priv->engine[RCS]));
  1852. }
  1853. static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
  1854. {
  1855. if (INTEL_INFO(dev_priv)->gen < 6) {
  1856. intel_gtt_chipset_flush();
  1857. } else {
  1858. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  1859. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  1860. }
  1861. }
  1862. void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
  1863. {
  1864. struct drm_i915_private *dev_priv = to_i915(dev);
  1865. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  1866. /* Don't bother messing with faults pre GEN6 as we have little
  1867. * documentation supporting that it's a good idea.
  1868. */
  1869. if (INTEL_INFO(dev)->gen < 6)
  1870. return;
  1871. i915_check_and_clear_faults(dev_priv);
  1872. ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
  1873. true);
  1874. i915_ggtt_flush(dev_priv);
  1875. }
  1876. int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
  1877. {
  1878. if (!dma_map_sg(&obj->base.dev->pdev->dev,
  1879. obj->pages->sgl, obj->pages->nents,
  1880. PCI_DMA_BIDIRECTIONAL))
  1881. return -ENOSPC;
  1882. return 0;
  1883. }
  1884. static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
  1885. {
  1886. writeq(pte, addr);
  1887. }
  1888. static void gen8_ggtt_insert_page(struct i915_address_space *vm,
  1889. dma_addr_t addr,
  1890. uint64_t offset,
  1891. enum i915_cache_level level,
  1892. u32 unused)
  1893. {
  1894. struct drm_i915_private *dev_priv = to_i915(vm->dev);
  1895. gen8_pte_t __iomem *pte =
  1896. (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
  1897. (offset >> PAGE_SHIFT);
  1898. int rpm_atomic_seq;
  1899. rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
  1900. gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
  1901. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  1902. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  1903. assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
  1904. }
  1905. static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
  1906. struct sg_table *st,
  1907. uint64_t start,
  1908. enum i915_cache_level level, u32 unused)
  1909. {
  1910. struct drm_i915_private *dev_priv = to_i915(vm->dev);
  1911. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  1912. struct sgt_iter sgt_iter;
  1913. gen8_pte_t __iomem *gtt_entries;
  1914. gen8_pte_t gtt_entry;
  1915. dma_addr_t addr;
  1916. int rpm_atomic_seq;
  1917. int i = 0;
  1918. rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
  1919. gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
  1920. for_each_sgt_dma(addr, sgt_iter, st) {
  1921. gtt_entry = gen8_pte_encode(addr, level, true);
  1922. gen8_set_pte(&gtt_entries[i++], gtt_entry);
  1923. }
  1924. /*
  1925. * XXX: This serves as a posting read to make sure that the PTE has
  1926. * actually been updated. There is some concern that even though
  1927. * registers and PTEs are within the same BAR that they are potentially
  1928. * of NUMA access patterns. Therefore, even with the way we assume
  1929. * hardware should work, we must keep this posting read for paranoia.
  1930. */
  1931. if (i != 0)
  1932. WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
  1933. /* This next bit makes the above posting read even more important. We
  1934. * want to flush the TLBs only after we're certain all the PTE updates
  1935. * have finished.
  1936. */
  1937. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  1938. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  1939. assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
  1940. }
  1941. struct insert_entries {
  1942. struct i915_address_space *vm;
  1943. struct sg_table *st;
  1944. uint64_t start;
  1945. enum i915_cache_level level;
  1946. u32 flags;
  1947. };
  1948. static int gen8_ggtt_insert_entries__cb(void *_arg)
  1949. {
  1950. struct insert_entries *arg = _arg;
  1951. gen8_ggtt_insert_entries(arg->vm, arg->st,
  1952. arg->start, arg->level, arg->flags);
  1953. return 0;
  1954. }
  1955. static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
  1956. struct sg_table *st,
  1957. uint64_t start,
  1958. enum i915_cache_level level,
  1959. u32 flags)
  1960. {
  1961. struct insert_entries arg = { vm, st, start, level, flags };
  1962. stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
  1963. }
  1964. static void gen6_ggtt_insert_page(struct i915_address_space *vm,
  1965. dma_addr_t addr,
  1966. uint64_t offset,
  1967. enum i915_cache_level level,
  1968. u32 flags)
  1969. {
  1970. struct drm_i915_private *dev_priv = to_i915(vm->dev);
  1971. gen6_pte_t __iomem *pte =
  1972. (gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
  1973. (offset >> PAGE_SHIFT);
  1974. int rpm_atomic_seq;
  1975. rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
  1976. iowrite32(vm->pte_encode(addr, level, true, flags), pte);
  1977. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  1978. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  1979. assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
  1980. }
  1981. /*
  1982. * Binds an object into the global gtt with the specified cache level. The object
  1983. * will be accessible to the GPU via commands whose operands reference offsets
  1984. * within the global GTT as well as accessible by the GPU through the GMADR
  1985. * mapped BAR (dev_priv->mm.gtt->gtt).
  1986. */
  1987. static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
  1988. struct sg_table *st,
  1989. uint64_t start,
  1990. enum i915_cache_level level, u32 flags)
  1991. {
  1992. struct drm_i915_private *dev_priv = to_i915(vm->dev);
  1993. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  1994. struct sgt_iter sgt_iter;
  1995. gen6_pte_t __iomem *gtt_entries;
  1996. gen6_pte_t gtt_entry;
  1997. dma_addr_t addr;
  1998. int rpm_atomic_seq;
  1999. int i = 0;
  2000. rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
  2001. gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
  2002. for_each_sgt_dma(addr, sgt_iter, st) {
  2003. gtt_entry = vm->pte_encode(addr, level, true, flags);
  2004. iowrite32(gtt_entry, &gtt_entries[i++]);
  2005. }
  2006. /* XXX: This serves as a posting read to make sure that the PTE has
  2007. * actually been updated. There is some concern that even though
  2008. * registers and PTEs are within the same BAR that they are potentially
  2009. * of NUMA access patterns. Therefore, even with the way we assume
  2010. * hardware should work, we must keep this posting read for paranoia.
  2011. */
  2012. if (i != 0)
  2013. WARN_ON(readl(&gtt_entries[i-1]) != gtt_entry);
  2014. /* This next bit makes the above posting read even more important. We
  2015. * want to flush the TLBs only after we're certain all the PTE updates
  2016. * have finished.
  2017. */
  2018. I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  2019. POSTING_READ(GFX_FLSH_CNTL_GEN6);
  2020. assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
  2021. }
  2022. static void nop_clear_range(struct i915_address_space *vm,
  2023. uint64_t start,
  2024. uint64_t length,
  2025. bool use_scratch)
  2026. {
  2027. }
  2028. static void gen8_ggtt_clear_range(struct i915_address_space *vm,
  2029. uint64_t start,
  2030. uint64_t length,
  2031. bool use_scratch)
  2032. {
  2033. struct drm_i915_private *dev_priv = to_i915(vm->dev);
  2034. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  2035. unsigned first_entry = start >> PAGE_SHIFT;
  2036. unsigned num_entries = length >> PAGE_SHIFT;
  2037. gen8_pte_t scratch_pte, __iomem *gtt_base =
  2038. (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
  2039. const int max_entries = ggtt_total_entries(ggtt) - first_entry;
  2040. int i;
  2041. int rpm_atomic_seq;
  2042. rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
  2043. if (WARN(num_entries > max_entries,
  2044. "First entry = %d; Num entries = %d (max=%d)\n",
  2045. first_entry, num_entries, max_entries))
  2046. num_entries = max_entries;
  2047. scratch_pte = gen8_pte_encode(vm->scratch_page.daddr,
  2048. I915_CACHE_LLC,
  2049. use_scratch);
  2050. for (i = 0; i < num_entries; i++)
  2051. gen8_set_pte(&gtt_base[i], scratch_pte);
  2052. readl(gtt_base);
  2053. assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
  2054. }
  2055. static void gen6_ggtt_clear_range(struct i915_address_space *vm,
  2056. uint64_t start,
  2057. uint64_t length,
  2058. bool use_scratch)
  2059. {
  2060. struct drm_i915_private *dev_priv = to_i915(vm->dev);
  2061. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  2062. unsigned first_entry = start >> PAGE_SHIFT;
  2063. unsigned num_entries = length >> PAGE_SHIFT;
  2064. gen6_pte_t scratch_pte, __iomem *gtt_base =
  2065. (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
  2066. const int max_entries = ggtt_total_entries(ggtt) - first_entry;
  2067. int i;
  2068. int rpm_atomic_seq;
  2069. rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
  2070. if (WARN(num_entries > max_entries,
  2071. "First entry = %d; Num entries = %d (max=%d)\n",
  2072. first_entry, num_entries, max_entries))
  2073. num_entries = max_entries;
  2074. scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
  2075. I915_CACHE_LLC, use_scratch, 0);
  2076. for (i = 0; i < num_entries; i++)
  2077. iowrite32(scratch_pte, &gtt_base[i]);
  2078. readl(gtt_base);
  2079. assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
  2080. }
  2081. static void i915_ggtt_insert_page(struct i915_address_space *vm,
  2082. dma_addr_t addr,
  2083. uint64_t offset,
  2084. enum i915_cache_level cache_level,
  2085. u32 unused)
  2086. {
  2087. struct drm_i915_private *dev_priv = to_i915(vm->dev);
  2088. unsigned int flags = (cache_level == I915_CACHE_NONE) ?
  2089. AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
  2090. int rpm_atomic_seq;
  2091. rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
  2092. intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
  2093. assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
  2094. }
  2095. static void i915_ggtt_insert_entries(struct i915_address_space *vm,
  2096. struct sg_table *pages,
  2097. uint64_t start,
  2098. enum i915_cache_level cache_level, u32 unused)
  2099. {
  2100. struct drm_i915_private *dev_priv = to_i915(vm->dev);
  2101. unsigned int flags = (cache_level == I915_CACHE_NONE) ?
  2102. AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
  2103. int rpm_atomic_seq;
  2104. rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
  2105. intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
  2106. assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
  2107. }
  2108. static void i915_ggtt_clear_range(struct i915_address_space *vm,
  2109. uint64_t start,
  2110. uint64_t length,
  2111. bool unused)
  2112. {
  2113. struct drm_i915_private *dev_priv = to_i915(vm->dev);
  2114. unsigned first_entry = start >> PAGE_SHIFT;
  2115. unsigned num_entries = length >> PAGE_SHIFT;
  2116. int rpm_atomic_seq;
  2117. rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
  2118. intel_gtt_clear_range(first_entry, num_entries);
  2119. assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
  2120. }
  2121. static int ggtt_bind_vma(struct i915_vma *vma,
  2122. enum i915_cache_level cache_level,
  2123. u32 flags)
  2124. {
  2125. struct drm_i915_gem_object *obj = vma->obj;
  2126. u32 pte_flags = 0;
  2127. int ret;
  2128. ret = i915_get_ggtt_vma_pages(vma);
  2129. if (ret)
  2130. return ret;
  2131. /* Currently applicable only to VLV */
  2132. if (obj->gt_ro)
  2133. pte_flags |= PTE_READ_ONLY;
  2134. vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
  2135. cache_level, pte_flags);
  2136. /*
  2137. * Without aliasing PPGTT there's no difference between
  2138. * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
  2139. * upgrade to both bound if we bind either to avoid double-binding.
  2140. */
  2141. vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
  2142. return 0;
  2143. }
  2144. static int aliasing_gtt_bind_vma(struct i915_vma *vma,
  2145. enum i915_cache_level cache_level,
  2146. u32 flags)
  2147. {
  2148. u32 pte_flags;
  2149. int ret;
  2150. ret = i915_get_ggtt_vma_pages(vma);
  2151. if (ret)
  2152. return ret;
  2153. /* Currently applicable only to VLV */
  2154. pte_flags = 0;
  2155. if (vma->obj->gt_ro)
  2156. pte_flags |= PTE_READ_ONLY;
  2157. if (flags & I915_VMA_GLOBAL_BIND) {
  2158. vma->vm->insert_entries(vma->vm,
  2159. vma->pages, vma->node.start,
  2160. cache_level, pte_flags);
  2161. }
  2162. if (flags & I915_VMA_LOCAL_BIND) {
  2163. struct i915_hw_ppgtt *appgtt =
  2164. to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
  2165. appgtt->base.insert_entries(&appgtt->base,
  2166. vma->pages, vma->node.start,
  2167. cache_level, pte_flags);
  2168. }
  2169. return 0;
  2170. }
  2171. static void ggtt_unbind_vma(struct i915_vma *vma)
  2172. {
  2173. struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt;
  2174. const u64 size = min(vma->size, vma->node.size);
  2175. if (vma->flags & I915_VMA_GLOBAL_BIND)
  2176. vma->vm->clear_range(vma->vm,
  2177. vma->node.start, size,
  2178. true);
  2179. if (vma->flags & I915_VMA_LOCAL_BIND && appgtt)
  2180. appgtt->base.clear_range(&appgtt->base,
  2181. vma->node.start, size,
  2182. true);
  2183. }
  2184. void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
  2185. {
  2186. struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
  2187. struct device *kdev = &dev_priv->drm.pdev->dev;
  2188. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2189. if (unlikely(ggtt->do_idle_maps)) {
  2190. if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
  2191. DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
  2192. /* Wait a bit, in hopes it avoids the hang */
  2193. udelay(10);
  2194. }
  2195. }
  2196. dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents,
  2197. PCI_DMA_BIDIRECTIONAL);
  2198. }
  2199. static void i915_gtt_color_adjust(struct drm_mm_node *node,
  2200. unsigned long color,
  2201. u64 *start,
  2202. u64 *end)
  2203. {
  2204. if (node->color != color)
  2205. *start += 4096;
  2206. node = list_first_entry_or_null(&node->node_list,
  2207. struct drm_mm_node,
  2208. node_list);
  2209. if (node && node->allocated && node->color != color)
  2210. *end -= 4096;
  2211. }
  2212. int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
  2213. {
  2214. /* Let GEM Manage all of the aperture.
  2215. *
  2216. * However, leave one page at the end still bound to the scratch page.
  2217. * There are a number of places where the hardware apparently prefetches
  2218. * past the end of the object, and we've seen multiple hangs with the
  2219. * GPU head pointer stuck in a batchbuffer bound at the last page of the
  2220. * aperture. One page should be enough to keep any prefetching inside
  2221. * of the aperture.
  2222. */
  2223. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2224. unsigned long hole_start, hole_end;
  2225. struct drm_mm_node *entry;
  2226. int ret;
  2227. ret = intel_vgt_balloon(dev_priv);
  2228. if (ret)
  2229. return ret;
  2230. /* Clear any non-preallocated blocks */
  2231. drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
  2232. DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
  2233. hole_start, hole_end);
  2234. ggtt->base.clear_range(&ggtt->base, hole_start,
  2235. hole_end - hole_start, true);
  2236. }
  2237. /* And finally clear the reserved guard page */
  2238. ggtt->base.clear_range(&ggtt->base,
  2239. ggtt->base.total - PAGE_SIZE, PAGE_SIZE,
  2240. true);
  2241. if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
  2242. struct i915_hw_ppgtt *ppgtt;
  2243. ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
  2244. if (!ppgtt)
  2245. return -ENOMEM;
  2246. ret = __hw_ppgtt_init(ppgtt, dev_priv);
  2247. if (ret) {
  2248. kfree(ppgtt);
  2249. return ret;
  2250. }
  2251. if (ppgtt->base.allocate_va_range)
  2252. ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
  2253. ppgtt->base.total);
  2254. if (ret) {
  2255. ppgtt->base.cleanup(&ppgtt->base);
  2256. kfree(ppgtt);
  2257. return ret;
  2258. }
  2259. ppgtt->base.clear_range(&ppgtt->base,
  2260. ppgtt->base.start,
  2261. ppgtt->base.total,
  2262. true);
  2263. dev_priv->mm.aliasing_ppgtt = ppgtt;
  2264. WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
  2265. ggtt->base.bind_vma = aliasing_gtt_bind_vma;
  2266. }
  2267. return 0;
  2268. }
  2269. /**
  2270. * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
  2271. * @dev_priv: i915 device
  2272. */
  2273. void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
  2274. {
  2275. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2276. if (dev_priv->mm.aliasing_ppgtt) {
  2277. struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
  2278. ppgtt->base.cleanup(&ppgtt->base);
  2279. kfree(ppgtt);
  2280. }
  2281. i915_gem_cleanup_stolen(&dev_priv->drm);
  2282. if (drm_mm_initialized(&ggtt->base.mm)) {
  2283. intel_vgt_deballoon(dev_priv);
  2284. drm_mm_takedown(&ggtt->base.mm);
  2285. list_del(&ggtt->base.global_link);
  2286. }
  2287. ggtt->base.cleanup(&ggtt->base);
  2288. arch_phys_wc_del(ggtt->mtrr);
  2289. io_mapping_fini(&ggtt->mappable);
  2290. }
  2291. static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
  2292. {
  2293. snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
  2294. snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
  2295. return snb_gmch_ctl << 20;
  2296. }
  2297. static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
  2298. {
  2299. bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
  2300. bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
  2301. if (bdw_gmch_ctl)
  2302. bdw_gmch_ctl = 1 << bdw_gmch_ctl;
  2303. #ifdef CONFIG_X86_32
  2304. /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
  2305. if (bdw_gmch_ctl > 4)
  2306. bdw_gmch_ctl = 4;
  2307. #endif
  2308. return bdw_gmch_ctl << 20;
  2309. }
  2310. static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
  2311. {
  2312. gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
  2313. gmch_ctrl &= SNB_GMCH_GGMS_MASK;
  2314. if (gmch_ctrl)
  2315. return 1 << (20 + gmch_ctrl);
  2316. return 0;
  2317. }
  2318. static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
  2319. {
  2320. snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
  2321. snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
  2322. return snb_gmch_ctl << 25; /* 32 MB units */
  2323. }
  2324. static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
  2325. {
  2326. bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
  2327. bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
  2328. return bdw_gmch_ctl << 25; /* 32 MB units */
  2329. }
  2330. static size_t chv_get_stolen_size(u16 gmch_ctrl)
  2331. {
  2332. gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
  2333. gmch_ctrl &= SNB_GMCH_GMS_MASK;
  2334. /*
  2335. * 0x0 to 0x10: 32MB increments starting at 0MB
  2336. * 0x11 to 0x16: 4MB increments starting at 8MB
  2337. * 0x17 to 0x1d: 4MB increments start at 36MB
  2338. */
  2339. if (gmch_ctrl < 0x11)
  2340. return gmch_ctrl << 25;
  2341. else if (gmch_ctrl < 0x17)
  2342. return (gmch_ctrl - 0x11 + 2) << 22;
  2343. else
  2344. return (gmch_ctrl - 0x17 + 9) << 22;
  2345. }
  2346. static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
  2347. {
  2348. gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
  2349. gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
  2350. if (gen9_gmch_ctl < 0xf0)
  2351. return gen9_gmch_ctl << 25; /* 32 MB units */
  2352. else
  2353. /* 4MB increments starting at 0xf0 for 4MB */
  2354. return (gen9_gmch_ctl - 0xf0 + 1) << 22;
  2355. }
  2356. static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
  2357. {
  2358. struct pci_dev *pdev = ggtt->base.dev->pdev;
  2359. phys_addr_t phys_addr;
  2360. int ret;
  2361. /* For Modern GENs the PTEs and register space are split in the BAR */
  2362. phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
  2363. /*
  2364. * On BXT writes larger than 64 bit to the GTT pagetable range will be
  2365. * dropped. For WC mappings in general we have 64 byte burst writes
  2366. * when the WC buffer is flushed, so we can't use it, but have to
  2367. * resort to an uncached mapping. The WC issue is easily caught by the
  2368. * readback check when writing GTT PTE entries.
  2369. */
  2370. if (IS_BROXTON(ggtt->base.dev))
  2371. ggtt->gsm = ioremap_nocache(phys_addr, size);
  2372. else
  2373. ggtt->gsm = ioremap_wc(phys_addr, size);
  2374. if (!ggtt->gsm) {
  2375. DRM_ERROR("Failed to map the ggtt page table\n");
  2376. return -ENOMEM;
  2377. }
  2378. ret = setup_scratch_page(ggtt->base.dev,
  2379. &ggtt->base.scratch_page,
  2380. GFP_DMA32);
  2381. if (ret) {
  2382. DRM_ERROR("Scratch setup failed\n");
  2383. /* iounmap will also get called at remove, but meh */
  2384. iounmap(ggtt->gsm);
  2385. return ret;
  2386. }
  2387. return 0;
  2388. }
  2389. /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
  2390. * bits. When using advanced contexts each context stores its own PAT, but
  2391. * writing this data shouldn't be harmful even in those cases. */
  2392. static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
  2393. {
  2394. uint64_t pat;
  2395. pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
  2396. GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
  2397. GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
  2398. GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
  2399. GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
  2400. GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
  2401. GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
  2402. GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
  2403. if (!USES_PPGTT(dev_priv))
  2404. /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
  2405. * so RTL will always use the value corresponding to
  2406. * pat_sel = 000".
  2407. * So let's disable cache for GGTT to avoid screen corruptions.
  2408. * MOCS still can be used though.
  2409. * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
  2410. * before this patch, i.e. the same uncached + snooping access
  2411. * like on gen6/7 seems to be in effect.
  2412. * - So this just fixes blitter/render access. Again it looks
  2413. * like it's not just uncached access, but uncached + snooping.
  2414. * So we can still hold onto all our assumptions wrt cpu
  2415. * clflushing on LLC machines.
  2416. */
  2417. pat = GEN8_PPAT(0, GEN8_PPAT_UC);
  2418. /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
  2419. * write would work. */
  2420. I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
  2421. I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
  2422. }
  2423. static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
  2424. {
  2425. uint64_t pat;
  2426. /*
  2427. * Map WB on BDW to snooped on CHV.
  2428. *
  2429. * Only the snoop bit has meaning for CHV, the rest is
  2430. * ignored.
  2431. *
  2432. * The hardware will never snoop for certain types of accesses:
  2433. * - CPU GTT (GMADR->GGTT->no snoop->memory)
  2434. * - PPGTT page tables
  2435. * - some other special cycles
  2436. *
  2437. * As with BDW, we also need to consider the following for GT accesses:
  2438. * "For GGTT, there is NO pat_sel[2:0] from the entry,
  2439. * so RTL will always use the value corresponding to
  2440. * pat_sel = 000".
  2441. * Which means we must set the snoop bit in PAT entry 0
  2442. * in order to keep the global status page working.
  2443. */
  2444. pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
  2445. GEN8_PPAT(1, 0) |
  2446. GEN8_PPAT(2, 0) |
  2447. GEN8_PPAT(3, 0) |
  2448. GEN8_PPAT(4, CHV_PPAT_SNOOP) |
  2449. GEN8_PPAT(5, CHV_PPAT_SNOOP) |
  2450. GEN8_PPAT(6, CHV_PPAT_SNOOP) |
  2451. GEN8_PPAT(7, CHV_PPAT_SNOOP);
  2452. I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
  2453. I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
  2454. }
  2455. static void gen6_gmch_remove(struct i915_address_space *vm)
  2456. {
  2457. struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
  2458. iounmap(ggtt->gsm);
  2459. cleanup_scratch_page(vm->dev, &vm->scratch_page);
  2460. }
  2461. static int gen8_gmch_probe(struct i915_ggtt *ggtt)
  2462. {
  2463. struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
  2464. struct pci_dev *pdev = dev_priv->drm.pdev;
  2465. unsigned int size;
  2466. u16 snb_gmch_ctl;
  2467. /* TODO: We're not aware of mappable constraints on gen8 yet */
  2468. ggtt->mappable_base = pci_resource_start(pdev, 2);
  2469. ggtt->mappable_end = pci_resource_len(pdev, 2);
  2470. if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39)))
  2471. pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
  2472. pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  2473. if (INTEL_GEN(dev_priv) >= 9) {
  2474. ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
  2475. size = gen8_get_total_gtt_size(snb_gmch_ctl);
  2476. } else if (IS_CHERRYVIEW(dev_priv)) {
  2477. ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
  2478. size = chv_get_total_gtt_size(snb_gmch_ctl);
  2479. } else {
  2480. ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
  2481. size = gen8_get_total_gtt_size(snb_gmch_ctl);
  2482. }
  2483. ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
  2484. if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
  2485. chv_setup_private_ppat(dev_priv);
  2486. else
  2487. bdw_setup_private_ppat(dev_priv);
  2488. ggtt->base.cleanup = gen6_gmch_remove;
  2489. ggtt->base.bind_vma = ggtt_bind_vma;
  2490. ggtt->base.unbind_vma = ggtt_unbind_vma;
  2491. ggtt->base.insert_page = gen8_ggtt_insert_page;
  2492. ggtt->base.clear_range = nop_clear_range;
  2493. if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
  2494. ggtt->base.clear_range = gen8_ggtt_clear_range;
  2495. ggtt->base.insert_entries = gen8_ggtt_insert_entries;
  2496. if (IS_CHERRYVIEW(dev_priv))
  2497. ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
  2498. return ggtt_probe_common(ggtt, size);
  2499. }
  2500. static int gen6_gmch_probe(struct i915_ggtt *ggtt)
  2501. {
  2502. struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
  2503. struct pci_dev *pdev = dev_priv->drm.pdev;
  2504. unsigned int size;
  2505. u16 snb_gmch_ctl;
  2506. ggtt->mappable_base = pci_resource_start(pdev, 2);
  2507. ggtt->mappable_end = pci_resource_len(pdev, 2);
  2508. /* 64/512MB is the current min/max we actually know of, but this is just
  2509. * a coarse sanity check.
  2510. */
  2511. if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
  2512. DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
  2513. return -ENXIO;
  2514. }
  2515. if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
  2516. pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
  2517. pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  2518. ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
  2519. size = gen6_get_total_gtt_size(snb_gmch_ctl);
  2520. ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
  2521. ggtt->base.clear_range = gen6_ggtt_clear_range;
  2522. ggtt->base.insert_page = gen6_ggtt_insert_page;
  2523. ggtt->base.insert_entries = gen6_ggtt_insert_entries;
  2524. ggtt->base.bind_vma = ggtt_bind_vma;
  2525. ggtt->base.unbind_vma = ggtt_unbind_vma;
  2526. ggtt->base.cleanup = gen6_gmch_remove;
  2527. if (HAS_EDRAM(dev_priv))
  2528. ggtt->base.pte_encode = iris_pte_encode;
  2529. else if (IS_HASWELL(dev_priv))
  2530. ggtt->base.pte_encode = hsw_pte_encode;
  2531. else if (IS_VALLEYVIEW(dev_priv))
  2532. ggtt->base.pte_encode = byt_pte_encode;
  2533. else if (INTEL_GEN(dev_priv) >= 7)
  2534. ggtt->base.pte_encode = ivb_pte_encode;
  2535. else
  2536. ggtt->base.pte_encode = snb_pte_encode;
  2537. return ggtt_probe_common(ggtt, size);
  2538. }
  2539. static void i915_gmch_remove(struct i915_address_space *vm)
  2540. {
  2541. intel_gmch_remove();
  2542. }
  2543. static int i915_gmch_probe(struct i915_ggtt *ggtt)
  2544. {
  2545. struct drm_i915_private *dev_priv = to_i915(ggtt->base.dev);
  2546. int ret;
  2547. ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
  2548. if (!ret) {
  2549. DRM_ERROR("failed to set up gmch\n");
  2550. return -EIO;
  2551. }
  2552. intel_gtt_get(&ggtt->base.total, &ggtt->stolen_size,
  2553. &ggtt->mappable_base, &ggtt->mappable_end);
  2554. ggtt->do_idle_maps = needs_idle_maps(dev_priv);
  2555. ggtt->base.insert_page = i915_ggtt_insert_page;
  2556. ggtt->base.insert_entries = i915_ggtt_insert_entries;
  2557. ggtt->base.clear_range = i915_ggtt_clear_range;
  2558. ggtt->base.bind_vma = ggtt_bind_vma;
  2559. ggtt->base.unbind_vma = ggtt_unbind_vma;
  2560. ggtt->base.cleanup = i915_gmch_remove;
  2561. if (unlikely(ggtt->do_idle_maps))
  2562. DRM_INFO("applying Ironlake quirks for intel_iommu\n");
  2563. return 0;
  2564. }
  2565. /**
  2566. * i915_ggtt_probe_hw - Probe GGTT hardware location
  2567. * @dev_priv: i915 device
  2568. */
  2569. int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
  2570. {
  2571. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2572. int ret;
  2573. ggtt->base.dev = &dev_priv->drm;
  2574. if (INTEL_GEN(dev_priv) <= 5)
  2575. ret = i915_gmch_probe(ggtt);
  2576. else if (INTEL_GEN(dev_priv) < 8)
  2577. ret = gen6_gmch_probe(ggtt);
  2578. else
  2579. ret = gen8_gmch_probe(ggtt);
  2580. if (ret)
  2581. return ret;
  2582. if ((ggtt->base.total - 1) >> 32) {
  2583. DRM_ERROR("We never expected a Global GTT with more than 32bits"
  2584. " of address space! Found %lldM!\n",
  2585. ggtt->base.total >> 20);
  2586. ggtt->base.total = 1ULL << 32;
  2587. ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
  2588. }
  2589. if (ggtt->mappable_end > ggtt->base.total) {
  2590. DRM_ERROR("mappable aperture extends past end of GGTT,"
  2591. " aperture=%llx, total=%llx\n",
  2592. ggtt->mappable_end, ggtt->base.total);
  2593. ggtt->mappable_end = ggtt->base.total;
  2594. }
  2595. /* GMADR is the PCI mmio aperture into the global GTT. */
  2596. DRM_INFO("Memory usable by graphics device = %lluM\n",
  2597. ggtt->base.total >> 20);
  2598. DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
  2599. DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", ggtt->stolen_size >> 20);
  2600. #ifdef CONFIG_INTEL_IOMMU
  2601. if (intel_iommu_gfx_mapped)
  2602. DRM_INFO("VT-d active for gfx access\n");
  2603. #endif
  2604. return 0;
  2605. }
  2606. /**
  2607. * i915_ggtt_init_hw - Initialize GGTT hardware
  2608. * @dev_priv: i915 device
  2609. */
  2610. int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
  2611. {
  2612. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2613. int ret;
  2614. INIT_LIST_HEAD(&dev_priv->vm_list);
  2615. /* Subtract the guard page before address space initialization to
  2616. * shrink the range used by drm_mm.
  2617. */
  2618. ggtt->base.total -= PAGE_SIZE;
  2619. i915_address_space_init(&ggtt->base, dev_priv);
  2620. ggtt->base.total += PAGE_SIZE;
  2621. if (!HAS_LLC(dev_priv))
  2622. ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
  2623. if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
  2624. dev_priv->ggtt.mappable_base,
  2625. dev_priv->ggtt.mappable_end)) {
  2626. ret = -EIO;
  2627. goto out_gtt_cleanup;
  2628. }
  2629. ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end);
  2630. /*
  2631. * Initialise stolen early so that we may reserve preallocated
  2632. * objects for the BIOS to KMS transition.
  2633. */
  2634. ret = i915_gem_init_stolen(&dev_priv->drm);
  2635. if (ret)
  2636. goto out_gtt_cleanup;
  2637. return 0;
  2638. out_gtt_cleanup:
  2639. ggtt->base.cleanup(&ggtt->base);
  2640. return ret;
  2641. }
  2642. int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
  2643. {
  2644. if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
  2645. return -EIO;
  2646. return 0;
  2647. }
  2648. void i915_gem_restore_gtt_mappings(struct drm_device *dev)
  2649. {
  2650. struct drm_i915_private *dev_priv = to_i915(dev);
  2651. struct i915_ggtt *ggtt = &dev_priv->ggtt;
  2652. struct drm_i915_gem_object *obj, *on;
  2653. i915_check_and_clear_faults(dev_priv);
  2654. /* First fill our portion of the GTT with scratch pages */
  2655. ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total,
  2656. true);
  2657. ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
  2658. /* clflush objects bound into the GGTT and rebind them. */
  2659. list_for_each_entry_safe(obj, on,
  2660. &dev_priv->mm.bound_list, global_list) {
  2661. bool ggtt_bound = false;
  2662. struct i915_vma *vma;
  2663. list_for_each_entry(vma, &obj->vma_list, obj_link) {
  2664. if (vma->vm != &ggtt->base)
  2665. continue;
  2666. if (!i915_vma_unbind(vma))
  2667. continue;
  2668. WARN_ON(i915_vma_bind(vma, obj->cache_level,
  2669. PIN_UPDATE));
  2670. ggtt_bound = true;
  2671. }
  2672. if (ggtt_bound)
  2673. WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
  2674. }
  2675. ggtt->base.closed = false;
  2676. if (INTEL_INFO(dev)->gen >= 8) {
  2677. if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
  2678. chv_setup_private_ppat(dev_priv);
  2679. else
  2680. bdw_setup_private_ppat(dev_priv);
  2681. return;
  2682. }
  2683. if (USES_PPGTT(dev)) {
  2684. struct i915_address_space *vm;
  2685. list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
  2686. /* TODO: Perhaps it shouldn't be gen6 specific */
  2687. struct i915_hw_ppgtt *ppgtt;
  2688. if (i915_is_ggtt(vm))
  2689. ppgtt = dev_priv->mm.aliasing_ppgtt;
  2690. else
  2691. ppgtt = i915_vm_to_ppgtt(vm);
  2692. gen6_write_page_range(dev_priv, &ppgtt->pd,
  2693. 0, ppgtt->base.total);
  2694. }
  2695. }
  2696. i915_ggtt_flush(dev_priv);
  2697. }
  2698. static void
  2699. i915_vma_retire(struct i915_gem_active *active,
  2700. struct drm_i915_gem_request *rq)
  2701. {
  2702. const unsigned int idx = rq->engine->id;
  2703. struct i915_vma *vma =
  2704. container_of(active, struct i915_vma, last_read[idx]);
  2705. GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
  2706. i915_vma_clear_active(vma, idx);
  2707. if (i915_vma_is_active(vma))
  2708. return;
  2709. list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
  2710. if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
  2711. WARN_ON(i915_vma_unbind(vma));
  2712. }
  2713. void i915_vma_destroy(struct i915_vma *vma)
  2714. {
  2715. GEM_BUG_ON(vma->node.allocated);
  2716. GEM_BUG_ON(i915_vma_is_active(vma));
  2717. GEM_BUG_ON(!i915_vma_is_closed(vma));
  2718. GEM_BUG_ON(vma->fence);
  2719. list_del(&vma->vm_link);
  2720. if (!i915_vma_is_ggtt(vma))
  2721. i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
  2722. kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
  2723. }
  2724. void i915_vma_close(struct i915_vma *vma)
  2725. {
  2726. GEM_BUG_ON(i915_vma_is_closed(vma));
  2727. vma->flags |= I915_VMA_CLOSED;
  2728. list_del_init(&vma->obj_link);
  2729. if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
  2730. WARN_ON(i915_vma_unbind(vma));
  2731. }
  2732. static struct i915_vma *
  2733. __i915_vma_create(struct drm_i915_gem_object *obj,
  2734. struct i915_address_space *vm,
  2735. const struct i915_ggtt_view *view)
  2736. {
  2737. struct i915_vma *vma;
  2738. int i;
  2739. GEM_BUG_ON(vm->closed);
  2740. vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
  2741. if (vma == NULL)
  2742. return ERR_PTR(-ENOMEM);
  2743. INIT_LIST_HEAD(&vma->exec_list);
  2744. for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
  2745. init_request_active(&vma->last_read[i], i915_vma_retire);
  2746. init_request_active(&vma->last_fence, NULL);
  2747. list_add(&vma->vm_link, &vm->unbound_list);
  2748. vma->vm = vm;
  2749. vma->obj = obj;
  2750. vma->size = obj->base.size;
  2751. if (view) {
  2752. vma->ggtt_view = *view;
  2753. if (view->type == I915_GGTT_VIEW_PARTIAL) {
  2754. vma->size = view->params.partial.size;
  2755. vma->size <<= PAGE_SHIFT;
  2756. } else if (view->type == I915_GGTT_VIEW_ROTATED) {
  2757. vma->size =
  2758. intel_rotation_info_size(&view->params.rotated);
  2759. vma->size <<= PAGE_SHIFT;
  2760. }
  2761. }
  2762. if (i915_is_ggtt(vm)) {
  2763. vma->flags |= I915_VMA_GGTT;
  2764. } else {
  2765. i915_ppgtt_get(i915_vm_to_ppgtt(vm));
  2766. }
  2767. list_add_tail(&vma->obj_link, &obj->vma_list);
  2768. return vma;
  2769. }
  2770. static inline bool vma_matches(struct i915_vma *vma,
  2771. struct i915_address_space *vm,
  2772. const struct i915_ggtt_view *view)
  2773. {
  2774. if (vma->vm != vm)
  2775. return false;
  2776. if (!i915_vma_is_ggtt(vma))
  2777. return true;
  2778. if (!view)
  2779. return vma->ggtt_view.type == 0;
  2780. if (vma->ggtt_view.type != view->type)
  2781. return false;
  2782. return memcmp(&vma->ggtt_view.params,
  2783. &view->params,
  2784. sizeof(view->params)) == 0;
  2785. }
  2786. struct i915_vma *
  2787. i915_vma_create(struct drm_i915_gem_object *obj,
  2788. struct i915_address_space *vm,
  2789. const struct i915_ggtt_view *view)
  2790. {
  2791. GEM_BUG_ON(view && !i915_is_ggtt(vm));
  2792. GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view));
  2793. return __i915_vma_create(obj, vm, view);
  2794. }
  2795. struct i915_vma *
  2796. i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
  2797. struct i915_address_space *vm,
  2798. const struct i915_ggtt_view *view)
  2799. {
  2800. struct i915_vma *vma;
  2801. list_for_each_entry_reverse(vma, &obj->vma_list, obj_link)
  2802. if (vma_matches(vma, vm, view))
  2803. return vma;
  2804. return NULL;
  2805. }
  2806. struct i915_vma *
  2807. i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
  2808. struct i915_address_space *vm,
  2809. const struct i915_ggtt_view *view)
  2810. {
  2811. struct i915_vma *vma;
  2812. GEM_BUG_ON(view && !i915_is_ggtt(vm));
  2813. vma = i915_gem_obj_to_vma(obj, vm, view);
  2814. if (!vma)
  2815. vma = __i915_vma_create(obj, vm, view);
  2816. GEM_BUG_ON(i915_vma_is_closed(vma));
  2817. return vma;
  2818. }
  2819. static struct scatterlist *
  2820. rotate_pages(const dma_addr_t *in, unsigned int offset,
  2821. unsigned int width, unsigned int height,
  2822. unsigned int stride,
  2823. struct sg_table *st, struct scatterlist *sg)
  2824. {
  2825. unsigned int column, row;
  2826. unsigned int src_idx;
  2827. for (column = 0; column < width; column++) {
  2828. src_idx = stride * (height - 1) + column;
  2829. for (row = 0; row < height; row++) {
  2830. st->nents++;
  2831. /* We don't need the pages, but need to initialize
  2832. * the entries so the sg list can be happily traversed.
  2833. * The only thing we need are DMA addresses.
  2834. */
  2835. sg_set_page(sg, NULL, PAGE_SIZE, 0);
  2836. sg_dma_address(sg) = in[offset + src_idx];
  2837. sg_dma_len(sg) = PAGE_SIZE;
  2838. sg = sg_next(sg);
  2839. src_idx -= stride;
  2840. }
  2841. }
  2842. return sg;
  2843. }
  2844. static struct sg_table *
  2845. intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
  2846. struct drm_i915_gem_object *obj)
  2847. {
  2848. const size_t n_pages = obj->base.size / PAGE_SIZE;
  2849. unsigned int size = intel_rotation_info_size(rot_info);
  2850. struct sgt_iter sgt_iter;
  2851. dma_addr_t dma_addr;
  2852. unsigned long i;
  2853. dma_addr_t *page_addr_list;
  2854. struct sg_table *st;
  2855. struct scatterlist *sg;
  2856. int ret = -ENOMEM;
  2857. /* Allocate a temporary list of source pages for random access. */
  2858. page_addr_list = drm_malloc_gfp(n_pages,
  2859. sizeof(dma_addr_t),
  2860. GFP_TEMPORARY);
  2861. if (!page_addr_list)
  2862. return ERR_PTR(ret);
  2863. /* Allocate target SG list. */
  2864. st = kmalloc(sizeof(*st), GFP_KERNEL);
  2865. if (!st)
  2866. goto err_st_alloc;
  2867. ret = sg_alloc_table(st, size, GFP_KERNEL);
  2868. if (ret)
  2869. goto err_sg_alloc;
  2870. /* Populate source page list from the object. */
  2871. i = 0;
  2872. for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
  2873. page_addr_list[i++] = dma_addr;
  2874. GEM_BUG_ON(i != n_pages);
  2875. st->nents = 0;
  2876. sg = st->sgl;
  2877. for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
  2878. sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
  2879. rot_info->plane[i].width, rot_info->plane[i].height,
  2880. rot_info->plane[i].stride, st, sg);
  2881. }
  2882. DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
  2883. obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
  2884. drm_free_large(page_addr_list);
  2885. return st;
  2886. err_sg_alloc:
  2887. kfree(st);
  2888. err_st_alloc:
  2889. drm_free_large(page_addr_list);
  2890. DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
  2891. obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
  2892. return ERR_PTR(ret);
  2893. }
  2894. static struct sg_table *
  2895. intel_partial_pages(const struct i915_ggtt_view *view,
  2896. struct drm_i915_gem_object *obj)
  2897. {
  2898. struct sg_table *st;
  2899. struct scatterlist *sg;
  2900. struct sg_page_iter obj_sg_iter;
  2901. int ret = -ENOMEM;
  2902. st = kmalloc(sizeof(*st), GFP_KERNEL);
  2903. if (!st)
  2904. goto err_st_alloc;
  2905. ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
  2906. if (ret)
  2907. goto err_sg_alloc;
  2908. sg = st->sgl;
  2909. st->nents = 0;
  2910. for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
  2911. view->params.partial.offset)
  2912. {
  2913. if (st->nents >= view->params.partial.size)
  2914. break;
  2915. sg_set_page(sg, NULL, PAGE_SIZE, 0);
  2916. sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
  2917. sg_dma_len(sg) = PAGE_SIZE;
  2918. sg = sg_next(sg);
  2919. st->nents++;
  2920. }
  2921. return st;
  2922. err_sg_alloc:
  2923. kfree(st);
  2924. err_st_alloc:
  2925. return ERR_PTR(ret);
  2926. }
  2927. static int
  2928. i915_get_ggtt_vma_pages(struct i915_vma *vma)
  2929. {
  2930. int ret = 0;
  2931. if (vma->pages)
  2932. return 0;
  2933. if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
  2934. vma->pages = vma->obj->pages;
  2935. else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
  2936. vma->pages =
  2937. intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
  2938. else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
  2939. vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
  2940. else
  2941. WARN_ONCE(1, "GGTT view %u not implemented!\n",
  2942. vma->ggtt_view.type);
  2943. if (!vma->pages) {
  2944. DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
  2945. vma->ggtt_view.type);
  2946. ret = -EINVAL;
  2947. } else if (IS_ERR(vma->pages)) {
  2948. ret = PTR_ERR(vma->pages);
  2949. vma->pages = NULL;
  2950. DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
  2951. vma->ggtt_view.type, ret);
  2952. }
  2953. return ret;
  2954. }
  2955. /**
  2956. * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  2957. * @vma: VMA to map
  2958. * @cache_level: mapping cache level
  2959. * @flags: flags like global or local mapping
  2960. *
  2961. * DMA addresses are taken from the scatter-gather table of this object (or of
  2962. * this VMA in case of non-default GGTT views) and PTE entries set up.
  2963. * Note that DMA addresses are also the only part of the SG table we care about.
  2964. */
  2965. int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
  2966. u32 flags)
  2967. {
  2968. u32 bind_flags;
  2969. u32 vma_flags;
  2970. int ret;
  2971. if (WARN_ON(flags == 0))
  2972. return -EINVAL;
  2973. bind_flags = 0;
  2974. if (flags & PIN_GLOBAL)
  2975. bind_flags |= I915_VMA_GLOBAL_BIND;
  2976. if (flags & PIN_USER)
  2977. bind_flags |= I915_VMA_LOCAL_BIND;
  2978. vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
  2979. if (flags & PIN_UPDATE)
  2980. bind_flags |= vma_flags;
  2981. else
  2982. bind_flags &= ~vma_flags;
  2983. if (bind_flags == 0)
  2984. return 0;
  2985. if (vma_flags == 0 && vma->vm->allocate_va_range) {
  2986. trace_i915_va_alloc(vma);
  2987. ret = vma->vm->allocate_va_range(vma->vm,
  2988. vma->node.start,
  2989. vma->node.size);
  2990. if (ret)
  2991. return ret;
  2992. }
  2993. ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
  2994. if (ret)
  2995. return ret;
  2996. vma->flags |= bind_flags;
  2997. return 0;
  2998. }
  2999. void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
  3000. {
  3001. void __iomem *ptr;
  3002. /* Access through the GTT requires the device to be awake. */
  3003. assert_rpm_wakelock_held(to_i915(vma->vm->dev));
  3004. lockdep_assert_held(&vma->vm->dev->struct_mutex);
  3005. if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
  3006. return IO_ERR_PTR(-ENODEV);
  3007. GEM_BUG_ON(!i915_vma_is_ggtt(vma));
  3008. GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0);
  3009. ptr = vma->iomap;
  3010. if (ptr == NULL) {
  3011. ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
  3012. vma->node.start,
  3013. vma->node.size);
  3014. if (ptr == NULL)
  3015. return IO_ERR_PTR(-ENOMEM);
  3016. vma->iomap = ptr;
  3017. }
  3018. __i915_vma_pin(vma);
  3019. return ptr;
  3020. }
  3021. void i915_vma_unpin_and_release(struct i915_vma **p_vma)
  3022. {
  3023. struct i915_vma *vma;
  3024. vma = fetch_and_zero(p_vma);
  3025. if (!vma)
  3026. return;
  3027. i915_vma_unpin(vma);
  3028. i915_vma_put(vma);
  3029. }