delay.c 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. /*
  2. * Precise Delay Loops for i386
  3. *
  4. * Copyright (C) 1993 Linus Torvalds
  5. * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
  6. * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
  7. *
  8. * The __delay function must _NOT_ be inlined as its execution time
  9. * depends wildly on alignment on many x86 processors. The additional
  10. * jump magic is needed to get the timing stable on all the CPU's
  11. * we have to worry about.
  12. */
  13. #include <linux/export.h>
  14. #include <linux/sched.h>
  15. #include <linux/timex.h>
  16. #include <linux/preempt.h>
  17. #include <linux/delay.h>
  18. #include <asm/processor.h>
  19. #include <asm/delay.h>
  20. #include <asm/timer.h>
  21. #include <asm/mwait.h>
  22. #ifdef CONFIG_SMP
  23. # include <asm/smp.h>
  24. #endif
  25. /* simple loop based delay: */
  26. static void delay_loop(unsigned long loops)
  27. {
  28. asm volatile(
  29. " test %0,%0 \n"
  30. " jz 3f \n"
  31. " jmp 1f \n"
  32. ".align 16 \n"
  33. "1: jmp 2f \n"
  34. ".align 16 \n"
  35. "2: dec %0 \n"
  36. " jnz 2b \n"
  37. "3: dec %0 \n"
  38. : /* we don't need output */
  39. :"a" (loops)
  40. );
  41. }
  42. /* TSC based delay: */
  43. static void delay_tsc(unsigned long __loops)
  44. {
  45. u64 bclock, now, loops = __loops;
  46. int cpu;
  47. preempt_disable();
  48. cpu = smp_processor_id();
  49. bclock = rdtsc_ordered();
  50. for (;;) {
  51. now = rdtsc_ordered();
  52. if ((now - bclock) >= loops)
  53. break;
  54. /* Allow RT tasks to run */
  55. preempt_enable();
  56. rep_nop();
  57. preempt_disable();
  58. /*
  59. * It is possible that we moved to another CPU, and
  60. * since TSC's are per-cpu we need to calculate
  61. * that. The delay must guarantee that we wait "at
  62. * least" the amount of time. Being moved to another
  63. * CPU could make the wait longer but we just need to
  64. * make sure we waited long enough. Rebalance the
  65. * counter for this CPU.
  66. */
  67. if (unlikely(cpu != smp_processor_id())) {
  68. loops -= (now - bclock);
  69. cpu = smp_processor_id();
  70. bclock = rdtsc_ordered();
  71. }
  72. }
  73. preempt_enable();
  74. }
  75. /*
  76. * On some AMD platforms, MWAITX has a configurable 32-bit timer, that
  77. * counts with TSC frequency. The input value is the loop of the
  78. * counter, it will exit when the timer expires.
  79. */
  80. static void delay_mwaitx(unsigned long __loops)
  81. {
  82. u64 start, end, delay, loops = __loops;
  83. /*
  84. * Timer value of 0 causes MWAITX to wait indefinitely, unless there
  85. * is a store on the memory monitored by MONITORX.
  86. */
  87. if (loops == 0)
  88. return;
  89. start = rdtsc_ordered();
  90. for (;;) {
  91. delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
  92. /*
  93. * Use cpu_tss as a cacheline-aligned, seldomly
  94. * accessed per-cpu variable as the monitor target.
  95. */
  96. __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
  97. /*
  98. * AMD, like Intel, supports the EAX hint and EAX=0xf
  99. * means, do not enter any deep C-state and we use it
  100. * here in delay() to minimize wakeup latency.
  101. */
  102. __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
  103. end = rdtsc_ordered();
  104. if (loops <= end - start)
  105. break;
  106. loops -= end - start;
  107. start = end;
  108. }
  109. }
  110. /*
  111. * Since we calibrate only once at boot, this
  112. * function should be set once at boot and not changed
  113. */
  114. static void (*delay_fn)(unsigned long) = delay_loop;
  115. void use_tsc_delay(void)
  116. {
  117. if (delay_fn == delay_loop)
  118. delay_fn = delay_tsc;
  119. }
  120. void use_mwaitx_delay(void)
  121. {
  122. delay_fn = delay_mwaitx;
  123. }
  124. int read_current_timer(unsigned long *timer_val)
  125. {
  126. if (delay_fn == delay_tsc) {
  127. *timer_val = rdtsc();
  128. return 0;
  129. }
  130. return -1;
  131. }
  132. void __delay(unsigned long loops)
  133. {
  134. delay_fn(loops);
  135. }
  136. EXPORT_SYMBOL(__delay);
  137. inline void __const_udelay(unsigned long xloops)
  138. {
  139. int d0;
  140. xloops *= 4;
  141. asm("mull %%edx"
  142. :"=d" (xloops), "=&a" (d0)
  143. :"1" (xloops), "0"
  144. (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4)));
  145. __delay(++xloops);
  146. }
  147. EXPORT_SYMBOL(__const_udelay);
  148. void __udelay(unsigned long usecs)
  149. {
  150. __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
  151. }
  152. EXPORT_SYMBOL(__udelay);
  153. void __ndelay(unsigned long nsecs)
  154. {
  155. __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
  156. }
  157. EXPORT_SYMBOL(__ndelay);