tsc_sync.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. /*
  2. * check TSC synchronization.
  3. *
  4. * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
  5. *
  6. * We check whether all boot CPUs have their TSC's synchronized,
  7. * print a warning if not and turn off the TSC clock-source.
  8. *
  9. * The warp-check is point-to-point between two CPUs, the CPU
  10. * initiating the bootup is the 'source CPU', the freshly booting
  11. * CPU is the 'target CPU'.
  12. *
  13. * Only two CPUs may participate - they can enter in any order.
  14. * ( The serial nature of the boot logic and the CPU hotplug lock
  15. * protects against more than 2 CPUs entering this code. )
  16. */
  17. #include <linux/spinlock.h>
  18. #include <linux/kernel.h>
  19. #include <linux/smp.h>
  20. #include <linux/nmi.h>
  21. #include <asm/tsc.h>
  22. /*
  23. * Entry/exit counters that make sure that both CPUs
  24. * run the measurement code at once:
  25. */
  26. static atomic_t start_count;
  27. static atomic_t stop_count;
  28. /*
  29. * We use a raw spinlock in this exceptional case, because
  30. * we want to have the fastest, inlined, non-debug version
  31. * of a critical section, to be able to prove TSC time-warps:
  32. */
  33. static arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
  34. static cycles_t last_tsc;
  35. static cycles_t max_warp;
  36. static int nr_warps;
  37. /*
  38. * TSC-warp measurement loop running on both CPUs. This is not called
  39. * if there is no TSC.
  40. */
  41. static void check_tsc_warp(unsigned int timeout)
  42. {
  43. cycles_t start, now, prev, end;
  44. int i;
  45. start = rdtsc_ordered();
  46. /*
  47. * The measurement runs for 'timeout' msecs:
  48. */
  49. end = start + (cycles_t) tsc_khz * timeout;
  50. now = start;
  51. for (i = 0; ; i++) {
  52. /*
  53. * We take the global lock, measure TSC, save the
  54. * previous TSC that was measured (possibly on
  55. * another CPU) and update the previous TSC timestamp.
  56. */
  57. arch_spin_lock(&sync_lock);
  58. prev = last_tsc;
  59. now = rdtsc_ordered();
  60. last_tsc = now;
  61. arch_spin_unlock(&sync_lock);
  62. /*
  63. * Be nice every now and then (and also check whether
  64. * measurement is done [we also insert a 10 million
  65. * loops safety exit, so we dont lock up in case the
  66. * TSC readout is totally broken]):
  67. */
  68. if (unlikely(!(i & 7))) {
  69. if (now > end || i > 10000000)
  70. break;
  71. cpu_relax();
  72. touch_nmi_watchdog();
  73. }
  74. /*
  75. * Outside the critical section we can now see whether
  76. * we saw a time-warp of the TSC going backwards:
  77. */
  78. if (unlikely(prev > now)) {
  79. arch_spin_lock(&sync_lock);
  80. max_warp = max(max_warp, prev - now);
  81. nr_warps++;
  82. arch_spin_unlock(&sync_lock);
  83. }
  84. }
  85. WARN(!(now-start),
  86. "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
  87. now-start, end-start);
  88. }
  89. /*
  90. * If the target CPU coming online doesn't have any of its core-siblings
  91. * online, a timeout of 20msec will be used for the TSC-warp measurement
  92. * loop. Otherwise a smaller timeout of 2msec will be used, as we have some
  93. * information about this socket already (and this information grows as we
  94. * have more and more logical-siblings in that socket).
  95. *
  96. * Ideally we should be able to skip the TSC sync check on the other
  97. * core-siblings, if the first logical CPU in a socket passed the sync test.
  98. * But as the TSC is per-logical CPU and can potentially be modified wrongly
  99. * by the bios, TSC sync test for smaller duration should be able
  100. * to catch such errors. Also this will catch the condition where all the
  101. * cores in the socket doesn't get reset at the same time.
  102. */
  103. static inline unsigned int loop_timeout(int cpu)
  104. {
  105. return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20;
  106. }
  107. /*
  108. * Source CPU calls into this - it waits for the freshly booted
  109. * target CPU to arrive and then starts the measurement:
  110. */
  111. void check_tsc_sync_source(int cpu)
  112. {
  113. int cpus = 2;
  114. /*
  115. * No need to check if we already know that the TSC is not
  116. * synchronized or if we have no TSC.
  117. */
  118. if (unsynchronized_tsc())
  119. return;
  120. if (tsc_clocksource_reliable) {
  121. if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
  122. pr_info(
  123. "Skipped synchronization checks as TSC is reliable.\n");
  124. return;
  125. }
  126. /*
  127. * Reset it - in case this is a second bootup:
  128. */
  129. atomic_set(&stop_count, 0);
  130. /*
  131. * Wait for the target to arrive:
  132. */
  133. while (atomic_read(&start_count) != cpus-1)
  134. cpu_relax();
  135. /*
  136. * Trigger the target to continue into the measurement too:
  137. */
  138. atomic_inc(&start_count);
  139. check_tsc_warp(loop_timeout(cpu));
  140. while (atomic_read(&stop_count) != cpus-1)
  141. cpu_relax();
  142. if (nr_warps) {
  143. pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
  144. smp_processor_id(), cpu);
  145. pr_warning("Measured %Ld cycles TSC warp between CPUs, "
  146. "turning off TSC clock.\n", max_warp);
  147. mark_tsc_unstable("check_tsc_sync_source failed");
  148. } else {
  149. pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
  150. smp_processor_id(), cpu);
  151. }
  152. /*
  153. * Reset it - just in case we boot another CPU later:
  154. */
  155. atomic_set(&start_count, 0);
  156. nr_warps = 0;
  157. max_warp = 0;
  158. last_tsc = 0;
  159. /*
  160. * Let the target continue with the bootup:
  161. */
  162. atomic_inc(&stop_count);
  163. }
  164. /*
  165. * Freshly booted CPUs call into this:
  166. */
  167. void check_tsc_sync_target(void)
  168. {
  169. int cpus = 2;
  170. /* Also aborts if there is no TSC. */
  171. if (unsynchronized_tsc() || tsc_clocksource_reliable)
  172. return;
  173. /*
  174. * Register this CPU's participation and wait for the
  175. * source CPU to start the measurement:
  176. */
  177. atomic_inc(&start_count);
  178. while (atomic_read(&start_count) != cpus)
  179. cpu_relax();
  180. check_tsc_warp(loop_timeout(smp_processor_id()));
  181. /*
  182. * Ok, we are done:
  183. */
  184. atomic_inc(&stop_count);
  185. /*
  186. * Wait for the source CPU to print stuff:
  187. */
  188. while (atomic_read(&stop_count) != cpus)
  189. cpu_relax();
  190. }