coproc.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. // This file is part of BOINC.
  2. // http://boinc.berkeley.edu
  3. // Copyright (C) 2008 University of California
  4. //
  5. // BOINC is free software; you can redistribute it and/or modify it
  6. // under the terms of the GNU Lesser General Public License
  7. // as published by the Free Software Foundation,
  8. // either version 3 of the License, or (at your option) any later version.
  9. //
  10. // BOINC is distributed in the hope that it will be useful,
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  13. // See the GNU Lesser General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU Lesser General Public License
  16. // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
  17. // Structures representing coprocessors (e.g. GPUs);
  18. // used in both client and server.
  19. //
  20. // Notes:
  21. //
  22. // 1) The use of "CUDA" is misleading; it really means "NVIDIA GPU".
  23. // 2) The design treats each resource type as a pool of identical devices;
  24. // for example, a scheduler request contains a request
  25. // (#instances, instance-seconds) for CUDA jobs.
  26. // In reality, the instances of a resource type can have different properties:
  27. // In the case of CUDA, "compute capability", driver version, RAM, speed, etc.
  28. // How to resolve this discrepancy?
  29. //
  30. // Prior to 21 Apr 09 we identified the fastest instance
  31. // and pretended that the others were identical to it.
  32. // This approach has a serious flaw:
  33. // suppose that the fastest instance has characteristics
  34. // (version, RAM etc.) that satisfy the project's requirements,
  35. // but other instances to not.
  36. // Then BOINC executes jobs on GPUs that can't handle them,
  37. // the jobs fail, the host is punished, etc.
  38. //
  39. // We could treat each GPU has a separate resource,
  40. // with its own backoffs, etc.
  41. // However, this would imply tying jobs to instances,
  42. // which is undesirable from a scheduling viewpoint.
  43. // It would also be a big code change in both client and server.
  44. //
  45. // Instead, (as of 21 Apr 09) our approach is to identify a
  46. // "most capable" instance, which in the case of CUDA is based on
  47. // a) compute capability
  48. // b) driver version
  49. // c) RAM size
  50. // d) est. FLOPS
  51. // (in decreasing priority).
  52. // We ignore and don't use any instances that are less capable
  53. // on any of these axes.
  54. //
  55. // This design avoids running coprocessor apps on instances
  56. // that are incapable of handling them, and it involves no server changes.
  57. // Its drawback is that, on systems with multiple and differing GPUs,
  58. // it may not use some GPUs that actually could be used.
  59. //
  60. // Modified (as of 23 July 14) to allow coprocessors (OpenCL GPUs and OpenCL
  61. // accelerators) from vendors other than original 3: NVIDIA, AMD and Intel.
  62. // For these original 3 GPU vendors, we still use the above approach, and the
  63. // COPROC::type field contains a standardized vendor name "NVIDIA", "ATI" or
  64. // "intel_gpu". But for other, "new" vendors, we treat each device as a
  65. // separate resource, creating an entry for each instance in the
  66. // COPROCS::coprocs[] array and copying the device name COPROC::opencl_prop.name
  67. // into the COPROC::type field (instead of the vendor name.)
  68. #ifndef BOINC_COPROC_H
  69. #define BOINC_COPROC_H
  70. #include <vector>
  71. #include <string>
  72. #ifdef _WIN32
  73. #include "boinc_win.h"
  74. #endif
  75. #ifdef _USING_FCGI_
  76. #include "boinc_fcgi.h"
  77. #endif
  78. #include "miofile.h"
  79. #include "error_numbers.h"
  80. #include "parse.h"
  81. #include "cal_boinc.h"
  82. #include "cl_boinc.h"
  83. #include "opencl_boinc.h"
  84. #define MAX_COPROC_INSTANCES 64
  85. #define MAX_RSC 8
  86. // max # of processing resources types
  87. #define GPU_MAX_PEAK_FLOPS 1.e15
  88. // sanity-check bound for peak FLOPS
  89. // for now (Feb 2019) 1000 TeraFLOPS.
  90. // As of now, the fastest GPU is 20 TeraFLOPS (NVIDIA).
  91. // May need to increase this at some point
  92. #define GPU_DEFAULT_PEAK_FLOPS 100.e9
  93. // value to use if sanity check fails
  94. // as of now (Feb 2019) 100 GigaFLOPS is a typical low-end GPU
  95. // arguments to proc_type_name() and proc_type_name_xml().
  96. //
  97. #define PROC_TYPE_CPU 0
  98. #define PROC_TYPE_NVIDIA_GPU 1
  99. #define PROC_TYPE_AMD_GPU 2
  100. #define PROC_TYPE_INTEL_GPU 3
  101. #define PROC_TYPE_MINER_ASIC 4
  102. #define NPROC_TYPES 5
  103. extern const char* proc_type_name(int);
  104. // user-readable name
  105. extern const char* proc_type_name_xml(int);
  106. // name used in XML and COPROC::type
  107. extern int coproc_type_name_to_num(const char* name);
  108. // deprecated, but keep for simplicity
  109. #define GPU_TYPE_NVIDIA proc_type_name_xml(PROC_TYPE_NVIDIA_GPU)
  110. #define GPU_TYPE_ATI proc_type_name_xml(PROC_TYPE_AMD_GPU)
  111. #define GPU_TYPE_INTEL proc_type_name_xml(PROC_TYPE_INTEL_GPU)
  112. // represents a requirement for a coproc.
  113. // This is a parsed version of the <coproc> elements in an <app_version>
  114. // (used in client only)
  115. //
  116. struct COPROC_REQ {
  117. char type[256]; // must be unique
  118. double count;
  119. int parse(XML_PARSER&);
  120. };
  121. struct PCI_INFO {
  122. bool present;
  123. int bus_id;
  124. int device_id;
  125. int domain_id;
  126. void clear() {
  127. present = false;
  128. bus_id = 0;
  129. device_id = 0;
  130. domain_id = 0;
  131. }
  132. PCI_INFO() {
  133. clear();
  134. }
  135. void write(MIOFILE&);
  136. int parse(XML_PARSER&);
  137. };
  138. // represents a set of identical coprocessors on a particular computer.
  139. // Abstract class;
  140. // objects will always be a derived class (COPROC_CUDA, COPROC_ATI)
  141. // Used in both client and server.
  142. //
  143. struct COPROC {
  144. char type[256]; // must be unique
  145. int count; // how many are present
  146. bool non_gpu; // coproc is not a GPU
  147. double peak_flops;
  148. double used; // how many are in use (used by client)
  149. bool have_cuda; // True if this GPU supports CUDA on this computer
  150. bool have_cal; // True if this GPU supports CAL on this computer
  151. bool have_opencl; // True if this GPU supports openCL on this computer
  152. double available_ram;
  153. bool specified_in_config;
  154. // If true, this coproc was listed in cc_config.xml
  155. // rather than being detected by the client.
  156. // the following are used in both client and server for work-fetch info
  157. //
  158. double req_secs;
  159. // how many instance-seconds of work requested
  160. double req_instances;
  161. // client is requesting enough jobs to use this many instances
  162. double estimated_delay;
  163. // resource will be saturated for this long
  164. // temps used in client (enforce_schedule())
  165. // to keep track of what fraction of each instance is in use
  166. // during instance assignment
  167. //
  168. double usage[MAX_COPROC_INSTANCES];
  169. double pending_usage[MAX_COPROC_INSTANCES];
  170. // the device number of each instance
  171. // These are not sequential if we omit instances (see above)
  172. //
  173. int device_nums[MAX_COPROC_INSTANCES];
  174. int device_num; // temp used in scan process
  175. bool instance_has_opencl[MAX_COPROC_INSTANCES];
  176. cl_device_id opencl_device_ids[MAX_COPROC_INSTANCES];
  177. int opencl_device_count;
  178. int opencl_device_indexes[MAX_COPROC_INSTANCES];
  179. PCI_INFO pci_info;
  180. PCI_INFO pci_infos[MAX_COPROC_INSTANCES];
  181. bool running_graphics_app[MAX_COPROC_INSTANCES];
  182. // is this GPU running a graphics app (NVIDIA only)
  183. double last_print_time;
  184. OPENCL_DEVICE_PROP opencl_prop;
  185. COPROC(int){}
  186. inline void clear() {
  187. static const COPROC x(0);
  188. *this = x;
  189. }
  190. COPROC(){
  191. clear();
  192. }
  193. #ifndef _USING_FCGI_
  194. void write_xml(MIOFILE&, bool scheduler_rpc=false);
  195. void write_request(MIOFILE&);
  196. #endif
  197. int parse(XML_PARSER&);
  198. inline void clear_usage() {
  199. for (int i=0; i<count; i++) {
  200. usage[i] = 0;
  201. pending_usage[i] = 0;
  202. }
  203. }
  204. int device_num_index(int n) {
  205. for (int i=0; i<count; i++) {
  206. if (device_nums[i] == n) return i;
  207. }
  208. return -1;
  209. }
  210. void merge_opencl(
  211. std::vector<OPENCL_DEVICE_PROP> &opencls,
  212. std::vector<int>& ignore_dev
  213. );
  214. void find_best_opencls(
  215. bool use_all,
  216. std::vector<OPENCL_DEVICE_PROP> &opencls,
  217. std::vector<int>& ignore_dev
  218. );
  219. // sanity check GPU peak FLOPS
  220. //
  221. inline bool bad_gpu_peak_flops(const char* source, std::string& msg) {
  222. if (peak_flops <= 0 || peak_flops > GPU_MAX_PEAK_FLOPS) {
  223. char buf[256];
  224. sprintf(buf, "%s reported bad GPU peak FLOPS %f; using %f",
  225. source, peak_flops, GPU_DEFAULT_PEAK_FLOPS
  226. );
  227. msg = buf;
  228. peak_flops = GPU_DEFAULT_PEAK_FLOPS;
  229. return true;
  230. }
  231. return false;
  232. }
  233. };
  234. // Based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h
  235. // doesn't have to match exactly since we get the attributes one at a time.
  236. //
  237. // This is used for 2 purposes:
  238. // - it's exported via GUI RPC for GUIs or other tools
  239. // - it's sent from client to scheduler, for use by app plan functions
  240. // Properties not relevant to either of these can be omitted.
  241. //
  242. struct CUDA_DEVICE_PROP {
  243. char name[256];
  244. double totalGlobalMem;
  245. double sharedMemPerBlock;
  246. int regsPerBlock;
  247. int warpSize;
  248. double memPitch;
  249. int maxThreadsPerBlock;
  250. int maxThreadsDim[3];
  251. int maxGridSize[3];
  252. int clockRate;
  253. double totalConstMem;
  254. int major; // compute capability
  255. int minor;
  256. double textureAlignment;
  257. int deviceOverlap;
  258. int multiProcessorCount;
  259. CUDA_DEVICE_PROP(int){}
  260. void clear() {
  261. static const CUDA_DEVICE_PROP x(0);
  262. *this = x;
  263. }
  264. CUDA_DEVICE_PROP() {
  265. clear();
  266. }
  267. };
  268. typedef int CUdevice;
  269. struct COPROC_NVIDIA : public COPROC {
  270. int cuda_version; // CUDA runtime version
  271. int display_driver_version;
  272. CUDA_DEVICE_PROP prop;
  273. COPROC_USAGE is_used; // temp used in scan process
  274. #ifndef _USING_FCGI_
  275. void write_xml(MIOFILE&, bool scheduler_rpc);
  276. #endif
  277. COPROC_NVIDIA(): COPROC() {clear();}
  278. COPROC_NVIDIA(int): COPROC() {}
  279. void get(std::vector<std::string>& warnings);
  280. void correlate(
  281. bool use_all,
  282. std::vector<int>& ignore_devs
  283. );
  284. void description(char* buf, int buflen);
  285. void clear();
  286. int parse(XML_PARSER&);
  287. void set_peak_flops();
  288. void fake(int driver_version, double ram, double avail_ram, int count);
  289. };
  290. // encode a 3-part version as // 10000000*major + 10000*minor + release
  291. // Note: ATI release #s can exceed 1000
  292. //
  293. inline int ati_version_int(int major, int minor, int release) {
  294. return major*10000000 + minor*10000 + release;
  295. }
  296. struct COPROC_ATI : public COPROC {
  297. char name[256];
  298. char version[50];
  299. int version_num;
  300. // CAL version (not driver version) encoded as an int
  301. bool atirt_detected;
  302. bool amdrt_detected;
  303. CALdeviceattribs attribs;
  304. CALdeviceinfo info;
  305. COPROC_USAGE is_used; // temp used in scan process
  306. #ifndef _USING_FCGI_
  307. void write_xml(MIOFILE&, bool scheduler_rpc);
  308. #endif
  309. COPROC_ATI(int): COPROC() {}
  310. COPROC_ATI(): COPROC() {clear();}
  311. void get(std::vector<std::string>& warnings);
  312. void correlate(
  313. bool use_all,
  314. std::vector<int>& ignore_devs
  315. );
  316. void description(char* buf, int buflen);
  317. void clear();
  318. int parse(XML_PARSER&);
  319. void set_peak_flops();
  320. void fake(double ram, double avail_ram, int);
  321. };
  322. struct COPROC_INTEL : public COPROC {
  323. char name[256];
  324. char version[50];
  325. double global_mem_size;
  326. COPROC_USAGE is_used; // temp used in scan process
  327. #ifndef _USING_FCGI_
  328. void write_xml(MIOFILE&, bool scheduler_rpc);
  329. #endif
  330. COPROC_INTEL(int): COPROC() {}
  331. COPROC_INTEL(): COPROC() {clear();}
  332. void get(std::vector<std::string>& warnings);
  333. void correlate(
  334. bool use_all,
  335. std::vector<int>& ignore_devs
  336. );
  337. void clear();
  338. int parse(XML_PARSER&);
  339. void set_peak_flops();
  340. void fake(double ram, double avail_ram, int);
  341. };
  342. typedef std::vector<int> IGNORE_GPU_INSTANCE[NPROC_TYPES];
  343. struct COPROCS {
  344. int n_rsc;
  345. COPROC coprocs[MAX_RSC];
  346. // array of processor types on this host.
  347. // element 0 always represents the CPU.
  348. // The remaining elements, if any, are GPUs or other coprocessors
  349. // The following contain vendor-specific info about GPUs.
  350. // (These GPUs are also represented by elements in the coprocs array)
  351. //
  352. COPROC_NVIDIA nvidia;
  353. COPROC_ATI ati;
  354. COPROC_INTEL intel_gpu;
  355. void write_xml(MIOFILE& out, bool scheduler_rpc);
  356. void get(
  357. bool use_all,
  358. std::vector<std::string> &descs,
  359. std::vector<std::string> &warnings,
  360. IGNORE_GPU_INSTANCE &ignore_gpu_instance
  361. );
  362. void detect_gpus(std::vector<std::string> &warnings);
  363. int launch_child_process_to_detect_gpus();
  364. void correlate_gpus(
  365. bool use_all,
  366. std::vector<std::string> &descs,
  367. IGNORE_GPU_INSTANCE &ignore_gpu_instance
  368. );
  369. void get_opencl(
  370. std::vector<std::string> &warnings
  371. );
  372. void correlate_opencl(
  373. bool use_all,
  374. IGNORE_GPU_INSTANCE& ignore_gpu_instance
  375. );
  376. cl_int get_opencl_info(
  377. OPENCL_DEVICE_PROP& prop,
  378. cl_uint device_index,
  379. std::vector<std::string>& warnings
  380. );
  381. int parse(XML_PARSER&);
  382. void set_path_to_client(char *path);
  383. int write_coproc_info_file(std::vector<std::string> &warnings);
  384. int read_coproc_info_file(std::vector<std::string> &warnings);
  385. int add_other_coproc_types();
  386. #ifdef __APPLE__
  387. void opencl_get_ati_mem_size_from_opengl(std::vector<std::string> &warnings);
  388. #endif
  389. void summary_string(char* buf, int len);
  390. // Copy a coproc set, possibly setting usage to zero.
  391. // used in round-robin simulator and CPU scheduler,
  392. // to avoid messing w/ master copy
  393. //
  394. void clone(COPROCS& c, bool copy_used) {
  395. n_rsc = c.n_rsc;
  396. for (int i=0; i<n_rsc; i++) {
  397. coprocs[i] = c.coprocs[i];
  398. if (!copy_used) {
  399. coprocs[i].used = 0;
  400. }
  401. }
  402. }
  403. void clear() {
  404. n_rsc = 0;
  405. for (int i=0; i<MAX_RSC; i++) {
  406. coprocs[i].clear();
  407. }
  408. nvidia.clear();
  409. ati.clear();
  410. intel_gpu.clear();
  411. COPROC c;
  412. strcpy(c.type, "CPU");
  413. c.clear_usage();
  414. add(c);
  415. }
  416. inline void clear_usage() {
  417. for (int i=0; i<n_rsc; i++) {
  418. coprocs[i].clear_usage();
  419. }
  420. }
  421. inline bool none() {
  422. return (n_rsc == 1);
  423. }
  424. inline int ndevs() {
  425. int n=0;
  426. for (int i=1; i<n_rsc; i++) {
  427. n += coprocs[i].count;
  428. }
  429. return n;
  430. }
  431. inline bool have_nvidia() {
  432. return (nvidia.count > 0);
  433. }
  434. inline bool have_ati() {
  435. return (ati.count > 0);
  436. }
  437. inline bool have_intel_gpu() {
  438. return (intel_gpu.count > 0);
  439. }
  440. int add(COPROC& c) {
  441. if (n_rsc >= MAX_RSC) return ERR_BUFFER_OVERFLOW;
  442. for (int i=1; i<n_rsc; i++) {
  443. if (!strcmp(c.type, coprocs[i].type)) {
  444. return ERR_DUP_NAME;
  445. }
  446. }
  447. coprocs[n_rsc++] = c;
  448. return 0;
  449. }
  450. void bound_counts();
  451. // make sure instance counts are within legal range
  452. COPROC* lookup_type(const char* t) {
  453. for (int i=1; i<n_rsc; i++) {
  454. if (!strcmp(t, coprocs[i].type)) {
  455. return &coprocs[i];
  456. }
  457. }
  458. return NULL;
  459. }
  460. COPROC* proc_type_to_coproc(int t) {
  461. switch(t) {
  462. case PROC_TYPE_NVIDIA_GPU: return &nvidia;
  463. case PROC_TYPE_AMD_GPU: return &ati;
  464. case PROC_TYPE_INTEL_GPU: return &intel_gpu;
  465. case PROC_TYPE_MINER_ASIC: return lookup_type("miner_asic");
  466. }
  467. return NULL;
  468. }
  469. COPROCS() {
  470. n_rsc = 0;
  471. nvidia.count = 0;
  472. ati.count = 0;
  473. intel_gpu.count = 0;
  474. COPROC c;
  475. strcpy(c.type, "CPU");
  476. c.clear_usage();
  477. add(c);
  478. }
  479. };
  480. extern void fake_opencl_gpu(char*);
  481. #endif