123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527 |
- // This file is part of BOINC.
- // http://boinc.berkeley.edu
- // Copyright (C) 2008 University of California
- //
- // BOINC is free software; you can redistribute it and/or modify it
- // under the terms of the GNU Lesser General Public License
- // as published by the Free Software Foundation,
- // either version 3 of the License, or (at your option) any later version.
- //
- // BOINC is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- // See the GNU Lesser General Public License for more details.
- //
- // You should have received a copy of the GNU Lesser General Public License
- // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
- // Structures representing coprocessors (e.g. GPUs);
- // used in both client and server.
- //
- // Notes:
- //
- // 1) The use of "CUDA" is misleading; it really means "NVIDIA GPU".
- // 2) The design treats each resource type as a pool of identical devices;
- // for example, a scheduler request contains a request
- // (#instances, instance-seconds) for CUDA jobs.
- // In reality, the instances of a resource type can have different properties:
- // In the case of CUDA, "compute capability", driver version, RAM, speed, etc.
- // How to resolve this discrepancy?
- //
- // Prior to 21 Apr 09 we identified the fastest instance
- // and pretended that the others were identical to it.
- // This approach has a serious flaw:
- // suppose that the fastest instance has characteristics
- // (version, RAM etc.) that satisfy the project's requirements,
- // but other instances to not.
- // Then BOINC executes jobs on GPUs that can't handle them,
- // the jobs fail, the host is punished, etc.
- //
- // We could treat each GPU has a separate resource,
- // with its own backoffs, etc.
- // However, this would imply tying jobs to instances,
- // which is undesirable from a scheduling viewpoint.
- // It would also be a big code change in both client and server.
- //
- // Instead, (as of 21 Apr 09) our approach is to identify a
- // "most capable" instance, which in the case of CUDA is based on
- // a) compute capability
- // b) driver version
- // c) RAM size
- // d) est. FLOPS
- // (in decreasing priority).
- // We ignore and don't use any instances that are less capable
- // on any of these axes.
- //
- // This design avoids running coprocessor apps on instances
- // that are incapable of handling them, and it involves no server changes.
- // Its drawback is that, on systems with multiple and differing GPUs,
- // it may not use some GPUs that actually could be used.
- //
- // Modified (as of 23 July 14) to allow coprocessors (OpenCL GPUs and OpenCL
- // accelerators) from vendors other than original 3: NVIDIA, AMD and Intel.
- // For these original 3 GPU vendors, we still use the above approach, and the
- // COPROC::type field contains a standardized vendor name "NVIDIA", "ATI" or
- // "intel_gpu". But for other, "new" vendors, we treat each device as a
- // separate resource, creating an entry for each instance in the
- // COPROCS::coprocs[] array and copying the device name COPROC::opencl_prop.name
- // into the COPROC::type field (instead of the vendor name.)
- #ifndef BOINC_COPROC_H
- #define BOINC_COPROC_H
- #include <vector>
- #include <string>
- #ifdef _WIN32
- #include "boinc_win.h"
- #endif
- #ifdef _USING_FCGI_
- #include "boinc_fcgi.h"
- #endif
- #include "miofile.h"
- #include "error_numbers.h"
- #include "parse.h"
- #include "cal_boinc.h"
- #include "cl_boinc.h"
- #include "opencl_boinc.h"
- #define MAX_COPROC_INSTANCES 64
- #define MAX_RSC 8
- // max # of processing resources types
- #define GPU_MAX_PEAK_FLOPS 1.e15
- // sanity-check bound for peak FLOPS
- // for now (Feb 2019) 1000 TeraFLOPS.
- // As of now, the fastest GPU is 20 TeraFLOPS (NVIDIA).
- // May need to increase this at some point
- #define GPU_DEFAULT_PEAK_FLOPS 100.e9
- // value to use if sanity check fails
- // as of now (Feb 2019) 100 GigaFLOPS is a typical low-end GPU
- // arguments to proc_type_name() and proc_type_name_xml().
- //
- #define PROC_TYPE_CPU 0
- #define PROC_TYPE_NVIDIA_GPU 1
- #define PROC_TYPE_AMD_GPU 2
- #define PROC_TYPE_INTEL_GPU 3
- #define PROC_TYPE_MINER_ASIC 4
- #define NPROC_TYPES 5
- extern const char* proc_type_name(int);
- // user-readable name
- extern const char* proc_type_name_xml(int);
- // name used in XML and COPROC::type
- extern int coproc_type_name_to_num(const char* name);
- // deprecated, but keep for simplicity
- #define GPU_TYPE_NVIDIA proc_type_name_xml(PROC_TYPE_NVIDIA_GPU)
- #define GPU_TYPE_ATI proc_type_name_xml(PROC_TYPE_AMD_GPU)
- #define GPU_TYPE_INTEL proc_type_name_xml(PROC_TYPE_INTEL_GPU)
- // represents a requirement for a coproc.
- // This is a parsed version of the <coproc> elements in an <app_version>
- // (used in client only)
- //
- struct COPROC_REQ {
- char type[256]; // must be unique
- double count;
- int parse(XML_PARSER&);
- };
- struct PCI_INFO {
- bool present;
- int bus_id;
- int device_id;
- int domain_id;
- void clear() {
- present = false;
- bus_id = 0;
- device_id = 0;
- domain_id = 0;
- }
- PCI_INFO() {
- clear();
- }
- void write(MIOFILE&);
- int parse(XML_PARSER&);
- };
- // represents a set of identical coprocessors on a particular computer.
- // Abstract class;
- // objects will always be a derived class (COPROC_CUDA, COPROC_ATI)
- // Used in both client and server.
- //
- struct COPROC {
- char type[256]; // must be unique
- int count; // how many are present
- bool non_gpu; // coproc is not a GPU
- double peak_flops;
- double used; // how many are in use (used by client)
- bool have_cuda; // True if this GPU supports CUDA on this computer
- bool have_cal; // True if this GPU supports CAL on this computer
- bool have_opencl; // True if this GPU supports openCL on this computer
- double available_ram;
- bool specified_in_config;
- // If true, this coproc was listed in cc_config.xml
- // rather than being detected by the client.
- // the following are used in both client and server for work-fetch info
- //
- double req_secs;
- // how many instance-seconds of work requested
- double req_instances;
- // client is requesting enough jobs to use this many instances
- double estimated_delay;
- // resource will be saturated for this long
- // temps used in client (enforce_schedule())
- // to keep track of what fraction of each instance is in use
- // during instance assignment
- //
- double usage[MAX_COPROC_INSTANCES];
- double pending_usage[MAX_COPROC_INSTANCES];
- // the device number of each instance
- // These are not sequential if we omit instances (see above)
- //
- int device_nums[MAX_COPROC_INSTANCES];
- int device_num; // temp used in scan process
- bool instance_has_opencl[MAX_COPROC_INSTANCES];
- cl_device_id opencl_device_ids[MAX_COPROC_INSTANCES];
- int opencl_device_count;
- int opencl_device_indexes[MAX_COPROC_INSTANCES];
- PCI_INFO pci_info;
- PCI_INFO pci_infos[MAX_COPROC_INSTANCES];
- bool running_graphics_app[MAX_COPROC_INSTANCES];
- // is this GPU running a graphics app (NVIDIA only)
- double last_print_time;
- OPENCL_DEVICE_PROP opencl_prop;
- COPROC(int){}
- inline void clear() {
- static const COPROC x(0);
- *this = x;
- }
- COPROC(){
- clear();
- }
- #ifndef _USING_FCGI_
- void write_xml(MIOFILE&, bool scheduler_rpc=false);
- void write_request(MIOFILE&);
- #endif
- int parse(XML_PARSER&);
- inline void clear_usage() {
- for (int i=0; i<count; i++) {
- usage[i] = 0;
- pending_usage[i] = 0;
- }
- }
- int device_num_index(int n) {
- for (int i=0; i<count; i++) {
- if (device_nums[i] == n) return i;
- }
- return -1;
- }
- void merge_opencl(
- std::vector<OPENCL_DEVICE_PROP> &opencls,
- std::vector<int>& ignore_dev
- );
- void find_best_opencls(
- bool use_all,
- std::vector<OPENCL_DEVICE_PROP> &opencls,
- std::vector<int>& ignore_dev
- );
- // sanity check GPU peak FLOPS
- //
- inline bool bad_gpu_peak_flops(const char* source, std::string& msg) {
- if (peak_flops <= 0 || peak_flops > GPU_MAX_PEAK_FLOPS) {
- char buf[256];
- sprintf(buf, "%s reported bad GPU peak FLOPS %f; using %f",
- source, peak_flops, GPU_DEFAULT_PEAK_FLOPS
- );
- msg = buf;
- peak_flops = GPU_DEFAULT_PEAK_FLOPS;
- return true;
- }
- return false;
- }
- };
- // Based on cudaDeviceProp from /usr/local/cuda/include/driver_types.h
- // doesn't have to match exactly since we get the attributes one at a time.
- //
- // This is used for 2 purposes:
- // - it's exported via GUI RPC for GUIs or other tools
- // - it's sent from client to scheduler, for use by app plan functions
- // Properties not relevant to either of these can be omitted.
- //
- struct CUDA_DEVICE_PROP {
- char name[256];
- double totalGlobalMem;
- double sharedMemPerBlock;
- int regsPerBlock;
- int warpSize;
- double memPitch;
- int maxThreadsPerBlock;
- int maxThreadsDim[3];
- int maxGridSize[3];
- int clockRate;
- double totalConstMem;
- int major; // compute capability
- int minor;
- double textureAlignment;
- int deviceOverlap;
- int multiProcessorCount;
- CUDA_DEVICE_PROP(int){}
- void clear() {
- static const CUDA_DEVICE_PROP x(0);
- *this = x;
- }
- CUDA_DEVICE_PROP() {
- clear();
- }
- };
- typedef int CUdevice;
- struct COPROC_NVIDIA : public COPROC {
- int cuda_version; // CUDA runtime version
- int display_driver_version;
- CUDA_DEVICE_PROP prop;
- COPROC_USAGE is_used; // temp used in scan process
- #ifndef _USING_FCGI_
- void write_xml(MIOFILE&, bool scheduler_rpc);
- #endif
- COPROC_NVIDIA(): COPROC() {clear();}
- COPROC_NVIDIA(int): COPROC() {}
- void get(std::vector<std::string>& warnings);
- void correlate(
- bool use_all,
- std::vector<int>& ignore_devs
- );
- void description(char* buf, int buflen);
- void clear();
- int parse(XML_PARSER&);
- void set_peak_flops();
- void fake(int driver_version, double ram, double avail_ram, int count);
- };
- // encode a 3-part version as // 10000000*major + 10000*minor + release
- // Note: ATI release #s can exceed 1000
- //
- inline int ati_version_int(int major, int minor, int release) {
- return major*10000000 + minor*10000 + release;
- }
- struct COPROC_ATI : public COPROC {
- char name[256];
- char version[50];
- int version_num;
- // CAL version (not driver version) encoded as an int
- bool atirt_detected;
- bool amdrt_detected;
- CALdeviceattribs attribs;
- CALdeviceinfo info;
- COPROC_USAGE is_used; // temp used in scan process
- #ifndef _USING_FCGI_
- void write_xml(MIOFILE&, bool scheduler_rpc);
- #endif
- COPROC_ATI(int): COPROC() {}
- COPROC_ATI(): COPROC() {clear();}
- void get(std::vector<std::string>& warnings);
- void correlate(
- bool use_all,
- std::vector<int>& ignore_devs
- );
- void description(char* buf, int buflen);
- void clear();
- int parse(XML_PARSER&);
- void set_peak_flops();
- void fake(double ram, double avail_ram, int);
- };
- struct COPROC_INTEL : public COPROC {
- char name[256];
- char version[50];
- double global_mem_size;
- COPROC_USAGE is_used; // temp used in scan process
- #ifndef _USING_FCGI_
- void write_xml(MIOFILE&, bool scheduler_rpc);
- #endif
- COPROC_INTEL(int): COPROC() {}
- COPROC_INTEL(): COPROC() {clear();}
- void get(std::vector<std::string>& warnings);
- void correlate(
- bool use_all,
- std::vector<int>& ignore_devs
- );
- void clear();
- int parse(XML_PARSER&);
- void set_peak_flops();
- void fake(double ram, double avail_ram, int);
- };
- typedef std::vector<int> IGNORE_GPU_INSTANCE[NPROC_TYPES];
- struct COPROCS {
- int n_rsc;
- COPROC coprocs[MAX_RSC];
- // array of processor types on this host.
- // element 0 always represents the CPU.
- // The remaining elements, if any, are GPUs or other coprocessors
- // The following contain vendor-specific info about GPUs.
- // (These GPUs are also represented by elements in the coprocs array)
- //
- COPROC_NVIDIA nvidia;
- COPROC_ATI ati;
- COPROC_INTEL intel_gpu;
- void write_xml(MIOFILE& out, bool scheduler_rpc);
- void get(
- bool use_all,
- std::vector<std::string> &descs,
- std::vector<std::string> &warnings,
- IGNORE_GPU_INSTANCE &ignore_gpu_instance
- );
- void detect_gpus(std::vector<std::string> &warnings);
- int launch_child_process_to_detect_gpus();
- void correlate_gpus(
- bool use_all,
- std::vector<std::string> &descs,
- IGNORE_GPU_INSTANCE &ignore_gpu_instance
- );
- void get_opencl(
- std::vector<std::string> &warnings
- );
- void correlate_opencl(
- bool use_all,
- IGNORE_GPU_INSTANCE& ignore_gpu_instance
- );
- cl_int get_opencl_info(
- OPENCL_DEVICE_PROP& prop,
- cl_uint device_index,
- std::vector<std::string>& warnings
- );
- int parse(XML_PARSER&);
- void set_path_to_client(char *path);
- int write_coproc_info_file(std::vector<std::string> &warnings);
- int read_coproc_info_file(std::vector<std::string> &warnings);
- int add_other_coproc_types();
-
- #ifdef __APPLE__
- void opencl_get_ati_mem_size_from_opengl(std::vector<std::string> &warnings);
- #endif
- void summary_string(char* buf, int len);
- // Copy a coproc set, possibly setting usage to zero.
- // used in round-robin simulator and CPU scheduler,
- // to avoid messing w/ master copy
- //
- void clone(COPROCS& c, bool copy_used) {
- n_rsc = c.n_rsc;
- for (int i=0; i<n_rsc; i++) {
- coprocs[i] = c.coprocs[i];
- if (!copy_used) {
- coprocs[i].used = 0;
- }
- }
- }
- void clear() {
- n_rsc = 0;
- for (int i=0; i<MAX_RSC; i++) {
- coprocs[i].clear();
- }
- nvidia.clear();
- ati.clear();
- intel_gpu.clear();
- COPROC c;
- strcpy(c.type, "CPU");
- c.clear_usage();
- add(c);
- }
- inline void clear_usage() {
- for (int i=0; i<n_rsc; i++) {
- coprocs[i].clear_usage();
- }
- }
- inline bool none() {
- return (n_rsc == 1);
- }
- inline int ndevs() {
- int n=0;
- for (int i=1; i<n_rsc; i++) {
- n += coprocs[i].count;
- }
- return n;
- }
- inline bool have_nvidia() {
- return (nvidia.count > 0);
- }
- inline bool have_ati() {
- return (ati.count > 0);
- }
- inline bool have_intel_gpu() {
- return (intel_gpu.count > 0);
- }
- int add(COPROC& c) {
- if (n_rsc >= MAX_RSC) return ERR_BUFFER_OVERFLOW;
- for (int i=1; i<n_rsc; i++) {
- if (!strcmp(c.type, coprocs[i].type)) {
- return ERR_DUP_NAME;
- }
- }
- coprocs[n_rsc++] = c;
- return 0;
- }
- void bound_counts();
- // make sure instance counts are within legal range
- COPROC* lookup_type(const char* t) {
- for (int i=1; i<n_rsc; i++) {
- if (!strcmp(t, coprocs[i].type)) {
- return &coprocs[i];
- }
- }
- return NULL;
- }
- COPROC* proc_type_to_coproc(int t) {
- switch(t) {
- case PROC_TYPE_NVIDIA_GPU: return &nvidia;
- case PROC_TYPE_AMD_GPU: return &ati;
- case PROC_TYPE_INTEL_GPU: return &intel_gpu;
- case PROC_TYPE_MINER_ASIC: return lookup_type("miner_asic");
- }
- return NULL;
- }
- COPROCS() {
- n_rsc = 0;
- nvidia.count = 0;
- ati.count = 0;
- intel_gpu.count = 0;
- COPROC c;
- strcpy(c.type, "CPU");
- c.clear_usage();
- add(c);
- }
- };
- extern void fake_opencl_gpu(char*);
- #endif
|