user_namespace.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. /*
  2. * This program is free software; you can redistribute it and/or
  3. * modify it under the terms of the GNU General Public License as
  4. * published by the Free Software Foundation, version 2 of the
  5. * License.
  6. */
  7. #include <linux/export.h>
  8. #include <linux/nsproxy.h>
  9. #include <linux/slab.h>
  10. #include <linux/user_namespace.h>
  11. #include <linux/highuid.h>
  12. #include <linux/cred.h>
  13. #include <linux/proc_fs.h>
  14. #include <linux/securebits.h>
  15. #include <linux/keyctl.h>
  16. #include <linux/key-type.h>
  17. #include <keys/user-type.h>
  18. #include <linux/seq_file.h>
  19. #include <linux/fs.h>
  20. #include <linux/uaccess.h>
  21. #include <linux/ctype.h>
  22. static struct kmem_cache *user_ns_cachep __read_mostly;
  23. static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
  24. struct uid_gid_map *map);
  25. /*
  26. * Create a new user namespace, deriving the creator from the user in the
  27. * passed credentials, and replacing that user with the new root user for the
  28. * new namespace.
  29. *
  30. * This is called by copy_creds(), which will finish setting the target task's
  31. * credentials.
  32. */
  33. int create_user_ns(struct cred *new)
  34. {
  35. struct user_namespace *ns, *parent_ns = new->user_ns;
  36. kuid_t owner = new->euid;
  37. kgid_t group = new->egid;
  38. int ret;
  39. /* The creator needs a mapping in the parent user namespace
  40. * or else we won't be able to reasonably tell userspace who
  41. * created a user_namespace.
  42. */
  43. if (!kuid_has_mapping(parent_ns, owner) ||
  44. !kgid_has_mapping(parent_ns, group))
  45. return -EPERM;
  46. ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
  47. if (!ns)
  48. return -ENOMEM;
  49. ret = proc_alloc_inum(&ns->proc_inum);
  50. if (ret) {
  51. kmem_cache_free(user_ns_cachep, ns);
  52. return ret;
  53. }
  54. kref_init(&ns->kref);
  55. ns->parent = parent_ns;
  56. ns->owner = owner;
  57. ns->group = group;
  58. /* Start with the same capabilities as init but useless for doing
  59. * anything as the capabilities are bound to the new user namespace.
  60. */
  61. new->securebits = SECUREBITS_DEFAULT;
  62. new->cap_inheritable = CAP_EMPTY_SET;
  63. new->cap_permitted = CAP_FULL_SET;
  64. new->cap_effective = CAP_FULL_SET;
  65. new->cap_bset = CAP_FULL_SET;
  66. #ifdef CONFIG_KEYS
  67. key_put(new->request_key_auth);
  68. new->request_key_auth = NULL;
  69. #endif
  70. /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
  71. /* Leave the new->user_ns reference with the new user namespace. */
  72. /* Leave the reference to our user_ns with the new cred. */
  73. new->user_ns = ns;
  74. return 0;
  75. }
  76. void free_user_ns(struct kref *kref)
  77. {
  78. struct user_namespace *parent, *ns =
  79. container_of(kref, struct user_namespace, kref);
  80. parent = ns->parent;
  81. proc_free_inum(ns->proc_inum);
  82. kmem_cache_free(user_ns_cachep, ns);
  83. put_user_ns(parent);
  84. }
  85. EXPORT_SYMBOL(free_user_ns);
  86. static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
  87. {
  88. unsigned idx, extents;
  89. u32 first, last, id2;
  90. id2 = id + count - 1;
  91. /* Find the matching extent */
  92. extents = map->nr_extents;
  93. smp_read_barrier_depends();
  94. for (idx = 0; idx < extents; idx++) {
  95. first = map->extent[idx].first;
  96. last = first + map->extent[idx].count - 1;
  97. if (id >= first && id <= last &&
  98. (id2 >= first && id2 <= last))
  99. break;
  100. }
  101. /* Map the id or note failure */
  102. if (idx < extents)
  103. id = (id - first) + map->extent[idx].lower_first;
  104. else
  105. id = (u32) -1;
  106. return id;
  107. }
  108. static u32 map_id_down(struct uid_gid_map *map, u32 id)
  109. {
  110. unsigned idx, extents;
  111. u32 first, last;
  112. /* Find the matching extent */
  113. extents = map->nr_extents;
  114. smp_read_barrier_depends();
  115. for (idx = 0; idx < extents; idx++) {
  116. first = map->extent[idx].first;
  117. last = first + map->extent[idx].count - 1;
  118. if (id >= first && id <= last)
  119. break;
  120. }
  121. /* Map the id or note failure */
  122. if (idx < extents)
  123. id = (id - first) + map->extent[idx].lower_first;
  124. else
  125. id = (u32) -1;
  126. return id;
  127. }
  128. static u32 map_id_up(struct uid_gid_map *map, u32 id)
  129. {
  130. unsigned idx, extents;
  131. u32 first, last;
  132. /* Find the matching extent */
  133. extents = map->nr_extents;
  134. smp_read_barrier_depends();
  135. for (idx = 0; idx < extents; idx++) {
  136. first = map->extent[idx].lower_first;
  137. last = first + map->extent[idx].count - 1;
  138. if (id >= first && id <= last)
  139. break;
  140. }
  141. /* Map the id or note failure */
  142. if (idx < extents)
  143. id = (id - first) + map->extent[idx].first;
  144. else
  145. id = (u32) -1;
  146. return id;
  147. }
  148. /**
  149. * make_kuid - Map a user-namespace uid pair into a kuid.
  150. * @ns: User namespace that the uid is in
  151. * @uid: User identifier
  152. *
  153. * Maps a user-namespace uid pair into a kernel internal kuid,
  154. * and returns that kuid.
  155. *
  156. * When there is no mapping defined for the user-namespace uid
  157. * pair INVALID_UID is returned. Callers are expected to test
  158. * for and handle handle INVALID_UID being returned. INVALID_UID
  159. * may be tested for using uid_valid().
  160. */
  161. kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
  162. {
  163. /* Map the uid to a global kernel uid */
  164. return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
  165. }
  166. EXPORT_SYMBOL(make_kuid);
  167. /**
  168. * from_kuid - Create a uid from a kuid user-namespace pair.
  169. * @targ: The user namespace we want a uid in.
  170. * @kuid: The kernel internal uid to start with.
  171. *
  172. * Map @kuid into the user-namespace specified by @targ and
  173. * return the resulting uid.
  174. *
  175. * There is always a mapping into the initial user_namespace.
  176. *
  177. * If @kuid has no mapping in @targ (uid_t)-1 is returned.
  178. */
  179. uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
  180. {
  181. /* Map the uid from a global kernel uid */
  182. return map_id_up(&targ->uid_map, __kuid_val(kuid));
  183. }
  184. EXPORT_SYMBOL(from_kuid);
  185. /**
  186. * from_kuid_munged - Create a uid from a kuid user-namespace pair.
  187. * @targ: The user namespace we want a uid in.
  188. * @kuid: The kernel internal uid to start with.
  189. *
  190. * Map @kuid into the user-namespace specified by @targ and
  191. * return the resulting uid.
  192. *
  193. * There is always a mapping into the initial user_namespace.
  194. *
  195. * Unlike from_kuid from_kuid_munged never fails and always
  196. * returns a valid uid. This makes from_kuid_munged appropriate
  197. * for use in syscalls like stat and getuid where failing the
  198. * system call and failing to provide a valid uid are not an
  199. * options.
  200. *
  201. * If @kuid has no mapping in @targ overflowuid is returned.
  202. */
  203. uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
  204. {
  205. uid_t uid;
  206. uid = from_kuid(targ, kuid);
  207. if (uid == (uid_t) -1)
  208. uid = overflowuid;
  209. return uid;
  210. }
  211. EXPORT_SYMBOL(from_kuid_munged);
  212. /**
  213. * make_kgid - Map a user-namespace gid pair into a kgid.
  214. * @ns: User namespace that the gid is in
  215. * @uid: group identifier
  216. *
  217. * Maps a user-namespace gid pair into a kernel internal kgid,
  218. * and returns that kgid.
  219. *
  220. * When there is no mapping defined for the user-namespace gid
  221. * pair INVALID_GID is returned. Callers are expected to test
  222. * for and handle INVALID_GID being returned. INVALID_GID may be
  223. * tested for using gid_valid().
  224. */
  225. kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
  226. {
  227. /* Map the gid to a global kernel gid */
  228. return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
  229. }
  230. EXPORT_SYMBOL(make_kgid);
  231. /**
  232. * from_kgid - Create a gid from a kgid user-namespace pair.
  233. * @targ: The user namespace we want a gid in.
  234. * @kgid: The kernel internal gid to start with.
  235. *
  236. * Map @kgid into the user-namespace specified by @targ and
  237. * return the resulting gid.
  238. *
  239. * There is always a mapping into the initial user_namespace.
  240. *
  241. * If @kgid has no mapping in @targ (gid_t)-1 is returned.
  242. */
  243. gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
  244. {
  245. /* Map the gid from a global kernel gid */
  246. return map_id_up(&targ->gid_map, __kgid_val(kgid));
  247. }
  248. EXPORT_SYMBOL(from_kgid);
  249. /**
  250. * from_kgid_munged - Create a gid from a kgid user-namespace pair.
  251. * @targ: The user namespace we want a gid in.
  252. * @kgid: The kernel internal gid to start with.
  253. *
  254. * Map @kgid into the user-namespace specified by @targ and
  255. * return the resulting gid.
  256. *
  257. * There is always a mapping into the initial user_namespace.
  258. *
  259. * Unlike from_kgid from_kgid_munged never fails and always
  260. * returns a valid gid. This makes from_kgid_munged appropriate
  261. * for use in syscalls like stat and getgid where failing the
  262. * system call and failing to provide a valid gid are not options.
  263. *
  264. * If @kgid has no mapping in @targ overflowgid is returned.
  265. */
  266. gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
  267. {
  268. gid_t gid;
  269. gid = from_kgid(targ, kgid);
  270. if (gid == (gid_t) -1)
  271. gid = overflowgid;
  272. return gid;
  273. }
  274. EXPORT_SYMBOL(from_kgid_munged);
  275. static int uid_m_show(struct seq_file *seq, void *v)
  276. {
  277. struct user_namespace *ns = seq->private;
  278. struct uid_gid_extent *extent = v;
  279. struct user_namespace *lower_ns;
  280. uid_t lower;
  281. lower_ns = current_user_ns();
  282. if ((lower_ns == ns) && lower_ns->parent)
  283. lower_ns = lower_ns->parent;
  284. lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
  285. seq_printf(seq, "%10u %10u %10u\n",
  286. extent->first,
  287. lower,
  288. extent->count);
  289. return 0;
  290. }
  291. static int gid_m_show(struct seq_file *seq, void *v)
  292. {
  293. struct user_namespace *ns = seq->private;
  294. struct uid_gid_extent *extent = v;
  295. struct user_namespace *lower_ns;
  296. gid_t lower;
  297. lower_ns = current_user_ns();
  298. if ((lower_ns == ns) && lower_ns->parent)
  299. lower_ns = lower_ns->parent;
  300. lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
  301. seq_printf(seq, "%10u %10u %10u\n",
  302. extent->first,
  303. lower,
  304. extent->count);
  305. return 0;
  306. }
  307. static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map)
  308. {
  309. struct uid_gid_extent *extent = NULL;
  310. loff_t pos = *ppos;
  311. if (pos < map->nr_extents)
  312. extent = &map->extent[pos];
  313. return extent;
  314. }
  315. static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
  316. {
  317. struct user_namespace *ns = seq->private;
  318. return m_start(seq, ppos, &ns->uid_map);
  319. }
  320. static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
  321. {
  322. struct user_namespace *ns = seq->private;
  323. return m_start(seq, ppos, &ns->gid_map);
  324. }
  325. static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
  326. {
  327. (*pos)++;
  328. return seq->op->start(seq, pos);
  329. }
  330. static void m_stop(struct seq_file *seq, void *v)
  331. {
  332. return;
  333. }
  334. struct seq_operations proc_uid_seq_operations = {
  335. .start = uid_m_start,
  336. .stop = m_stop,
  337. .next = m_next,
  338. .show = uid_m_show,
  339. };
  340. struct seq_operations proc_gid_seq_operations = {
  341. .start = gid_m_start,
  342. .stop = m_stop,
  343. .next = m_next,
  344. .show = gid_m_show,
  345. };
  346. static DEFINE_MUTEX(id_map_mutex);
  347. static ssize_t map_write(struct file *file, const char __user *buf,
  348. size_t count, loff_t *ppos,
  349. int cap_setid,
  350. struct uid_gid_map *map,
  351. struct uid_gid_map *parent_map)
  352. {
  353. struct seq_file *seq = file->private_data;
  354. struct user_namespace *ns = seq->private;
  355. struct uid_gid_map new_map;
  356. unsigned idx;
  357. struct uid_gid_extent *extent, *last = NULL;
  358. unsigned long page = 0;
  359. char *kbuf, *pos, *next_line;
  360. ssize_t ret = -EINVAL;
  361. /*
  362. * The id_map_mutex serializes all writes to any given map.
  363. *
  364. * Any map is only ever written once.
  365. *
  366. * An id map fits within 1 cache line on most architectures.
  367. *
  368. * On read nothing needs to be done unless you are on an
  369. * architecture with a crazy cache coherency model like alpha.
  370. *
  371. * There is a one time data dependency between reading the
  372. * count of the extents and the values of the extents. The
  373. * desired behavior is to see the values of the extents that
  374. * were written before the count of the extents.
  375. *
  376. * To achieve this smp_wmb() is used on guarantee the write
  377. * order and smp_read_barrier_depends() is guaranteed that we
  378. * don't have crazy architectures returning stale data.
  379. *
  380. */
  381. mutex_lock(&id_map_mutex);
  382. ret = -EPERM;
  383. /* Only allow one successful write to the map */
  384. if (map->nr_extents != 0)
  385. goto out;
  386. /* Require the appropriate privilege CAP_SETUID or CAP_SETGID
  387. * over the user namespace in order to set the id mapping.
  388. */
  389. if (!ns_capable(ns, cap_setid))
  390. goto out;
  391. /* Get a buffer */
  392. ret = -ENOMEM;
  393. page = __get_free_page(GFP_TEMPORARY);
  394. kbuf = (char *) page;
  395. if (!page)
  396. goto out;
  397. /* Only allow <= page size writes at the beginning of the file */
  398. ret = -EINVAL;
  399. if ((*ppos != 0) || (count >= PAGE_SIZE))
  400. goto out;
  401. /* Slurp in the user data */
  402. ret = -EFAULT;
  403. if (copy_from_user(kbuf, buf, count))
  404. goto out;
  405. kbuf[count] = '\0';
  406. /* Parse the user data */
  407. ret = -EINVAL;
  408. pos = kbuf;
  409. new_map.nr_extents = 0;
  410. for (;pos; pos = next_line) {
  411. extent = &new_map.extent[new_map.nr_extents];
  412. /* Find the end of line and ensure I don't look past it */
  413. next_line = strchr(pos, '\n');
  414. if (next_line) {
  415. *next_line = '\0';
  416. next_line++;
  417. if (*next_line == '\0')
  418. next_line = NULL;
  419. }
  420. pos = skip_spaces(pos);
  421. extent->first = simple_strtoul(pos, &pos, 10);
  422. if (!isspace(*pos))
  423. goto out;
  424. pos = skip_spaces(pos);
  425. extent->lower_first = simple_strtoul(pos, &pos, 10);
  426. if (!isspace(*pos))
  427. goto out;
  428. pos = skip_spaces(pos);
  429. extent->count = simple_strtoul(pos, &pos, 10);
  430. if (*pos && !isspace(*pos))
  431. goto out;
  432. /* Verify there is not trailing junk on the line */
  433. pos = skip_spaces(pos);
  434. if (*pos != '\0')
  435. goto out;
  436. /* Verify we have been given valid starting values */
  437. if ((extent->first == (u32) -1) ||
  438. (extent->lower_first == (u32) -1 ))
  439. goto out;
  440. /* Verify count is not zero and does not cause the extent to wrap */
  441. if ((extent->first + extent->count) <= extent->first)
  442. goto out;
  443. if ((extent->lower_first + extent->count) <= extent->lower_first)
  444. goto out;
  445. /* For now only accept extents that are strictly in order */
  446. if (last &&
  447. (((last->first + last->count) > extent->first) ||
  448. ((last->lower_first + last->count) > extent->lower_first)))
  449. goto out;
  450. new_map.nr_extents++;
  451. last = extent;
  452. /* Fail if the file contains too many extents */
  453. if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
  454. (next_line != NULL))
  455. goto out;
  456. }
  457. /* Be very certaint the new map actually exists */
  458. if (new_map.nr_extents == 0)
  459. goto out;
  460. ret = -EPERM;
  461. /* Validate the user is allowed to use user id's mapped to. */
  462. if (!new_idmap_permitted(ns, cap_setid, &new_map))
  463. goto out;
  464. /* Map the lower ids from the parent user namespace to the
  465. * kernel global id space.
  466. */
  467. for (idx = 0; idx < new_map.nr_extents; idx++) {
  468. u32 lower_first;
  469. extent = &new_map.extent[idx];
  470. lower_first = map_id_range_down(parent_map,
  471. extent->lower_first,
  472. extent->count);
  473. /* Fail if we can not map the specified extent to
  474. * the kernel global id space.
  475. */
  476. if (lower_first == (u32) -1)
  477. goto out;
  478. extent->lower_first = lower_first;
  479. }
  480. /* Install the map */
  481. memcpy(map->extent, new_map.extent,
  482. new_map.nr_extents*sizeof(new_map.extent[0]));
  483. smp_wmb();
  484. map->nr_extents = new_map.nr_extents;
  485. *ppos = count;
  486. ret = count;
  487. out:
  488. mutex_unlock(&id_map_mutex);
  489. if (page)
  490. free_page(page);
  491. return ret;
  492. }
  493. ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
  494. {
  495. struct seq_file *seq = file->private_data;
  496. struct user_namespace *ns = seq->private;
  497. if (!ns->parent)
  498. return -EPERM;
  499. return map_write(file, buf, size, ppos, CAP_SETUID,
  500. &ns->uid_map, &ns->parent->uid_map);
  501. }
  502. ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
  503. {
  504. struct seq_file *seq = file->private_data;
  505. struct user_namespace *ns = seq->private;
  506. if (!ns->parent)
  507. return -EPERM;
  508. return map_write(file, buf, size, ppos, CAP_SETGID,
  509. &ns->gid_map, &ns->parent->gid_map);
  510. }
  511. static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
  512. struct uid_gid_map *new_map)
  513. {
  514. /* Allow the specified ids if we have the appropriate capability
  515. * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
  516. */
  517. if (ns_capable(ns->parent, cap_setid))
  518. return true;
  519. return false;
  520. }
  521. static __init int user_namespaces_init(void)
  522. {
  523. user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
  524. return 0;
  525. }
  526. module_init(user_namespaces_init);