user_namespace.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093
  1. /*
  2. * This program is free software; you can redistribute it and/or
  3. * modify it under the terms of the GNU General Public License as
  4. * published by the Free Software Foundation, version 2 of the
  5. * License.
  6. */
  7. #include <linux/export.h>
  8. #include <linux/nsproxy.h>
  9. #include <linux/slab.h>
  10. #include <linux/sched/signal.h>
  11. #include <linux/user_namespace.h>
  12. #include <linux/proc_ns.h>
  13. #include <linux/highuid.h>
  14. #include <linux/cred.h>
  15. #include <linux/securebits.h>
  16. #include <linux/keyctl.h>
  17. #include <linux/key-type.h>
  18. #include <keys/user-type.h>
  19. #include <linux/seq_file.h>
  20. #include <linux/fs.h>
  21. #include <linux/uaccess.h>
  22. #include <linux/ctype.h>
  23. #include <linux/projid.h>
  24. #include <linux/fs_struct.h>
  25. static struct kmem_cache *user_ns_cachep __read_mostly;
  26. static DEFINE_MUTEX(userns_state_mutex);
  27. static bool new_idmap_permitted(const struct file *file,
  28. struct user_namespace *ns, int cap_setid,
  29. struct uid_gid_map *map);
  30. static void free_user_ns(struct work_struct *work);
  31. static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
  32. {
  33. return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
  34. }
  35. static void dec_user_namespaces(struct ucounts *ucounts)
  36. {
  37. return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
  38. }
  39. static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
  40. {
  41. /* Start with the same capabilities as init but useless for doing
  42. * anything as the capabilities are bound to the new user namespace.
  43. */
  44. cred->securebits = SECUREBITS_DEFAULT;
  45. cred->cap_inheritable = CAP_EMPTY_SET;
  46. cred->cap_permitted = CAP_FULL_SET;
  47. cred->cap_effective = CAP_FULL_SET;
  48. cred->cap_ambient = CAP_EMPTY_SET;
  49. cred->cap_bset = CAP_FULL_SET;
  50. #ifdef CONFIG_KEYS
  51. key_put(cred->request_key_auth);
  52. cred->request_key_auth = NULL;
  53. #endif
  54. /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
  55. cred->user_ns = user_ns;
  56. }
  57. /*
  58. * Create a new user namespace, deriving the creator from the user in the
  59. * passed credentials, and replacing that user with the new root user for the
  60. * new namespace.
  61. *
  62. * This is called by copy_creds(), which will finish setting the target task's
  63. * credentials.
  64. */
  65. int create_user_ns(struct cred *new)
  66. {
  67. struct user_namespace *ns, *parent_ns = new->user_ns;
  68. kuid_t owner = new->euid;
  69. kgid_t group = new->egid;
  70. struct ucounts *ucounts;
  71. int ret, i;
  72. ret = -ENOSPC;
  73. if (parent_ns->level > 32)
  74. goto fail;
  75. ucounts = inc_user_namespaces(parent_ns, owner);
  76. if (!ucounts)
  77. goto fail;
  78. /*
  79. * Verify that we can not violate the policy of which files
  80. * may be accessed that is specified by the root directory,
  81. * by verifing that the root directory is at the root of the
  82. * mount namespace which allows all files to be accessed.
  83. */
  84. ret = -EPERM;
  85. if (current_chrooted())
  86. goto fail_dec;
  87. /* The creator needs a mapping in the parent user namespace
  88. * or else we won't be able to reasonably tell userspace who
  89. * created a user_namespace.
  90. */
  91. ret = -EPERM;
  92. if (!kuid_has_mapping(parent_ns, owner) ||
  93. !kgid_has_mapping(parent_ns, group))
  94. goto fail_dec;
  95. ret = -ENOMEM;
  96. ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
  97. if (!ns)
  98. goto fail_dec;
  99. ret = ns_alloc_inum(&ns->ns);
  100. if (ret)
  101. goto fail_free;
  102. ns->ns.ops = &userns_operations;
  103. atomic_set(&ns->count, 1);
  104. /* Leave the new->user_ns reference with the new user namespace. */
  105. ns->parent = parent_ns;
  106. ns->level = parent_ns->level + 1;
  107. ns->owner = owner;
  108. ns->group = group;
  109. INIT_WORK(&ns->work, free_user_ns);
  110. for (i = 0; i < UCOUNT_COUNTS; i++) {
  111. ns->ucount_max[i] = INT_MAX;
  112. }
  113. ns->ucounts = ucounts;
  114. /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
  115. mutex_lock(&userns_state_mutex);
  116. ns->flags = parent_ns->flags;
  117. mutex_unlock(&userns_state_mutex);
  118. #ifdef CONFIG_PERSISTENT_KEYRINGS
  119. init_rwsem(&ns->persistent_keyring_register_sem);
  120. #endif
  121. ret = -ENOMEM;
  122. if (!setup_userns_sysctls(ns))
  123. goto fail_keyring;
  124. set_cred_user_ns(new, ns);
  125. return 0;
  126. fail_keyring:
  127. #ifdef CONFIG_PERSISTENT_KEYRINGS
  128. key_put(ns->persistent_keyring_register);
  129. #endif
  130. ns_free_inum(&ns->ns);
  131. fail_free:
  132. kmem_cache_free(user_ns_cachep, ns);
  133. fail_dec:
  134. dec_user_namespaces(ucounts);
  135. fail:
  136. return ret;
  137. }
  138. int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
  139. {
  140. struct cred *cred;
  141. int err = -ENOMEM;
  142. if (!(unshare_flags & CLONE_NEWUSER))
  143. return 0;
  144. cred = prepare_creds();
  145. if (cred) {
  146. err = create_user_ns(cred);
  147. if (err)
  148. put_cred(cred);
  149. else
  150. *new_cred = cred;
  151. }
  152. return err;
  153. }
  154. static void free_user_ns(struct work_struct *work)
  155. {
  156. struct user_namespace *parent, *ns =
  157. container_of(work, struct user_namespace, work);
  158. do {
  159. struct ucounts *ucounts = ns->ucounts;
  160. parent = ns->parent;
  161. retire_userns_sysctls(ns);
  162. #ifdef CONFIG_PERSISTENT_KEYRINGS
  163. key_put(ns->persistent_keyring_register);
  164. #endif
  165. ns_free_inum(&ns->ns);
  166. kmem_cache_free(user_ns_cachep, ns);
  167. dec_user_namespaces(ucounts);
  168. ns = parent;
  169. } while (atomic_dec_and_test(&parent->count));
  170. }
  171. void __put_user_ns(struct user_namespace *ns)
  172. {
  173. schedule_work(&ns->work);
  174. }
  175. EXPORT_SYMBOL(__put_user_ns);
  176. static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
  177. {
  178. unsigned idx, extents;
  179. u32 first, last, id2;
  180. id2 = id + count - 1;
  181. /* Find the matching extent */
  182. extents = map->nr_extents;
  183. smp_rmb();
  184. for (idx = 0; idx < extents; idx++) {
  185. first = map->extent[idx].first;
  186. last = first + map->extent[idx].count - 1;
  187. if (id >= first && id <= last &&
  188. (id2 >= first && id2 <= last))
  189. break;
  190. }
  191. /* Map the id or note failure */
  192. if (idx < extents)
  193. id = (id - first) + map->extent[idx].lower_first;
  194. else
  195. id = (u32) -1;
  196. return id;
  197. }
  198. static u32 map_id_down(struct uid_gid_map *map, u32 id)
  199. {
  200. unsigned idx, extents;
  201. u32 first, last;
  202. /* Find the matching extent */
  203. extents = map->nr_extents;
  204. smp_rmb();
  205. for (idx = 0; idx < extents; idx++) {
  206. first = map->extent[idx].first;
  207. last = first + map->extent[idx].count - 1;
  208. if (id >= first && id <= last)
  209. break;
  210. }
  211. /* Map the id or note failure */
  212. if (idx < extents)
  213. id = (id - first) + map->extent[idx].lower_first;
  214. else
  215. id = (u32) -1;
  216. return id;
  217. }
  218. static u32 map_id_up(struct uid_gid_map *map, u32 id)
  219. {
  220. unsigned idx, extents;
  221. u32 first, last;
  222. /* Find the matching extent */
  223. extents = map->nr_extents;
  224. smp_rmb();
  225. for (idx = 0; idx < extents; idx++) {
  226. first = map->extent[idx].lower_first;
  227. last = first + map->extent[idx].count - 1;
  228. if (id >= first && id <= last)
  229. break;
  230. }
  231. /* Map the id or note failure */
  232. if (idx < extents)
  233. id = (id - first) + map->extent[idx].first;
  234. else
  235. id = (u32) -1;
  236. return id;
  237. }
  238. /**
  239. * make_kuid - Map a user-namespace uid pair into a kuid.
  240. * @ns: User namespace that the uid is in
  241. * @uid: User identifier
  242. *
  243. * Maps a user-namespace uid pair into a kernel internal kuid,
  244. * and returns that kuid.
  245. *
  246. * When there is no mapping defined for the user-namespace uid
  247. * pair INVALID_UID is returned. Callers are expected to test
  248. * for and handle INVALID_UID being returned. INVALID_UID
  249. * may be tested for using uid_valid().
  250. */
  251. kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
  252. {
  253. /* Map the uid to a global kernel uid */
  254. return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
  255. }
  256. EXPORT_SYMBOL(make_kuid);
  257. /**
  258. * from_kuid - Create a uid from a kuid user-namespace pair.
  259. * @targ: The user namespace we want a uid in.
  260. * @kuid: The kernel internal uid to start with.
  261. *
  262. * Map @kuid into the user-namespace specified by @targ and
  263. * return the resulting uid.
  264. *
  265. * There is always a mapping into the initial user_namespace.
  266. *
  267. * If @kuid has no mapping in @targ (uid_t)-1 is returned.
  268. */
  269. uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
  270. {
  271. /* Map the uid from a global kernel uid */
  272. return map_id_up(&targ->uid_map, __kuid_val(kuid));
  273. }
  274. EXPORT_SYMBOL(from_kuid);
  275. /**
  276. * from_kuid_munged - Create a uid from a kuid user-namespace pair.
  277. * @targ: The user namespace we want a uid in.
  278. * @kuid: The kernel internal uid to start with.
  279. *
  280. * Map @kuid into the user-namespace specified by @targ and
  281. * return the resulting uid.
  282. *
  283. * There is always a mapping into the initial user_namespace.
  284. *
  285. * Unlike from_kuid from_kuid_munged never fails and always
  286. * returns a valid uid. This makes from_kuid_munged appropriate
  287. * for use in syscalls like stat and getuid where failing the
  288. * system call and failing to provide a valid uid are not an
  289. * options.
  290. *
  291. * If @kuid has no mapping in @targ overflowuid is returned.
  292. */
  293. uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
  294. {
  295. uid_t uid;
  296. uid = from_kuid(targ, kuid);
  297. if (uid == (uid_t) -1)
  298. uid = overflowuid;
  299. return uid;
  300. }
  301. EXPORT_SYMBOL(from_kuid_munged);
  302. /**
  303. * make_kgid - Map a user-namespace gid pair into a kgid.
  304. * @ns: User namespace that the gid is in
  305. * @gid: group identifier
  306. *
  307. * Maps a user-namespace gid pair into a kernel internal kgid,
  308. * and returns that kgid.
  309. *
  310. * When there is no mapping defined for the user-namespace gid
  311. * pair INVALID_GID is returned. Callers are expected to test
  312. * for and handle INVALID_GID being returned. INVALID_GID may be
  313. * tested for using gid_valid().
  314. */
  315. kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
  316. {
  317. /* Map the gid to a global kernel gid */
  318. return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
  319. }
  320. EXPORT_SYMBOL(make_kgid);
  321. /**
  322. * from_kgid - Create a gid from a kgid user-namespace pair.
  323. * @targ: The user namespace we want a gid in.
  324. * @kgid: The kernel internal gid to start with.
  325. *
  326. * Map @kgid into the user-namespace specified by @targ and
  327. * return the resulting gid.
  328. *
  329. * There is always a mapping into the initial user_namespace.
  330. *
  331. * If @kgid has no mapping in @targ (gid_t)-1 is returned.
  332. */
  333. gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
  334. {
  335. /* Map the gid from a global kernel gid */
  336. return map_id_up(&targ->gid_map, __kgid_val(kgid));
  337. }
  338. EXPORT_SYMBOL(from_kgid);
  339. /**
  340. * from_kgid_munged - Create a gid from a kgid user-namespace pair.
  341. * @targ: The user namespace we want a gid in.
  342. * @kgid: The kernel internal gid to start with.
  343. *
  344. * Map @kgid into the user-namespace specified by @targ and
  345. * return the resulting gid.
  346. *
  347. * There is always a mapping into the initial user_namespace.
  348. *
  349. * Unlike from_kgid from_kgid_munged never fails and always
  350. * returns a valid gid. This makes from_kgid_munged appropriate
  351. * for use in syscalls like stat and getgid where failing the
  352. * system call and failing to provide a valid gid are not options.
  353. *
  354. * If @kgid has no mapping in @targ overflowgid is returned.
  355. */
  356. gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
  357. {
  358. gid_t gid;
  359. gid = from_kgid(targ, kgid);
  360. if (gid == (gid_t) -1)
  361. gid = overflowgid;
  362. return gid;
  363. }
  364. EXPORT_SYMBOL(from_kgid_munged);
  365. /**
  366. * make_kprojid - Map a user-namespace projid pair into a kprojid.
  367. * @ns: User namespace that the projid is in
  368. * @projid: Project identifier
  369. *
  370. * Maps a user-namespace uid pair into a kernel internal kuid,
  371. * and returns that kuid.
  372. *
  373. * When there is no mapping defined for the user-namespace projid
  374. * pair INVALID_PROJID is returned. Callers are expected to test
  375. * for and handle handle INVALID_PROJID being returned. INVALID_PROJID
  376. * may be tested for using projid_valid().
  377. */
  378. kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
  379. {
  380. /* Map the uid to a global kernel uid */
  381. return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
  382. }
  383. EXPORT_SYMBOL(make_kprojid);
  384. /**
  385. * from_kprojid - Create a projid from a kprojid user-namespace pair.
  386. * @targ: The user namespace we want a projid in.
  387. * @kprojid: The kernel internal project identifier to start with.
  388. *
  389. * Map @kprojid into the user-namespace specified by @targ and
  390. * return the resulting projid.
  391. *
  392. * There is always a mapping into the initial user_namespace.
  393. *
  394. * If @kprojid has no mapping in @targ (projid_t)-1 is returned.
  395. */
  396. projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
  397. {
  398. /* Map the uid from a global kernel uid */
  399. return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
  400. }
  401. EXPORT_SYMBOL(from_kprojid);
  402. /**
  403. * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
  404. * @targ: The user namespace we want a projid in.
  405. * @kprojid: The kernel internal projid to start with.
  406. *
  407. * Map @kprojid into the user-namespace specified by @targ and
  408. * return the resulting projid.
  409. *
  410. * There is always a mapping into the initial user_namespace.
  411. *
  412. * Unlike from_kprojid from_kprojid_munged never fails and always
  413. * returns a valid projid. This makes from_kprojid_munged
  414. * appropriate for use in syscalls like stat and where
  415. * failing the system call and failing to provide a valid projid are
  416. * not an options.
  417. *
  418. * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
  419. */
  420. projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
  421. {
  422. projid_t projid;
  423. projid = from_kprojid(targ, kprojid);
  424. if (projid == (projid_t) -1)
  425. projid = OVERFLOW_PROJID;
  426. return projid;
  427. }
  428. EXPORT_SYMBOL(from_kprojid_munged);
  429. static int uid_m_show(struct seq_file *seq, void *v)
  430. {
  431. struct user_namespace *ns = seq->private;
  432. struct uid_gid_extent *extent = v;
  433. struct user_namespace *lower_ns;
  434. uid_t lower;
  435. lower_ns = seq_user_ns(seq);
  436. if ((lower_ns == ns) && lower_ns->parent)
  437. lower_ns = lower_ns->parent;
  438. lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
  439. seq_printf(seq, "%10u %10u %10u\n",
  440. extent->first,
  441. lower,
  442. extent->count);
  443. return 0;
  444. }
  445. static int gid_m_show(struct seq_file *seq, void *v)
  446. {
  447. struct user_namespace *ns = seq->private;
  448. struct uid_gid_extent *extent = v;
  449. struct user_namespace *lower_ns;
  450. gid_t lower;
  451. lower_ns = seq_user_ns(seq);
  452. if ((lower_ns == ns) && lower_ns->parent)
  453. lower_ns = lower_ns->parent;
  454. lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
  455. seq_printf(seq, "%10u %10u %10u\n",
  456. extent->first,
  457. lower,
  458. extent->count);
  459. return 0;
  460. }
  461. static int projid_m_show(struct seq_file *seq, void *v)
  462. {
  463. struct user_namespace *ns = seq->private;
  464. struct uid_gid_extent *extent = v;
  465. struct user_namespace *lower_ns;
  466. projid_t lower;
  467. lower_ns = seq_user_ns(seq);
  468. if ((lower_ns == ns) && lower_ns->parent)
  469. lower_ns = lower_ns->parent;
  470. lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
  471. seq_printf(seq, "%10u %10u %10u\n",
  472. extent->first,
  473. lower,
  474. extent->count);
  475. return 0;
  476. }
  477. static void *m_start(struct seq_file *seq, loff_t *ppos,
  478. struct uid_gid_map *map)
  479. {
  480. struct uid_gid_extent *extent = NULL;
  481. loff_t pos = *ppos;
  482. if (pos < map->nr_extents)
  483. extent = &map->extent[pos];
  484. return extent;
  485. }
  486. static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
  487. {
  488. struct user_namespace *ns = seq->private;
  489. return m_start(seq, ppos, &ns->uid_map);
  490. }
  491. static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
  492. {
  493. struct user_namespace *ns = seq->private;
  494. return m_start(seq, ppos, &ns->gid_map);
  495. }
  496. static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
  497. {
  498. struct user_namespace *ns = seq->private;
  499. return m_start(seq, ppos, &ns->projid_map);
  500. }
  501. static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
  502. {
  503. (*pos)++;
  504. return seq->op->start(seq, pos);
  505. }
  506. static void m_stop(struct seq_file *seq, void *v)
  507. {
  508. return;
  509. }
  510. const struct seq_operations proc_uid_seq_operations = {
  511. .start = uid_m_start,
  512. .stop = m_stop,
  513. .next = m_next,
  514. .show = uid_m_show,
  515. };
  516. const struct seq_operations proc_gid_seq_operations = {
  517. .start = gid_m_start,
  518. .stop = m_stop,
  519. .next = m_next,
  520. .show = gid_m_show,
  521. };
  522. const struct seq_operations proc_projid_seq_operations = {
  523. .start = projid_m_start,
  524. .stop = m_stop,
  525. .next = m_next,
  526. .show = projid_m_show,
  527. };
  528. static bool mappings_overlap(struct uid_gid_map *new_map,
  529. struct uid_gid_extent *extent)
  530. {
  531. u32 upper_first, lower_first, upper_last, lower_last;
  532. unsigned idx;
  533. upper_first = extent->first;
  534. lower_first = extent->lower_first;
  535. upper_last = upper_first + extent->count - 1;
  536. lower_last = lower_first + extent->count - 1;
  537. for (idx = 0; idx < new_map->nr_extents; idx++) {
  538. u32 prev_upper_first, prev_lower_first;
  539. u32 prev_upper_last, prev_lower_last;
  540. struct uid_gid_extent *prev;
  541. prev = &new_map->extent[idx];
  542. prev_upper_first = prev->first;
  543. prev_lower_first = prev->lower_first;
  544. prev_upper_last = prev_upper_first + prev->count - 1;
  545. prev_lower_last = prev_lower_first + prev->count - 1;
  546. /* Does the upper range intersect a previous extent? */
  547. if ((prev_upper_first <= upper_last) &&
  548. (prev_upper_last >= upper_first))
  549. return true;
  550. /* Does the lower range intersect a previous extent? */
  551. if ((prev_lower_first <= lower_last) &&
  552. (prev_lower_last >= lower_first))
  553. return true;
  554. }
  555. return false;
  556. }
  557. static ssize_t map_write(struct file *file, const char __user *buf,
  558. size_t count, loff_t *ppos,
  559. int cap_setid,
  560. struct uid_gid_map *map,
  561. struct uid_gid_map *parent_map)
  562. {
  563. struct seq_file *seq = file->private_data;
  564. struct user_namespace *ns = seq->private;
  565. struct uid_gid_map new_map;
  566. unsigned idx;
  567. struct uid_gid_extent *extent = NULL;
  568. char *kbuf = NULL, *pos, *next_line;
  569. ssize_t ret;
  570. /* Only allow < page size writes at the beginning of the file */
  571. if ((*ppos != 0) || (count >= PAGE_SIZE))
  572. return -EINVAL;
  573. /* Slurp in the user data */
  574. kbuf = memdup_user_nul(buf, count);
  575. if (IS_ERR(kbuf))
  576. return PTR_ERR(kbuf);
  577. /*
  578. * The userns_state_mutex serializes all writes to any given map.
  579. *
  580. * Any map is only ever written once.
  581. *
  582. * An id map fits within 1 cache line on most architectures.
  583. *
  584. * On read nothing needs to be done unless you are on an
  585. * architecture with a crazy cache coherency model like alpha.
  586. *
  587. * There is a one time data dependency between reading the
  588. * count of the extents and the values of the extents. The
  589. * desired behavior is to see the values of the extents that
  590. * were written before the count of the extents.
  591. *
  592. * To achieve this smp_wmb() is used on guarantee the write
  593. * order and smp_rmb() is guaranteed that we don't have crazy
  594. * architectures returning stale data.
  595. */
  596. mutex_lock(&userns_state_mutex);
  597. ret = -EPERM;
  598. /* Only allow one successful write to the map */
  599. if (map->nr_extents != 0)
  600. goto out;
  601. /*
  602. * Adjusting namespace settings requires capabilities on the target.
  603. */
  604. if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
  605. goto out;
  606. /* Parse the user data */
  607. ret = -EINVAL;
  608. pos = kbuf;
  609. new_map.nr_extents = 0;
  610. for (; pos; pos = next_line) {
  611. extent = &new_map.extent[new_map.nr_extents];
  612. /* Find the end of line and ensure I don't look past it */
  613. next_line = strchr(pos, '\n');
  614. if (next_line) {
  615. *next_line = '\0';
  616. next_line++;
  617. if (*next_line == '\0')
  618. next_line = NULL;
  619. }
  620. pos = skip_spaces(pos);
  621. extent->first = simple_strtoul(pos, &pos, 10);
  622. if (!isspace(*pos))
  623. goto out;
  624. pos = skip_spaces(pos);
  625. extent->lower_first = simple_strtoul(pos, &pos, 10);
  626. if (!isspace(*pos))
  627. goto out;
  628. pos = skip_spaces(pos);
  629. extent->count = simple_strtoul(pos, &pos, 10);
  630. if (*pos && !isspace(*pos))
  631. goto out;
  632. /* Verify there is not trailing junk on the line */
  633. pos = skip_spaces(pos);
  634. if (*pos != '\0')
  635. goto out;
  636. /* Verify we have been given valid starting values */
  637. if ((extent->first == (u32) -1) ||
  638. (extent->lower_first == (u32) -1))
  639. goto out;
  640. /* Verify count is not zero and does not cause the
  641. * extent to wrap
  642. */
  643. if ((extent->first + extent->count) <= extent->first)
  644. goto out;
  645. if ((extent->lower_first + extent->count) <=
  646. extent->lower_first)
  647. goto out;
  648. /* Do the ranges in extent overlap any previous extents? */
  649. if (mappings_overlap(&new_map, extent))
  650. goto out;
  651. new_map.nr_extents++;
  652. /* Fail if the file contains too many extents */
  653. if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
  654. (next_line != NULL))
  655. goto out;
  656. }
  657. /* Be very certaint the new map actually exists */
  658. if (new_map.nr_extents == 0)
  659. goto out;
  660. ret = -EPERM;
  661. /* Validate the user is allowed to use user id's mapped to. */
  662. if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
  663. goto out;
  664. /* Map the lower ids from the parent user namespace to the
  665. * kernel global id space.
  666. */
  667. for (idx = 0; idx < new_map.nr_extents; idx++) {
  668. u32 lower_first;
  669. extent = &new_map.extent[idx];
  670. lower_first = map_id_range_down(parent_map,
  671. extent->lower_first,
  672. extent->count);
  673. /* Fail if we can not map the specified extent to
  674. * the kernel global id space.
  675. */
  676. if (lower_first == (u32) -1)
  677. goto out;
  678. extent->lower_first = lower_first;
  679. }
  680. /* Install the map */
  681. memcpy(map->extent, new_map.extent,
  682. new_map.nr_extents*sizeof(new_map.extent[0]));
  683. smp_wmb();
  684. map->nr_extents = new_map.nr_extents;
  685. *ppos = count;
  686. ret = count;
  687. out:
  688. mutex_unlock(&userns_state_mutex);
  689. kfree(kbuf);
  690. return ret;
  691. }
  692. ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
  693. size_t size, loff_t *ppos)
  694. {
  695. struct seq_file *seq = file->private_data;
  696. struct user_namespace *ns = seq->private;
  697. struct user_namespace *seq_ns = seq_user_ns(seq);
  698. if (!ns->parent)
  699. return -EPERM;
  700. if ((seq_ns != ns) && (seq_ns != ns->parent))
  701. return -EPERM;
  702. return map_write(file, buf, size, ppos, CAP_SETUID,
  703. &ns->uid_map, &ns->parent->uid_map);
  704. }
  705. ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
  706. size_t size, loff_t *ppos)
  707. {
  708. struct seq_file *seq = file->private_data;
  709. struct user_namespace *ns = seq->private;
  710. struct user_namespace *seq_ns = seq_user_ns(seq);
  711. if (!ns->parent)
  712. return -EPERM;
  713. if ((seq_ns != ns) && (seq_ns != ns->parent))
  714. return -EPERM;
  715. return map_write(file, buf, size, ppos, CAP_SETGID,
  716. &ns->gid_map, &ns->parent->gid_map);
  717. }
  718. ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
  719. size_t size, loff_t *ppos)
  720. {
  721. struct seq_file *seq = file->private_data;
  722. struct user_namespace *ns = seq->private;
  723. struct user_namespace *seq_ns = seq_user_ns(seq);
  724. if (!ns->parent)
  725. return -EPERM;
  726. if ((seq_ns != ns) && (seq_ns != ns->parent))
  727. return -EPERM;
  728. /* Anyone can set any valid project id no capability needed */
  729. return map_write(file, buf, size, ppos, -1,
  730. &ns->projid_map, &ns->parent->projid_map);
  731. }
  732. static bool new_idmap_permitted(const struct file *file,
  733. struct user_namespace *ns, int cap_setid,
  734. struct uid_gid_map *new_map)
  735. {
  736. const struct cred *cred = file->f_cred;
  737. /* Don't allow mappings that would allow anything that wouldn't
  738. * be allowed without the establishment of unprivileged mappings.
  739. */
  740. if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
  741. uid_eq(ns->owner, cred->euid)) {
  742. u32 id = new_map->extent[0].lower_first;
  743. if (cap_setid == CAP_SETUID) {
  744. kuid_t uid = make_kuid(ns->parent, id);
  745. if (uid_eq(uid, cred->euid))
  746. return true;
  747. } else if (cap_setid == CAP_SETGID) {
  748. kgid_t gid = make_kgid(ns->parent, id);
  749. if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
  750. gid_eq(gid, cred->egid))
  751. return true;
  752. }
  753. }
  754. /* Allow anyone to set a mapping that doesn't require privilege */
  755. if (!cap_valid(cap_setid))
  756. return true;
  757. /* Allow the specified ids if we have the appropriate capability
  758. * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
  759. * And the opener of the id file also had the approprpiate capability.
  760. */
  761. if (ns_capable(ns->parent, cap_setid) &&
  762. file_ns_capable(file, ns->parent, cap_setid))
  763. return true;
  764. return false;
  765. }
  766. int proc_setgroups_show(struct seq_file *seq, void *v)
  767. {
  768. struct user_namespace *ns = seq->private;
  769. unsigned long userns_flags = ACCESS_ONCE(ns->flags);
  770. seq_printf(seq, "%s\n",
  771. (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
  772. "allow" : "deny");
  773. return 0;
  774. }
  775. ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
  776. size_t count, loff_t *ppos)
  777. {
  778. struct seq_file *seq = file->private_data;
  779. struct user_namespace *ns = seq->private;
  780. char kbuf[8], *pos;
  781. bool setgroups_allowed;
  782. ssize_t ret;
  783. /* Only allow a very narrow range of strings to be written */
  784. ret = -EINVAL;
  785. if ((*ppos != 0) || (count >= sizeof(kbuf)))
  786. goto out;
  787. /* What was written? */
  788. ret = -EFAULT;
  789. if (copy_from_user(kbuf, buf, count))
  790. goto out;
  791. kbuf[count] = '\0';
  792. pos = kbuf;
  793. /* What is being requested? */
  794. ret = -EINVAL;
  795. if (strncmp(pos, "allow", 5) == 0) {
  796. pos += 5;
  797. setgroups_allowed = true;
  798. }
  799. else if (strncmp(pos, "deny", 4) == 0) {
  800. pos += 4;
  801. setgroups_allowed = false;
  802. }
  803. else
  804. goto out;
  805. /* Verify there is not trailing junk on the line */
  806. pos = skip_spaces(pos);
  807. if (*pos != '\0')
  808. goto out;
  809. ret = -EPERM;
  810. mutex_lock(&userns_state_mutex);
  811. if (setgroups_allowed) {
  812. /* Enabling setgroups after setgroups has been disabled
  813. * is not allowed.
  814. */
  815. if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
  816. goto out_unlock;
  817. } else {
  818. /* Permanently disabling setgroups after setgroups has
  819. * been enabled by writing the gid_map is not allowed.
  820. */
  821. if (ns->gid_map.nr_extents != 0)
  822. goto out_unlock;
  823. ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
  824. }
  825. mutex_unlock(&userns_state_mutex);
  826. /* Report a successful write */
  827. *ppos = count;
  828. ret = count;
  829. out:
  830. return ret;
  831. out_unlock:
  832. mutex_unlock(&userns_state_mutex);
  833. goto out;
  834. }
  835. bool userns_may_setgroups(const struct user_namespace *ns)
  836. {
  837. bool allowed;
  838. mutex_lock(&userns_state_mutex);
  839. /* It is not safe to use setgroups until a gid mapping in
  840. * the user namespace has been established.
  841. */
  842. allowed = ns->gid_map.nr_extents != 0;
  843. /* Is setgroups allowed? */
  844. allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
  845. mutex_unlock(&userns_state_mutex);
  846. return allowed;
  847. }
  848. /*
  849. * Returns true if @child is the same namespace or a descendant of
  850. * @ancestor.
  851. */
  852. bool in_userns(const struct user_namespace *ancestor,
  853. const struct user_namespace *child)
  854. {
  855. const struct user_namespace *ns;
  856. for (ns = child; ns->level > ancestor->level; ns = ns->parent)
  857. ;
  858. return (ns == ancestor);
  859. }
  860. bool current_in_userns(const struct user_namespace *target_ns)
  861. {
  862. return in_userns(target_ns, current_user_ns());
  863. }
  864. static inline struct user_namespace *to_user_ns(struct ns_common *ns)
  865. {
  866. return container_of(ns, struct user_namespace, ns);
  867. }
  868. static struct ns_common *userns_get(struct task_struct *task)
  869. {
  870. struct user_namespace *user_ns;
  871. rcu_read_lock();
  872. user_ns = get_user_ns(__task_cred(task)->user_ns);
  873. rcu_read_unlock();
  874. return user_ns ? &user_ns->ns : NULL;
  875. }
  876. static void userns_put(struct ns_common *ns)
  877. {
  878. put_user_ns(to_user_ns(ns));
  879. }
  880. static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
  881. {
  882. struct user_namespace *user_ns = to_user_ns(ns);
  883. struct cred *cred;
  884. /* Don't allow gaining capabilities by reentering
  885. * the same user namespace.
  886. */
  887. if (user_ns == current_user_ns())
  888. return -EINVAL;
  889. /* Tasks that share a thread group must share a user namespace */
  890. if (!thread_group_empty(current))
  891. return -EINVAL;
  892. if (current->fs->users != 1)
  893. return -EINVAL;
  894. if (!ns_capable(user_ns, CAP_SYS_ADMIN))
  895. return -EPERM;
  896. cred = prepare_creds();
  897. if (!cred)
  898. return -ENOMEM;
  899. put_user_ns(cred->user_ns);
  900. set_cred_user_ns(cred, get_user_ns(user_ns));
  901. return commit_creds(cred);
  902. }
  903. struct ns_common *ns_get_owner(struct ns_common *ns)
  904. {
  905. struct user_namespace *my_user_ns = current_user_ns();
  906. struct user_namespace *owner, *p;
  907. /* See if the owner is in the current user namespace */
  908. owner = p = ns->ops->owner(ns);
  909. for (;;) {
  910. if (!p)
  911. return ERR_PTR(-EPERM);
  912. if (p == my_user_ns)
  913. break;
  914. p = p->parent;
  915. }
  916. return &get_user_ns(owner)->ns;
  917. }
  918. static struct user_namespace *userns_owner(struct ns_common *ns)
  919. {
  920. return to_user_ns(ns)->parent;
  921. }
  922. const struct proc_ns_operations userns_operations = {
  923. .name = "user",
  924. .type = CLONE_NEWUSER,
  925. .get = userns_get,
  926. .put = userns_put,
  927. .install = userns_install,
  928. .owner = userns_owner,
  929. .get_parent = ns_get_owner,
  930. };
  931. static __init int user_namespaces_init(void)
  932. {
  933. user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
  934. return 0;
  935. }
  936. subsys_initcall(user_namespaces_init);