Fri, 06 Jul 2018 18:50:13 +0000
8146115: Improve docker container detection and resource configuration usage
Reviewed-by: bobv, dbuck
1 /*
2 * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include <string.h>
26 #include <math.h>
27 #include <errno.h>
28 #include "utilities/globalDefinitions.hpp"
29 #include "memory/allocation.hpp"
30 #include "runtime/os.hpp"
31 #include "osContainer_linux.hpp"
33 #define PER_CPU_SHARES 1024
35 bool OSContainer::_is_initialized = false;
36 bool OSContainer::_is_containerized = false;
37 julong _unlimited_memory;
39 class CgroupSubsystem: CHeapObj<mtInternal> {
40 friend class OSContainer;
42 private:
43 /* mountinfo contents */
44 char *_root;
45 char *_mount_point;
47 /* Constructed subsystem directory */
48 char *_path;
50 public:
51 CgroupSubsystem(char *root, char *mountpoint) {
52 _root = os::strdup(root);
53 _mount_point = os::strdup(mountpoint);
54 _path = NULL;
55 }
57 /*
58 * Set directory to subsystem specific files based
59 * on the contents of the mountinfo and cgroup files.
60 */
61 void set_subsystem_path(char *cgroup_path) {
62 char buf[MAXPATHLEN+1];
63 if (_root != NULL && cgroup_path != NULL) {
64 if (strcmp(_root, "/") == 0) {
65 int buflen;
66 strncpy(buf, _mount_point, MAXPATHLEN);
67 buf[MAXPATHLEN-1] = '\0';
68 if (strcmp(cgroup_path,"/") != 0) {
69 buflen = strlen(buf);
70 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {
71 return;
72 }
73 strncat(buf, cgroup_path, MAXPATHLEN-buflen);
74 buf[MAXPATHLEN-1] = '\0';
75 }
76 _path = os::strdup(buf);
77 } else {
78 if (strcmp(_root, cgroup_path) == 0) {
79 strncpy(buf, _mount_point, MAXPATHLEN);
80 buf[MAXPATHLEN-1] = '\0';
81 _path = os::strdup(buf);
82 } else {
83 char *p = strstr(_root, cgroup_path);
84 if (p != NULL && p == _root) {
85 if (strlen(cgroup_path) > strlen(_root)) {
86 int buflen;
87 strncpy(buf, _mount_point, MAXPATHLEN);
88 buf[MAXPATHLEN-1] = '\0';
89 buflen = strlen(buf);
90 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {
91 return;
92 }
93 strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen);
94 buf[MAXPATHLEN-1] = '\0';
95 _path = os::strdup(buf);
96 }
97 }
98 }
99 }
100 }
101 }
103 char *subsystem_path() { return _path; }
104 };
106 CgroupSubsystem* memory = NULL;
107 CgroupSubsystem* cpuset = NULL;
108 CgroupSubsystem* cpu = NULL;
109 CgroupSubsystem* cpuacct = NULL;
111 typedef char * cptr;
113 PRAGMA_DIAG_PUSH
114 PRAGMA_FORMAT_NONLITERAL_IGNORED
115 template <typename T> int subsystem_file_contents(CgroupSubsystem* c,
116 const char *filename,
117 const char *scan_fmt,
118 T returnval) {
119 FILE *fp = NULL;
120 char *p;
121 char file[MAXPATHLEN+1];
122 char buf[MAXPATHLEN+1];
124 if (c == NULL) {
125 if (PrintContainerInfo) {
126 tty->print_cr("subsystem_file_contents: CgroupSubsytem* is NULL");
127 }
128 return OSCONTAINER_ERROR;
129 }
130 if (c->subsystem_path() == NULL) {
131 if (PrintContainerInfo) {
132 tty->print_cr("subsystem_file_contents: subsystem path is NULL");
133 }
134 return OSCONTAINER_ERROR;
135 }
137 strncpy(file, c->subsystem_path(), MAXPATHLEN);
138 file[MAXPATHLEN-1] = '\0';
139 int filelen = strlen(file);
140 if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {
141 if (PrintContainerInfo) {
142 tty->print_cr("File path too long %s, %s", file, filename);
143 }
144 return OSCONTAINER_ERROR;
145 }
146 strncat(file, filename, MAXPATHLEN-filelen);
147 if (PrintContainerInfo) {
148 tty->print_cr("Path to %s is %s", filename, file);
149 }
150 fp = fopen(file, "r");
151 if (fp != NULL) {
152 p = fgets(buf, MAXPATHLEN, fp);
153 if (p != NULL) {
154 int matched = sscanf(p, scan_fmt, returnval);
155 if (matched == 1) {
156 fclose(fp);
157 return 0;
158 } else {
159 if (PrintContainerInfo) {
160 tty->print_cr("Type %s not found in file %s", scan_fmt, file);
161 }
162 }
163 } else {
164 if (PrintContainerInfo) {
165 tty->print_cr("Empty file %s", file);
166 }
167 }
168 } else {
169 if (PrintContainerInfo) {
170 tty->print_cr("Open of file %s failed, %s", file, strerror(errno));
171 }
172 }
173 if (fp != NULL)
174 fclose(fp);
175 return OSCONTAINER_ERROR;
176 }
177 PRAGMA_DIAG_POP
179 #define GET_CONTAINER_INFO(return_type, subsystem, filename, \
180 logstring, scan_fmt, variable) \
181 return_type variable; \
182 { \
183 int err; \
184 err = subsystem_file_contents(subsystem, \
185 filename, \
186 scan_fmt, \
187 &variable); \
188 if (err != 0) \
189 return (return_type) OSCONTAINER_ERROR; \
190 \
191 if (PrintContainerInfo) \
192 tty->print_cr(logstring, variable); \
193 }
195 #define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \
196 logstring, scan_fmt, variable, bufsize) \
197 char variable[bufsize]; \
198 { \
199 int err; \
200 err = subsystem_file_contents(subsystem, \
201 filename, \
202 scan_fmt, \
203 variable); \
204 if (err != 0) \
205 return (return_type) NULL; \
206 \
207 if (PrintContainerInfo) \
208 tty->print_cr(logstring, variable); \
209 }
211 /* init
212 *
213 * Initialize the container support and determine if
214 * we are running under cgroup control.
215 */
216 void OSContainer::init() {
217 int mountid;
218 int parentid;
219 int major;
220 int minor;
221 FILE *mntinfo = NULL;
222 FILE *cgroup = NULL;
223 char buf[MAXPATHLEN+1];
224 char tmproot[MAXPATHLEN+1];
225 char tmpmount[MAXPATHLEN+1];
226 char tmpbase[MAXPATHLEN+1];
227 char *p;
228 jlong mem_limit;
230 assert(!_is_initialized, "Initializing OSContainer more than once");
232 _is_initialized = true;
233 _is_containerized = false;
235 _unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();
237 if (PrintContainerInfo) {
238 tty->print_cr("OSContainer::init: Initializing Container Support");
239 }
240 if (!UseContainerSupport) {
241 if (PrintContainerInfo) {
242 tty->print_cr("Container Support not enabled");
243 }
244 return;
245 }
247 /*
248 * Find the cgroup mount point for memory and cpuset
249 * by reading /proc/self/mountinfo
250 *
251 * Example for docker:
252 * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
253 *
254 * Example for host:
255 * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
256 */
257 mntinfo = fopen("/proc/self/mountinfo", "r");
258 if (mntinfo == NULL) {
259 if (PrintContainerInfo) {
260 tty->print_cr("Can't open /proc/self/mountinfo, %s",
261 strerror(errno));
262 }
263 return;
264 }
266 while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
267 // Look for the filesystem type and see if it's cgroup
268 char fstype[MAXPATHLEN+1];
269 fstype[0] = '\0';
270 char *s = strstr(p, " - ");
271 if (s != NULL &&
272 sscanf(s, " - %s", fstype) == 1 &&
273 strcmp(fstype, "cgroup") == 0) {
275 if (strstr(p, "memory") != NULL) {
276 int matched = sscanf(p, "%d %d %d:%d %s %s",
277 &mountid,
278 &parentid,
279 &major,
280 &minor,
281 tmproot,
282 tmpmount);
283 if (matched == 6) {
284 memory = new CgroupSubsystem(tmproot, tmpmount);
285 }
286 else
287 if (PrintContainerInfo) {
288 tty->print_cr("Incompatible str containing cgroup and memory: %s", p);
289 }
290 } else if (strstr(p, "cpuset") != NULL) {
291 int matched = sscanf(p, "%d %d %d:%d %s %s",
292 &mountid,
293 &parentid,
294 &major,
295 &minor,
296 tmproot,
297 tmpmount);
298 if (matched == 6) {
299 cpuset = new CgroupSubsystem(tmproot, tmpmount);
300 }
301 else {
302 if (PrintContainerInfo) {
303 tty->print_cr("Incompatible str containing cgroup and cpuset: %s", p);
304 }
305 }
306 } else if (strstr(p, "cpu,cpuacct") != NULL || strstr(p, "cpuacct,cpu") != NULL) {
307 int matched = sscanf(p, "%d %d %d:%d %s %s",
308 &mountid,
309 &parentid,
310 &major,
311 &minor,
312 tmproot,
313 tmpmount);
314 if (matched == 6) {
315 cpu = new CgroupSubsystem(tmproot, tmpmount);
316 cpuacct = new CgroupSubsystem(tmproot, tmpmount);
317 }
318 else {
319 if (PrintContainerInfo) {
320 tty->print_cr("Incompatible str containing cgroup and cpu,cpuacct: %s", p);
321 }
322 }
323 } else if (strstr(p, "cpuacct") != NULL) {
324 int matched = sscanf(p, "%d %d %d:%d %s %s",
325 &mountid,
326 &parentid,
327 &major,
328 &minor,
329 tmproot,
330 tmpmount);
331 if (matched == 6) {
332 cpuacct = new CgroupSubsystem(tmproot, tmpmount);
333 }
334 else {
335 if (PrintContainerInfo) {
336 tty->print_cr("Incompatible str containing cgroup and cpuacct: %s", p);
337 }
338 }
339 } else if (strstr(p, "cpu") != NULL) {
340 int matched = sscanf(p, "%d %d %d:%d %s %s",
341 &mountid,
342 &parentid,
343 &major,
344 &minor,
345 tmproot,
346 tmpmount);
347 if (matched == 6) {
348 cpu = new CgroupSubsystem(tmproot, tmpmount);
349 }
350 else {
351 if (PrintContainerInfo) {
352 tty->print_cr("Incompatible str containing cgroup and cpu: %s", p);
353 }
354 }
355 }
356 }
357 }
359 fclose(mntinfo);
361 if (memory == NULL) {
362 if (PrintContainerInfo) {
363 tty->print_cr("Required cgroup memory subsystem not found");
364 }
365 return;
366 }
367 if (cpuset == NULL) {
368 if (PrintContainerInfo) {
369 tty->print_cr("Required cgroup cpuset subsystem not found");
370 }
371 return;
372 }
373 if (cpu == NULL) {
374 if (PrintContainerInfo) {
375 tty->print_cr("Required cgroup cpu subsystem not found");
376 }
377 return;
378 }
379 if (cpuacct == NULL) {
380 if (PrintContainerInfo) {
381 tty->print_cr("Required cgroup cpuacct subsystem not found");
382 }
383 return;
384 }
386 /*
387 * Read /proc/self/cgroup and map host mount point to
388 * local one via /proc/self/mountinfo content above
389 *
390 * Docker example:
391 * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
392 *
393 * Host example:
394 * 5:memory:/user.slice
395 *
396 * Construct a path to the process specific memory and cpuset
397 * cgroup directory.
398 *
399 * For a container running under Docker from memory example above
400 * the paths would be:
401 *
402 * /sys/fs/cgroup/memory
403 *
404 * For a Host from memory example above the path would be:
405 *
406 * /sys/fs/cgroup/memory/user.slice
407 *
408 */
409 cgroup = fopen("/proc/self/cgroup", "r");
410 if (cgroup == NULL) {
411 if (PrintContainerInfo) {
412 tty->print_cr("Can't open /proc/self/cgroup, %s",
413 strerror(errno));
414 }
415 return;
416 }
418 while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
419 int cgno;
420 int matched;
421 char *controller;
422 char *base;
424 /* Skip cgroup number */
425 strsep(&p, ":");
426 /* Get controller and base */
427 controller = strsep(&p, ":");
428 base = strsep(&p, "\n");
430 if (controller != NULL) {
431 if (strstr(controller, "memory") != NULL) {
432 memory->set_subsystem_path(base);
433 } else if (strstr(controller, "cpuset") != NULL) {
434 cpuset->set_subsystem_path(base);
435 } else if (strstr(controller, "cpu,cpuacct") != NULL || strstr(controller, "cpuacct,cpu") != NULL) {
436 cpu->set_subsystem_path(base);
437 cpuacct->set_subsystem_path(base);
438 } else if (strstr(controller, "cpuacct") != NULL) {
439 cpuacct->set_subsystem_path(base);
440 } else if (strstr(controller, "cpu") != NULL) {
441 cpu->set_subsystem_path(base);
442 }
443 }
444 }
446 fclose(cgroup);
448 // We need to update the amount of physical memory now that
449 // command line arguments have been processed.
450 if ((mem_limit = memory_limit_in_bytes()) > 0) {
451 os::Linux::set_physical_memory(mem_limit);
452 }
454 _is_containerized = true;
456 }
458 const char * OSContainer::container_type() {
459 if (is_containerized()) {
460 return "cgroupv1";
461 } else {
462 return NULL;
463 }
464 }
467 /* memory_limit_in_bytes
468 *
469 * Return the limit of available memory for this process.
470 *
471 * return:
472 * memory limit in bytes or
473 * -1 for unlimited
474 * OSCONTAINER_ERROR for not supported
475 */
476 jlong OSContainer::memory_limit_in_bytes() {
477 GET_CONTAINER_INFO(julong, memory, "/memory.limit_in_bytes",
478 "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);
480 if (memlimit >= _unlimited_memory) {
481 if (PrintContainerInfo) {
482 tty->print_cr("Memory Limit is: Unlimited");
483 }
484 return (jlong)-1;
485 }
486 else {
487 return (jlong)memlimit;
488 }
489 }
491 jlong OSContainer::memory_and_swap_limit_in_bytes() {
492 GET_CONTAINER_INFO(julong, memory, "/memory.memsw.limit_in_bytes",
493 "Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit);
494 if (memswlimit >= _unlimited_memory) {
495 if (PrintContainerInfo) {
496 tty->print_cr("Memory and Swap Limit is: Unlimited");
497 }
498 return (jlong)-1;
499 } else {
500 return (jlong)memswlimit;
501 }
502 }
504 jlong OSContainer::memory_soft_limit_in_bytes() {
505 GET_CONTAINER_INFO(julong, memory, "/memory.soft_limit_in_bytes",
506 "Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit);
507 if (memsoftlimit >= _unlimited_memory) {
508 if (PrintContainerInfo) {
509 tty->print_cr("Memory Soft Limit is: Unlimited");
510 }
511 return (jlong)-1;
512 } else {
513 return (jlong)memsoftlimit;
514 }
515 }
517 /* memory_usage_in_bytes
518 *
519 * Return the amount of used memory for this process.
520 *
521 * return:
522 * memory usage in bytes or
523 * -1 for unlimited
524 * OSCONTAINER_ERROR for not supported
525 */
526 jlong OSContainer::memory_usage_in_bytes() {
527 GET_CONTAINER_INFO(jlong, memory, "/memory.usage_in_bytes",
528 "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage);
529 return memusage;
530 }
532 /* memory_max_usage_in_bytes
533 *
534 * Return the maximum amount of used memory for this process.
535 *
536 * return:
537 * max memory usage in bytes or
538 * OSCONTAINER_ERROR for not supported
539 */
540 jlong OSContainer::memory_max_usage_in_bytes() {
541 GET_CONTAINER_INFO(jlong, memory, "/memory.max_usage_in_bytes",
542 "Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage);
543 return memmaxusage;
544 }
546 /* active_processor_count
547 *
548 * Calculate an appropriate number of active processors for the
549 * VM to use based on these three inputs.
550 *
551 * cpu affinity
552 * cgroup cpu quota & cpu period
553 * cgroup cpu shares
554 *
555 * Algorithm:
556 *
557 * Determine the number of available CPUs from sched_getaffinity
558 *
559 * If user specified a quota (quota != -1), calculate the number of
560 * required CPUs by dividing quota by period.
561 *
562 * If shares are in effect (shares != -1), calculate the number
563 * of CPUs required for the shares by dividing the share value
564 * by PER_CPU_SHARES.
565 *
566 * All results of division are rounded up to the next whole number.
567 *
568 * If neither shares or quotas have been specified, return the
569 * number of active processors in the system.
570 *
571 * If both shares and quotas have been specified, the results are
572 * based on the flag PreferContainerQuotaForCPUCount. If true,
573 * return the quota value. If false return the smallest value
574 * between shares or quotas.
575 *
576 * If shares and/or quotas have been specified, the resulting number
577 * returned will never exceed the number of active processors.
578 *
579 * return:
580 * number of CPUs
581 */
582 int OSContainer::active_processor_count() {
583 int quota_count = 0, share_count = 0;
584 int cpu_count, limit_count;
585 int result;
587 cpu_count = limit_count = os::Linux::active_processor_count();
588 int quota = cpu_quota();
589 int period = cpu_period();
590 int share = cpu_shares();
592 if (quota > -1 && period > 0) {
593 quota_count = ceilf((float)quota / (float)period);
594 if (PrintContainerInfo) {
595 tty->print_cr("CPU Quota count based on quota/period: %d", quota_count);
596 }
597 }
598 if (share > -1) {
599 share_count = ceilf((float)share / (float)PER_CPU_SHARES);
600 if (PrintContainerInfo) {
601 tty->print_cr("CPU Share count based on shares: %d", share_count);
602 }
603 }
605 // If both shares and quotas are setup results depend
606 // on flag PreferContainerQuotaForCPUCount.
607 // If true, limit CPU count to quota
608 // If false, use minimum of shares and quotas
609 if (quota_count !=0 && share_count != 0) {
610 if (PreferContainerQuotaForCPUCount) {
611 limit_count = quota_count;
612 } else {
613 limit_count = MIN2(quota_count, share_count);
614 }
615 } else if (quota_count != 0) {
616 limit_count = quota_count;
617 } else if (share_count != 0) {
618 limit_count = share_count;
619 }
621 result = MIN2(cpu_count, limit_count);
622 if (PrintContainerInfo) {
623 tty->print_cr("OSContainer::active_processor_count: %d", result);
624 }
625 return result;
626 }
628 char * OSContainer::cpu_cpuset_cpus() {
629 GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus",
630 "cpuset.cpus is: %s", "%1023s", cpus, 1024);
631 return os::strdup(cpus);
632 }
634 char * OSContainer::cpu_cpuset_memory_nodes() {
635 GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems",
636 "cpuset.mems is: %s", "%1023s", mems, 1024);
637 return os::strdup(mems);
638 }
640 /* cpu_quota
641 *
642 * Return the number of milliseconds per period
643 * process is guaranteed to run.
644 *
645 * return:
646 * quota time in milliseconds
647 * -1 for no quota
648 * OSCONTAINER_ERROR for not supported
649 */
650 int OSContainer::cpu_quota() {
651 GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_quota_us",
652 "CPU Quota is: %d", "%d", quota);
653 return quota;
654 }
656 int OSContainer::cpu_period() {
657 GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_period_us",
658 "CPU Period is: %d", "%d", period);
659 return period;
660 }
662 /* cpu_shares
663 *
664 * Return the amount of cpu shares available to the process
665 *
666 * return:
667 * Share number (typically a number relative to 1024)
668 * (2048 typically expresses 2 CPUs worth of processing)
669 * -1 for no share setup
670 * OSCONTAINER_ERROR for not supported
671 */
672 int OSContainer::cpu_shares() {
673 GET_CONTAINER_INFO(int, cpu, "/cpu.shares",
674 "CPU Shares is: %d", "%d", shares);
675 // Convert 1024 to no shares setup
676 if (shares == 1024) return -1;
678 return shares;
679 }