Skip to content

Commit

Permalink
dump and restore cpu affinity of each thread.
Browse files Browse the repository at this point in the history
Add one entry of thread_cpuallow_entry into thread_core_entry to save cpu affinity info.
Restore it after threads restored but before running.
Add option --with-cpu-affinity to enable this function at restore.
Signed-off-by: hdzhoujie <[email protected]>
Signed-off-by: He jingxian <[email protected]>
Signed-off-by: Sang Yan <[email protected]>
  • Loading branch information
hdzhoujie committed Dec 15, 2022
1 parent 50db2be commit 9a98e31
Show file tree
Hide file tree
Showing 15 changed files with 126 additions and 1 deletion.
1 change: 1 addition & 0 deletions compel/arch/arm/plugins/std/syscalls/syscall.def
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ fsopen 430 430 (char *fsname, unsigned int flags)
fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux)
fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags)
clone3 435 435 (struct clone_args *uargs, size_t size)
sched_setaffinity 122 241 (int fd, size_t cpusetsize, const cpu_set_t *mask)
pidfd_open 434 434 (pid_t pid, unsigned int flags)
openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size)
pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags)
Expand Down
1 change: 1 addition & 0 deletions compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
__NR_sched_setaffinity 222 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
__NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size)
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
Expand Down
1 change: 1 addition & 0 deletions compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
__NR_sched_setaffinity 239 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
__NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
__NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size)
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
Expand Down
1 change: 1 addition & 0 deletions compel/arch/x86/plugins/std/syscalls/syscall_32.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ __NR_mincore 218 sys_mincore (void *addr, unsigned long size, unsigned char *
__NR_madvise 219 sys_madvise (unsigned long start, size_t len, int behavior)
__NR_gettid 224 sys_gettid (void)
__NR_futex 240 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
__NR_sched_setaffinity 241 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
__NR_set_thread_area 243 sys_set_thread_area (user_desc_t *info)
__NR_get_thread_area 244 sys_get_thread_area (user_desc_t *info)
__NR_io_setup 245 sys_io_setup (unsigned nr_reqs, aio_context_t *ctx32p)
Expand Down
1 change: 1 addition & 0 deletions compel/arch/x86/plugins/std/syscalls/syscall_64.tbl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ __NR_mount 165 sys_mount (char *dev_nmae, char *dir_name, char *type, unsign
__NR_umount2 166 sys_umount2 (char *name, int flags)
__NR_gettid 186 sys_gettid (void)
__NR_futex 202 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3)
__NR_sched_setaffinity 203 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask)
__NR_set_thread_area 205 sys_set_thread_area (user_desc_t *info)
__NR_io_setup 206 sys_io_setup (unsigned nr_events, aio_context_t *ctx)
__NR_io_getevents 208 sys_io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo)
Expand Down
1 change: 1 addition & 0 deletions criu/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
{ "cgroup-yard", required_argument, 0, 1096 },
{ "pre-dump-mode", required_argument, 0, 1097 },
{ "file-validation", required_argument, 0, 1098 },
BOOL_OPT("with-cpu-affinity", &opts.with_cpu_affinity),
BOOL_OPT("skip-file-rwx-check", &opts.skip_file_rwx_check),
{ "lsm-mount-context", required_argument, 0, 1099 },
{ "network-lock", required_argument, 0, 1100 },
Expand Down
12 changes: 12 additions & 0 deletions criu/cr-dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc)
{
int ret;
struct sched_param sp;
cpu_set_t cpumask;

BUILD_BUG_ON(SCHED_OTHER != 0); /* default in proto message */

Expand Down Expand Up @@ -185,6 +186,17 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc)
tc->has_sched_nice = true;
tc->sched_nice = ret;

if (opts.with_cpu_affinity) {
pr_info("\tdumping allowed cpus for %d\n", pid);
ret = syscall(__NR_sched_getaffinity, pid, sizeof(cpumask), &cpumask);
if (ret < 0) {
pr_perror("Can't get sched affinity for %d", pid);
return -1;
}
tc->allowed_cpus->has_cpumask = true;
memcpy(tc->allowed_cpus->cpumask, &cpumask, sizeof(cpu_set_t));
}

return 0;
}

Expand Down
35 changes: 34 additions & 1 deletion criu/cr-restore.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ static int prepare_restorer_blob(void);
static int prepare_rlimits(int pid, struct task_restore_args *, CoreEntry *core);
static int prepare_posix_timers(int pid, struct task_restore_args *ta, CoreEntry *core);
static int prepare_signals(int pid, struct task_restore_args *, CoreEntry *core);
static int prepare_allowed_cpus(int pid, struct task_restore_args *ta, CoreEntry *leader_core);

/*
* Architectures can overwrite this function to restore registers that are not
Expand Down Expand Up @@ -918,6 +919,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core)
if (prepare_signals(pid, ta, core))
return -1;

if (prepare_allowed_cpus(pid, ta, core))
return -1;

if (prepare_posix_timers(pid, ta, core))
return -1;

Expand Down Expand Up @@ -3290,6 +3294,34 @@ static int prepare_signals(int pid, struct task_restore_args *ta, CoreEntry *lea
return ret;
}

static int prepare_allowed_cpus(int pid, struct task_restore_args *ta, CoreEntry *leader_core)
{
int i;
cpu_set_t *cpumask;
bool *has_cpumask;

if (!opts.with_cpu_affinity) {
return 0;
}

ta->allowed_cpus = (char *)rst_mem_align_cpos(RM_PRIVATE);
for (i = 0; i < current->nr_threads; i++) {
has_cpumask = rst_mem_alloc(sizeof(bool), RM_PRIVATE);
if (!has_cpumask)
return -1;
memcpy(has_cpumask, &(current->core[i]->thread_core->allowed_cpus->has_cpumask), sizeof(bool));

if (!(*has_cpumask))
continue;

cpumask = rst_mem_alloc(sizeof(cpu_set_t), RM_PRIVATE);
if (!cpumask)
return -1;
memcpy(cpumask, current->core[i]->thread_core->allowed_cpus->cpumask, sizeof(cpu_set_t));
}
return 0;
}

extern void __gcov_flush(void) __attribute__((weak));
void __gcov_flush(void)
{
Expand Down Expand Up @@ -3740,6 +3772,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
RST_MEM_FIXUP_PPTR(task_args->timerfd);
RST_MEM_FIXUP_PPTR(task_args->posix_timers);
RST_MEM_FIXUP_PPTR(task_args->siginfo);
RST_MEM_FIXUP_PPTR(task_args->allowed_cpus);
RST_MEM_FIXUP_PPTR(task_args->rlims);
RST_MEM_FIXUP_PPTR(task_args->helpers);
RST_MEM_FIXUP_PPTR(task_args->zombies);
Expand Down Expand Up @@ -3900,7 +3933,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
task_args->thread_args = thread_args;

task_args->auto_dedup = opts.auto_dedup;

task_args->with_cpu_affinity = opts.with_cpu_affinity;
/*
* In the restorer we need to know if it is SELinux or not. For SELinux
* we must change the process context before creating threads. For
Expand Down
2 changes: 2 additions & 0 deletions criu/crtools.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,8 @@ int main(int argc, char *argv[], char *envp[])
" --file-validation METHOD\n"
" pass the validation method to be used; argument\n"
" can be 'filesize' or 'buildid' (default).\n"
" --with-cpu-affinity Allow to restore cpu affinity. Only for hosts with\n"
" same cpu quantity.\n"
" --skip-file-rwx-check\n"
" Skip checking file permissions\n"
" (r/w/x for u/g/o) on restore.\n"
Expand Down
3 changes: 3 additions & 0 deletions criu/include/cr_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ struct cr_options {
* explicitly request it as it comes with many limitations.
*/
int unprivileged;

/* restore cpu affinity */
int with_cpu_affinity;
};

extern struct cr_options opts;
Expand Down
5 changes: 5 additions & 0 deletions criu/include/restorer.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <signal.h>
#include <limits.h>
#include <sched.h>
#include <sys/resource.h>
#include <linux/filter.h>

Expand Down Expand Up @@ -171,6 +172,8 @@ struct task_restore_args {
siginfo_t *siginfo;
unsigned int siginfo_n;

char *allowed_cpus;

struct rst_tcp_sock *tcp_socks;
unsigned int tcp_socks_n;

Expand Down Expand Up @@ -240,6 +243,8 @@ struct task_restore_args {

uid_t uid;
u32 cap_eff[CR_CAP_SIZE];

bool with_cpu_affinity;
} __aligned(64);

/*
Expand Down
38 changes: 38 additions & 0 deletions criu/pie/restorer.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,40 @@ static int restore_signals(siginfo_t *ptr, int nr, bool group)
return 0;
}

static int restore_cpu_affinity(struct task_restore_args *args)
{
int i;
int pid;
int ret;
cpu_set_t *cpumask;
char *allowed_cpus;
bool *has_cpumask;

if (!args->with_cpu_affinity) {
return 0;
}

allowed_cpus = args->allowed_cpus;
for (i = 0; i < args->nr_threads; i++) {
has_cpumask = (bool *)allowed_cpus;
allowed_cpus += sizeof(bool);
if (!(*has_cpumask)) {
continue;
}

pid = args->thread_args[i].pid;
cpumask = (cpu_set_t *)allowed_cpus;
ret = sys_sched_setaffinity(pid, sizeof(cpu_set_t), cpumask);
if (ret) {
pr_err("\t Restore %d cpumask failed.\n", pid);
return ret;
}
allowed_cpus += sizeof(cpu_set_t);
}

return 0;
}

static int restore_rseq(struct rst_rseq_param *rseq)
{
int ret;
Expand Down Expand Up @@ -1968,6 +2002,10 @@ long __export_restore_task(struct task_restore_args *args)

pr_info("%ld: Restored\n", sys_getpid());

ret = restore_cpu_affinity(args);
if (ret)
goto core_restore_end;

restore_finish_stage(task_entries_local, CR_STATE_RESTORE);

if (wait_helpers(args) < 0)
Expand Down
19 changes: 19 additions & 0 deletions criu/pstree.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,13 @@ CoreEntry *core_entry_alloc(int th, int tsk)
CredsEntry *ce = NULL;

sz += sizeof(ThreadCoreEntry) + sizeof(ThreadSasEntry) + sizeof(CredsEntry);
sz += sizeof(ThreadAllowedcpusEntry);

sz += CR_CAP_SIZE * sizeof(ce->cap_inh[0]);
sz += CR_CAP_SIZE * sizeof(ce->cap_prm[0]);
sz += CR_CAP_SIZE * sizeof(ce->cap_eff[0]);
sz += CR_CAP_SIZE * sizeof(ce->cap_bnd[0]);
sz += sizeof(cpu_set_t);
/*
* @groups are dynamic and allocated
* on demand.
Expand Down Expand Up @@ -127,6 +129,11 @@ CoreEntry *core_entry_alloc(int th, int tsk)
ce->cap_eff = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_eff[0]));
ce->cap_bnd = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_bnd[0]));

core->thread_core->allowed_cpus = xptr_pull(&m, ThreadAllowedcpusEntry);
thread_allowedcpus_entry__init(core->thread_core->allowed_cpus);
core->thread_core->allowed_cpus->n_cpumask = sizeof(cpu_set_t) / sizeof(uint64_t);
core->thread_core->allowed_cpus->cpumask = xptr_pull_s(&m, sizeof(cpu_set_t));

if (arch_alloc_thread_info(core)) {
xfree(core);
core = NULL;
Expand Down Expand Up @@ -278,6 +285,7 @@ int dump_pstree(struct pstree_item *root_item)
PstreeEntry e = PSTREE_ENTRY__INIT;
int ret = -1, i;
struct cr_img *img;
unsigned int nr_cpus;

pr_info("\n");
pr_info("Dumping pstree (pid: %d)\n", root_item->pid->real);
Expand All @@ -301,6 +309,7 @@ int dump_pstree(struct pstree_item *root_item)
}
}

nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
img = open_image(CR_FD_PSTREE, O_DUMP);
if (!img)
return -1;
Expand All @@ -313,6 +322,7 @@ int dump_pstree(struct pstree_item *root_item)
e.pgid = item->pgid;
e.sid = item->sid;
e.n_threads = item->nr_threads;
e.nr_cpus = nr_cpus;

e.threads = xmalloc(sizeof(e.threads[0]) * e.n_threads);
if (!e.threads)
Expand Down Expand Up @@ -532,6 +542,7 @@ static int read_one_pstree_item(struct cr_img *img, pid_t *pid_max)
struct pstree_item *pi;
PstreeEntry *e;
int ret, i;
unsigned int nr_cpus;

ret = pb_read_one_eof(img, &e, PB_PSTREE);
if (ret <= 0)
Expand All @@ -543,6 +554,14 @@ static int read_one_pstree_item(struct cr_img *img, pid_t *pid_max)
goto err;
BUG_ON(pi->pid->state != TASK_UNDEF);

if (opts.with_cpu_affinity) {
nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
if (e->nr_cpus > nr_cpus) {
pr_err("different number of cpus in cpu affinity restore\n");
goto err;
}
}

/*
* All pids should be added in the tree to be able to find
* free pid-s for helpers. pstree_item for these pid-s will
Expand Down
6 changes: 6 additions & 0 deletions images/core.proto
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ message thread_sas_entry {
required uint32 ss_flags = 3;
}

message thread_allowedcpus_entry {
required bool has_cpumask = 1 [default=false];
repeated uint64 cpumask = 2 [packed=true];
}

message thread_core_entry {
required uint64 futex_rla = 1;
required uint32 futex_rla_len = 2;
Expand All @@ -107,6 +112,7 @@ message thread_core_entry {
optional uint64 blk_sigset_extended = 14;
optional rseq_entry rseq_entry = 15;
required uint32 cg_set = 16;
optional thread_allowedcpus_entry allowed_cpus = 17;
}

message task_rlimits_entry {
Expand Down
1 change: 1 addition & 0 deletions images/pstree.proto
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ message pstree_entry {
required uint32 pgid = 3;
required uint32 sid = 4;
repeated uint32 threads = 5;
optional uint32 nr_cpus = 6;
}

0 comments on commit 9a98e31

Please sign in to comment.