dirtylimit.c (15403B)
1 /* 2 * Dirty page rate limit implementation code 3 * 4 * Copyright (c) 2022 CHINA TELECOM CO.,LTD. 5 * 6 * Authors: 7 * Hyman Huang(黄勇) <huangy81@chinatelecom.cn> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qapi/error.h" 15 #include "qemu/main-loop.h" 16 #include "qapi/qapi-commands-migration.h" 17 #include "qapi/qmp/qdict.h" 18 #include "qapi/error.h" 19 #include "sysemu/dirtyrate.h" 20 #include "sysemu/dirtylimit.h" 21 #include "monitor/hmp.h" 22 #include "monitor/monitor.h" 23 #include "exec/memory.h" 24 #include "hw/boards.h" 25 #include "sysemu/kvm.h" 26 #include "trace.h" 27 28 /* 29 * Dirtylimit stop working if dirty page rate error 30 * value less than DIRTYLIMIT_TOLERANCE_RANGE 31 */ 32 #define DIRTYLIMIT_TOLERANCE_RANGE 25 /* MB/s */ 33 /* 34 * Plus or minus vcpu sleep time linearly if dirty 35 * page rate error value percentage over 36 * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT. 37 * Otherwise, plus or minus a fixed vcpu sleep time. 38 */ 39 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT 50 40 /* 41 * Max vcpu sleep time percentage during a cycle 42 * composed of dirty ring full and sleep time. 43 */ 44 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99 45 46 struct { 47 VcpuStat stat; 48 bool running; 49 QemuThread thread; 50 } *vcpu_dirty_rate_stat; 51 52 typedef struct VcpuDirtyLimitState { 53 int cpu_index; 54 bool enabled; 55 /* 56 * Quota dirty page rate, unit is MB/s 57 * zero if not enabled. 58 */ 59 uint64_t quota; 60 } VcpuDirtyLimitState; 61 62 struct { 63 VcpuDirtyLimitState *states; 64 /* Max cpus number configured by user */ 65 int max_cpus; 66 /* Number of vcpu under dirtylimit */ 67 int limited_nvcpu; 68 } *dirtylimit_state; 69 70 /* protect dirtylimit_state */ 71 static QemuMutex dirtylimit_mutex; 72 73 /* dirtylimit thread quit if dirtylimit_quit is true */ 74 static bool dirtylimit_quit; 75 76 static void vcpu_dirty_rate_stat_collect(void) 77 { 78 VcpuStat stat; 79 int i = 0; 80 81 /* calculate vcpu dirtyrate */ 82 vcpu_calculate_dirtyrate(DIRTYLIMIT_CALC_TIME_MS, 83 &stat, 84 GLOBAL_DIRTY_LIMIT, 85 false); 86 87 for (i = 0; i < stat.nvcpu; i++) { 88 vcpu_dirty_rate_stat->stat.rates[i].id = i; 89 vcpu_dirty_rate_stat->stat.rates[i].dirty_rate = 90 stat.rates[i].dirty_rate; 91 } 92 93 free(stat.rates); 94 } 95 96 static void *vcpu_dirty_rate_stat_thread(void *opaque) 97 { 98 rcu_register_thread(); 99 100 /* start log sync */ 101 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true); 102 103 while (qatomic_read(&vcpu_dirty_rate_stat->running)) { 104 vcpu_dirty_rate_stat_collect(); 105 if (dirtylimit_in_service()) { 106 dirtylimit_process(); 107 } 108 } 109 110 /* stop log sync */ 111 global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false); 112 113 rcu_unregister_thread(); 114 return NULL; 115 } 116 117 int64_t vcpu_dirty_rate_get(int cpu_index) 118 { 119 DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates; 120 return qatomic_read_i64(&rates[cpu_index].dirty_rate); 121 } 122 123 void vcpu_dirty_rate_stat_start(void) 124 { 125 if (qatomic_read(&vcpu_dirty_rate_stat->running)) { 126 return; 127 } 128 129 qatomic_set(&vcpu_dirty_rate_stat->running, 1); 130 qemu_thread_create(&vcpu_dirty_rate_stat->thread, 131 "dirtyrate-stat", 132 vcpu_dirty_rate_stat_thread, 133 NULL, 134 QEMU_THREAD_JOINABLE); 135 } 136 137 void vcpu_dirty_rate_stat_stop(void) 138 { 139 qatomic_set(&vcpu_dirty_rate_stat->running, 0); 140 dirtylimit_state_unlock(); 141 qemu_mutex_unlock_iothread(); 142 qemu_thread_join(&vcpu_dirty_rate_stat->thread); 143 qemu_mutex_lock_iothread(); 144 dirtylimit_state_lock(); 145 } 146 147 void vcpu_dirty_rate_stat_initialize(void) 148 { 149 MachineState *ms = MACHINE(qdev_get_machine()); 150 int max_cpus = ms->smp.max_cpus; 151 152 vcpu_dirty_rate_stat = 153 g_malloc0(sizeof(*vcpu_dirty_rate_stat)); 154 155 vcpu_dirty_rate_stat->stat.nvcpu = max_cpus; 156 vcpu_dirty_rate_stat->stat.rates = 157 g_new0(DirtyRateVcpu, max_cpus); 158 159 vcpu_dirty_rate_stat->running = false; 160 } 161 162 void vcpu_dirty_rate_stat_finalize(void) 163 { 164 free(vcpu_dirty_rate_stat->stat.rates); 165 vcpu_dirty_rate_stat->stat.rates = NULL; 166 167 free(vcpu_dirty_rate_stat); 168 vcpu_dirty_rate_stat = NULL; 169 } 170 171 void dirtylimit_state_lock(void) 172 { 173 qemu_mutex_lock(&dirtylimit_mutex); 174 } 175 176 void dirtylimit_state_unlock(void) 177 { 178 qemu_mutex_unlock(&dirtylimit_mutex); 179 } 180 181 static void 182 __attribute__((__constructor__)) dirtylimit_mutex_init(void) 183 { 184 qemu_mutex_init(&dirtylimit_mutex); 185 } 186 187 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index) 188 { 189 return &dirtylimit_state->states[cpu_index]; 190 } 191 192 void dirtylimit_state_initialize(void) 193 { 194 MachineState *ms = MACHINE(qdev_get_machine()); 195 int max_cpus = ms->smp.max_cpus; 196 int i; 197 198 dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state)); 199 200 dirtylimit_state->states = 201 g_new0(VcpuDirtyLimitState, max_cpus); 202 203 for (i = 0; i < max_cpus; i++) { 204 dirtylimit_state->states[i].cpu_index = i; 205 } 206 207 dirtylimit_state->max_cpus = max_cpus; 208 trace_dirtylimit_state_initialize(max_cpus); 209 } 210 211 void dirtylimit_state_finalize(void) 212 { 213 free(dirtylimit_state->states); 214 dirtylimit_state->states = NULL; 215 216 free(dirtylimit_state); 217 dirtylimit_state = NULL; 218 219 trace_dirtylimit_state_finalize(); 220 } 221 222 bool dirtylimit_in_service(void) 223 { 224 return !!dirtylimit_state; 225 } 226 227 bool dirtylimit_vcpu_index_valid(int cpu_index) 228 { 229 MachineState *ms = MACHINE(qdev_get_machine()); 230 231 return !(cpu_index < 0 || 232 cpu_index >= ms->smp.max_cpus); 233 } 234 235 static inline int64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate) 236 { 237 static uint64_t max_dirtyrate; 238 uint32_t dirty_ring_size = kvm_dirty_ring_size(); 239 uint64_t dirty_ring_size_meory_MB = 240 dirty_ring_size * TARGET_PAGE_SIZE >> 20; 241 242 if (max_dirtyrate < dirtyrate) { 243 max_dirtyrate = dirtyrate; 244 } 245 246 return dirty_ring_size_meory_MB * 1000000 / max_dirtyrate; 247 } 248 249 static inline bool dirtylimit_done(uint64_t quota, 250 uint64_t current) 251 { 252 uint64_t min, max; 253 254 min = MIN(quota, current); 255 max = MAX(quota, current); 256 257 return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false; 258 } 259 260 static inline bool 261 dirtylimit_need_linear_adjustment(uint64_t quota, 262 uint64_t current) 263 { 264 uint64_t min, max; 265 266 min = MIN(quota, current); 267 max = MAX(quota, current); 268 269 return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT; 270 } 271 272 static void dirtylimit_set_throttle(CPUState *cpu, 273 uint64_t quota, 274 uint64_t current) 275 { 276 int64_t ring_full_time_us = 0; 277 uint64_t sleep_pct = 0; 278 uint64_t throttle_us = 0; 279 280 if (current == 0) { 281 cpu->throttle_us_per_full = 0; 282 return; 283 } 284 285 ring_full_time_us = dirtylimit_dirty_ring_full_time(current); 286 287 if (dirtylimit_need_linear_adjustment(quota, current)) { 288 if (quota < current) { 289 sleep_pct = (current - quota) * 100 / current; 290 throttle_us = 291 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); 292 cpu->throttle_us_per_full += throttle_us; 293 } else { 294 sleep_pct = (quota - current) * 100 / quota; 295 throttle_us = 296 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct); 297 cpu->throttle_us_per_full -= throttle_us; 298 } 299 300 trace_dirtylimit_throttle_pct(cpu->cpu_index, 301 sleep_pct, 302 throttle_us); 303 } else { 304 if (quota < current) { 305 cpu->throttle_us_per_full += ring_full_time_us / 10; 306 } else { 307 cpu->throttle_us_per_full -= ring_full_time_us / 10; 308 } 309 } 310 311 /* 312 * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario), 313 * current dirty page rate may never reach the quota, we should stop 314 * increasing sleep time? 315 */ 316 cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full, 317 ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX); 318 319 cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0); 320 } 321 322 static void dirtylimit_adjust_throttle(CPUState *cpu) 323 { 324 uint64_t quota = 0; 325 uint64_t current = 0; 326 int cpu_index = cpu->cpu_index; 327 328 quota = dirtylimit_vcpu_get_state(cpu_index)->quota; 329 current = vcpu_dirty_rate_get(cpu_index); 330 331 if (!dirtylimit_done(quota, current)) { 332 dirtylimit_set_throttle(cpu, quota, current); 333 } 334 335 return; 336 } 337 338 void dirtylimit_process(void) 339 { 340 CPUState *cpu; 341 342 if (!qatomic_read(&dirtylimit_quit)) { 343 dirtylimit_state_lock(); 344 345 if (!dirtylimit_in_service()) { 346 dirtylimit_state_unlock(); 347 return; 348 } 349 350 CPU_FOREACH(cpu) { 351 if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) { 352 continue; 353 } 354 dirtylimit_adjust_throttle(cpu); 355 } 356 dirtylimit_state_unlock(); 357 } 358 } 359 360 void dirtylimit_change(bool start) 361 { 362 if (start) { 363 qatomic_set(&dirtylimit_quit, 0); 364 } else { 365 qatomic_set(&dirtylimit_quit, 1); 366 } 367 } 368 369 void dirtylimit_set_vcpu(int cpu_index, 370 uint64_t quota, 371 bool enable) 372 { 373 trace_dirtylimit_set_vcpu(cpu_index, quota); 374 375 if (enable) { 376 dirtylimit_state->states[cpu_index].quota = quota; 377 if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) { 378 dirtylimit_state->limited_nvcpu++; 379 } 380 } else { 381 dirtylimit_state->states[cpu_index].quota = 0; 382 if (dirtylimit_state->states[cpu_index].enabled) { 383 dirtylimit_state->limited_nvcpu--; 384 } 385 } 386 387 dirtylimit_state->states[cpu_index].enabled = enable; 388 } 389 390 void dirtylimit_set_all(uint64_t quota, 391 bool enable) 392 { 393 MachineState *ms = MACHINE(qdev_get_machine()); 394 int max_cpus = ms->smp.max_cpus; 395 int i; 396 397 for (i = 0; i < max_cpus; i++) { 398 dirtylimit_set_vcpu(i, quota, enable); 399 } 400 } 401 402 void dirtylimit_vcpu_execute(CPUState *cpu) 403 { 404 if (dirtylimit_in_service() && 405 dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled && 406 cpu->throttle_us_per_full) { 407 trace_dirtylimit_vcpu_execute(cpu->cpu_index, 408 cpu->throttle_us_per_full); 409 usleep(cpu->throttle_us_per_full); 410 } 411 } 412 413 static void dirtylimit_init(void) 414 { 415 dirtylimit_state_initialize(); 416 dirtylimit_change(true); 417 vcpu_dirty_rate_stat_initialize(); 418 vcpu_dirty_rate_stat_start(); 419 } 420 421 static void dirtylimit_cleanup(void) 422 { 423 vcpu_dirty_rate_stat_stop(); 424 vcpu_dirty_rate_stat_finalize(); 425 dirtylimit_change(false); 426 dirtylimit_state_finalize(); 427 } 428 429 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index, 430 int64_t cpu_index, 431 Error **errp) 432 { 433 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { 434 return; 435 } 436 437 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { 438 error_setg(errp, "incorrect cpu index specified"); 439 return; 440 } 441 442 if (!dirtylimit_in_service()) { 443 return; 444 } 445 446 dirtylimit_state_lock(); 447 448 if (has_cpu_index) { 449 dirtylimit_set_vcpu(cpu_index, 0, false); 450 } else { 451 dirtylimit_set_all(0, false); 452 } 453 454 if (!dirtylimit_state->limited_nvcpu) { 455 dirtylimit_cleanup(); 456 } 457 458 dirtylimit_state_unlock(); 459 } 460 461 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 462 { 463 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); 464 Error *err = NULL; 465 466 qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err); 467 if (err) { 468 hmp_handle_error(mon, err); 469 return; 470 } 471 472 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " 473 "dirty limit for virtual CPU]\n"); 474 } 475 476 void qmp_set_vcpu_dirty_limit(bool has_cpu_index, 477 int64_t cpu_index, 478 uint64_t dirty_rate, 479 Error **errp) 480 { 481 if (!kvm_enabled() || !kvm_dirty_ring_enabled()) { 482 error_setg(errp, "dirty page limit feature requires KVM with" 483 " accelerator property 'dirty-ring-size' set'"); 484 return; 485 } 486 487 if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) { 488 error_setg(errp, "incorrect cpu index specified"); 489 return; 490 } 491 492 if (!dirty_rate) { 493 qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp); 494 return; 495 } 496 497 dirtylimit_state_lock(); 498 499 if (!dirtylimit_in_service()) { 500 dirtylimit_init(); 501 } 502 503 if (has_cpu_index) { 504 dirtylimit_set_vcpu(cpu_index, dirty_rate, true); 505 } else { 506 dirtylimit_set_all(dirty_rate, true); 507 } 508 509 dirtylimit_state_unlock(); 510 } 511 512 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 513 { 514 int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate"); 515 int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1); 516 Error *err = NULL; 517 518 qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err); 519 if (err) { 520 hmp_handle_error(mon, err); 521 return; 522 } 523 524 monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query " 525 "dirty limit for virtual CPU]\n"); 526 } 527 528 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index) 529 { 530 DirtyLimitInfo *info = NULL; 531 532 info = g_malloc0(sizeof(*info)); 533 info->cpu_index = cpu_index; 534 info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota; 535 info->current_rate = vcpu_dirty_rate_get(cpu_index); 536 537 return info; 538 } 539 540 static struct DirtyLimitInfoList *dirtylimit_query_all(void) 541 { 542 int i, index; 543 DirtyLimitInfo *info = NULL; 544 DirtyLimitInfoList *head = NULL, **tail = &head; 545 546 dirtylimit_state_lock(); 547 548 if (!dirtylimit_in_service()) { 549 dirtylimit_state_unlock(); 550 return NULL; 551 } 552 553 for (i = 0; i < dirtylimit_state->max_cpus; i++) { 554 index = dirtylimit_state->states[i].cpu_index; 555 if (dirtylimit_vcpu_get_state(index)->enabled) { 556 info = dirtylimit_query_vcpu(index); 557 QAPI_LIST_APPEND(tail, info); 558 } 559 } 560 561 dirtylimit_state_unlock(); 562 563 return head; 564 } 565 566 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp) 567 { 568 if (!dirtylimit_in_service()) { 569 return NULL; 570 } 571 572 return dirtylimit_query_all(); 573 } 574 575 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict) 576 { 577 DirtyLimitInfoList *limit, *head, *info = NULL; 578 Error *err = NULL; 579 580 if (!dirtylimit_in_service()) { 581 monitor_printf(mon, "Dirty page limit not enabled!\n"); 582 return; 583 } 584 585 info = qmp_query_vcpu_dirty_limit(&err); 586 if (err) { 587 hmp_handle_error(mon, err); 588 return; 589 } 590 591 head = info; 592 for (limit = head; limit != NULL; limit = limit->next) { 593 monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s)," 594 " current rate %"PRIi64 " (MB/s)\n", 595 limit->value->cpu_index, 596 limit->value->limit_rate, 597 limit->value->current_rate); 598 } 599 600 g_free(info); 601 }