PIXEventsCommon.h (20982B)
1 // Copyright (c) Microsoft Corporation. All rights reserved. 2 3 /*==========================================================================; 4 * 5 * Copyright (C) Microsoft Corporation. All Rights Reserved. 6 * 7 * File: PIXEventsCommon.h 8 * Content: PIX include file 9 * Don't include this file directly - use pix3.h 10 * 11 ****************************************************************************/ 12 #pragma once 13 14 #ifndef _PIXEventsCommon_H_ 15 #define _PIXEventsCommon_H_ 16 17 #if defined(XBOX) || defined(_XBOX_ONE) || defined(_DURANGO) || defined(_GAMING_XBOX) || defined(_GAMING_XBOX_SCARLETT) 18 #define PIX_XBOX 19 #endif 20 21 #include <cstdint> 22 23 #if defined(_M_X64) || defined(_M_IX86) 24 #include <emmintrin.h> 25 #endif 26 27 // 28 // The PIXBeginEvent and PIXSetMarker functions have an optimized path for 29 // copying strings that work by copying 128-bit or 64-bits at a time. In some 30 // circumstances this may result in PIX logging the remaining memory after the 31 // null terminator. 32 // 33 // By default this optimization is enabled unless Address Sanitizer is enabled, 34 // since this optimization can trigger a global-buffer-overflow when copying 35 // string literals. 36 // 37 // The PIX_ENABLE_BLOCK_ARGUMENT_COPY controls whether or not this optimization 38 // is enabled. Applications may also explicitly set this macro to 0 to disable 39 // the optimization if necessary. 40 // 41 42 // Check for Address Sanitizer on either Clang or MSVC 43 44 #if defined(__has_feature) 45 #if __has_feature(address_sanitizer) 46 #define PIX_ASAN_ENABLED 47 #endif 48 #elif defined(__SANITIZE_ADDRESS__) 49 #define PIX_ASAN_ENABLED 50 #endif 51 52 #if defined(PIX_ENABLE_BLOCK_ARGUMENT_COPY) 53 // Previously set values override everything 54 # define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 0 55 #elif defined(PIX_ASAN_ENABLED) 56 // Disable block argument copy when address sanitizer is enabled 57 #define PIX_ENABLE_BLOCK_ARGUMENT_COPY 0 58 #define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 1 59 #endif 60 61 #if !defined(PIX_ENABLE_BLOCK_ARGUMENT_COPY) 62 // Default to enabled. 63 #define PIX_ENABLE_BLOCK_ARGUMENT_COPY 1 64 #define PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 1 65 #endif 66 67 struct PIXEventsBlockInfo; 68 69 struct PIXEventsThreadInfo 70 { 71 PIXEventsBlockInfo* block; 72 UINT64* biasedLimit; 73 UINT64* destination; 74 }; 75 76 #ifdef PIX_XBOX 77 extern "C" UINT64 WINAPI PIXEventsReplaceBlock(bool getEarliestTime) noexcept; 78 #else 79 extern "C" UINT64 WINAPI PIXEventsReplaceBlock(PIXEventsThreadInfo * threadInfo, bool getEarliestTime) noexcept; 80 #endif 81 82 enum PIXEventType 83 { 84 PIXEvent_EndEvent = 0x000, 85 PIXEvent_BeginEvent_VarArgs = 0x001, 86 PIXEvent_BeginEvent_NoArgs = 0x002, 87 PIXEvent_SetMarker_VarArgs = 0x007, 88 PIXEvent_SetMarker_NoArgs = 0x008, 89 90 PIXEvent_EndEvent_OnContext = 0x010, 91 PIXEvent_BeginEvent_OnContext_VarArgs = 0x011, 92 PIXEvent_BeginEvent_OnContext_NoArgs = 0x012, 93 PIXEvent_SetMarker_OnContext_VarArgs = 0x017, 94 PIXEvent_SetMarker_OnContext_NoArgs = 0x018, 95 }; 96 97 static const UINT64 PIXEventsReservedRecordSpaceQwords = 64; 98 //this is used to make sure SSE string copy always will end 16-byte write in the current block 99 //this way only a check if destination < limit can be performed, instead of destination < limit - 1 100 //since both these are UINT64* and SSE writes in 16 byte chunks, 8 bytes are kept in reserve 101 //so even if SSE overwrites 8 extra bytes, those will still belong to the correct block 102 //on next iteration check destination will be greater than limit 103 //this is used as well for fixed size UMD events and PIXEndEvent since these require less space 104 //than other variable length user events and do not need big reserved space 105 static const UINT64 PIXEventsReservedTailSpaceQwords = 2; 106 static const UINT64 PIXEventsSafeFastCopySpaceQwords = PIXEventsReservedRecordSpaceQwords - PIXEventsReservedTailSpaceQwords; 107 static const UINT64 PIXEventsGraphicsRecordSpaceQwords = 64; 108 109 //Bits 7-19 (13 bits) 110 static const UINT64 PIXEventsBlockEndMarker = 0x00000000000FFF80; 111 112 //Bits 10-19 (10 bits) 113 static const UINT64 PIXEventsTypeReadMask = 0x00000000000FFC00; 114 static const UINT64 PIXEventsTypeWriteMask = 0x00000000000003FF; 115 static const UINT64 PIXEventsTypeBitShift = 10; 116 117 //Bits 20-63 (44 bits) 118 static const UINT64 PIXEventsTimestampReadMask = 0xFFFFFFFFFFF00000; 119 static const UINT64 PIXEventsTimestampWriteMask = 0x00000FFFFFFFFFFF; 120 static const UINT64 PIXEventsTimestampBitShift = 20; 121 122 inline UINT64 PIXEncodeEventInfo(UINT64 timestamp, PIXEventType eventType) 123 { 124 return ((timestamp & PIXEventsTimestampWriteMask) << PIXEventsTimestampBitShift) | 125 (((UINT64)eventType & PIXEventsTypeWriteMask) << PIXEventsTypeBitShift); 126 } 127 128 //Bits 60-63 (4) 129 static const UINT64 PIXEventsStringAlignmentWriteMask = 0x000000000000000F; 130 static const UINT64 PIXEventsStringAlignmentReadMask = 0xF000000000000000; 131 static const UINT64 PIXEventsStringAlignmentBitShift = 60; 132 133 //Bits 55-59 (5) 134 static const UINT64 PIXEventsStringCopyChunkSizeWriteMask = 0x000000000000001F; 135 static const UINT64 PIXEventsStringCopyChunkSizeReadMask = 0x0F80000000000000; 136 static const UINT64 PIXEventsStringCopyChunkSizeBitShift = 55; 137 138 //Bit 54 139 static const UINT64 PIXEventsStringIsANSIWriteMask = 0x0000000000000001; 140 static const UINT64 PIXEventsStringIsANSIReadMask = 0x0040000000000000; 141 static const UINT64 PIXEventsStringIsANSIBitShift = 54; 142 143 //Bit 53 144 static const UINT64 PIXEventsStringIsShortcutWriteMask = 0x0000000000000001; 145 static const UINT64 PIXEventsStringIsShortcutReadMask = 0x0020000000000000; 146 static const UINT64 PIXEventsStringIsShortcutBitShift = 53; 147 148 inline UINT64 PIXEncodeStringInfo(UINT64 alignment, UINT64 copyChunkSize, BOOL isANSI, BOOL isShortcut) 149 { 150 return ((alignment & PIXEventsStringAlignmentWriteMask) << PIXEventsStringAlignmentBitShift) | 151 ((copyChunkSize & PIXEventsStringCopyChunkSizeWriteMask) << PIXEventsStringCopyChunkSizeBitShift) | 152 (((UINT64)isANSI & PIXEventsStringIsANSIWriteMask) << PIXEventsStringIsANSIBitShift) | 153 (((UINT64)isShortcut & PIXEventsStringIsShortcutWriteMask) << PIXEventsStringIsShortcutBitShift); 154 } 155 156 template<UINT alignment, class T> 157 inline bool PIXIsPointerAligned(T* pointer) 158 { 159 return !(((UINT64)pointer) & (alignment - 1)); 160 } 161 162 // Generic template version slower because of the additional clear write 163 template<class T> 164 inline void PIXCopyEventArgument(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, T argument) 165 { 166 if (destination < limit) 167 { 168 *destination = 0ull; 169 *((T*)destination) = argument; 170 ++destination; 171 } 172 } 173 174 // int32 specialization to avoid slower double memory writes 175 template<> 176 inline void PIXCopyEventArgument<INT32>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, INT32 argument) 177 { 178 if (destination < limit) 179 { 180 *reinterpret_cast<INT64*>(destination) = static_cast<INT64>(argument); 181 ++destination; 182 } 183 } 184 185 // unsigned int32 specialization to avoid slower double memory writes 186 template<> 187 inline void PIXCopyEventArgument<UINT32>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, UINT32 argument) 188 { 189 if (destination < limit) 190 { 191 *destination = static_cast<UINT64>(argument); 192 ++destination; 193 } 194 } 195 196 // int64 specialization to avoid slower double memory writes 197 template<> 198 inline void PIXCopyEventArgument<INT64>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, INT64 argument) 199 { 200 if (destination < limit) 201 { 202 *reinterpret_cast<INT64*>(destination) = argument; 203 ++destination; 204 } 205 } 206 207 // unsigned int64 specialization to avoid slower double memory writes 208 template<> 209 inline void PIXCopyEventArgument<UINT64>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, UINT64 argument) 210 { 211 if (destination < limit) 212 { 213 *destination = argument; 214 ++destination; 215 } 216 } 217 218 //floats must be cast to double during writing the data to be properly printed later when reading the data 219 //this is needed because when float is passed to varargs function it's cast to double 220 template<> 221 inline void PIXCopyEventArgument<float>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, float argument) 222 { 223 if (destination < limit) 224 { 225 *reinterpret_cast<double*>(destination) = static_cast<double>(argument); 226 ++destination; 227 } 228 } 229 230 //char has to be cast to a longer signed integer type 231 //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier 232 template<> 233 inline void PIXCopyEventArgument<char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, char argument) 234 { 235 if (destination < limit) 236 { 237 *reinterpret_cast<INT64*>(destination) = static_cast<INT64>(argument); 238 ++destination; 239 } 240 } 241 242 //unsigned char has to be cast to a longer unsigned integer type 243 //this is due to printf not ignoring correctly the upper bits of unsigned long long for a char format specifier 244 template<> 245 inline void PIXCopyEventArgument<unsigned char>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, unsigned char argument) 246 { 247 if (destination < limit) 248 { 249 *destination = static_cast<UINT64>(argument); 250 ++destination; 251 } 252 } 253 254 //bool has to be cast to an integer since it's not explicitly supported by string format routines 255 //there's no format specifier for bool type, but it should work with integer format specifiers 256 template<> 257 inline void PIXCopyEventArgument<bool>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, bool argument) 258 { 259 if (destination < limit) 260 { 261 *destination = static_cast<UINT64>(argument); 262 ++destination; 263 } 264 } 265 266 inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument) 267 { 268 *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE); 269 while (destination < limit) 270 { 271 UINT64 c = static_cast<uint8_t>(argument[0]); 272 if (!c) 273 { 274 *destination++ = 0; 275 return; 276 } 277 UINT64 x = c; 278 c = static_cast<uint8_t>(argument[1]); 279 if (!c) 280 { 281 *destination++ = x; 282 return; 283 } 284 x |= c << 8; 285 c = static_cast<uint8_t>(argument[2]); 286 if (!c) 287 { 288 *destination++ = x; 289 return; 290 } 291 x |= c << 16; 292 c = static_cast<uint8_t>(argument[3]); 293 if (!c) 294 { 295 *destination++ = x; 296 return; 297 } 298 x |= c << 24; 299 c = static_cast<uint8_t>(argument[4]); 300 if (!c) 301 { 302 *destination++ = x; 303 return; 304 } 305 x |= c << 32; 306 c = static_cast<uint8_t>(argument[5]); 307 if (!c) 308 { 309 *destination++ = x; 310 return; 311 } 312 x |= c << 40; 313 c = static_cast<uint8_t>(argument[6]); 314 if (!c) 315 { 316 *destination++ = x; 317 return; 318 } 319 x |= c << 48; 320 c = static_cast<uint8_t>(argument[7]); 321 if (!c) 322 { 323 *destination++ = x; 324 return; 325 } 326 x |= c << 56; 327 *destination++ = x; 328 argument += 8; 329 } 330 } 331 332 inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument) 333 { 334 #if PIX_ENABLE_BLOCK_ARGUMENT_COPY 335 if (PIXIsPointerAligned<8>(argument)) 336 { 337 *destination++ = PIXEncodeStringInfo(0, 8, TRUE, FALSE); 338 UINT64* source = (UINT64*)argument; 339 while (destination < limit) 340 { 341 UINT64 qword = *source++; 342 *destination++ = qword; 343 //check if any of the characters is a terminating zero 344 if (!((qword & 0xFF00000000000000) && 345 (qword & 0xFF000000000000) && 346 (qword & 0xFF0000000000) && 347 (qword & 0xFF00000000) && 348 (qword & 0xFF000000) && 349 (qword & 0xFF0000) && 350 (qword & 0xFF00) && 351 (qword & 0xFF))) 352 { 353 break; 354 } 355 } 356 } 357 else 358 #endif // PIX_ENABLE_BLOCK_ARGUMENT_COPY 359 { 360 PIXCopyEventArgumentSlowest(destination, limit, argument); 361 } 362 } 363 364 template<> 365 inline void PIXCopyEventArgument<PCSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCSTR argument) 366 { 367 if (destination < limit) 368 { 369 if (argument != nullptr) 370 { 371 #if (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY 372 if (PIXIsPointerAligned<16>(argument)) 373 { 374 *destination++ = PIXEncodeStringInfo(0, 16, TRUE, FALSE); 375 __m128i zero = _mm_setzero_si128(); 376 if (PIXIsPointerAligned<16>(destination)) 377 { 378 while (destination < limit) 379 { 380 __m128i mem = _mm_load_si128((__m128i*)argument); 381 _mm_store_si128((__m128i*)destination, mem); 382 //check if any of the characters is a terminating zero 383 __m128i res = _mm_cmpeq_epi8(mem, zero); 384 destination += 2; 385 if (_mm_movemask_epi8(res)) 386 break; 387 argument += 16; 388 } 389 } 390 else 391 { 392 while (destination < limit) 393 { 394 __m128i mem = _mm_load_si128((__m128i*)argument); 395 _mm_storeu_si128((__m128i*)destination, mem); 396 //check if any of the characters is a terminating zero 397 __m128i res = _mm_cmpeq_epi8(mem, zero); 398 destination += 2; 399 if (_mm_movemask_epi8(res)) 400 break; 401 argument += 16; 402 } 403 } 404 } 405 else 406 #endif // (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY 407 { 408 PIXCopyEventArgumentSlow(destination, limit, argument); 409 } 410 } 411 else 412 { 413 *destination++ = 0ull; 414 } 415 } 416 } 417 418 template<> 419 inline void PIXCopyEventArgument<PSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PSTR argument) 420 { 421 PIXCopyEventArgument(destination, limit, (PCSTR)argument); 422 } 423 424 inline void PIXCopyEventArgumentSlowest(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument) 425 { 426 *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE); 427 while (destination < limit) 428 { 429 UINT64 c = static_cast<uint16_t>(argument[0]); 430 if (!c) 431 { 432 *destination++ = 0; 433 return; 434 } 435 UINT64 x = c; 436 c = static_cast<uint16_t>(argument[1]); 437 if (!c) 438 { 439 *destination++ = x; 440 return; 441 } 442 x |= c << 16; 443 c = static_cast<uint16_t>(argument[2]); 444 if (!c) 445 { 446 *destination++ = x; 447 return; 448 } 449 x |= c << 32; 450 c = static_cast<uint16_t>(argument[3]); 451 if (!c) 452 { 453 *destination++ = x; 454 return; 455 } 456 x |= c << 48; 457 *destination++ = x; 458 argument += 4; 459 } 460 } 461 462 inline void PIXCopyEventArgumentSlow(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument) 463 { 464 #if PIX_ENABLE_BLOCK_ARGUMENT_COPY 465 if (PIXIsPointerAligned<8>(argument)) 466 { 467 *destination++ = PIXEncodeStringInfo(0, 8, FALSE, FALSE); 468 UINT64* source = (UINT64*)argument; 469 while (destination < limit) 470 { 471 UINT64 qword = *source++; 472 *destination++ = qword; 473 //check if any of the characters is a terminating zero 474 //TODO: check if reversed condition is faster 475 if (!((qword & 0xFFFF000000000000) && 476 (qword & 0xFFFF00000000) && 477 (qword & 0xFFFF0000) && 478 (qword & 0xFFFF))) 479 { 480 break; 481 } 482 } 483 } 484 else 485 #endif // PIX_ENABLE_BLOCK_ARGUMENT_COPY 486 { 487 PIXCopyEventArgumentSlowest(destination, limit, argument); 488 } 489 } 490 491 template<> 492 inline void PIXCopyEventArgument<PCWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PCWSTR argument) 493 { 494 if (destination < limit) 495 { 496 if (argument != nullptr) 497 { 498 #if (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY 499 if (PIXIsPointerAligned<16>(argument)) 500 { 501 *destination++ = PIXEncodeStringInfo(0, 16, FALSE, FALSE); 502 __m128i zero = _mm_setzero_si128(); 503 if (PIXIsPointerAligned<16>(destination)) 504 { 505 while (destination < limit) 506 { 507 __m128i mem = _mm_load_si128((__m128i*)argument); 508 _mm_store_si128((__m128i*)destination, mem); 509 //check if any of the characters is a terminating zero 510 __m128i res = _mm_cmpeq_epi16(mem, zero); 511 destination += 2; 512 if (_mm_movemask_epi8(res)) 513 break; 514 argument += 8; 515 } 516 } 517 else 518 { 519 while (destination < limit) 520 { 521 __m128i mem = _mm_load_si128((__m128i*)argument); 522 _mm_storeu_si128((__m128i*)destination, mem); 523 //check if any of the characters is a terminating zero 524 __m128i res = _mm_cmpeq_epi16(mem, zero); 525 destination += 2; 526 if (_mm_movemask_epi8(res)) 527 break; 528 argument += 8; 529 } 530 } 531 } 532 else 533 #endif // (defined(_M_X64) || defined(_M_IX86)) && PIX_ENABLE_BLOCK_ARGUMENT_COPY 534 { 535 PIXCopyEventArgumentSlow(destination, limit, argument); 536 } 537 } 538 else 539 { 540 *destination++ = 0ull; 541 } 542 } 543 } 544 545 template<> 546 inline void PIXCopyEventArgument<PWSTR>(_Out_writes_to_ptr_(limit) UINT64*& destination, _In_ const UINT64* limit, _In_ PWSTR argument) 547 { 548 PIXCopyEventArgument(destination, limit, (PCWSTR)argument); 549 }; 550 551 #if defined(__d3d12_x_h__) || defined(__d3d12_xs_h__) || defined(__d3d12_h__) 552 553 inline void PIXSetGPUMarkerOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size) 554 { 555 commandList->SetMarker(D3D12_EVENT_METADATA, data, size); 556 } 557 558 inline void PIXSetGPUMarkerOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size) 559 { 560 commandQueue->SetMarker(D3D12_EVENT_METADATA, data, size); 561 } 562 563 inline void PIXBeginGPUEventOnContext(_In_ ID3D12GraphicsCommandList* commandList, _In_reads_bytes_(size) void* data, UINT size) 564 { 565 commandList->BeginEvent(D3D12_EVENT_METADATA, data, size); 566 } 567 568 inline void PIXBeginGPUEventOnContext(_In_ ID3D12CommandQueue* commandQueue, _In_reads_bytes_(size) void* data, UINT size) 569 { 570 commandQueue->BeginEvent(D3D12_EVENT_METADATA, data, size); 571 } 572 573 inline void PIXEndGPUEventOnContext(_In_ ID3D12GraphicsCommandList* commandList) 574 { 575 commandList->EndEvent(); 576 } 577 578 inline void PIXEndGPUEventOnContext(_In_ ID3D12CommandQueue* commandQueue) 579 { 580 commandQueue->EndEvent(); 581 } 582 583 #endif //__d3d12_h__ 584 585 template<class T> struct PIXInferScopedEventType { typedef T Type; }; 586 template<class T> struct PIXInferScopedEventType<const T> { typedef T Type; }; 587 template<class T> struct PIXInferScopedEventType<T*> { typedef T Type; }; 588 template<class T> struct PIXInferScopedEventType<T* const> { typedef T Type; }; 589 template<> struct PIXInferScopedEventType<UINT64> { typedef void Type; }; 590 template<> struct PIXInferScopedEventType<const UINT64> { typedef void Type; }; 591 template<> struct PIXInferScopedEventType<INT64> { typedef void Type; }; 592 template<> struct PIXInferScopedEventType<const INT64> { typedef void Type; }; 593 template<> struct PIXInferScopedEventType<UINT> { typedef void Type; }; 594 template<> struct PIXInferScopedEventType<const UINT> { typedef void Type; }; 595 template<> struct PIXInferScopedEventType<INT> { typedef void Type; }; 596 template<> struct PIXInferScopedEventType<const INT> { typedef void Type; }; 597 598 599 #if PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 600 #undef PIX_ENABLE_BLOCK_ARGUMENT_COPY 601 #endif 602 603 #undef PIX_ENABLE_BLOCK_ARGUMENT_COPY_SET 604 605 #endif //_PIXEventsCommon_H_