sdl

FORK: Simple Directmedia Layer
git clone https://git.neptards.moe/neptards/sdl.git
Log | Files | Refs

SDL_render_metal.m (73233B)


      1 /*
      2   Simple DirectMedia Layer
      3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
      4 
      5   This software is provided 'as-is', without any express or implied
      6   warranty.  In no event will the authors be held liable for any damages
      7   arising from the use of this software.
      8 
      9   Permission is granted to anyone to use this software for any purpose,
     10   including commercial applications, and to alter it and redistribute it
     11   freely, subject to the following restrictions:
     12 
     13   1. The origin of this software must not be misrepresented; you must not
     14      claim that you wrote the original software. If you use this software
     15      in a product, an acknowledgment in the product documentation would be
     16      appreciated but is not required.
     17   2. Altered source versions must be plainly marked as such, and must not be
     18      misrepresented as being the original software.
     19   3. This notice may not be removed or altered from any source distribution.
     20 */
     21 #include "../../SDL_internal.h"
     22 
     23 #if SDL_VIDEO_RENDER_METAL && !SDL_RENDER_DISABLED
     24 
     25 #include "SDL_hints.h"
     26 #include "SDL_syswm.h"
     27 #include "SDL_metal.h"
     28 #include "../SDL_sysrender.h"
     29 
     30 #include <Availability.h>
     31 #import <Metal/Metal.h>
     32 #import <QuartzCore/CAMetalLayer.h>
     33 
     34 #ifdef __MACOSX__
     35 #import <AppKit/NSView.h>
     36 #endif
     37 
     38 /* Regenerate these with build-metal-shaders.sh */
     39 #ifdef __MACOSX__
     40 #include "SDL_shaders_metal_osx.h"
     41 #elif defined(__TVOS__)
     42 #if TARGET_OS_SIMULATOR
     43 #include "SDL_shaders_metal_tvsimulator.h"
     44 #else
     45 #include "SDL_shaders_metal_tvos.h"
     46 #endif
     47 #else
     48 #if TARGET_OS_SIMULATOR
     49 #include "SDL_shaders_metal_iphonesimulator.h"
     50 #else
     51 #include "SDL_shaders_metal_ios.h"
     52 #endif
     53 #endif
     54 
     55 /* Apple Metal renderer implementation */
     56 
     57 /* Used to re-create the window with Metal capability */
     58 extern int SDL_RecreateWindow(SDL_Window * window, Uint32 flags);
     59 
     60 /* macOS requires constants in a buffer to have a 256 byte alignment. */
     61 /* Use native type alignments from https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf */
     62 #if defined(__MACOSX__) || TARGET_OS_SIMULATOR
     63 #define CONSTANT_ALIGN(x) (256)
     64 #else
     65 #define CONSTANT_ALIGN(x) (x < 4 ? 4 : x)
     66 #endif
     67 
     68 #define DEVICE_ALIGN(x) (x < 4 ? 4 : x)
     69 
     70 #define ALIGN_CONSTANTS(align, size) ((size + CONSTANT_ALIGN(align) - 1) & (~(CONSTANT_ALIGN(align) - 1)))
     71 
     72 static const size_t CONSTANTS_OFFSET_INVALID = 0xFFFFFFFF;
     73 static const size_t CONSTANTS_OFFSET_IDENTITY = 0;
     74 static const size_t CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM = ALIGN_CONSTANTS(16, CONSTANTS_OFFSET_IDENTITY + sizeof(float) * 16);
     75 static const size_t CONSTANTS_OFFSET_DECODE_JPEG = ALIGN_CONSTANTS(16, CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM + sizeof(float) * 16);
     76 static const size_t CONSTANTS_OFFSET_DECODE_BT601 = ALIGN_CONSTANTS(16, CONSTANTS_OFFSET_DECODE_JPEG + sizeof(float) * 4 * 4);
     77 static const size_t CONSTANTS_OFFSET_DECODE_BT709 = ALIGN_CONSTANTS(16, CONSTANTS_OFFSET_DECODE_BT601 + sizeof(float) * 4 * 4);
     78 static const size_t CONSTANTS_LENGTH = CONSTANTS_OFFSET_DECODE_BT709 + sizeof(float) * 4 * 4;
     79 
     80 typedef enum SDL_MetalVertexFunction
     81 {
     82     SDL_METAL_VERTEX_SOLID,
     83     SDL_METAL_VERTEX_COPY,
     84 } SDL_MetalVertexFunction;
     85 
     86 typedef enum SDL_MetalFragmentFunction
     87 {
     88     SDL_METAL_FRAGMENT_SOLID = 0,
     89     SDL_METAL_FRAGMENT_COPY,
     90     SDL_METAL_FRAGMENT_YUV,
     91     SDL_METAL_FRAGMENT_NV12,
     92     SDL_METAL_FRAGMENT_NV21,
     93     SDL_METAL_FRAGMENT_COUNT,
     94 } SDL_MetalFragmentFunction;
     95 
     96 typedef struct METAL_PipelineState
     97 {
     98     SDL_BlendMode blendMode;
     99     void *pipe;
    100 } METAL_PipelineState;
    101 
    102 typedef struct METAL_PipelineCache
    103 {
    104     METAL_PipelineState *states;
    105     int count;
    106     SDL_MetalVertexFunction vertexFunction;
    107     SDL_MetalFragmentFunction fragmentFunction;
    108     MTLPixelFormat renderTargetFormat;
    109     const char *label;
    110 } METAL_PipelineCache;
    111 
    112 /* Each shader combination used by drawing functions has a separate pipeline
    113  * cache, and we have a separate list of caches for each render target pixel
    114  * format. This is more efficient than iterating over a global cache to find
    115  * the pipeline based on the specified shader combination and RT pixel format,
    116  * since we know what the RT pixel format is when we set the render target, and
    117  * we know what the shader combination is inside each drawing function's code. */
    118 typedef struct METAL_ShaderPipelines
    119 {
    120     MTLPixelFormat renderTargetFormat;
    121     METAL_PipelineCache caches[SDL_METAL_FRAGMENT_COUNT];
    122 } METAL_ShaderPipelines;
    123 
    124 @interface METAL_RenderData : NSObject
    125     @property (nonatomic, retain) id<MTLDevice> mtldevice;
    126     @property (nonatomic, retain) id<MTLCommandQueue> mtlcmdqueue;
    127     @property (nonatomic, retain) id<MTLCommandBuffer> mtlcmdbuffer;
    128     @property (nonatomic, retain) id<MTLRenderCommandEncoder> mtlcmdencoder;
    129     @property (nonatomic, retain) id<MTLLibrary> mtllibrary;
    130     @property (nonatomic, retain) id<CAMetalDrawable> mtlbackbuffer;
    131     @property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
    132     @property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
    133     @property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
    134     @property (nonatomic, retain) id<MTLBuffer> mtlbufquadindices;
    135     @property (nonatomic, assign) SDL_MetalView mtlview;
    136     @property (nonatomic, retain) CAMetalLayer *mtllayer;
    137     @property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
    138     @property (nonatomic, assign) METAL_ShaderPipelines *activepipelines;
    139     @property (nonatomic, assign) METAL_ShaderPipelines *allpipelines;
    140     @property (nonatomic, assign) int pipelinescount;
    141 @end
    142 
    143 @implementation METAL_RenderData
    144 #if !__has_feature(objc_arc)
    145 - (void)dealloc
    146 {
    147     [_mtldevice release];
    148     [_mtlcmdqueue release];
    149     [_mtlcmdbuffer release];
    150     [_mtlcmdencoder release];
    151     [_mtllibrary release];
    152     [_mtlbackbuffer release];
    153     [_mtlsamplernearest release];
    154     [_mtlsamplerlinear release];
    155     [_mtlbufconstants release];
    156     [_mtlbufquadindices release];
    157     [_mtllayer release];
    158     [_mtlpassdesc release];
    159     [super dealloc];
    160 }
    161 #endif
    162 @end
    163 
    164 @interface METAL_TextureData : NSObject
    165     @property (nonatomic, retain) id<MTLTexture> mtltexture;
    166     @property (nonatomic, retain) id<MTLTexture> mtltexture_uv;
    167     @property (nonatomic, retain) id<MTLSamplerState> mtlsampler;
    168     @property (nonatomic, assign) SDL_MetalFragmentFunction fragmentFunction;
    169     @property (nonatomic, assign) BOOL yuv;
    170     @property (nonatomic, assign) BOOL nv12;
    171     @property (nonatomic, assign) size_t conversionBufferOffset;
    172     @property (nonatomic, assign) BOOL hasdata;
    173 
    174     @property (nonatomic, retain) id<MTLBuffer> lockedbuffer;
    175     @property (nonatomic, assign) SDL_Rect lockedrect;
    176 @end
    177 
    178 @implementation METAL_TextureData
    179 #if !__has_feature(objc_arc)
    180 - (void)dealloc
    181 {
    182     [_mtltexture release];
    183     [_mtltexture_uv release];
    184     [_mtlsampler release];
    185     [_lockedbuffer release];
    186     [super dealloc];
    187 }
    188 #endif
    189 @end
    190 
    191 static int
    192 IsMetalAvailable(const SDL_SysWMinfo *syswm)
    193 {
    194     if (syswm->subsystem != SDL_SYSWM_COCOA && syswm->subsystem != SDL_SYSWM_UIKIT) {
    195         return SDL_SetError("Metal render target only supports Cocoa and UIKit video targets at the moment.");
    196     }
    197 
    198     // this checks a weak symbol.
    199 #if (defined(__MACOSX__) && (MAC_OS_X_VERSION_MIN_REQUIRED < 101100))
    200     if (MTLCreateSystemDefaultDevice == NULL) {  // probably on 10.10 or lower.
    201         return SDL_SetError("Metal framework not available on this system");
    202     }
    203 #endif
    204 
    205     return 0;
    206 }
    207 
    208 static const MTLBlendOperation invalidBlendOperation = (MTLBlendOperation)0xFFFFFFFF;
    209 static const MTLBlendFactor invalidBlendFactor = (MTLBlendFactor)0xFFFFFFFF;
    210 
    211 static MTLBlendOperation
    212 GetBlendOperation(SDL_BlendOperation operation)
    213 {
    214     switch (operation) {
    215         case SDL_BLENDOPERATION_ADD: return MTLBlendOperationAdd;
    216         case SDL_BLENDOPERATION_SUBTRACT: return MTLBlendOperationSubtract;
    217         case SDL_BLENDOPERATION_REV_SUBTRACT: return MTLBlendOperationReverseSubtract;
    218         case SDL_BLENDOPERATION_MINIMUM: return MTLBlendOperationMin;
    219         case SDL_BLENDOPERATION_MAXIMUM: return MTLBlendOperationMax;
    220         default: return invalidBlendOperation;
    221     }
    222 }
    223 
    224 static MTLBlendFactor
    225 GetBlendFactor(SDL_BlendFactor factor)
    226 {
    227     switch (factor) {
    228         case SDL_BLENDFACTOR_ZERO: return MTLBlendFactorZero;
    229         case SDL_BLENDFACTOR_ONE: return MTLBlendFactorOne;
    230         case SDL_BLENDFACTOR_SRC_COLOR: return MTLBlendFactorSourceColor;
    231         case SDL_BLENDFACTOR_ONE_MINUS_SRC_COLOR: return MTLBlendFactorOneMinusSourceColor;
    232         case SDL_BLENDFACTOR_SRC_ALPHA: return MTLBlendFactorSourceAlpha;
    233         case SDL_BLENDFACTOR_ONE_MINUS_SRC_ALPHA: return MTLBlendFactorOneMinusSourceAlpha;
    234         case SDL_BLENDFACTOR_DST_COLOR: return MTLBlendFactorDestinationColor;
    235         case SDL_BLENDFACTOR_ONE_MINUS_DST_COLOR: return MTLBlendFactorOneMinusDestinationColor;
    236         case SDL_BLENDFACTOR_DST_ALPHA: return MTLBlendFactorDestinationAlpha;
    237         case SDL_BLENDFACTOR_ONE_MINUS_DST_ALPHA: return MTLBlendFactorOneMinusDestinationAlpha;
    238         default: return invalidBlendFactor;
    239     }
    240 }
    241 
    242 static NSString *
    243 GetVertexFunctionName(SDL_MetalVertexFunction function)
    244 {
    245     switch (function) {
    246         case SDL_METAL_VERTEX_SOLID: return @"SDL_Solid_vertex";
    247         case SDL_METAL_VERTEX_COPY: return @"SDL_Copy_vertex";
    248         default: return nil;
    249     }
    250 }
    251 
    252 static NSString *
    253 GetFragmentFunctionName(SDL_MetalFragmentFunction function)
    254 {
    255     switch (function) {
    256         case SDL_METAL_FRAGMENT_SOLID: return @"SDL_Solid_fragment";
    257         case SDL_METAL_FRAGMENT_COPY: return @"SDL_Copy_fragment";
    258         case SDL_METAL_FRAGMENT_YUV: return @"SDL_YUV_fragment";
    259         case SDL_METAL_FRAGMENT_NV12: return @"SDL_NV12_fragment";
    260         case SDL_METAL_FRAGMENT_NV21: return @"SDL_NV21_fragment";
    261         default: return nil;
    262     }
    263 }
    264 
    265 static id<MTLRenderPipelineState>
    266 MakePipelineState(METAL_RenderData *data, METAL_PipelineCache *cache,
    267                   NSString *blendlabel, SDL_BlendMode blendmode)
    268 {
    269     id<MTLFunction> mtlvertfn = [data.mtllibrary newFunctionWithName:GetVertexFunctionName(cache->vertexFunction)];
    270     id<MTLFunction> mtlfragfn = [data.mtllibrary newFunctionWithName:GetFragmentFunctionName(cache->fragmentFunction)];
    271     SDL_assert(mtlvertfn != nil);
    272     SDL_assert(mtlfragfn != nil);
    273 
    274     MTLRenderPipelineDescriptor *mtlpipedesc = [[MTLRenderPipelineDescriptor alloc] init];
    275     mtlpipedesc.vertexFunction = mtlvertfn;
    276     mtlpipedesc.fragmentFunction = mtlfragfn;
    277 
    278     MTLVertexDescriptor *vertdesc = [MTLVertexDescriptor vertexDescriptor];
    279 
    280     switch (cache->vertexFunction) {
    281         case SDL_METAL_VERTEX_SOLID:
    282             /* position (float2) */
    283             vertdesc.layouts[0].stride = sizeof(float) * 2;
    284             vertdesc.layouts[0].stepFunction = MTLVertexStepFunctionPerVertex;
    285 
    286             vertdesc.attributes[0].format = MTLVertexFormatFloat2;
    287             vertdesc.attributes[0].offset = 0;
    288             vertdesc.attributes[0].bufferIndex = 0;
    289             break;
    290         case SDL_METAL_VERTEX_COPY:
    291             /* position (float2), texcoord (float2) */
    292             vertdesc.layouts[0].stride = sizeof(float) * 4;
    293             vertdesc.layouts[0].stepFunction = MTLVertexStepFunctionPerVertex;
    294 
    295             vertdesc.attributes[0].format = MTLVertexFormatFloat2;
    296             vertdesc.attributes[0].offset = 0;
    297             vertdesc.attributes[0].bufferIndex = 0;
    298 
    299             vertdesc.attributes[1].format = MTLVertexFormatFloat2;
    300             vertdesc.attributes[1].offset = sizeof(float) * 2;
    301             vertdesc.attributes[1].bufferIndex = 0;
    302             break;
    303     }
    304 
    305     mtlpipedesc.vertexDescriptor = vertdesc;
    306 
    307     MTLRenderPipelineColorAttachmentDescriptor *rtdesc = mtlpipedesc.colorAttachments[0];
    308     rtdesc.pixelFormat = cache->renderTargetFormat;
    309 
    310     if (blendmode != SDL_BLENDMODE_NONE) {
    311         rtdesc.blendingEnabled = YES;
    312         rtdesc.sourceRGBBlendFactor = GetBlendFactor(SDL_GetBlendModeSrcColorFactor(blendmode));
    313         rtdesc.destinationRGBBlendFactor = GetBlendFactor(SDL_GetBlendModeDstColorFactor(blendmode));
    314         rtdesc.rgbBlendOperation = GetBlendOperation(SDL_GetBlendModeColorOperation(blendmode));
    315         rtdesc.sourceAlphaBlendFactor = GetBlendFactor(SDL_GetBlendModeSrcAlphaFactor(blendmode));
    316         rtdesc.destinationAlphaBlendFactor = GetBlendFactor(SDL_GetBlendModeDstAlphaFactor(blendmode));
    317         rtdesc.alphaBlendOperation = GetBlendOperation(SDL_GetBlendModeAlphaOperation(blendmode));
    318     } else {
    319         rtdesc.blendingEnabled = NO;
    320     }
    321 
    322     mtlpipedesc.label = [@(cache->label) stringByAppendingString:blendlabel];
    323 
    324     NSError *err = nil;
    325     id<MTLRenderPipelineState> state = [data.mtldevice newRenderPipelineStateWithDescriptor:mtlpipedesc error:&err];
    326     SDL_assert(err == nil);
    327 
    328     METAL_PipelineState pipeline;
    329     pipeline.blendMode = blendmode;
    330     pipeline.pipe = (void *)CFBridgingRetain(state);
    331 
    332     METAL_PipelineState *states = SDL_realloc(cache->states, (cache->count + 1) * sizeof(pipeline));
    333 
    334 #if !__has_feature(objc_arc)
    335     [mtlpipedesc release];  // !!! FIXME: can these be reused for each creation, or does the pipeline obtain it?
    336     [mtlvertfn release];
    337     [mtlfragfn release];
    338     [state release];
    339 #endif
    340 
    341     if (states) {
    342         states[cache->count++] = pipeline;
    343         cache->states = states;
    344         return (__bridge id<MTLRenderPipelineState>)pipeline.pipe;
    345     } else {
    346         CFBridgingRelease(pipeline.pipe);
    347         SDL_OutOfMemory();
    348         return NULL;
    349     }
    350 }
    351 
    352 static void
    353 MakePipelineCache(METAL_RenderData *data, METAL_PipelineCache *cache, const char *label,
    354                   MTLPixelFormat rtformat, SDL_MetalVertexFunction vertfn, SDL_MetalFragmentFunction fragfn)
    355 {
    356     SDL_zerop(cache);
    357 
    358     cache->vertexFunction = vertfn;
    359     cache->fragmentFunction = fragfn;
    360     cache->renderTargetFormat = rtformat;
    361     cache->label = label;
    362 
    363     /* Create pipeline states for the default blend modes. Custom blend modes
    364      * will be added to the cache on-demand. */
    365     MakePipelineState(data, cache, @" (blend=none)", SDL_BLENDMODE_NONE);
    366     MakePipelineState(data, cache, @" (blend=blend)", SDL_BLENDMODE_BLEND);
    367     MakePipelineState(data, cache, @" (blend=add)", SDL_BLENDMODE_ADD);
    368     MakePipelineState(data, cache, @" (blend=mod)", SDL_BLENDMODE_MOD);
    369     MakePipelineState(data, cache, @" (blend=mul)", SDL_BLENDMODE_MUL);
    370 }
    371 
    372 static void
    373 DestroyPipelineCache(METAL_PipelineCache *cache)
    374 {
    375     if (cache != NULL) {
    376         for (int i = 0; i < cache->count; i++) {
    377             CFBridgingRelease(cache->states[i].pipe);
    378         }
    379 
    380         SDL_free(cache->states);
    381     }
    382 }
    383 
    384 void
    385 MakeShaderPipelines(METAL_RenderData *data, METAL_ShaderPipelines *pipelines, MTLPixelFormat rtformat)
    386 {
    387     SDL_zerop(pipelines);
    388 
    389     pipelines->renderTargetFormat = rtformat;
    390 
    391     MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_SOLID], "SDL primitives pipeline", rtformat, SDL_METAL_VERTEX_SOLID, SDL_METAL_FRAGMENT_SOLID);
    392     MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_COPY], "SDL copy pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_COPY);
    393     MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_YUV], "SDL YUV pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_YUV);
    394     MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV12], "SDL NV12 pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV12);
    395     MakePipelineCache(data, &pipelines->caches[SDL_METAL_FRAGMENT_NV21], "SDL NV21 pipeline", rtformat, SDL_METAL_VERTEX_COPY, SDL_METAL_FRAGMENT_NV21);
    396 }
    397 
    398 static METAL_ShaderPipelines *
    399 ChooseShaderPipelines(METAL_RenderData *data, MTLPixelFormat rtformat)
    400 {
    401     METAL_ShaderPipelines *allpipelines = data.allpipelines;
    402     int count = data.pipelinescount;
    403 
    404     for (int i = 0; i < count; i++) {
    405         if (allpipelines[i].renderTargetFormat == rtformat) {
    406             return &allpipelines[i];
    407         }
    408     }
    409 
    410     allpipelines = SDL_realloc(allpipelines, (count + 1) * sizeof(METAL_ShaderPipelines));
    411 
    412     if (allpipelines == NULL) {
    413         SDL_OutOfMemory();
    414         return NULL;
    415     }
    416 
    417     MakeShaderPipelines(data, &allpipelines[count], rtformat);
    418 
    419     data.allpipelines = allpipelines;
    420     data.pipelinescount = count + 1;
    421 
    422     return &data.allpipelines[count];
    423 }
    424 
    425 static void
    426 DestroyAllPipelines(METAL_ShaderPipelines *allpipelines, int count)
    427 {
    428     if (allpipelines != NULL) {
    429         for (int i = 0; i < count; i++) {
    430             for (int cache = 0; cache < SDL_METAL_FRAGMENT_COUNT; cache++) {
    431                 DestroyPipelineCache(&allpipelines[i].caches[cache]);
    432             }
    433         }
    434 
    435         SDL_free(allpipelines);
    436     }
    437 }
    438 
    439 static inline id<MTLRenderPipelineState>
    440 ChoosePipelineState(METAL_RenderData *data, METAL_ShaderPipelines *pipelines, SDL_MetalFragmentFunction fragfn, SDL_BlendMode blendmode)
    441 {
    442     METAL_PipelineCache *cache = &pipelines->caches[fragfn];
    443 
    444     for (int i = 0; i < cache->count; i++) {
    445         if (cache->states[i].blendMode == blendmode) {
    446             return (__bridge id<MTLRenderPipelineState>)cache->states[i].pipe;
    447         }
    448     }
    449 
    450     return MakePipelineState(data, cache, [NSString stringWithFormat:@" (blend=custom 0x%x)", blendmode], blendmode);
    451 }
    452 
    453 static void
    454 METAL_ActivateRenderCommandEncoder(SDL_Renderer * renderer, MTLLoadAction load, MTLClearColor *clear_color, id<MTLBuffer> vertex_buffer)
    455 {
    456     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
    457 
    458     /* Our SetRenderTarget just signals that the next render operation should
    459      * set up a new render pass. This is where that work happens. */
    460     if (data.mtlcmdencoder == nil) {
    461         id<MTLTexture> mtltexture = nil;
    462 
    463         if (renderer->target != NULL) {
    464             METAL_TextureData *texdata = (__bridge METAL_TextureData *)renderer->target->driverdata;
    465             mtltexture = texdata.mtltexture;
    466         } else {
    467             if (data.mtlbackbuffer == nil) {
    468                 /* The backbuffer's contents aren't guaranteed to persist after
    469                  * presenting, so we can leave it undefined when loading it. */
    470                 data.mtlbackbuffer = [data.mtllayer nextDrawable];
    471                 if (load == MTLLoadActionLoad) {
    472                     load = MTLLoadActionDontCare;
    473                 }
    474             }
    475             mtltexture = data.mtlbackbuffer.texture;
    476         }
    477 
    478         SDL_assert(mtltexture);
    479 
    480         if (load == MTLLoadActionClear) {
    481             SDL_assert(clear_color != NULL);
    482             data.mtlpassdesc.colorAttachments[0].clearColor = *clear_color;
    483         }
    484 
    485         data.mtlpassdesc.colorAttachments[0].loadAction = load;
    486         data.mtlpassdesc.colorAttachments[0].texture = mtltexture;
    487 
    488         data.mtlcmdbuffer = [data.mtlcmdqueue commandBuffer];
    489         data.mtlcmdencoder = [data.mtlcmdbuffer renderCommandEncoderWithDescriptor:data.mtlpassdesc];
    490 
    491         if (data.mtlbackbuffer != nil && mtltexture == data.mtlbackbuffer.texture) {
    492             data.mtlcmdencoder.label = @"SDL metal renderer backbuffer";
    493         } else {
    494             data.mtlcmdencoder.label = @"SDL metal renderer render target";
    495         }
    496 
    497         /* Set up buffer bindings for positions, texcoords, and color once here,
    498          * the offsets are adjusted in the code that uses them. */
    499         if (vertex_buffer != nil) {
    500             [data.mtlcmdencoder setVertexBuffer:vertex_buffer offset:0 atIndex:0];
    501             [data.mtlcmdencoder setFragmentBuffer:vertex_buffer offset:0 atIndex:0];
    502         }
    503 
    504         data.activepipelines = ChooseShaderPipelines(data, mtltexture.pixelFormat);
    505 
    506         // make sure this has a definite place in the queue. This way it will
    507         //  execute reliably whether the app tries to make its own command buffers
    508         //  or whatever. This means we can _always_ batch rendering commands!
    509         [data.mtlcmdbuffer enqueue];
    510     }
    511 }
    512 
    513 static void
    514 METAL_WindowEvent(SDL_Renderer * renderer, const SDL_WindowEvent *event)
    515 {
    516     if (event->event == SDL_WINDOWEVENT_SHOWN ||
    517         event->event == SDL_WINDOWEVENT_HIDDEN) {
    518         // !!! FIXME: write me
    519     }
    520 }
    521 
    522 static int
    523 METAL_GetOutputSize(SDL_Renderer * renderer, int *w, int *h)
    524 { @autoreleasepool {
    525     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
    526     if (w) {
    527         *w = (int)data.mtllayer.drawableSize.width;
    528     }
    529     if (h) {
    530         *h = (int)data.mtllayer.drawableSize.height;
    531     }
    532     return 0;
    533 }}
    534 
    535 static SDL_bool
    536 METAL_SupportsBlendMode(SDL_Renderer * renderer, SDL_BlendMode blendMode)
    537 {
    538     SDL_BlendFactor srcColorFactor = SDL_GetBlendModeSrcColorFactor(blendMode);
    539     SDL_BlendFactor srcAlphaFactor = SDL_GetBlendModeSrcAlphaFactor(blendMode);
    540     SDL_BlendOperation colorOperation = SDL_GetBlendModeColorOperation(blendMode);
    541     SDL_BlendFactor dstColorFactor = SDL_GetBlendModeDstColorFactor(blendMode);
    542     SDL_BlendFactor dstAlphaFactor = SDL_GetBlendModeDstAlphaFactor(blendMode);
    543     SDL_BlendOperation alphaOperation = SDL_GetBlendModeAlphaOperation(blendMode);
    544 
    545     if (GetBlendFactor(srcColorFactor) == invalidBlendFactor ||
    546         GetBlendFactor(srcAlphaFactor) == invalidBlendFactor ||
    547         GetBlendOperation(colorOperation) == invalidBlendOperation ||
    548         GetBlendFactor(dstColorFactor) == invalidBlendFactor ||
    549         GetBlendFactor(dstAlphaFactor) == invalidBlendFactor ||
    550         GetBlendOperation(alphaOperation) == invalidBlendOperation) {
    551         return SDL_FALSE;
    552     }
    553     return SDL_TRUE;
    554 }
    555 
    556 static int
    557 METAL_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
    558 { @autoreleasepool {
    559     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
    560     MTLPixelFormat pixfmt;
    561 
    562     switch (texture->format) {
    563         case SDL_PIXELFORMAT_ABGR8888:
    564             pixfmt = MTLPixelFormatRGBA8Unorm;
    565             break;
    566         case SDL_PIXELFORMAT_ARGB8888:
    567             pixfmt = MTLPixelFormatBGRA8Unorm;
    568             break;
    569         case SDL_PIXELFORMAT_IYUV:
    570         case SDL_PIXELFORMAT_YV12:
    571         case SDL_PIXELFORMAT_NV12:
    572         case SDL_PIXELFORMAT_NV21:
    573             pixfmt = MTLPixelFormatR8Unorm;
    574             break;
    575         default:
    576             return SDL_SetError("Texture format %s not supported by Metal", SDL_GetPixelFormatName(texture->format));
    577     }
    578 
    579     MTLTextureDescriptor *mtltexdesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:pixfmt
    580                                             width:(NSUInteger)texture->w height:(NSUInteger)texture->h mipmapped:NO];
    581 
    582     /* Not available in iOS 8. */
    583     if ([mtltexdesc respondsToSelector:@selector(usage)]) {
    584         if (texture->access == SDL_TEXTUREACCESS_TARGET) {
    585             mtltexdesc.usage = MTLTextureUsageShaderRead | MTLTextureUsageRenderTarget;
    586         } else {
    587             mtltexdesc.usage = MTLTextureUsageShaderRead;
    588         }
    589     }
    590     
    591     id<MTLTexture> mtltexture = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
    592     if (mtltexture == nil) {
    593         return SDL_SetError("Texture allocation failed");
    594     }
    595 
    596     id<MTLTexture> mtltexture_uv = nil;
    597 
    598     BOOL yuv = (texture->format == SDL_PIXELFORMAT_IYUV) || (texture->format == SDL_PIXELFORMAT_YV12);
    599     BOOL nv12 = (texture->format == SDL_PIXELFORMAT_NV12) || (texture->format == SDL_PIXELFORMAT_NV21);
    600 
    601     if (yuv) {
    602         mtltexdesc.pixelFormat = MTLPixelFormatR8Unorm;
    603         mtltexdesc.width = (texture->w + 1) / 2;
    604         mtltexdesc.height = (texture->h + 1) / 2;
    605         mtltexdesc.textureType = MTLTextureType2DArray;
    606         mtltexdesc.arrayLength = 2;
    607     } else if (nv12) {
    608         mtltexdesc.pixelFormat = MTLPixelFormatRG8Unorm;
    609         mtltexdesc.width = (texture->w + 1) / 2;
    610         mtltexdesc.height = (texture->h + 1) / 2;
    611     }
    612 
    613     if (yuv || nv12) {
    614         mtltexture_uv = [data.mtldevice newTextureWithDescriptor:mtltexdesc];
    615         if (mtltexture_uv == nil) {
    616 #if !__has_feature(objc_arc)
    617             [mtltexture release];
    618 #endif
    619             return SDL_SetError("Texture allocation failed");
    620         }
    621     }
    622 
    623     METAL_TextureData *texturedata = [[METAL_TextureData alloc] init];
    624     if (texture->scaleMode == SDL_ScaleModeNearest) {
    625         texturedata.mtlsampler = data.mtlsamplernearest;
    626     } else {
    627         texturedata.mtlsampler = data.mtlsamplerlinear;
    628     }
    629     texturedata.mtltexture = mtltexture;
    630     texturedata.mtltexture_uv = mtltexture_uv;
    631 
    632     texturedata.yuv = yuv;
    633     texturedata.nv12 = nv12;
    634 
    635     if (yuv) {
    636         texturedata.fragmentFunction = SDL_METAL_FRAGMENT_YUV;
    637     } else if (texture->format == SDL_PIXELFORMAT_NV12) {
    638         texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV12;
    639     } else if (texture->format == SDL_PIXELFORMAT_NV21) {
    640         texturedata.fragmentFunction = SDL_METAL_FRAGMENT_NV21;
    641     } else {
    642         texturedata.fragmentFunction = SDL_METAL_FRAGMENT_COPY;
    643     }
    644 
    645     if (yuv || nv12) {
    646         size_t offset = 0;
    647         SDL_YUV_CONVERSION_MODE mode = SDL_GetYUVConversionModeForResolution(texture->w, texture->h);
    648         switch (mode) {
    649             case SDL_YUV_CONVERSION_JPEG: offset = CONSTANTS_OFFSET_DECODE_JPEG; break;
    650             case SDL_YUV_CONVERSION_BT601: offset = CONSTANTS_OFFSET_DECODE_BT601; break;
    651             case SDL_YUV_CONVERSION_BT709: offset = CONSTANTS_OFFSET_DECODE_BT709; break;
    652             default: offset = 0; break;
    653         }
    654         texturedata.conversionBufferOffset = offset;
    655     }
    656 
    657     texture->driverdata = (void*)CFBridgingRetain(texturedata);
    658 
    659 #if !__has_feature(objc_arc)
    660     [texturedata release];
    661     [mtltexture release];
    662     [mtltexture_uv release];
    663 #endif
    664 
    665     return 0;
    666 }}
    667 
    668 static void
    669 METAL_UploadTextureData(id<MTLTexture> texture, SDL_Rect rect, int slice,
    670                         const void * pixels, int pitch)
    671 {
    672     [texture replaceRegion:MTLRegionMake2D(rect.x, rect.y, rect.w, rect.h)
    673                mipmapLevel:0
    674                      slice:slice
    675                  withBytes:pixels
    676                bytesPerRow:pitch
    677              bytesPerImage:0];
    678 }
    679 
    680 static MTLStorageMode
    681 METAL_GetStorageMode(id<MTLResource> resource)
    682 {
    683     /* iOS 8 does not have this method. */
    684     if ([resource respondsToSelector:@selector(storageMode)]) {
    685         return resource.storageMode;
    686     }
    687     return MTLStorageModeShared;
    688 }
    689 
    690 static int
    691 METAL_UpdateTextureInternal(SDL_Renderer * renderer, METAL_TextureData *texturedata,
    692                             id<MTLTexture> texture, SDL_Rect rect, int slice,
    693                             const void * pixels, int pitch)
    694 {
    695     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
    696     SDL_Rect stagingrect = {0, 0, rect.w, rect.h};
    697     MTLTextureDescriptor *desc;
    698 
    699     /* If the texture is managed or shared and this is the first upload, we can
    700      * use replaceRegion to upload to it directly. Otherwise we upload the data
    701      * to a staging texture and copy that over. */
    702     if (!texturedata.hasdata && METAL_GetStorageMode(texture) != MTLStorageModePrivate) {
    703         METAL_UploadTextureData(texture, rect, slice, pixels, pitch);
    704         return 0;
    705     }
    706 
    707     desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:texture.pixelFormat
    708                                                               width:rect.w
    709                                                              height:rect.h
    710                                                           mipmapped:NO];
    711 
    712     if (desc == nil) {
    713         return SDL_OutOfMemory();
    714     }
    715 
    716     /* TODO: We could have a pool of textures or a MTLHeap we allocate from,
    717      * and release a staging texture back to the pool in the command buffer's
    718      * completion handler. */
    719     id<MTLTexture> stagingtex = [data.mtldevice newTextureWithDescriptor:desc];
    720     if (stagingtex == nil) {
    721         return SDL_OutOfMemory();
    722     }
    723 
    724 #if !__has_feature(objc_arc)
    725     [stagingtex autorelease];
    726 #endif
    727 
    728     METAL_UploadTextureData(stagingtex, stagingrect, 0, pixels, pitch);
    729 
    730     if (data.mtlcmdencoder != nil) {
    731         [data.mtlcmdencoder endEncoding];
    732         data.mtlcmdencoder = nil;
    733     }
    734 
    735     if (data.mtlcmdbuffer == nil) {
    736         data.mtlcmdbuffer = [data.mtlcmdqueue commandBuffer];
    737     }
    738 
    739     id<MTLBlitCommandEncoder> blitcmd = [data.mtlcmdbuffer blitCommandEncoder];
    740 
    741     [blitcmd copyFromTexture:stagingtex
    742                  sourceSlice:0
    743                  sourceLevel:0
    744                 sourceOrigin:MTLOriginMake(0, 0, 0)
    745                   sourceSize:MTLSizeMake(rect.w, rect.h, 1)
    746                    toTexture:texture
    747             destinationSlice:slice
    748             destinationLevel:0
    749            destinationOrigin:MTLOriginMake(rect.x, rect.y, 0)];
    750 
    751     [blitcmd endEncoding];
    752 
    753     /* TODO: This isn't very efficient for the YUV formats, which call
    754      * UpdateTextureInternal multiple times in a row. */
    755     [data.mtlcmdbuffer commit];
    756     data.mtlcmdbuffer = nil;
    757 
    758     return 0;
    759 }
    760 
    761 static int
    762 METAL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
    763                     const SDL_Rect * rect, const void *pixels, int pitch)
    764 { @autoreleasepool {
    765     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
    766 
    767     if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture, *rect, 0, pixels, pitch) < 0) {
    768         return -1;
    769     }
    770 
    771     if (texturedata.yuv) {
    772         int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
    773         int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
    774         int UVpitch = (pitch + 1) / 2;
    775         SDL_Rect UVrect = {rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2};
    776 
    777         /* Skip to the correct offset into the next texture */
    778         pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
    779         if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture_uv, UVrect, Uslice, pixels, UVpitch) < 0) {
    780             return -1;
    781         }
    782 
    783         /* Skip to the correct offset into the next texture */
    784         pixels = (const void*)((const Uint8*)pixels + UVrect.h * UVpitch);
    785         if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture_uv, UVrect, Vslice, pixels, UVpitch) < 0) {
    786             return -1;
    787         }
    788     }
    789 
    790     if (texturedata.nv12) {
    791         SDL_Rect UVrect = {rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2};
    792         int UVpitch = 2 * ((pitch + 1) / 2);
    793 
    794         /* Skip to the correct offset into the next texture */
    795         pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
    796         if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture_uv, UVrect, 0, pixels, UVpitch) < 0) {
    797             return -1;
    798         }
    799     }
    800 
    801     texturedata.hasdata = YES;
    802 
    803     return 0;
    804 }}
    805 
    806 static int
    807 METAL_UpdateTextureYUV(SDL_Renderer * renderer, SDL_Texture * texture,
    808                     const SDL_Rect * rect,
    809                     const Uint8 *Yplane, int Ypitch,
    810                     const Uint8 *Uplane, int Upitch,
    811                     const Uint8 *Vplane, int Vpitch)
    812 { @autoreleasepool {
    813     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
    814     const int Uslice = 0;
    815     const int Vslice = 1;
    816     SDL_Rect UVrect = {rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2};
    817 
    818     /* Bail out if we're supposed to update an empty rectangle */
    819     if (rect->w <= 0 || rect->h <= 0) {
    820         return 0;
    821     }
    822 
    823     if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture, *rect, 0, Yplane, Ypitch) < 0) {
    824         return -1;
    825     }
    826     if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture_uv, UVrect, Uslice, Uplane, Upitch)) {
    827         return -1;
    828     }
    829     if (METAL_UpdateTextureInternal(renderer, texturedata, texturedata.mtltexture_uv, UVrect, Vslice, Vplane, Vpitch)) {
    830         return -1;
    831     }
    832 
    833     texturedata.hasdata = YES;
    834 
    835     return 0;
    836 }}
    837 
    838 static int
    839 METAL_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
    840                const SDL_Rect * rect, void **pixels, int *pitch)
    841 { @autoreleasepool {
    842     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
    843     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
    844     int buffersize = 0;
    845     id<MTLBuffer> lockedbuffer = nil;
    846 
    847     if (rect->w <= 0 || rect->h <= 0) {
    848         return SDL_SetError("Invalid rectangle dimensions for LockTexture.");
    849     }
    850 
    851     *pitch = SDL_BYTESPERPIXEL(texture->format) * rect->w;
    852 
    853     if (texturedata.yuv || texturedata.nv12) {
    854         buffersize = ((*pitch) * rect->h) + (2 * (*pitch + 1) / 2) * ((rect->h + 1) / 2);
    855     } else {
    856         buffersize = (*pitch) * rect->h;
    857     }
    858 
    859     lockedbuffer = [data.mtldevice newBufferWithLength:buffersize options:MTLResourceStorageModeShared];
    860     if (lockedbuffer == nil) {
    861         return SDL_OutOfMemory();
    862     }
    863 
    864     texturedata.lockedrect = *rect;
    865     texturedata.lockedbuffer = lockedbuffer;
    866     *pixels = [lockedbuffer contents];
    867 
    868     /* METAL_TextureData.lockedbuffer retains. */
    869 #if !__has_feature(objc_arc)
    870     [lockedbuffer release];
    871 #endif
    872 
    873     return 0;
    874 }}
    875 
    876 static void
    877 METAL_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
    878 { @autoreleasepool {
    879     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
    880     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
    881     SDL_Rect rect = texturedata.lockedrect;
    882     int pitch = SDL_BYTESPERPIXEL(texture->format) * rect.w;
    883     SDL_Rect UVrect = {rect.x / 2, rect.y / 2, (rect.w + 1) / 2, (rect.h + 1) / 2};
    884 
    885     if (texturedata.lockedbuffer == nil) {
    886         return;
    887     }
    888 
    889     if (data.mtlcmdencoder != nil) {
    890         [data.mtlcmdencoder endEncoding];
    891         data.mtlcmdencoder = nil;
    892     }
    893 
    894     if (data.mtlcmdbuffer == nil) {
    895         data.mtlcmdbuffer = [data.mtlcmdqueue commandBuffer];
    896     }
    897 
    898     id<MTLBlitCommandEncoder> blitcmd = [data.mtlcmdbuffer blitCommandEncoder];
    899 
    900     [blitcmd copyFromBuffer:texturedata.lockedbuffer
    901                sourceOffset:0
    902           sourceBytesPerRow:pitch
    903         sourceBytesPerImage:0
    904                  sourceSize:MTLSizeMake(rect.w, rect.h, 1)
    905                   toTexture:texturedata.mtltexture
    906            destinationSlice:0
    907            destinationLevel:0
    908           destinationOrigin:MTLOriginMake(rect.x, rect.y, 0)];
    909 
    910     if (texturedata.yuv) {
    911         int Uslice = texture->format == SDL_PIXELFORMAT_YV12 ? 1 : 0;
    912         int Vslice = texture->format == SDL_PIXELFORMAT_YV12 ? 0 : 1;
    913         int UVpitch = (pitch + 1) / 2;
    914 
    915         [blitcmd copyFromBuffer:texturedata.lockedbuffer
    916                    sourceOffset:rect.h * pitch
    917               sourceBytesPerRow:UVpitch
    918             sourceBytesPerImage:UVpitch * UVrect.h
    919                      sourceSize:MTLSizeMake(UVrect.w, UVrect.h, 1)
    920                       toTexture:texturedata.mtltexture_uv
    921                destinationSlice:Uslice
    922                destinationLevel:0
    923               destinationOrigin:MTLOriginMake(UVrect.x, UVrect.y, 0)];
    924 
    925         [blitcmd copyFromBuffer:texturedata.lockedbuffer
    926                    sourceOffset:(rect.h * pitch) + UVrect.h * UVpitch
    927               sourceBytesPerRow:UVpitch
    928             sourceBytesPerImage:UVpitch * UVrect.h
    929                      sourceSize:MTLSizeMake(UVrect.w, UVrect.h, 1)
    930                       toTexture:texturedata.mtltexture_uv
    931                destinationSlice:Vslice
    932                destinationLevel:0
    933               destinationOrigin:MTLOriginMake(UVrect.x, UVrect.y, 0)];
    934     }
    935 
    936     if (texturedata.nv12) {
    937         int UVpitch = 2 * ((pitch + 1) / 2);
    938 
    939         [blitcmd copyFromBuffer:texturedata.lockedbuffer
    940                    sourceOffset:rect.h * pitch
    941               sourceBytesPerRow:UVpitch
    942             sourceBytesPerImage:0
    943                      sourceSize:MTLSizeMake(UVrect.w, UVrect.h, 1)
    944                       toTexture:texturedata.mtltexture_uv
    945                destinationSlice:0
    946                destinationLevel:0
    947               destinationOrigin:MTLOriginMake(UVrect.x, UVrect.y, 0)];
    948     }
    949 
    950     [blitcmd endEncoding];
    951 
    952     [data.mtlcmdbuffer commit];
    953     data.mtlcmdbuffer = nil;
    954 
    955     texturedata.lockedbuffer = nil; /* Retained property, so it calls release. */
    956     texturedata.hasdata = YES;
    957 }}
    958 
    959 static void
    960 METAL_SetTextureScaleMode(SDL_Renderer * renderer, SDL_Texture * texture, SDL_ScaleMode scaleMode)
    961 { @autoreleasepool {
    962     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
    963     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
    964 
    965     if (scaleMode == SDL_ScaleModeNearest) {
    966         texturedata.mtlsampler = data.mtlsamplernearest;
    967     } else {
    968         texturedata.mtlsampler = data.mtlsamplerlinear;
    969     }
    970 }}
    971 
    972 static int
    973 METAL_SetRenderTarget(SDL_Renderer * renderer, SDL_Texture * texture)
    974 { @autoreleasepool {
    975     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
    976 
    977     if (data.mtlcmdencoder) {
    978         /* End encoding for the previous render target so we can set up a new
    979          * render pass for this one. */
    980         [data.mtlcmdencoder endEncoding];
    981         [data.mtlcmdbuffer commit];
    982 
    983         data.mtlcmdencoder = nil;
    984         data.mtlcmdbuffer = nil;
    985     }
    986 
    987     /* We don't begin a new render pass right away - we delay it until an actual
    988      * draw or clear happens. That way we can use hardware clears when possible,
    989      * which are only available when beginning a new render pass. */
    990     return 0;
    991 }}
    992 
    993 
    994 // normalize a value from 0.0f to len into 0.0f to 1.0f.
    995 static inline float
    996 normtex(const float _val, const float len)
    997 {
    998     return _val / len;
    999 }
   1000 
   1001 static int
   1002 METAL_QueueSetViewport(SDL_Renderer * renderer, SDL_RenderCommand *cmd)
   1003 {
   1004     float projection[4][4];    /* Prepare an orthographic projection */
   1005     const int w = cmd->data.viewport.rect.w;
   1006     const int h = cmd->data.viewport.rect.h;
   1007     const size_t matrixlen = sizeof (projection);
   1008     float *matrix = (float *) SDL_AllocateRenderVertices(renderer, matrixlen, CONSTANT_ALIGN(16), &cmd->data.viewport.first);
   1009     if (!matrix) {
   1010         return -1;
   1011     }
   1012 
   1013     SDL_memset(projection, '\0', matrixlen);
   1014     if (w && h) {
   1015         projection[0][0] = 2.0f / w;
   1016         projection[1][1] = -2.0f / h;
   1017         projection[3][0] = -1.0f;
   1018         projection[3][1] = 1.0f;
   1019         projection[3][3] = 1.0f;
   1020     }
   1021     SDL_memcpy(matrix, projection, matrixlen);
   1022 
   1023     return 0;
   1024 }
   1025 
   1026 static int
   1027 METAL_QueueSetDrawColor(SDL_Renderer *renderer, SDL_RenderCommand *cmd)
   1028 {
   1029     const size_t vertlen = sizeof (float) * 4;
   1030     float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(16), &cmd->data.color.first);
   1031     if (!verts) {
   1032         return -1;
   1033     }
   1034     *(verts++) = ((float)cmd->data.color.r) / 255.0f;
   1035     *(verts++) = ((float)cmd->data.color.g) / 255.0f;
   1036     *(verts++) = ((float)cmd->data.color.b) / 255.0f;
   1037     *(verts++) = ((float)cmd->data.color.a) / 255.0f;
   1038     return 0;
   1039 }
   1040 
   1041 static int
   1042 METAL_QueueDrawPoints(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FPoint * points, int count)
   1043 {
   1044     const size_t vertlen = (sizeof (float) * 2) * count;
   1045     float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(8), &cmd->data.draw.first);
   1046     if (!verts) {
   1047         return -1;
   1048     }
   1049     cmd->data.draw.count = count;
   1050     SDL_memcpy(verts, points, vertlen);
   1051     return 0;
   1052 }
   1053 
   1054 static int
   1055 METAL_QueueDrawLines(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FPoint * points, int count)
   1056 {
   1057     SDL_assert(count >= 2);  /* should have been checked at the higher level. */
   1058 
   1059     const size_t vertlen = (sizeof (float) * 2) * count;
   1060     float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(8), &cmd->data.draw.first);
   1061     if (!verts) {
   1062         return -1;
   1063     }
   1064     cmd->data.draw.count = count;
   1065     SDL_memcpy(verts, points, vertlen);
   1066 
   1067     /* If the line segment is completely horizontal or vertical,
   1068        make it one pixel longer, to satisfy the diamond-exit rule.
   1069        We should probably do this for diagonal lines too, but we'd have to
   1070        do some trigonometry to figure out the correct pixel and generally
   1071        when we have problems with pixel perfection, it's for straight lines
   1072        that are missing a pixel that frames something and not arbitrary
   1073        angles. Maybe !!! FIXME for later, though. */
   1074 
   1075     points += count - 2;  /* update the last line. */
   1076     verts += (count * 2) - 2;
   1077 
   1078     const float xstart = points[0].x;
   1079     const float ystart = points[0].y;
   1080     const float xend = points[1].x;
   1081     const float yend = points[1].y;
   1082 
   1083     if (ystart == yend) {  /* horizontal line */
   1084         verts[0] += (xend > xstart) ? 1.0f : -1.0f;
   1085     } else if (xstart == xend) {  /* vertical line */
   1086         verts[1] += (yend > ystart) ? 1.0f : -1.0f;
   1087     }
   1088 
   1089     return 0;
   1090 }
   1091 
   1092 static int
   1093 METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FRect * rects, int count)
   1094 {
   1095     const size_t vertlen = (sizeof (float) * 8) * count;
   1096     float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(8), &cmd->data.draw.first);
   1097     if (!verts) {
   1098         return -1;
   1099     }
   1100 
   1101     cmd->data.draw.count = count;
   1102 
   1103     /* Quads in the following vertex order (matches the quad index buffer):
   1104      * 1---3
   1105      * | \ |
   1106      * 0---2
   1107      */
   1108     for (int i = 0; i < count; i++, rects++) {
   1109         if ((rects->w <= 0.0f) || (rects->h <= 0.0f)) {
   1110             cmd->data.draw.count--;
   1111         } else {
   1112             *(verts++) = rects->x;
   1113             *(verts++) = rects->y + rects->h;
   1114             *(verts++) = rects->x;
   1115             *(verts++) = rects->y;
   1116             *(verts++) = rects->x + rects->w;
   1117             *(verts++) = rects->y + rects->h;
   1118             *(verts++) = rects->x + rects->w;
   1119             *(verts++) = rects->y;
   1120         }
   1121     }
   1122 
   1123     if (cmd->data.draw.count == 0) {
   1124         cmd->command = SDL_RENDERCMD_NO_OP;  // nothing to do, just skip this one later.
   1125     }
   1126 
   1127     return 0;
   1128 }
   1129 
   1130 static int
   1131 METAL_QueueCopy(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
   1132                 const SDL_Rect * srcrect, const SDL_FRect * dstrect)
   1133 {
   1134     const float texw = (float) texture->w;
   1135     const float texh = (float) texture->h;
   1136     // !!! FIXME: use an index buffer
   1137     const size_t vertlen = (sizeof (float) * 16);
   1138     float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(8), &cmd->data.draw.first);
   1139     if (!verts) {
   1140         return -1;
   1141     }
   1142 
   1143     cmd->data.draw.count = 1;
   1144 
   1145     /* Interleaved positions and texture coordinates */
   1146     *(verts++) = dstrect->x;
   1147     *(verts++) = dstrect->y + dstrect->h;
   1148     *(verts++) = normtex(srcrect->x, texw);
   1149     *(verts++) = normtex(srcrect->y + srcrect->h, texh);
   1150 
   1151     *(verts++) = dstrect->x;
   1152     *(verts++) = dstrect->y;
   1153     *(verts++) = normtex(srcrect->x, texw);
   1154     *(verts++) = normtex(srcrect->y, texh);
   1155 
   1156     *(verts++) = dstrect->x + dstrect->w;
   1157     *(verts++) = dstrect->y + dstrect->h;
   1158     *(verts++) = normtex(srcrect->x + srcrect->w, texw);
   1159     *(verts++) = normtex(srcrect->y + srcrect->h, texh);
   1160 
   1161     *(verts++) = dstrect->x + dstrect->w;
   1162     *(verts++) = dstrect->y;
   1163     *(verts++) = normtex(srcrect->x + srcrect->w, texw);
   1164     *(verts++) = normtex(srcrect->y, texh);
   1165 
   1166     return 0;
   1167 }
   1168 
   1169 static int
   1170 METAL_QueueCopyEx(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
   1171                   const SDL_Rect * srcquad, const SDL_FRect * dstrect,
   1172                   const double angle, const SDL_FPoint *center, const SDL_RendererFlip flip)
   1173 {
   1174     const float texw = (float) texture->w;
   1175     const float texh = (float) texture->h;
   1176     const float rads = (float)(M_PI * (float) angle / 180.0f);
   1177     const float c = cosf(rads), s = sinf(rads);
   1178     float minu, maxu, minv, maxv;
   1179     const size_t vertlen = (sizeof (float) * 32);
   1180     float *verts;
   1181 
   1182     // cheat and store this offset in (count) because it needs to be aligned in ways other fields don't and we aren't using count otherwise.
   1183     verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, CONSTANT_ALIGN(16), &cmd->data.draw.count);
   1184     if (!verts) {
   1185         return -1;
   1186     }
   1187 
   1188     // transform matrix
   1189     SDL_memset(verts, '\0', sizeof (*verts) * 16);
   1190     verts[10] = verts[15] = 1.0f;
   1191     // rotation
   1192     verts[0] = c;
   1193     verts[1] = s;
   1194     verts[4] = -s;
   1195     verts[5] = c;
   1196 
   1197     // translation
   1198     verts[12] = dstrect->x + center->x;
   1199     verts[13] = dstrect->y + center->y;
   1200 
   1201     // rest of the vertices don't need the aggressive alignment. Pack them in.
   1202     verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, DEVICE_ALIGN(8), &cmd->data.draw.first);
   1203     if (!verts) {
   1204         return -1;
   1205     }
   1206 
   1207     minu = normtex(srcquad->x, texw);
   1208     maxu = normtex(srcquad->x + srcquad->w, texw);
   1209     minv = normtex(srcquad->y, texh);
   1210     maxv = normtex(srcquad->y + srcquad->h, texh);
   1211 
   1212     if (flip & SDL_FLIP_HORIZONTAL) {
   1213         float tmp = maxu;
   1214         maxu = minu;
   1215         minu = tmp;
   1216     }
   1217     if (flip & SDL_FLIP_VERTICAL) {
   1218         float tmp = maxv;
   1219         maxv = minv;
   1220         minv = tmp;
   1221     }
   1222 
   1223     /* Interleaved positions and texture coordinates */
   1224     *(verts++) = -center->x;
   1225     *(verts++) = dstrect->h - center->y;
   1226     *(verts++) = minu;
   1227     *(verts++) = maxv;
   1228 
   1229     *(verts++) = -center->x;
   1230     *(verts++) = -center->y;
   1231     *(verts++) = minu;
   1232     *(verts++) = minv;
   1233 
   1234     *(verts++) = dstrect->w - center->x;
   1235     *(verts++) = dstrect->h - center->y;
   1236     *(verts++) = maxu;
   1237     *(verts++) = maxv;
   1238 
   1239     *(verts++) = dstrect->w - center->x;
   1240     *(verts++) = -center->y;
   1241     *(verts++) = maxu;
   1242     *(verts++) = minv;
   1243 
   1244     return 0;
   1245 }
   1246 
   1247 
   1248 typedef struct
   1249 {
   1250     #if __has_feature(objc_arc)
   1251     __unsafe_unretained id<MTLRenderPipelineState> pipeline;
   1252     __unsafe_unretained id<MTLBuffer> vertex_buffer;
   1253     #else
   1254     id<MTLRenderPipelineState> pipeline;
   1255     id<MTLBuffer> vertex_buffer;
   1256     #endif
   1257     size_t constants_offset;
   1258     SDL_Texture *texture;
   1259     SDL_bool cliprect_dirty;
   1260     SDL_bool cliprect_enabled;
   1261     SDL_Rect cliprect;
   1262     SDL_bool viewport_dirty;
   1263     SDL_Rect viewport;
   1264     size_t projection_offset;
   1265     SDL_bool color_dirty;
   1266     size_t color_offset;
   1267 } METAL_DrawStateCache;
   1268 
   1269 static void
   1270 SetDrawState(SDL_Renderer *renderer, const SDL_RenderCommand *cmd, const SDL_MetalFragmentFunction shader,
   1271              const size_t constants_offset, id<MTLBuffer> mtlbufvertex, METAL_DrawStateCache *statecache)
   1272 {
   1273     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   1274     const SDL_BlendMode blend = cmd->data.draw.blend;
   1275     size_t first = cmd->data.draw.first;
   1276     id<MTLRenderPipelineState> newpipeline;
   1277 
   1278     METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL, statecache->vertex_buffer);
   1279 
   1280     if (statecache->viewport_dirty) {
   1281         MTLViewport viewport;
   1282         viewport.originX = statecache->viewport.x;
   1283         viewport.originY = statecache->viewport.y;
   1284         viewport.width = statecache->viewport.w;
   1285         viewport.height = statecache->viewport.h;
   1286         viewport.znear = 0.0;
   1287         viewport.zfar = 1.0;
   1288         [data.mtlcmdencoder setViewport:viewport];
   1289         [data.mtlcmdencoder setVertexBuffer:mtlbufvertex offset:statecache->projection_offset atIndex:2];  // projection
   1290         statecache->viewport_dirty = SDL_FALSE;
   1291     }
   1292 
   1293     if (statecache->cliprect_dirty) {
   1294         MTLScissorRect mtlrect;
   1295         if (statecache->cliprect_enabled) {
   1296             const SDL_Rect *rect = &statecache->cliprect;
   1297             mtlrect.x = statecache->viewport.x + rect->x;
   1298             mtlrect.y = statecache->viewport.y + rect->y;
   1299             mtlrect.width = rect->w;
   1300             mtlrect.height = rect->h;
   1301         } else {
   1302             mtlrect.x = statecache->viewport.x;
   1303             mtlrect.y = statecache->viewport.y;
   1304             mtlrect.width = statecache->viewport.w;
   1305             mtlrect.height = statecache->viewport.h;
   1306         }
   1307         if (mtlrect.width > 0 && mtlrect.height > 0) {
   1308             [data.mtlcmdencoder setScissorRect:mtlrect];
   1309         }
   1310         statecache->cliprect_dirty = SDL_FALSE;
   1311     }
   1312 
   1313     if (statecache->color_dirty) {
   1314         [data.mtlcmdencoder setFragmentBufferOffset:statecache->color_offset atIndex:0];
   1315         statecache->color_dirty = SDL_FALSE;
   1316     }
   1317 
   1318     newpipeline = ChoosePipelineState(data, data.activepipelines, shader, blend);
   1319     if (newpipeline != statecache->pipeline) {
   1320         [data.mtlcmdencoder setRenderPipelineState:newpipeline];
   1321         statecache->pipeline = newpipeline;
   1322     }
   1323 
   1324     if (constants_offset != statecache->constants_offset) {
   1325         if (constants_offset != CONSTANTS_OFFSET_INVALID) {
   1326             [data.mtlcmdencoder setVertexBuffer:data.mtlbufconstants offset:constants_offset atIndex:3];
   1327         }
   1328         statecache->constants_offset = constants_offset;
   1329     }
   1330 
   1331     [data.mtlcmdencoder setVertexBufferOffset:first atIndex:0]; /* position/texcoords */
   1332 }
   1333 
   1334 static void
   1335 SetCopyState(SDL_Renderer *renderer, const SDL_RenderCommand *cmd, const size_t constants_offset,
   1336              id<MTLBuffer> mtlbufvertex, METAL_DrawStateCache *statecache)
   1337 {
   1338     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   1339     SDL_Texture *texture = cmd->data.draw.texture;
   1340     METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
   1341 
   1342     SetDrawState(renderer, cmd, texturedata.fragmentFunction, constants_offset, mtlbufvertex, statecache);
   1343 
   1344     if (texture != statecache->texture) {
   1345         METAL_TextureData *oldtexturedata = NULL;
   1346         if (statecache->texture) {
   1347             oldtexturedata = (__bridge METAL_TextureData *) statecache->texture->driverdata;
   1348         }
   1349         if (!oldtexturedata || (texturedata.mtlsampler != oldtexturedata.mtlsampler)) {
   1350             [data.mtlcmdencoder setFragmentSamplerState:texturedata.mtlsampler atIndex:0];
   1351         }
   1352 
   1353         [data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture atIndex:0];
   1354         if (texturedata.yuv || texturedata.nv12) {
   1355             [data.mtlcmdencoder setFragmentTexture:texturedata.mtltexture_uv atIndex:1];
   1356             [data.mtlcmdencoder setFragmentBuffer:data.mtlbufconstants offset:texturedata.conversionBufferOffset atIndex:1];
   1357         }
   1358         statecache->texture = texture;
   1359     }
   1360 }
   1361 
   1362 static int
   1363 METAL_RunCommandQueue(SDL_Renderer * renderer, SDL_RenderCommand *cmd, void *vertices, size_t vertsize)
   1364 { @autoreleasepool {
   1365     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   1366     METAL_DrawStateCache statecache;
   1367     SDL_zero(statecache);
   1368 
   1369     id<MTLBuffer> mtlbufvertex = nil;
   1370 
   1371     statecache.pipeline = nil;
   1372     statecache.vertex_buffer = nil;
   1373     statecache.constants_offset = CONSTANTS_OFFSET_INVALID;
   1374     statecache.texture = NULL;
   1375     statecache.color_dirty = SDL_TRUE;
   1376     statecache.cliprect_dirty = SDL_TRUE;
   1377     statecache.viewport_dirty = SDL_TRUE;
   1378     statecache.projection_offset = 0;
   1379     statecache.color_offset = 0;
   1380 
   1381     // !!! FIXME: have a ring of pre-made MTLBuffers we cycle through? How expensive is creation?
   1382     if (vertsize > 0) {
   1383         /* We can memcpy to a shared buffer from the CPU and read it from the GPU
   1384          * without any extra copying. It's a bit slower on macOS to read shared
   1385          * data from the GPU than to read managed/private data, but we avoid the
   1386          * cost of copying the data and the code's simpler. Apple's best
   1387          * practices guide recommends this approach for streamed vertex data.
   1388          * TODO: this buffer is also used for constants. Is performance still
   1389          * good for those, or should we have a managed buffer for them? */
   1390         mtlbufvertex = [data.mtldevice newBufferWithLength:vertsize options:MTLResourceStorageModeShared];
   1391         #if !__has_feature(objc_arc)
   1392         [mtlbufvertex autorelease];
   1393         #endif
   1394         mtlbufvertex.label = @"SDL vertex data";
   1395         SDL_memcpy([mtlbufvertex contents], vertices, vertsize);
   1396 
   1397         statecache.vertex_buffer = mtlbufvertex;
   1398     }
   1399 
   1400     // If there's a command buffer here unexpectedly (app requested one?). Commit it so we can start fresh.
   1401     [data.mtlcmdencoder endEncoding];
   1402     [data.mtlcmdbuffer commit];
   1403     data.mtlcmdencoder = nil;
   1404     data.mtlcmdbuffer = nil;
   1405 
   1406     while (cmd) {
   1407         switch (cmd->command) {
   1408             case SDL_RENDERCMD_SETVIEWPORT: {
   1409                 SDL_memcpy(&statecache.viewport, &cmd->data.viewport.rect, sizeof (statecache.viewport));
   1410                 statecache.projection_offset = cmd->data.viewport.first;
   1411                 statecache.viewport_dirty = SDL_TRUE;
   1412                 statecache.cliprect_dirty = SDL_TRUE;
   1413                 break;
   1414             }
   1415 
   1416             case SDL_RENDERCMD_SETCLIPRECT: {
   1417                 SDL_memcpy(&statecache.cliprect, &cmd->data.cliprect.rect, sizeof (statecache.cliprect));
   1418                 statecache.cliprect_enabled = cmd->data.cliprect.enabled;
   1419                 statecache.cliprect_dirty = SDL_TRUE;
   1420                 break;
   1421             }
   1422 
   1423             case SDL_RENDERCMD_SETDRAWCOLOR: {
   1424                 statecache.color_offset = cmd->data.color.first;
   1425                 statecache.color_dirty = SDL_TRUE;
   1426                 break;
   1427             }
   1428 
   1429             case SDL_RENDERCMD_CLEAR: {
   1430                 /* If we're already encoding a command buffer, dump it without committing it. We'd just
   1431                     clear all its work anyhow, and starting a new encoder will let us use a hardware clear
   1432                     operation via MTLLoadActionClear. */
   1433                 if (data.mtlcmdencoder != nil) {
   1434                     [data.mtlcmdencoder endEncoding];
   1435 
   1436                     // !!! FIXME: have to commit, or an uncommitted but enqueued buffer will prevent the frame from finishing.
   1437                     [data.mtlcmdbuffer commit];
   1438                     data.mtlcmdencoder = nil;
   1439                     data.mtlcmdbuffer = nil;
   1440                 }
   1441 
   1442                 // force all this state to be reconfigured on next command buffer.
   1443                 statecache.pipeline = nil;
   1444                 statecache.constants_offset = CONSTANTS_OFFSET_INVALID;
   1445                 statecache.texture = NULL;
   1446                 statecache.color_dirty = SDL_TRUE;
   1447                 statecache.cliprect_dirty = SDL_TRUE;
   1448                 statecache.viewport_dirty = SDL_TRUE;
   1449 
   1450                 const Uint8 r = cmd->data.color.r;
   1451                 const Uint8 g = cmd->data.color.g;
   1452                 const Uint8 b = cmd->data.color.b;
   1453                 const Uint8 a = cmd->data.color.a;
   1454                 MTLClearColor color = MTLClearColorMake(r / 255.0f, g / 255.0f, b / 255.0f, a / 255.0f);
   1455 
   1456                 // get new command encoder, set up with an initial clear operation.
   1457                 METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionClear, &color, mtlbufvertex);
   1458                 break;
   1459             }
   1460 
   1461             case SDL_RENDERCMD_DRAW_POINTS:
   1462             case SDL_RENDERCMD_DRAW_LINES: {
   1463                 const size_t count = cmd->data.draw.count;
   1464                 const MTLPrimitiveType primtype = (cmd->command == SDL_RENDERCMD_DRAW_POINTS) ? MTLPrimitiveTypePoint : MTLPrimitiveTypeLineStrip;
   1465                 SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM, mtlbufvertex, &statecache);
   1466                 [data.mtlcmdencoder drawPrimitives:primtype vertexStart:0 vertexCount:count];
   1467                 break;
   1468             }
   1469 
   1470             case SDL_RENDERCMD_FILL_RECTS: {
   1471                 const size_t count = cmd->data.draw.count;
   1472                 const size_t maxcount = UINT16_MAX / 4;
   1473                 SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
   1474                 if (count == 1) {
   1475                     [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
   1476                 } else {
   1477                     /* Our index buffer has 16 bit indices, so we can only draw
   1478                      * 65k vertices (16k rects) at a time. */
   1479                     for (size_t i = 0; i < count; i += maxcount) {
   1480                         /* Set the vertex buffer offset for our current positions.
   1481                          * The vertex buffer itself was bound in SetDrawState. */
   1482                         [data.mtlcmdencoder setVertexBufferOffset:cmd->data.draw.first + i*sizeof(float)*8 atIndex:0];
   1483                         [data.mtlcmdencoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
   1484                                                        indexCount:SDL_min(maxcount, count - i) * 6
   1485                                                         indexType:MTLIndexTypeUInt16
   1486                                                       indexBuffer:data.mtlbufquadindices
   1487                                                 indexBufferOffset:0];
   1488                     }
   1489                 }
   1490                 break;
   1491             }
   1492 
   1493             case SDL_RENDERCMD_COPY: {
   1494                 SetCopyState(renderer, cmd, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
   1495                 [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
   1496                 break;
   1497             }
   1498 
   1499             case SDL_RENDERCMD_COPY_EX: {
   1500                 SetCopyState(renderer, cmd, CONSTANTS_OFFSET_INVALID, mtlbufvertex, &statecache);
   1501                 [data.mtlcmdencoder setVertexBuffer:mtlbufvertex offset:cmd->data.draw.count atIndex:3];  // transform
   1502                 [data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:0 vertexCount:4];
   1503                 break;
   1504             }
   1505 
   1506             case SDL_RENDERCMD_NO_OP:
   1507                 break;
   1508         }
   1509         cmd = cmd->next;
   1510     }
   1511 
   1512     return 0;
   1513 }}
   1514 
   1515 static int
   1516 METAL_RenderReadPixels(SDL_Renderer * renderer, const SDL_Rect * rect,
   1517                     Uint32 pixel_format, void * pixels, int pitch)
   1518 { @autoreleasepool {
   1519     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   1520     METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL, nil);
   1521 
   1522     [data.mtlcmdencoder endEncoding];
   1523     id<MTLTexture> mtltexture = data.mtlpassdesc.colorAttachments[0].texture;
   1524 
   1525 #ifdef __MACOSX__
   1526     /* on macOS with managed-storage textures, we need to tell the driver to
   1527      * update the CPU-side copy of the texture data.
   1528      * NOTE: Currently all of our textures are managed on macOS. We'll need some
   1529      * extra copying for any private textures. */
   1530     if (METAL_GetStorageMode(mtltexture) == MTLStorageModeManaged) {
   1531         id<MTLBlitCommandEncoder> blit = [data.mtlcmdbuffer blitCommandEncoder];
   1532         [blit synchronizeResource:mtltexture];
   1533         [blit endEncoding];
   1534     }
   1535 #endif
   1536 
   1537     /* Commit the current command buffer and wait until it's completed, to make
   1538      * sure the GPU has finished rendering to it by the time we read it. */
   1539     [data.mtlcmdbuffer commit];
   1540     [data.mtlcmdbuffer waitUntilCompleted];
   1541     data.mtlcmdencoder = nil;
   1542     data.mtlcmdbuffer = nil;
   1543 
   1544     MTLRegion mtlregion = MTLRegionMake2D(rect->x, rect->y, rect->w, rect->h);
   1545 
   1546     // we only do BGRA8 or RGBA8 at the moment, so 4 will do.
   1547     const int temp_pitch = rect->w * 4;
   1548     void *temp_pixels = SDL_malloc(temp_pitch * rect->h);
   1549     if (!temp_pixels) {
   1550         return SDL_OutOfMemory();
   1551     }
   1552 
   1553     [mtltexture getBytes:temp_pixels bytesPerRow:temp_pitch fromRegion:mtlregion mipmapLevel:0];
   1554 
   1555     const Uint32 temp_format = (mtltexture.pixelFormat == MTLPixelFormatBGRA8Unorm) ? SDL_PIXELFORMAT_ARGB8888 : SDL_PIXELFORMAT_ABGR8888;
   1556     const int status = SDL_ConvertPixels(rect->w, rect->h, temp_format, temp_pixels, temp_pitch, pixel_format, pixels, pitch);
   1557     SDL_free(temp_pixels);
   1558     return status;
   1559 }}
   1560 
   1561 static void
   1562 METAL_RenderPresent(SDL_Renderer * renderer)
   1563 { @autoreleasepool {
   1564     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   1565 
   1566     // If we don't have a command buffer, we can't present, so activate to get one.
   1567     if (data.mtlcmdencoder == nil) {
   1568         // We haven't even gotten a backbuffer yet? Clear it to black. Otherwise, load the existing data.
   1569         if (data.mtlbackbuffer == nil) {
   1570             MTLClearColor color = MTLClearColorMake(0.0f, 0.0f, 0.0f, 1.0f);
   1571             METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionClear, &color, nil);
   1572         } else {
   1573             METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL, nil);
   1574         }
   1575     }
   1576 
   1577     [data.mtlcmdencoder endEncoding];
   1578     [data.mtlcmdbuffer presentDrawable:data.mtlbackbuffer];
   1579     [data.mtlcmdbuffer commit];
   1580 
   1581     data.mtlcmdencoder = nil;
   1582     data.mtlcmdbuffer = nil;
   1583     data.mtlbackbuffer = nil;
   1584 }}
   1585 
   1586 static void
   1587 METAL_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture)
   1588 { @autoreleasepool {
   1589     CFBridgingRelease(texture->driverdata);
   1590     texture->driverdata = NULL;
   1591 }}
   1592 
   1593 static void
   1594 METAL_DestroyRenderer(SDL_Renderer * renderer)
   1595 { @autoreleasepool {
   1596     if (renderer->driverdata) {
   1597         METAL_RenderData *data = CFBridgingRelease(renderer->driverdata);
   1598 
   1599         if (data.mtlcmdencoder != nil) {
   1600             [data.mtlcmdencoder endEncoding];
   1601         }
   1602 
   1603         DestroyAllPipelines(data.allpipelines, data.pipelinescount);
   1604 
   1605         SDL_Metal_DestroyView(data.mtlview);
   1606     }
   1607 
   1608     SDL_free(renderer);
   1609 }}
   1610 
   1611 static void *
   1612 METAL_GetMetalLayer(SDL_Renderer * renderer)
   1613 { @autoreleasepool {
   1614     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   1615     return (__bridge void*)data.mtllayer;
   1616 }}
   1617 
   1618 static void *
   1619 METAL_GetMetalCommandEncoder(SDL_Renderer * renderer)
   1620 { @autoreleasepool {
   1621     METAL_ActivateRenderCommandEncoder(renderer, MTLLoadActionLoad, NULL, nil);
   1622     METAL_RenderData *data = (__bridge METAL_RenderData *) renderer->driverdata;
   1623     return (__bridge void*)data.mtlcmdencoder;
   1624 }}
   1625 
   1626 static SDL_Renderer *
   1627 METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
   1628 { @autoreleasepool {
   1629     SDL_Renderer *renderer = NULL;
   1630     METAL_RenderData *data = NULL;
   1631     id<MTLDevice> mtldevice = nil;
   1632     SDL_MetalView view = NULL;
   1633     CAMetalLayer *layer = nil;
   1634     SDL_SysWMinfo syswm;
   1635     Uint32 window_flags;
   1636     SDL_bool changed_window = SDL_FALSE;
   1637 
   1638     SDL_VERSION(&syswm.version);
   1639     if (!SDL_GetWindowWMInfo(window, &syswm)) {
   1640         return NULL;
   1641     }
   1642 
   1643     if (IsMetalAvailable(&syswm) == -1) {
   1644         return NULL;
   1645     }
   1646 
   1647     window_flags = SDL_GetWindowFlags(window);
   1648     if (!(window_flags & SDL_WINDOW_METAL)) {
   1649         changed_window = SDL_TRUE;
   1650         if (SDL_RecreateWindow(window, (window_flags & ~(SDL_WINDOW_VULKAN | SDL_WINDOW_OPENGL)) | SDL_WINDOW_METAL) < 0) {
   1651             return NULL;
   1652         }
   1653     }
   1654 
   1655     renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer));
   1656     if (!renderer) {
   1657         SDL_OutOfMemory();
   1658         if (changed_window) {
   1659             SDL_RecreateWindow(window, window_flags);
   1660         }
   1661         return NULL;
   1662     }
   1663 
   1664     // !!! FIXME: MTLCopyAllDevices() can find other GPUs on macOS...
   1665     mtldevice = MTLCreateSystemDefaultDevice();
   1666 
   1667     if (mtldevice == nil) {
   1668         SDL_free(renderer);
   1669         SDL_SetError("Failed to obtain Metal device");
   1670         if (changed_window) {
   1671             SDL_RecreateWindow(window, window_flags);
   1672         }
   1673         return NULL;
   1674     }
   1675 
   1676     view = SDL_Metal_CreateView(window);
   1677 
   1678     if (view == NULL) {
   1679 #if !__has_feature(objc_arc)
   1680         [mtldevice release];
   1681 #endif
   1682         SDL_free(renderer);
   1683         if (changed_window) {
   1684             SDL_RecreateWindow(window, window_flags);
   1685         }
   1686         return NULL;
   1687     }
   1688 
   1689     // !!! FIXME: error checking on all of this.
   1690     data = [[METAL_RenderData alloc] init];
   1691 
   1692     if (data == nil) {
   1693 #if !__has_feature(objc_arc)
   1694         [mtldevice release];
   1695 #endif
   1696         SDL_Metal_DestroyView(view);
   1697         SDL_free(renderer);
   1698         if (changed_window) {
   1699             SDL_RecreateWindow(window, window_flags);
   1700         }
   1701         return NULL;
   1702     }
   1703 
   1704     renderer->driverdata = (void*)CFBridgingRetain(data);
   1705     renderer->window = window;
   1706 
   1707     data.mtlview = view;
   1708 
   1709 #ifdef __MACOSX__
   1710     layer = (CAMetalLayer *)[(NSView *)view layer];
   1711 #else
   1712     layer = (CAMetalLayer *)[(__bridge UIView *)view layer];
   1713 #endif
   1714 
   1715     layer.device = mtldevice;
   1716 
   1717     /* Necessary for RenderReadPixels. */
   1718     layer.framebufferOnly = NO;
   1719 
   1720     data.mtldevice = layer.device;
   1721     data.mtllayer = layer;
   1722     id<MTLCommandQueue> mtlcmdqueue = [data.mtldevice newCommandQueue];
   1723     data.mtlcmdqueue = mtlcmdqueue;
   1724     data.mtlcmdqueue.label = @"SDL Metal Renderer";
   1725     data.mtlpassdesc = [MTLRenderPassDescriptor renderPassDescriptor];
   1726 
   1727     NSError *err = nil;
   1728 
   1729     // The compiled .metallib is embedded in a static array in a header file
   1730     // but the original shader source code is in SDL_shaders_metal.metal.
   1731     dispatch_data_t mtllibdata = dispatch_data_create(sdl_metallib, sdl_metallib_len, dispatch_get_global_queue(0, 0), ^{});
   1732     id<MTLLibrary> mtllibrary = [data.mtldevice newLibraryWithData:mtllibdata error:&err];
   1733     data.mtllibrary = mtllibrary;
   1734     SDL_assert(err == nil);
   1735 #if !__has_feature(objc_arc)
   1736     dispatch_release(mtllibdata);
   1737 #endif
   1738     data.mtllibrary.label = @"SDL Metal renderer shader library";
   1739 
   1740     /* Do some shader pipeline state loading up-front rather than on demand. */
   1741     data.pipelinescount = 0;
   1742     data.allpipelines = NULL;
   1743     ChooseShaderPipelines(data, MTLPixelFormatBGRA8Unorm);
   1744 
   1745     MTLSamplerDescriptor *samplerdesc = [[MTLSamplerDescriptor alloc] init];
   1746 
   1747     samplerdesc.minFilter = MTLSamplerMinMagFilterNearest;
   1748     samplerdesc.magFilter = MTLSamplerMinMagFilterNearest;
   1749     id<MTLSamplerState> mtlsamplernearest = [data.mtldevice newSamplerStateWithDescriptor:samplerdesc];
   1750     data.mtlsamplernearest = mtlsamplernearest;
   1751 
   1752     samplerdesc.minFilter = MTLSamplerMinMagFilterLinear;
   1753     samplerdesc.magFilter = MTLSamplerMinMagFilterLinear;
   1754     id<MTLSamplerState> mtlsamplerlinear = [data.mtldevice newSamplerStateWithDescriptor:samplerdesc];
   1755     data.mtlsamplerlinear = mtlsamplerlinear;
   1756 
   1757     /* Note: matrices are column major. */
   1758     float identitytransform[16] = {
   1759         1.0f, 0.0f, 0.0f, 0.0f,
   1760         0.0f, 1.0f, 0.0f, 0.0f,
   1761         0.0f, 0.0f, 1.0f, 0.0f,
   1762         0.0f, 0.0f, 0.0f, 1.0f,
   1763     };
   1764 
   1765     float halfpixeltransform[16] = {
   1766         1.0f, 0.0f, 0.0f, 0.0f,
   1767         0.0f, 1.0f, 0.0f, 0.0f,
   1768         0.0f, 0.0f, 1.0f, 0.0f,
   1769         0.5f, 0.5f, 0.0f, 1.0f,
   1770     };
   1771 
   1772     /* Metal pads float3s to 16 bytes. */
   1773     float decodetransformJPEG[4*4] = {
   1774         0.0, -0.501960814, -0.501960814, 0.0, /* offset */
   1775         1.0000,  0.0000,  1.4020, 0.0,        /* Rcoeff */
   1776         1.0000, -0.3441, -0.7141, 0.0,        /* Gcoeff */
   1777         1.0000,  1.7720,  0.0000, 0.0,        /* Bcoeff */
   1778     };
   1779 
   1780     float decodetransformBT601[4*4] = {
   1781         -0.0627451017, -0.501960814, -0.501960814, 0.0, /* offset */
   1782         1.1644,  0.0000,  1.5960, 0.0,                  /* Rcoeff */
   1783         1.1644, -0.3918, -0.8130, 0.0,                  /* Gcoeff */
   1784         1.1644,  2.0172,  0.0000, 0.0,                  /* Bcoeff */
   1785     };
   1786 
   1787     float decodetransformBT709[4*4] = {
   1788         0.0, -0.501960814, -0.501960814, 0.0, /* offset */
   1789         1.0000,  0.0000,  1.4020, 0.0,        /* Rcoeff */
   1790         1.0000, -0.3441, -0.7141, 0.0,        /* Gcoeff */
   1791         1.0000,  1.7720,  0.0000, 0.0,        /* Bcoeff */
   1792     };
   1793 
   1794     id<MTLBuffer> mtlbufconstantstaging = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModeShared];
   1795     #if !__has_feature(objc_arc)
   1796     [mtlbufconstantstaging autorelease];
   1797     #endif
   1798 
   1799     char *constantdata = [mtlbufconstantstaging contents];
   1800     SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
   1801     SDL_memcpy(constantdata + CONSTANTS_OFFSET_HALF_PIXEL_TRANSFORM, halfpixeltransform, sizeof(halfpixeltransform));
   1802     SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_JPEG, decodetransformJPEG, sizeof(decodetransformJPEG));
   1803     SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
   1804     SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));
   1805 
   1806     int quadcount = UINT16_MAX / 4;
   1807     size_t indicessize = sizeof(UInt16) * quadcount * 6;
   1808     id<MTLBuffer> mtlbufquadindicesstaging = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModeShared];
   1809 #if !__has_feature(objc_arc)
   1810     [mtlbufquadindicesstaging autorelease];
   1811 #endif
   1812 
   1813     /* Quads in the following vertex order (matches the FillRects vertices):
   1814      * 1---3
   1815      * | \ |
   1816      * 0---2
   1817      */
   1818     UInt16 *indexdata = [mtlbufquadindicesstaging contents];
   1819     for (int i = 0; i < quadcount; i++) {
   1820         indexdata[i * 6 + 0] = i * 4 + 0;
   1821         indexdata[i * 6 + 1] = i * 4 + 1;
   1822         indexdata[i * 6 + 2] = i * 4 + 2;
   1823 
   1824         indexdata[i * 6 + 3] = i * 4 + 2;
   1825         indexdata[i * 6 + 4] = i * 4 + 1;
   1826         indexdata[i * 6 + 5] = i * 4 + 3;
   1827     }
   1828 
   1829     id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
   1830     data.mtlbufconstants = mtlbufconstants;
   1831     data.mtlbufconstants.label = @"SDL constant data";
   1832 
   1833     id<MTLBuffer> mtlbufquadindices = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModePrivate];
   1834     data.mtlbufquadindices = mtlbufquadindices;
   1835     data.mtlbufquadindices.label = @"SDL quad index buffer";
   1836 
   1837     id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
   1838     id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];
   1839 
   1840     [blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
   1841     [blitcmd copyFromBuffer:mtlbufquadindicesstaging sourceOffset:0 toBuffer:mtlbufquadindices destinationOffset:0 size:indicessize];
   1842 
   1843     [blitcmd endEncoding];
   1844     [cmdbuffer commit];
   1845 
   1846     // !!! FIXME: force more clears here so all the drawables are sane to start, and our static buffers are definitely flushed.
   1847 
   1848     renderer->WindowEvent = METAL_WindowEvent;
   1849     renderer->GetOutputSize = METAL_GetOutputSize;
   1850     renderer->SupportsBlendMode = METAL_SupportsBlendMode;
   1851     renderer->CreateTexture = METAL_CreateTexture;
   1852     renderer->UpdateTexture = METAL_UpdateTexture;
   1853     renderer->UpdateTextureYUV = METAL_UpdateTextureYUV;
   1854     renderer->LockTexture = METAL_LockTexture;
   1855     renderer->UnlockTexture = METAL_UnlockTexture;
   1856     renderer->SetTextureScaleMode = METAL_SetTextureScaleMode;
   1857     renderer->SetRenderTarget = METAL_SetRenderTarget;
   1858     renderer->QueueSetViewport = METAL_QueueSetViewport;
   1859     renderer->QueueSetDrawColor = METAL_QueueSetDrawColor;
   1860     renderer->QueueDrawPoints = METAL_QueueDrawPoints;
   1861     renderer->QueueDrawLines = METAL_QueueDrawLines;
   1862     renderer->QueueFillRects = METAL_QueueFillRects;
   1863     renderer->QueueCopy = METAL_QueueCopy;
   1864     renderer->QueueCopyEx = METAL_QueueCopyEx;
   1865     renderer->RunCommandQueue = METAL_RunCommandQueue;
   1866     renderer->RenderReadPixels = METAL_RenderReadPixels;
   1867     renderer->RenderPresent = METAL_RenderPresent;
   1868     renderer->DestroyTexture = METAL_DestroyTexture;
   1869     renderer->DestroyRenderer = METAL_DestroyRenderer;
   1870     renderer->GetMetalLayer = METAL_GetMetalLayer;
   1871     renderer->GetMetalCommandEncoder = METAL_GetMetalCommandEncoder;
   1872 
   1873     renderer->info = METAL_RenderDriver.info;
   1874     renderer->info.flags = (SDL_RENDERER_ACCELERATED | SDL_RENDERER_TARGETTEXTURE);
   1875 
   1876     renderer->always_batch = SDL_TRUE;
   1877 
   1878 #if defined(__MACOSX__) && defined(MAC_OS_X_VERSION_10_13)
   1879     if (@available(macOS 10.13, *)) {
   1880         data.mtllayer.displaySyncEnabled = (flags & SDL_RENDERER_PRESENTVSYNC) != 0;
   1881         if (data.mtllayer.displaySyncEnabled) {
   1882             renderer->info.flags |= SDL_RENDERER_PRESENTVSYNC;
   1883         }
   1884     } else
   1885 #endif
   1886     {
   1887         renderer->info.flags |= SDL_RENDERER_PRESENTVSYNC;
   1888     }
   1889 
   1890     /* https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
   1891     int maxtexsize = 4096;
   1892 #if defined(__MACOSX__)
   1893     maxtexsize = 16384;
   1894 #elif defined(__TVOS__)
   1895     maxtexsize = 8192;
   1896 #ifdef __TVOS_11_0
   1897     if (@available(tvOS 11.0, *)) {
   1898         if ([mtldevice supportsFeatureSet:MTLFeatureSet_tvOS_GPUFamily2_v1]) {
   1899             maxtexsize = 16384;
   1900         }
   1901     }
   1902 #endif
   1903 #else
   1904 #ifdef __IPHONE_11_0
   1905 #pragma clang diagnostic push
   1906 #pragma clang diagnostic ignored "-Wunguarded-availability-new"
   1907     if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
   1908         maxtexsize = 16384;
   1909     } else
   1910 #pragma clang diagnostic pop
   1911 #endif
   1912 #ifdef __IPHONE_10_0
   1913     if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v1]) {
   1914         maxtexsize = 16384;
   1915     } else
   1916 #endif
   1917     if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily2_v2] || [mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily1_v2]) {
   1918         maxtexsize = 8192;
   1919     } else {
   1920         maxtexsize = 4096;
   1921     }
   1922 #endif
   1923 
   1924     renderer->info.max_texture_width = maxtexsize;
   1925     renderer->info.max_texture_height = maxtexsize;
   1926 
   1927 #if !__has_feature(objc_arc)
   1928     [mtlcmdqueue release];
   1929     [mtllibrary release];
   1930     [samplerdesc release];
   1931     [mtlsamplernearest release];
   1932     [mtlsamplerlinear release];
   1933     [mtlbufconstants release];
   1934     [mtlbufquadindices release];
   1935     [data release];
   1936     [mtldevice release];
   1937 #endif
   1938 
   1939     return renderer;
   1940 }}
   1941 
   1942 SDL_RenderDriver METAL_RenderDriver = {
   1943     METAL_CreateRenderer,
   1944     {
   1945         "metal",
   1946         (SDL_RENDERER_ACCELERATED | SDL_RENDERER_PRESENTVSYNC | SDL_RENDERER_TARGETTEXTURE),
   1947         6,
   1948         {
   1949             SDL_PIXELFORMAT_ARGB8888,
   1950             SDL_PIXELFORMAT_ABGR8888,
   1951             SDL_PIXELFORMAT_YV12,
   1952             SDL_PIXELFORMAT_IYUV,
   1953             SDL_PIXELFORMAT_NV12,
   1954             SDL_PIXELFORMAT_NV21
   1955         },
   1956     0, 0,
   1957     }
   1958 };
   1959 
   1960 #endif /* SDL_VIDEO_RENDER_METAL && !SDL_RENDER_DISABLED */
   1961 
   1962 /* vi: set ts=4 sw=4 expandtab: */