You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

993 lines
29 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//#define HEAVYTM
#include "tm.h"
#ifdef RADUSETM3
tm_api * g_tm_api;
//#define PROFILE_MODE
#endif
#include <math.h>
#ifdef _MSC_VER
#define stop() __debugbreak()
#include <windows.h>
#define int64 __int64
#define uint64 unsigned __int64
#else
#define stop() __builtin_trap()
#define int64 long long
#define uint64 unsigned long long
#endif
#ifdef _MSC_VER
#pragma warning(disable:4127)
#endif
//#define NOCOMP
//#define PROFILE_NEW_ONLY
//#define PROFILE_MODE
#if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(__SSE2__) || defined(STBIR_SSE) || defined( _M_IX86_FP ) || defined(__i386) || defined( __i386__ ) || defined( _M_IX86 ) || defined( _X86_ )
#ifdef _MSC_VER
uint64 __rdtsc();
#define __cycles() __rdtsc()
#else // non msvc
static inline uint64 __cycles()
{
unsigned int lo, hi;
asm volatile ("rdtsc" : "=a" (lo), "=d" (hi) );
return ( ( (uint64) hi ) << 32 ) | ( (uint64) lo );
}
#endif // msvc
#elif defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || defined(__ARM_NEON__)
#ifdef _MSC_VER
#define __cycles() _ReadStatusReg(ARM64_CNTVCT)
#else
static inline uint64 __cycles()
{
uint64 tsc;
asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
return tsc;
}
#endif
#else // x64, arm
#error Unknown platform for timing.
#endif //x64 and
#ifdef PROFILE_MODE
#define STBIR_ASSERT(cond)
#endif
#ifdef _DEBUG
#undef STBIR_ASSERT
#define STBIR_ASSERT(cond) { if (!(cond)) stop(); }
#endif
#define SHRINKBYW 2
#define ZOOMBYW 2
#define SHRINKBYH 2
#define ZOOMBYH 2
int mem_count = 0;
#ifdef TEST_WITH_VALLOC
#define STBIR__SEPARATE_ALLOCATIONS
#if TEST_WITH_LIMIT_AT_FRONT
void * wmalloc(SIZE_T size)
{
static unsigned int pagesize=0;
void* p;
SIZE_T s;
// get the page size, if we haven't yet
if (pagesize==0)
{
SYSTEM_INFO si;
GetSystemInfo(&si);
pagesize=si.dwPageSize;
}
// we need room for the size, 8 bytes to hide the original pointer and a
// validation dword, and enough data to completely fill one page
s=(size+(pagesize-1))&~(pagesize-1);
// allocate the size plus a page (for the guard)
p=VirtualAlloc(0,(SIZE_T)s,MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE);
return p;
}
void wfree(void * ptr)
{
if (ptr)
{
if ( ((ptrdiff_t)ptr) & 4095 ) stop();
if ( VirtualFree(ptr,0,MEM_RELEASE) == 0 ) stop();
}
}
#else
void * wmalloc(SIZE_T size)
{
static unsigned int pagesize=0;
void* p;
SIZE_T s;
// get the page size, if we haven't yet
if (pagesize==0)
{
SYSTEM_INFO si;
GetSystemInfo(&si);
pagesize=si.dwPageSize;
}
// we need room for the size, 8 bytes to hide the original pointer and a
// validation dword, and enough data to completely fill one page
s=(size+16+(pagesize-1))&~(pagesize-1);
// allocate the size plus a page (for the guard)
p=VirtualAlloc(0,(SIZE_T)(s+pagesize+pagesize),MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE);
if (p)
{
DWORD oldprot;
void* orig=p;
// protect the first page
VirtualProtect(((char*)p),pagesize,PAGE_NOACCESS,&oldprot);
// protect the final page
VirtualProtect(((char*)p)+s+pagesize,pagesize,PAGE_NOACCESS,&oldprot);
// now move the returned pointer so that it bumps right up against the
// the next (protected) page (this may result in unaligned return
// addresses - pre-align the sizes if you always want aligned ptrs)
//#define ERROR_ON_FRONT
#ifdef ERROR_ON_FRONT
p=((char*)p)+pagesize+16;
#else
p=((char*)p)+(s-size)+pagesize;
#endif
// hide the validation value and the original pointer (which we'll
// need used for freeing) right behind the returned pointer
((unsigned int*)p)[-1]=0x98765432;
((void**)p)[-2]=orig;
++mem_count;
//printf("aloc: %p bytes: %d\n",p,(int)size);
return(p);
}
return 0;
}
void wfree(void * ptr)
{
if (ptr)
{
int err=0;
// is this one of our allocations?
if (((((unsigned int*)ptr)[-1])!=0x98765432) || ((((void**)ptr)[-2])==0))
{
err=1;
}
if (err)
{
__debugbreak();
}
else
{
// back up to find the original pointer
void* p=((void**)ptr)[-2];
// clear the validation value and the original pointer
((unsigned int*)ptr)[-1]=0;
((void**)ptr)[-2]=0;
//printf("free: %p\n",ptr);
--mem_count;
// now free the pages
if (p)
VirtualFree(p,0,MEM_RELEASE);
}
}
}
#endif
#define STBIR_MALLOC(size,user_data) ((void)(user_data), wmalloc(size))
#define STBIR_FREE(ptr,user_data) ((void)(user_data), wfree(ptr))
#endif
#define STBIR_PROFILE
//#define STBIR_NO_SIMD
//#define STBIR_AVX
//#define STBIR_AVX2
#define STB_IMAGE_RESIZE_IMPLEMENTATION
#include "stb_image_resize2.h" // new one!
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
int tsizes[5] = { 1, 1, 2, 4, 2 };
int ttypes[5] = { STBIR_TYPE_UINT8, STBIR_TYPE_UINT8_SRGB, STBIR_TYPE_UINT16, STBIR_TYPE_FLOAT, STBIR_TYPE_HALF_FLOAT };
int cedges[4] = { STBIR_EDGE_CLAMP, STBIR_EDGE_REFLECT, STBIR_EDGE_ZERO, STBIR_EDGE_WRAP };
int flts[5] = { STBIR_FILTER_BOX, STBIR_FILTER_TRIANGLE, STBIR_FILTER_CUBICBSPLINE, STBIR_FILTER_CATMULLROM, STBIR_FILTER_MITCHELL };
int buffers[20] = { STBIR_1CHANNEL, STBIR_2CHANNEL, STBIR_RGB, STBIR_4CHANNEL,
STBIR_BGRA, STBIR_ARGB, STBIR_RA, STBIR_AR,
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
STBIR_RGBA, STBIR_ARGB, STBIR_RA, STBIR_AR,
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
};
int obuffers[20] = { STBIR_1CHANNEL, STBIR_2CHANNEL, STBIR_RGB, STBIR_4CHANNEL,
STBIR_BGRA, STBIR_ARGB, STBIR_RA, STBIR_AR,
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
STBIR_RGBA, STBIR_ARGB, STBIR_RA, STBIR_AR,
};
int bchannels[20] = { 1, 2, 3, 4, 4,4, 2,2, 4,4, 2,2, 4,4, 2,2, 4,4, 2,2 };
int alphapos[20] = { -1, -1, -1, -1, 3,0, 1,0, 3,0, 1,0, 3,0, 1,0,3,0, 1,0 };
char const * buffstrs[20] = { "1ch", "2ch", "3ch", "4ch", "RGBA", "ARGB", "RA", "AR", "RGBA_both_pre", "ARGB_both_pre", "RA_both_pre", "AR_both_pre", "RGBA_out_pre", "ARGB_out_pre", "RA_out_pre", "AR_out_pre", "RGBA_in_pre", "ARGB_in_pre", "RA_in_pre", "AR_in_pre" };
char const * typestrs[5] = { "Bytes", "BytesSRGB", "Shorts", "Floats", "Half Floats"};
char const * edgestrs[4] = { "Clamp", "Reflect", "Zero", "Wrap" };
char const * fltstrs[5] = { "Box", "Triangle", "Cubic", "Catmullrom", "Mitchell" };
#ifdef STBIR_PROFILE
static void do_acc_zones( STBIR_PROFILE_INFO * profile )
{
stbir_uint32 j;
stbir_uint64 start = tmGetAccumulationStart( tm_mask ); start=start;
for( j = 0 ; j < profile->count ; j++ )
{
if ( profile->clocks[j] )
tmEmitAccumulationZone( 0, 0, (tm_uint64*)&start, 0, profile->clocks[j], profile->descriptions[j] );
}
}
#else
#define do_acc_zones(...)
#endif
int64 vert;
//#define WINTHREADTEST
#ifdef WINTHREADTEST
static STBIR_RESIZE * thread_resize;
static LONG which;
static int threads_started = 0;
static HANDLE threads[32];
static HANDLE starts,stops;
static DWORD resize_shim( LPVOID p )
{
for(;;)
{
LONG wh;
WaitForSingleObject( starts, INFINITE );
wh = InterlockedAdd( &which, 1 ) - 1;
ENTER( "Split %d", wh );
stbir_resize_split( thread_resize, wh, 1 );
#ifdef STBIR_PROFILE
{ STBIR_PROFILE_INFO profile; stbir_resize_split_profile_info( &profile, thread_resize, wh, 1 ); do_acc_zones( &profile ); vert = profile.clocks[1]; }
#endif
LEAVE();
ReleaseSemaphore( stops, 1, 0 );
}
}
#endif
void nresize( void * o, int ox, int oy, int op, void * i, int ix, int iy, int ip, int buf, int type, int edg, int flt )
{
STBIR_RESIZE resize;
stbir_resize_init( &resize, i, ix, iy, ip, o, ox, oy, op, buffers[buf], ttypes[type] );
stbir_set_pixel_layouts( &resize, buffers[buf], obuffers[buf] );
stbir_set_edgemodes( &resize, cedges[edg], cedges[edg] );
stbir_set_filters( &resize, flts[flt], /*STBIR_FILTER_POINT_SAMPLE */ flts[flt] );
//stbir_set_input_subrect( &resize, 0.55f,0.333f,0.75f,0.50f);
//stbir_set_output_pixel_subrect( &resize, 00, 00, ox/2,oy/2);
//stbir_set_pixel_subrect(&resize, 1430,1361,30,30);
ENTER( "Resize" );
#ifndef WINTHREADTEST
ENTER( "Filters" );
stbir_build_samplers_with_splits( &resize, 1 );
#ifdef STBIR_PROFILE
{ STBIR_PROFILE_INFO profile; stbir_resize_build_profile_info( &profile, &resize ); do_acc_zones( &profile ); }
#endif
LEAVE();
ENTER( "Resize" );
if(!stbir_resize_extended( &resize ) )
stop();
#ifdef STBIR_PROFILE
{ STBIR_PROFILE_INFO profile; stbir_resize_extended_profile_info( &profile, &resize ); do_acc_zones( &profile ); vert = profile.clocks[1]; }
#endif
LEAVE();
#else
{
int c, cnt;
ENTER( "Filters" );
cnt = stbir_build_samplers_with_splits( &resize, 4 );
#ifdef STBIR_PROFILE
{ STBIR_PROFILE_INFO profile; stbir_resize_build_profile_info( &profile, &resize ); do_acc_zones( &profile ); }
#endif
LEAVE();
ENTER( "Thread start" );
if ( threads_started == 0 )
{
starts = CreateSemaphore( 0, 0, 32, 0 );
stops = CreateSemaphore( 0, 0, 32, 0 );
}
for( c = threads_started ; c < cnt ; c++ )
threads[ c ] = CreateThread( 0, 2048*1024, resize_shim, 0, 0, 0 );
threads_started = cnt;
thread_resize = &resize;
which = 0;
LEAVE();
// starts the threads
ReleaseSemaphore( starts, cnt, 0 );
ENTER( "Wait" );
for( c = 0 ; c < cnt; c++ )
WaitForSingleObject( stops, INFINITE );
LEAVE();
}
#endif
ENTER( "Free" );
stbir_free_samplers( &resize );
LEAVE();
LEAVE();
}
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
extern void oresize( void * o, int ox, int oy, int op, void * i, int ix, int iy, int ip, int buf, int type, int edg, int flt );
#define TYPESTART 0
#define TYPEEND 4
#define LAYOUTSTART 0
#define LAYOUTEND 19
#define SIZEWSTART 0
#define SIZEWEND 2
#define SIZEHSTART 0
#define SIZEHEND 2
#define EDGESTART 0
#define EDGEEND 3
#define FILTERSTART 0
#define FILTEREND 4
#define HEIGHTSTART 0
#define HEIGHTEND 2
#define WIDTHSTART 0
#define WIDTHEND 2
static void * convert8to16( unsigned char * i, int w, int h, int c )
{
unsigned short * ret;
int p;
ret = malloc( w*h*c*sizeof(short) );
for(p = 0 ; p < (w*h*c) ; p++ )
{
ret[p]=(short)((((int)i[p])<<8)+i[p]);
}
return ret;
}
static void * convert8tof( unsigned char * i, int w, int h, int c )
{
float * ret;
int p;
ret = malloc( w*h*c*sizeof(float) );
for(p = 0 ; p < (w*h*c) ; p++ )
{
ret[p]=((float)i[p])*(1.0f/255.0f);
}
return ret;
}
static void * convert8tohf( unsigned char * i, int w, int h, int c )
{
stbir__FP16 * ret;
int p;
ret = malloc( w*h*c*sizeof(stbir__FP16) );
for(p = 0 ; p < (w*h*c) ; p++ )
{
ret[p]=stbir__float_to_half(((float)i[p])*(1.0f/255.0f));
}
return ret;
}
static void * convert8tohff( unsigned char * i, int w, int h, int c )
{
float * ret;
int p;
ret = malloc( w*h*c*sizeof(float) );
for(p = 0 ; p < (w*h*c) ; p++ )
{
ret[p]=stbir__half_to_float(stbir__float_to_half(((float)i[p])*(1.0f/255.0f)));
}
return ret;
}
static int isprime( int v )
{
int i;
if ( v <= 3 )
return ( v > 1 );
if ( ( v & 1 ) == 0 )
return 0;
if ( ( v % 3 ) == 0 )
return 0;
i = 5;
while ( (i*i) <= v )
{
if ( ( v % i ) == 0 )
return 0;
if ( ( v % ( i + 2 ) ) == 0 )
return 0;
i += 6;
}
return 1;
}
static int getprime( int v )
{
int i;
i = 0;
for(;;)
{
if ( i >= v )
return v; // can't find any, just return orig
if (isprime(v - i))
return v - i;
if (isprime(v + i))
return v + i;
++i;
}
}
int main( int argc, char ** argv )
{
int ix, iy, ic;
unsigned char * input[6];
char * ir1;
char * ir2;
int szhs[3];
int szws[3];
int aw, ah, ac;
unsigned char * correctalpha;
int layouts, types, heights, widths, edges, filters;
if ( argc != 2 )
{
printf("command: stbirtest [imagefile]\n");
exit(1);
}
SetupTM( "127.0.0.1" );
correctalpha = stbi_load( "correctalpha.png", &aw, &ah, &ac, 0 );
input[0] = stbi_load( argv[1], &ix, &iy, &ic, 0 );
input[1] = input[0];
input[2] = convert8to16( input[0], ix, iy, ic );
input[3] = convert8tof( input[0], ix, iy, ic );
input[4] = convert8tohf( input[0], ix, iy, ic );
input[5] = convert8tohff( input[0], ix, iy, ic );
printf("Input %dx%d (%d channels)\n",ix,iy,ic);
ir1 = malloc( 4 * 4 * 3000 * 3000ULL );
ir2 = malloc( 4 * 4 * 3000 * 3000ULL );
szhs[0] = getprime( iy/SHRINKBYH );
szhs[1] = iy;
szhs[2] = getprime( iy*ZOOMBYH );
szws[0] = getprime( ix/SHRINKBYW );
szws[1] = ix;
szws[2] = getprime( ix*ZOOMBYW );
#if 1
for( types = TYPESTART ; types <= TYPEEND ; types++ )
#else
for( types = 1 ; types <= 1 ; types++ )
#endif
{
ENTER( "Test type: %s",typestrs[types]);
#if 1
for( layouts = LAYOUTSTART ; layouts <= LAYOUTEND ; layouts++ )
#else
for( layouts = 16; layouts <= 16 ; layouts++ )
#endif
{
ENTER( "Test layout: %s",buffstrs[layouts]);
#if 0
for( heights = HEIGHTSTART ; heights <= HEIGHTEND ; heights++ )
{
int w, h = szhs[heights];
#else
for( heights = 0 ; heights <= 11 ; heights++ )
{
static int szhsz[12]={32, 200, 350, 400, 450, 509, 532, 624, 700, 824, 1023, 2053 };
int w, h = szhsz[heights];
#endif
ENTER( "Test height: %d %s %d",iy,(h<iy)?"Down":((h>iy)?"Up":"Same"),h);
#if 0
for( widths = WIDTHSTART ; widths <= WIDTHEND ; widths++ )
{
w = szws[widths];
#else
for( widths = 0 ; widths <= 12 ; widths++ )
{
static int szwsz[13]={2, 32, 200, 350, 400, 450, 509, 532, 624, 700, 824, 1023, 2053 };
w = szwsz[widths];
#endif
ENTER( "Test width: %d %s %d",ix, (w<ix)?"Down":((w>ix)?"Up":"Same"), w);
#if 0
for( edges = EDGESTART ; edges <= EDGEEND ; edges++ )
#else
for( edges = 0 ; edges <= 0 ; edges++ )
#endif
{
ENTER( "Test edge: %s",edgestrs[edges]);
#if 0
for( filters = FILTERSTART ; filters <= FILTEREND ; filters++ )
#else
for( filters = 3 ; filters <= 3 ; filters++ )
#endif
{
int op, opw, np,npw, c, a;
#ifdef COMPARE_SAME
int oldtypes = types;
#else
int oldtypes = (types==4)?3:types;
#endif
ENTER( "Test filter: %s",fltstrs[filters]);
{
c = bchannels[layouts];
a = alphapos[layouts];
op = w*tsizes[oldtypes]*c + 60;
opw = w*tsizes[oldtypes]*c;
np = w*tsizes[types]*c + 60;
npw = w*tsizes[types]*c;
printf( "%s:layout: %s w: %d h: %d edge: %s filt: %s\n", typestrs[types],buffstrs[layouts], w, h, edgestrs[edges], fltstrs[filters] );
// clear pixel area to different, right edge to zero
#ifndef NOCLEAR
ENTER( "Test clear padding" );
{
int d;
for( d = 0 ; d < h ; d++ )
{
int oofs = d * op;
int nofs = d * np;
memset( ir1 + oofs, 192, opw );
memset( ir1 + oofs+opw, 79, op-opw );
memset( ir2 + nofs, 255, npw );
memset( ir2 + nofs+npw, 79, np-npw );
}
}
LEAVE();
#endif
#ifdef COMPARE_SAME
#define TIMINGS 1
#else
#define TIMINGS 1
#endif
ENTER( "Test both" );
{
#ifndef PROFILE_NEW_ONLY
{
int ttt, max = 0x7fffffff;
ENTER( "Test old" );
for( ttt = 0 ; ttt < TIMINGS ; ttt++ )
{
int64 m = __cycles();
oresize( ir1, w, h, op,
#ifdef COMPARE_SAME
input[types],
#else
input[(types==4)?5:types],
#endif
ix, iy, ix*ic*tsizes[oldtypes], layouts, oldtypes, edges, filters );
m = __cycles() - m;
if ( ( (int)m ) < max )
max = (int) m;
}
LEAVE();
printf("old: %d\n", max );
}
#endif
{
int ttt, max = 0x7fffffff, maxv = 0x7fffffff;
ENTER( "Test new" );
for( ttt = 0 ; ttt < TIMINGS ; ttt++ )
{
int64 m = __cycles();
nresize( ir2, w, h, np, input[types], ix, iy, ix*ic*tsizes[types], layouts, types, edges, filters );
m = __cycles() - m;
if ( ( (int)m ) < max )
max = (int) m;
if ( ( (int)vert ) < maxv )
maxv = (int) vert;
}
LEAVE(); // test new
printf("new: %d (v: %d)\n", max, maxv );
}
}
LEAVE(); // test both
if ( mem_count!= 0 )
stop();
#ifndef NOCOMP
ENTER( "Test compare" );
{
int x,y,ch;
int nums = 0;
for( y = 0 ; y < h ; y++ )
{
for( x = 0 ; x < w ; x++ )
{
switch(types)
{
case 0:
case 1: //SRGB
{
unsigned char * p1 = (unsigned char *)&ir1[y*op+x*c];
unsigned char * p2 = (unsigned char *)&ir2[y*np+x*c];
for( ch = 0 ; ch < c ; ch++ )
{
float pp1,pp2,d;
float av = (a==-1)?1.0f:((float)p1[a]/255.0f);
pp1 = p1[ch];
pp2 = p2[ch];
// compare in premult space
#ifndef COMPARE_SAME
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >=16 ) && ( layouts <= 19 ) ) )
{
pp1 *= av;
pp2 *= av;
}
#endif
d = pp1 - pp2;
if ( d < 0 ) d = -d;
#ifdef COMPARE_SAME
if ( d > 0 )
#else
if ( d > 1 )
#endif
{
printf("Error at %d x %d (chan %d) (d: %g a: %g) [%d %d %d %d] [%d %d %d %d]\n",x,y,ch, d,av, p1[0],p1[1],p1[2],p1[3], p2[0],p2[1],p2[2],p2[3]);
++nums;
if ( nums > 16 ) goto ex;
//if (d) exit(1);
//goto ex;
}
}
}
break;
case 2:
{
unsigned short * p1 = (unsigned short *)&ir1[y*op+x*c*sizeof(short)];
unsigned short * p2 = (unsigned short *)&ir2[y*np+x*c*sizeof(short)];
for( ch = 0 ; ch < c ; ch++ )
{
float thres,pp1,pp2,d;
float av = (a==-1)?1.0f:((float)p1[a]/65535.0f);
pp1 = p1[ch];
pp2 = p2[ch];
// compare in premult space
#ifndef COMPARE_SAME
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) )
{
pp1 *= av;
pp2 *= av;
}
#endif
d = pp1 - pp2;
if ( d < 0 ) d = -d;
thres=((float)p1[ch]*0.007f)+2.0f;
if (thres<4) thres = 4;
#ifdef COMPARE_SAME
if ( d > 0 )
#else
if ( d > thres)
#endif
{
printf("Error at %d x %d (chan %d) %d %d [df: %g th: %g al: %g] (%d %d %d %d) (%d %d %d %d)\n",x,y,ch, p1[ch],p2[ch],d,thres,av,p1[0],p1[1],p1[2],p1[3],p2[0],p2[1],p2[2],p2[3]);
++nums;
if ( nums > 16 ) goto ex;
//if (d) exit(1);
//goto ex;
}
}
}
break;
case 3:
{
float * p1 = (float *)&ir1[y*op+x*c*sizeof(float)];
float * p2 = (float *)&ir2[y*np+x*c*sizeof(float)];
for( ch = 0 ; ch < c ; ch++ )
{
float pp1 = p1[ch], pp2 = p2[ch];
float av = (a==-1)?1.0f:p1[a];
float thres, d;
// clamp
if (pp1<=0.0f) pp1 = 0;
if (pp2<=0.0f) pp2 = 0;
if (av<=0.0f) av = 0;
if (pp1>1.0f) pp1 = 1.0f;
if (pp2>1.0f) pp2 = 1.0f;
if (av>1.0f) av = 1.0f;
// compare in premult space
#ifndef COMPARE_SAME
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) )
{
pp1 *= av;
pp2 *= av;
}
#endif
d = pp1 - pp2;
if ( d < 0 ) d = -d;
thres=(p1[ch]*0.002f)+0.0002f;
if ( thres < 0 ) thres = -thres;
#ifdef COMPARE_SAME
if ( d != 0.0f )
#else
if ( d > thres )
#endif
{
printf("Error at %d x %d (chan %d) %g %g [df: %g th: %g al: %g] (%g %g %g %g) (%g %g %g %g)\n",x,y,ch, p1[ch],p2[ch],d,thres,av,p1[0],p1[1],p1[2],p1[3],p2[0],p2[1],p2[2],p2[3]);
++nums;
if ( nums > 16 ) goto ex;
//if (d) exit(1);
//goto ex;
}
}
}
break;
case 4:
{
#ifdef COMPARE_SAME
stbir__FP16 * p1 = (stbir__FP16 *)&ir1[y*op+x*c*sizeof(stbir__FP16)];
#else
float * p1 = (float *)&ir1[y*op+x*c*sizeof(float)];
#endif
stbir__FP16 * p2 = (stbir__FP16 *)&ir2[y*np+x*c*sizeof(stbir__FP16)];
for( ch = 0 ; ch < c ; ch++ )
{
#ifdef COMPARE_SAME
float pp1 = stbir__half_to_float(p1[ch]);
float av = (a==-1)?1.0f:stbir__half_to_float(p1[a]);
#else
float pp1 = stbir__half_to_float(stbir__float_to_half(p1[ch]));
float av = (a==-1)?1.0f:stbir__half_to_float(stbir__float_to_half(p1[a]));
#endif
float pp2 = stbir__half_to_float(p2[ch]);
float d, thres;
// clamp
if (pp1<=0.0f) pp1 = 0;
if (pp2<=0.0f) pp2 = 0;
if (av<=0.0f) av = 0;
if (pp1>1.0f) pp1 = 1.0f;
if (pp2>1.0f) pp2 = 1.0f;
if (av>1.0f) av = 1.0f;
thres=(pp1*0.002f)+0.0002f;
// compare in premult space
#ifndef COMPARE_SAME
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) )
{
pp1 *= av;
pp2 *= av;
}
#endif
d = pp1 - pp2;
if ( d < 0 ) d = -d;
#ifdef COMPARE_SAME
if ( d != 0.0f )
#else
if ( d > thres )
#endif
{
printf("Error at %d x %d (chan %d) %g %g [df: %g th: %g al: %g] (%g %g %g %g) (%g %g %g %g)\n",x,y,ch,
#ifdef COMPARE_SAME
stbir__half_to_float(p1[ch]),
#else
p1[ch],
#endif
stbir__half_to_float(p2[ch]),
d,thres,av,
#ifdef COMPARE_SAME
stbir__half_to_float(p1[0]),stbir__half_to_float(p1[1]),stbir__half_to_float(p1[2]),stbir__half_to_float(p1[3]),
#else
p1[0],p1[1],p1[2],p1[3],
#endif
stbir__half_to_float(p2[0]),stbir__half_to_float(p2[1]),stbir__half_to_float(p2[2]),stbir__half_to_float(p2[3]) );
++nums;
if ( nums > 16 ) goto ex;
//if (d) exit(1);
//goto ex;
}
}
}
break;
}
}
for( x = (w*c)*tsizes[oldtypes]; x < op; x++ )
{
if ( ir1[y*op+x] != 79 )
{
printf("Margin error at %d x %d %d (should be 79) OLD!\n",x,y,(unsigned char)ir1[y*op+x]);
goto ex;
}
}
for( x = (w*c)*tsizes[types]; x < np; x++ )
{
if ( ir2[y*np+x] != 79 )
{
printf("Margin error at %d x %d %d (should be 79) NEW\n",x,y,(unsigned char)ir2[y*np+x]);
goto ex;
}
}
}
ex:
ENTER( "OUTPUT IMAGES" );
printf(" tot pix: %d, errs: %d\n", w*h*c,nums );
if (nums)
{
stbi_write_png("old.png", w, h, c, ir1, op);
stbi_write_png("new.png", w, h, c, ir2, np);
exit(1);
}
LEAVE(); // output images
}
LEAVE(); //test compare
#endif
}
LEAVE(); // test filter
}
LEAVE(); // test edge
}
LEAVE(); // test width
}
LEAVE(); // test height
}
LEAVE(); // test type
}
LEAVE(); // test layout
}
CloseTM();
return 0;
}