#include #include #include #include #if !defined(M_PI) # define M_PI (3.1415926535897932384626433832795) #endif /*This program computes a set of quantization matrices using the same CSF functions that were used to generate the CSF filters in the psychovisual model. The main difference is that instead of a full FIR filter, only a single scale factor for each coefficient is derived. The same quantization matrices are used for both inter and intra modes. The reason is that human sensitivity to errors is the same regardless of what mode happened to be used to code a macro block. This assumption ignores statistical correlation between the use of intra mode and scene changes or other high frequency temporal effects which could actually have an effect on perception, because our current psychovisual model does not take these into account.*/ /*The number of phases to sample at each frequency.*/ #define NPHASES (16) /*The number of frequencies to sample (in the interval [0,pi)).*/ #define NFREQS (1024) /*The magnitude of each DCT basis vector. For a non-orthogonal transform, one per coefficient will be needed. The Theora DCT function scales its ouput by 2 in each dimension.*/ #define DCT_SCALE (2.0) /*The relative sensitivity to changes in each color channel. This is derived by making small changes in each color channel individually and measuring the distance to the new color in L*a*b space. A linear regression with an intercept of 0 was then fit to the measured distances. These are the slopes of the fit lines. See colorreg.c for details. These values depend on the particular color space one is using, but we'd like to derive generic quantization matrices. Therefore we use the geometric average over all color spaces (currently only two). Minor mismatches in the fit of the matrices to actual CSF curves will not harm the encoder, since it uses adaptive thresholding and amplitude reduction based on per-coefficient sensitivity measurements.*/ const float OC_YCbCr_SCALE[3]={ 0.62660577006129702908501F, 1.33082085846485050021660F,1.08759046924374083944404F /*JND values: 1.59589976310332425729864F, 0.75141593523980064237395F,0.91946373959616699828994F */ }; /*cos(n*pi/2) (resp. sin(m*pi/2)) accurate to 16 bits.*/ #define OC_C1S7D (64277.0/65536) #define OC_C2S6D (60547.0/65536) #define OC_C3S5D (54491.0/65536) #define OC_C4S4D (46341.0/65536) #define OC_C5S3D (36410.0/65536) #define OC_C6S2D (25080.0/65536) #define OC_C7S1D (12785.0/65536) /*A floating-point implementation of the Theora DCT. Here we pretend that the transform is linear, and ignore the non-linearities introduced by rounding and truncation in the integer version.*/ static void fdct8d(double _y[8],double _x[8]){ double t[8]; double u; double v; t[0]=_x[0]+_x[7]; t[1]=_x[1]+_x[2]; t[2]=_x[3]+_x[4]; t[3]=_x[5]+_x[6]; t[4]=_x[5]-_x[6]; t[5]=_x[3]-_x[4]; t[6]=_x[1]-_x[2]; t[7]=_x[0]-_x[7]; /*Butterflies for ouputs 0 and 4.*/ u=t[0]+t[2]; v=t[1]+t[3]; _y[0]=OC_C4S4D*(u+v); _y[4]=OC_C4S4D*(u-v); /*Apply rotation for outputs 2 and 6.*/ u=t[0]-t[2]; v=t[6]-t[4]; _y[2]=OC_C2S6D*u+OC_C6S2D*v; _y[6]=OC_C6S2D*u-OC_C2S6D*v; /*Compute some common terms.*/ t[3]=OC_C4S4D*(t[1]-t[3]); t[4]=-OC_C4S4D*(t[6]+t[4]); /*Apply rotation for outputs 1 and 7.*/ u=t[7]+t[3]; v=t[4]-t[5]; _y[1]=OC_C1S7D*u-OC_C7S1D*v; _y[7]=OC_C7S1D*u+OC_C1S7D*v; /*Apply rotation for outputs 3 and 5.*/ u=t[7]-t[3]; v=t[4]+t[5]; _y[3]=OC_C3S5D*u-OC_C5S3D*v; _y[5]=OC_C5S3D*u+OC_C3S5D*v; } /*A Contrast Sensitivity Function.*/ typedef double (*csf_func)(const void *_ctx,double _f); /*A set of CSF filters, one for each DCT coefficient.*/ typedef double csf_filter_bank[8]; /*Finds a filter bank for a given CSF function. _filters: Will contain the optimal filter bank. _csf: The CSF function. _csf_ctx: The parameters of the CSF function. _scratch: An NFREQS*NPHASES*8 array of scratch space.*/ static void find_csf_filters(csf_filter_bank _filters,csf_func _csf, const void *_csf_ctx){ double ata[8]; double atb[8]; double x[8]; double y[8]; int row; int f; int p; int c; int i; memset(ata,0,sizeof(ata)); memset(atb,0,sizeof(atb)); /*Find the response of each coefficient for each frequency and phase, and the expected response of CSF-sensitive coefficients.*/ for(row=f=0;fpeakf?1: _ctx->s*(_ctx->a1*(_f*_f)*exp(_ctx->b1*pow(_f,_ctx->c1))+ _ctx->a2*exp(_ctx->b2*pow(_f,_ctx->c2))); } /*The actual luma CSF parameters.*/ static const csfl_ctx Y_CTX={ 3.74695975597140362, 0.999941996555815748, 0.9973,0.221, -0.9699,-0.800, 0.7578,1.999 }; /*The chroma CSF function context.*/ struct csfc_ctx{ /*The actual parameters from Table 5.4 on page 83 of Nadenau's thesis.*/ double a1; double b1; double c1; }; /*The chroma CSF function.*/ static double csfc(csfc_ctx *_ctx,double _f){ return _ctx->a1*exp(_ctx->b1*pow(_f,_ctx->c1)); } /*The actual Cr CSF parameters.*/ static const csfc_ctx CR_CTX={ 1.000,-0.1521,0.893 }; /*The actual Cb CSF parameters.*/ static const csfc_ctx CB_CTX={ 1.000,-0.2041,0.900 }; /*A general contrast-dependent CSF function context.*/ struct csf_ctx{ /*The threshold-level CSF function.*/ csf_func c0csf; /*The threshold-level CSF function parameters.*/ const void *c0csf_ctx; /*The Nyquist frequency of this channel.*/ double fmax; /*The frequency where the threshold-level CSF funtion attains its maximum.*/ double peakf; /*A scale parameter used to normalize the curve to have a maximum of 1.0.*/ double scale; /*The contrast level.*/ double contrast; }; /*The exponent parameter in the supra-threshold model. Values between 0.4 and 0.6 are reasonable.*/ #define CONTRAST_EXP (0.5) /*The scale parameter of the contrast level. Cannon does not report values for this parameter, and it would differ for us anyway since we do not use Michelson contrast ((L_max-L_min)/(L_max+L_min)). It must be sufficiently small so that physical contrast does not actually decrease as perceptual contrast increases.*/ #define CONTRAST_SCALE (1.0/64) /*The main contrast-dependent CSF function.*/ static double csf(csf_ctx *_ctx,double _f){ double f; double s0; f=_f*_ctx->fmax; s0=_ctx->c0csf(_ctx->c0csf_ctx,f); return _ctx->scale/(_ctx->contrast+1/s0); } /*This is the Nyquist frequency for a 45 dpi screen viewed from 50 cm out. This is assumed to be the closest reasonable viewing distance.*/ #define FREQUENCY_MAX (7.558704385696589) /*Builds the CSF filter banks for each channel and contrast level.*/ static void build_csf_tables(csf_filter_bank _filters[5][64], double _qscale[64]){ csf_ctx csf_ctxs[5]={ { (csf_func)csfl, &Y_CTX, FREQUENCY_MAX, 3.74695975597140362, }, { (csf_func)csfc, &CR_CTX, FREQUENCY_MAX, 0, }, { (csf_func)csfc, &CR_CTX, 0.5*FREQUENCY_MAX, 0, }, { (csf_func)csfc, &CB_CTX, FREQUENCY_MAX, 0, }, { (csf_func)csfc, &CB_CTX, 0.5*FREQUENCY_MAX, 0, } }; int chi; int qi; for(chi=0;chi<5;chi++){ for(qi=0;qi<64;qi++){ csf_ctxs[chi].contrast=pow(_qscale[qi]*CONTRAST_SCALE,1.0/CONTRAST_EXP); /*Find the normalization constant for this contrast level.*/ csf_ctxs[chi].scale=csf_ctxs[chi].contrast+ 1/csf_ctxs[chi].c0csf(csf_ctxs[chi].c0csf_ctx,csf_ctxs[chi].peakf); find_csf_filters(_filters[chi][qi],(csf_func)csf,csf_ctxs+chi); } } } /*The resolution at which the base matrices are sampled over the range of qi values.*/ #define NQRANGES (3) int main(void){ static const int QSIZES[NQRANGES]={15,16,32}; static const int QOFFS[NQRANGES+1]={0,15,31,63}; /*The names of each pixel format.*/ static const char *FORMAT_NAMES[4]={ "4:4:4 subsampling", "4:2:2 subsampling", "subsampling in the Y direction", "4:2:0 subsampling" }; /*The names of each color plane.*/ static const char *CHANNEL_NAMES[3]={ "Y'", "Cb", "Cr" }; csf_filter_bank filters[5][64]; double fbase_matrices[9][64][64]; unsigned char base_matrices[9][64][64]; int ac_scale[64]; int dc_scale[64]; double qscales[64]; double bscale[NQRANGES+1][2]; double bmax[NQRANGES+1][2]; double bmin[NQRANGES+1][2]; int pci; int pfi; int qti; int pli; int chx; int chy; int qri; int qi; int ci; for(qi=0;qi<64;qi++){ qscales[qi]=qi>=63?0.5F:1.5*pow(2,0.0625*(64-qi)); ac_scale[qi]=65535; dc_scale[qi]=65535; } /*Compute all the CSF filters.*/ build_csf_tables(filters,qscales); /*Build the base matrices.*/ for(pci=0;pci<9;pci++){ if(pci==0)chx=chy=pli=pfi=0; else{ pli=(pci-1>>2)+1; pfi=pci-1&3; chx=(pli<<1)-1+(pfi&1); chy=(pli<<1)-1+(pfi>>1); } for(qi=0;qi<64;qi++)for(ci=0;ci<64;ci++){ fbase_matrices[pci][qi][ci]= 1/(OC_YCbCr_SCALE[pli]*filters[chx][qi][ci&7]*filters[chy][qi][ci>>3]); } } /*Scale the base matrices to be in the range of a single byte.*/ for(qri=0;qri<=NQRANGES;qri++){ qi=QOFFS[qri]; bmax[qri][0]=bmax[qri][1]=1; bmin[qri][0]=bmin[qri][1]=65536*256; for(pci=0;pci<9;pci++)for(ci=0;ci<64;ci++){ if(fbase_matrices[pci][qi][ci]>bmax[qri][ci!=0]){ bmax[qri][ci!=0]=fbase_matrices[pci][qi][ci]; } if(fbase_matrices[pci][qi][ci]255/bmax[qri][ci])scale=255/bmax[qri][ci]; bscale[qri][ci]=scale; } } /*Quantize the base matrices to integers at our sample points.*/ for(pci=0;pci<9;pci++){ for(qri=0;qri<=NQRANGES;qri++){ qi=QOFFS[qri]; for(ci=0;ci<64;ci++){ base_matrices[pci][qi][ci]=(unsigned char)( fbase_matrices[pci][qi][ci]*bscale[qri][ci!=0]+0.5); } } } /*Interpolate the remaining base matrices.*/ for(pci=0;pci<9;pci++){ for(qri=0;qri0;){ /*In general, we want to slightly underestimate the scale factor, so we take the minimum scale factor over all color channels and coefficients.*/ for(pci=0;pci<9;pci++){ int scale; scale=(int)(fbase_matrices[pci][qi][0]*qscales[qi]* (100/(DCT_SCALE*DCT_SCALE))/base_matrices[pci][qi][0]); if(scale<1)scale=1; if(scale=dc_scale[qi+1]){ base_matrices[pci][qi][0]++; } else dc_scale[qi]++; } for(ci=1;ci<64;ci++){ while(base_matrices[pci][qi][ci]=ac_scale[qi+1]){ base_matrices[pci][qi][ci]++; } else ac_scale[qi]++; } } } } } /*Print the quantization matrices and their integer, interpolated approximations.*/ #if 0 for(pci=0;pci<9;pci++)for(ci=0;ci<64;ci++){ fprintf(stderr,"[%i][%i]=",pci,ci); for(qi=0;qi<64;qi++){ if(!(qi&3))fprintf(stderr,"\n "); fprintf(stderr,"%3i-%5.2lf", (base_matrices[pci][qi][ci]*(ci==0?dc_scale[qi]:ac_scale[qi])/100)<<2, fbase_matrices[pci][qi][ci]*qscales[qi]); if(qi<63)fprintf(stderr,","); } fprintf(stderr,"\n\n"); } #endif /*Check to make sure that the coefficients always decrease as the qi rises. This is assumed by the current encoder.*/ #if 0 /*First check to make sure it is not a flaw in the computed matrices themselves.*/ for(pci=0;pci<9;pci++)for(qi=0;qi<63;qi++)for(ci=0;ci<64;ci++){ if(fbase_matrices[pci][qi][ci]*qscales[qi]< fbase_matrices[pci][qi+1][ci]*qscales[qi+1]){ fprintf(stderr,"Monotonicity failure: [%i][%i][%i]-[%i][%i][%i]: %lf\n", pci,qi,ci,pci,qi+1,ci, fbase_matrices[pci][qi+1][ci]*qscales[qi+1]- fbase_matrices[pci][qi][ci]*qscales[qi]); } } /*Then check the approximations.*/ for(pci=0;pci<9;pci++)for(qi=0;qi<63;qi++){ if(base_matrices[pci][qi][0]*dc_scale[qi]< base_matrices[pci][qi+1][0]*dc_scale[qi+1]){ fprintf(stderr,"Monotonicity failure: [%i][%i][0]-[%i][%i][0]\n", pci,qi,pci,qi+1); } for(ci=1;ci<64;ci++){ if(base_matrices[pci][qi][ci]*ac_scale[qi]< base_matrices[pci][qi+1][ci]*ac_scale[qi+1]){ fprintf(stderr,"Monotonicity failure: [%i][%i][%i]-[%i][%i][%i]\n", pci,qi,ci,pci,qi+1,ci); } } } #endif /*Dump the generated tables as C code.*/ printf("static const int OC_DEF_QRANGE_SIZES[%i]={",NQRANGES); for(qri=0;qri>2)+1; pfi=pci-1&3; chx=(pli<<1)-1+(pfi&1); chy=(pli<<1)-1+(pfi>>1); } printf("\n /*%s matrices, %s.*/",CHANNEL_NAMES[pli],FORMAT_NAMES[pfi]); printf("\n {"); for(qri=0;qri<=NQRANGES;qri++){ qi=QOFFS[qri]; printf("\n /*qi=%i.*/",qi); printf("\n {"); for(ci=0;ci<64;ci++){ if(!(ci&7))printf("\n "); printf("%3i",base_matrices[pci][qi][ci]); if(ci<63)printf(","); } printf("\n }"); if(qri