examples/sfexamples/oggvorbiscodec/src/tremor/asm_arm.h

00001 /********************************************************************
00002  *                                                                  *
00003  * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE.   *
00004  *                                                                  *
00005  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
00006  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
00007  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
00008  *                                                                  *
00009  * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
00010  * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
00011  *                                                                  *
00012  ********************************************************************
00013 
00014  function: arm7 and later wide math functions
00015 
00016  ********************************************************************/
00017 
00018 #ifdef _ARM_ASSEM_
00019 
00020 #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
00021 #define _V_WIDE_MATH
00022 
00023 static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
00024   int lo,hi;
00025   asm volatile("smull\t%0, %1, %2, %3"
00026                : "=&r"(lo),"=&r"(hi)
00027                : "%r"(x),"r"(y)
00028                : "cc");
00029   return(hi);
00030 }
00031 
00032 static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
00033   return MULT32(x,y)<<1;
00034 }
00035 
00036 static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
00037   int lo,hi;
00038   asm volatile("smull   %0, %1, %2, %3\n\t"
00039                "movs    %0, %0, lsr #15\n\t"
00040                "adc     %1, %0, %1, lsl #17\n\t"
00041                : "=&r"(lo),"=&r"(hi)
00042                : "%r"(x),"r"(y)
00043                : "cc");
00044   return(hi);
00045 }
00046 
00047 #define MB() asm volatile ("" : : : "memory")
00048 
00049 static inline void XPROD32(ogg_int32_t  a, ogg_int32_t  b,
00050                            ogg_int32_t  t, ogg_int32_t  v,
00051                            ogg_int32_t *x, ogg_int32_t *y)
00052 {
00053   int x1, y1, l;
00054   asm(  "smull  %0, %1, %4, %6\n\t"
00055         "smlal  %0, %1, %5, %7\n\t"
00056         "rsb    %3, %4, #0\n\t"
00057         "smull  %0, %2, %5, %6\n\t"
00058         "smlal  %0, %2, %3, %7"
00059         : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
00060         : "3" (a), "r" (b), "r" (t), "r" (v)
00061         : "cc" );
00062   *x = x1;
00063   MB();
00064   *y = y1;
00065 }
00066 
00067 static inline void XPROD31(ogg_int32_t  a, ogg_int32_t  b,
00068                            ogg_int32_t  t, ogg_int32_t  v,
00069                            ogg_int32_t *x, ogg_int32_t *y)
00070 {
00071   int x1, y1, l;
00072   asm(  "smull  %0, %1, %4, %6\n\t"
00073         "smlal  %0, %1, %5, %7\n\t"
00074         "rsb    %3, %4, #0\n\t"
00075         "smull  %0, %2, %5, %6\n\t"
00076         "smlal  %0, %2, %3, %7"
00077         : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
00078         : "3" (a), "r" (b), "r" (t), "r" (v)
00079         : "cc" );
00080   *x = x1 << 1;
00081   MB();
00082   *y = y1 << 1;
00083 }
00084 
00085 static inline void XNPROD31(ogg_int32_t  a, ogg_int32_t  b,
00086                             ogg_int32_t  t, ogg_int32_t  v,
00087                             ogg_int32_t *x, ogg_int32_t *y)
00088 {
00089   int x1, y1, l;
00090   asm(  "rsb    %2, %4, #0\n\t"
00091         "smull  %0, %1, %3, %5\n\t"
00092         "smlal  %0, %1, %2, %6\n\t"
00093         "smull  %0, %2, %4, %5\n\t"
00094         "smlal  %0, %2, %3, %6"
00095         : "=&r" (l), "=&r" (x1), "=&r" (y1)
00096         : "r" (a), "r" (b), "r" (t), "r" (v)
00097         : "cc" );
00098   *x = x1 << 1;
00099   MB();
00100   *y = y1 << 1;
00101 }
00102 
00103 #endif
00104 
00105 #ifndef _V_CLIP_MATH
00106 #define _V_CLIP_MATH
00107 
00108 static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) {
00109   int tmp;
00110   asm volatile("subs    %1, %0, #32768\n\t"
00111                "movpl   %0, #0x7f00\n\t"
00112                "orrpl   %0, %0, #0xff\n"
00113                "adds    %1, %0, #32768\n\t"
00114                "movmi   %0, #0x8000"
00115                : "+r"(x),"=r"(tmp)
00116                :
00117                : "cc");
00118   return(x);
00119 }
00120 
00121 #endif
00122 
00123 #ifndef _V_LSP_MATH_ASM
00124 #define _V_LSP_MATH_ASM
00125 
00126 static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip,
00127                                 ogg_int32_t *qexpp,
00128                                 ogg_int32_t *ilsp,ogg_int32_t wi,
00129                                 ogg_int32_t m){
00130   
00131   ogg_uint32_t qi=*qip,pi=*pip;
00132   ogg_int32_t qexp=*qexpp;
00133 
00134   asm("mov     r0,%3;"
00135       "mov     r1,%5,asr#1;"
00136       "add     r0,r0,r1,lsl#3;"
00137       "1:"
00138       
00139       "ldmdb   r0!,{r1,r3};"
00140       "subs    r1,r1,%4;"          //ilsp[j]-wi
00141       "rsbmi   r1,r1,#0;"          //labs(ilsp[j]-wi)
00142       "umull   %0,r2,r1,%0;"       //qi*=labs(ilsp[j]-wi)
00143       
00144       "subs    r1,r3,%4;"          //ilsp[j+1]-wi
00145       "rsbmi   r1,r1,#0;"          //labs(ilsp[j+1]-wi)
00146       "umull   %1,r3,r1,%1;"       //pi*=labs(ilsp[j+1]-wi)
00147       
00148       "cmn     r2,r3;"             // shift down 16?
00149       "beq     0f;"
00150       "add     %2,%2,#16;"
00151       "mov     %0,%0,lsr #16;"
00152       "orr     %0,%0,r2,lsl #16;"
00153       "mov     %1,%1,lsr #16;"
00154       "orr     %1,%1,r3,lsl #16;"
00155       "0:"
00156       "cmp     r0,%3;\n"
00157       "bhi     1b;\n"
00158       
00159       // odd filter assymetry
00160       "ands    r0,%5,#1;\n"
00161       "beq     2f;\n"
00162       "add     r0,%3,%5,lsl#2;\n"
00163       
00164       "ldr     r1,[r0,#-4];\n"
00165       "mov     r0,#0x4000;\n"
00166       
00167       "subs    r1,r1,%4;\n"          //ilsp[j]-wi
00168       "rsbmi   r1,r1,#0;\n"          //labs(ilsp[j]-wi)
00169       "umull   %0,r2,r1,%0;\n"       //qi*=labs(ilsp[j]-wi)
00170       "umull   %1,r3,r0,%1;\n"       //pi*=labs(ilsp[j+1]-wi)
00171       
00172       "cmn     r2,r3;\n"             // shift down 16?
00173       "beq     2f;\n"
00174       "add     %2,%2,#16;\n"
00175       "mov     %0,%0,lsr #16;\n"
00176       "orr     %0,%0,r2,lsl #16;\n"
00177       "mov     %1,%1,lsr #16;\n"
00178       "orr     %1,%1,r3,lsl #16;\n"
00179       
00180       //qi=(pi>>shift)*labs(ilsp[j]-wi);
00181       //pi=(qi>>shift)*labs(ilsp[j+1]-wi);
00182       //qexp+=shift;
00183       
00184       //}
00185          
00186       /* normalize to max 16 sig figs */
00187       "2:"
00188       "mov     r2,#0;"
00189       "orr     r1,%0,%1;"
00190       "tst     r1,#0xff000000;"
00191       "addne   r2,r2,#8;"
00192       "movne   r1,r1,lsr #8;"
00193       "tst     r1,#0x00f00000;"
00194       "addne   r2,r2,#4;"
00195       "movne   r1,r1,lsr #4;"
00196       "tst     r1,#0x000c0000;"
00197       "addne   r2,r2,#2;"
00198       "movne   r1,r1,lsr #2;"
00199       "tst     r1,#0x00020000;"
00200       "addne   r2,r2,#1;"
00201       "movne   r1,r1,lsr #1;"
00202       "tst     r1,#0x00010000;"
00203       "addne   r2,r2,#1;"
00204       "mov     %0,%0,lsr r2;"
00205       "mov     %1,%1,lsr r2;"
00206       "add     %2,%2,r2;"
00207       
00208       : "+r"(qi),"+r"(pi),"+r"(qexp)
00209       : "r"(ilsp),"r"(wi),"r"(m)
00210       : "r0","r1","r2","r3","cc");
00211   
00212   *qip=qi;
00213   *pip=pi;
00214   *qexpp=qexp;
00215 }
00216 
00217 static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){
00218 
00219   ogg_uint32_t qi=*qip;
00220   ogg_int32_t qexp=*qexpp;
00221 
00222   asm("tst     %0,#0x0000ff00;"
00223       "moveq   %0,%0,lsl #8;"
00224       "subeq   %1,%1,#8;"
00225       "tst     %0,#0x0000f000;"
00226       "moveq   %0,%0,lsl #4;"
00227       "subeq   %1,%1,#4;"
00228       "tst     %0,#0x0000c000;"
00229       "moveq   %0,%0,lsl #2;"
00230       "subeq   %1,%1,#2;"
00231       "tst     %0,#0x00008000;"
00232       "moveq   %0,%0,lsl #1;"
00233       "subeq   %1,%1,#1;"
00234       : "+r"(qi),"+r"(qexp)
00235       :
00236       : "cc");
00237   *qip=qi;
00238   *qexpp=qexp;
00239 }
00240 
00241 #endif
00242 #endif
00243 

Generated by  doxygen 1.6.2