Hi there Daniel and other Debian developers. I'm not sure if you know about this,
but there is a patch for mga driver to support XVideo extension Matrox Millennium II cards (both PCI and AGP versions). Homepage for this project is at http://www.penguintown.net/~gorlik/millennium.html
I just hope that debian xserver can have this hopefully before sarge. I think it has Debian FSG compatible licence. There is some info about if on Freshmeat.
Version stands at rc1 I think. Patch is meant for 4.2.0 source tree.
 
Many regards,
Safir Secerovic
Linux Users Group of Bosnia and Herzegovina
www.linux.org.ba


Do you Yahoo!?
Yahoo! Small Business $15K Web Design Giveaway - Enter today
62a63,66
> static XF86VideoAdaptorPtr MGASetupImageVideoILOAD(ScreenPtr pScreen);
> static int MGAPutImageILOAD(ScrnInfoPtr pScrn, short src_x, short src_y, 
> short drw_x, short drw_y,
>   short src_w, short src_h, short drw_w, short drw_h, int id, unsigned char* 
> buf, short width, short height, 
>   Bool Sync, RegionPtr clipBoxes, pointer data);
78c82,83
<        ((pMga->Chipset == PCI_CHIP_MGAG200) ||
---
>        ((pMga->Chipset == PCI_CHIP_MGA2164) ||
>         (pMga->Chipset == PCI_CHIP_MGAG200) ||     
83c88,98
<       if((pMga->Overlay8Plus24 || pMga->TexturedVideo) &&
---
>       if( (pMga->Chipset == PCI_CHIP_MGA2164) ) {
>       xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Using MGA 2164W ILOAD video\n");
>       xf86DrvMsg(pScrn->scrnIndex, X_INFO, 
>       "This is an experimenteal driver and may not work on your machine.\n");
>       xf86DrvMsg(pScrn->scrnIndex, X_INFO, 
>       "YUV to RGB color space conversion and horizontal scaling will be 
> hardware accelerated.\n");
>       newAdaptor = MGASetupImageVideoILOAD(pScreen);
>       pMga->TexturedVideo = TRUE; 
>       /* ^^^ this is not really true but the ILOAD scaler shares 
>       much more code with the textured video than the overlay */
>       } else if((pMga->Overlay8Plus24 || pMga->TexturedVideo) &&
1279a1295,2006
> 
> /* Matrox MGA 2164W Xv extension support.
> *  The extension is implemented as a HOST->FB image load in YUV format. 
> *  I decided not to use real hardware overlay since on the Millennium II
> *  it would limit the size of the frame buffer to 4Mb (even on a 16Mb
> *  card) due to an hardware limitation.
> *  Author: Gabriele Gorla ([EMAIL PROTECTED])
> *  Based on the MGA-Gxxx Xv extension by: Mark Vojkovich
>    */
> 
> /* This code is still in alpha stage. Only YUV->RGB conversion
>    and horizontal scaling are hardware accelerated.
>    All 4 FOURCC formats supported by X should be supported.
>    It has been tested only on my DEC DPW 500a at 1400x1050x32 under
>    linux 2.4.18 with XFree86 4.2.0
> 
>    Bug reports and success/failure stories are greatly appreciated.
> */
> 
> /* #define DEBUG_MGA2164 */
> #define CUSTOM_MEMCOPY
> #define MGA2164_SWFILTER
> 
> 
> static XF86VideoAdaptorPtr
> MGASetupImageVideoILOAD(ScreenPtr pScreen)
> {
>     ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
>     XF86VideoAdaptorPtr adapt;
>     MGAPtr pMga = MGAPTR(pScrn);
> 
>     adapt = MGAAllocAdaptor(pScrn, FALSE);
> 
>     adapt->type = XvWindowMask | XvInputMask | XvImageMask;
>     adapt->flags = 0;
>     adapt->name = "Matrox Millennium II ILOAD Video Engine";
>     adapt->nEncodings = 1;
>     adapt->pEncodings = &DummyEncoding[1];
>     adapt->nFormats = NUM_FORMATS;
>     adapt->pFormats = Formats;
>     adapt->nPorts = MGA_MAX_PORTS;
>     adapt->pAttributes = NULL;
>     adapt->nAttributes = 0;
>     
>     /* number of supported color formats */
>     adapt->pImages = Images;
>     adapt->nImages = 4;
> 
>     adapt->PutVideo = NULL;
>     adapt->PutStill = NULL;
>     adapt->GetVideo = NULL;
>     adapt->GetStill = NULL;
>     adapt->StopVideo = MGAStopVideo;
>     
>     adapt->SetPortAttribute = MGASetPortAttributeTexture;
>     adapt->GetPortAttribute = MGAGetPortAttributeTexture;
>     adapt->QueryBestSize = MGAQueryBestSize;
>     adapt->PutImage = MGAPutImageILOAD;
>     adapt->QueryImageAttributes = MGAQueryImageAttributes;
> 
>     REGION_INIT(pScreen, &(pMga->portPrivate->clip), NullBox, 0);
> 
>     return adapt;
> }
> 
> /* this function is optimized for alpha. It might be better also for 
> other load/store risc architectures but I never tested on anything else 
> than my ev56 */
> static void CopyMungedScanline_AXP(unsigned int *fb_ptr, short src_w,
>       unsigned int *tsp, unsigned int *tpu, unsigned int *tpv)
> {     
>   unsigned int k,y0,y1,u,v;
>   
>   for(k=src_w/8;k;k--) {
>     y0=*tsp; y1=*(tsp+1);
>     u=*tpu;   v=*tpv;
>     tsp+=2; tpu++; tpv++;
>     
>     *(fb_ptr)=(y0&0x000000ff)|((y0&0x0000ff00)<<8) |
>       (v&0x000000ff)<<24 | (u&0x000000ff)<<8;
>     *(fb_ptr+1)=((y0&0x00ff0000)>>16)|((y0&0xff000000)>>8) |
>       (v&0x0000ff00)<<16 | (u&0x0000ff00);
>     *(fb_ptr+2)=(y1&0x000000ff)|((y1&0x0000ff00)<<8) |
>       (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8;
>     *(fb_ptr+3)=((y1&0x00ff0000)>>16)|((y1&0xff000000)>>8) |
>       (v&0xff000000) | (u&0xff000000)>>16;
>     
>     fb_ptr+=4;
>   }
> }
> 
> static void CopyMungedScanlineFilter_AXP(unsigned int *fb_ptr, short src_w,
>       unsigned int *tsp1, unsigned int *tpu1, unsigned int *tpv1,
>       unsigned int *tsp2, unsigned int *tpu2, unsigned int *tpv2, int beta, 
> int xds )
> {     
>   unsigned int k,y0_1,y1_1,y0_2,y1_2,u,v;
>   int yf[8], uf[4], vf[4];
>   int oneminbeta = 0xff - beta;
> 
>   for(k=xds*src_w/8;k;k--) {
>     y0_1=*tsp1; y1_1=*(tsp1+1);
>     y0_2=*tsp2; y1_2=*(tsp2+1);
>     u=*tpu1;  v=*tpv1;
> 
>     tsp1+=2; tsp2+=2; tpu1++; tpv1++; 
>     yf[0] = ((y0_1&0x000000ff)*oneminbeta + (y0_2&0x000000ff)*beta )>>8;
>     yf[1] = (((y0_1&0x0000ff00)>>8)*oneminbeta + ((y0_2&0x0000ff00)>>8)*beta 
> )>>8;
>     yf[2] = (((y0_1&0x00ff0000)>>16)*oneminbeta + 
> ((y0_2&0x00ff0000)>>16)*beta )>>8;
>     yf[3] = (((y0_1&0xff000000)>>24)*oneminbeta + 
> ((y0_2&0xff000000)>>24)*beta )>>8;
>     yf[4] = ((y1_1&0x000000ff)*oneminbeta + (y1_2&0x000000ff)*beta )>>8;
>     yf[5] = (((y1_1&0x0000ff00)>>8)*oneminbeta + ((y1_2&0x0000ff00)>>8)*beta 
> )>>8;
>     yf[6] = (((y1_1&0x00ff0000)>>16)*oneminbeta + 
> ((y1_2&0x00ff0000)>>16)*beta )>>8;
>     yf[7] = (((y1_1&0xff000000)>>24)*oneminbeta + 
> ((y1_2&0xff000000)>>24)*beta )>>8;
> 
>     /* FIXME: there is still no filtering on u and v */
>     uf[0]=(u&0x000000ff);
>     uf[1]=(u&0x0000ff00)>>8;
>     uf[2]=(u&0x00ff0000)>>16;
>     uf[3]=(u&0xff000000)>>24;
> 
>     vf[0]=(v&0x000000ff);
>     vf[1]=(v&0x0000ff00)>>8;
>     vf[2]=(v&0x00ff0000)>>16;
>     vf[3]=(v&0xff000000)>>24;
> 
>     switch(xds) {
>     case 1:
>     *(fb_ptr)=(yf[0]) | (yf[1]<<16) |
>       vf[0]<<24 | uf[0]<<8;
>     *(fb_ptr+1)=(yf[2]) | (yf[3]<<16) |
>       vf[1]<<24 | uf[1]<<8;
>     *(fb_ptr+2)=(yf[4]) | (yf[5]<<16) |
>       vf[2]<<24 | uf[2]<<8;
>     *(fb_ptr+3)=(yf[6]) | (yf[7]<<16) |
>       vf[3]<<24 | uf[3]<<8;
>     fb_ptr+=4;
>     break;
> 
>     case 2:
>     *(fb_ptr)=(yf[0]+yf[1])/2 | (((yf[2]+yf[3])/2)<<16) |
>       ((vf[0]+vf[1])/2 )<<24 | ((uf[0]+uf[1])/2)<<8;
>     *(fb_ptr+1)=(yf[4]+yf[5])/2 | ( ((yf[6]+yf[7])/2) <<16) |
>       ((vf[2]+vf[3])/2 )<<24 | ((uf[2]+uf[3])/2)<<8;
>     fb_ptr+=2;
>       break;
> 
>     case 4:
>     *(fb_ptr)=(yf[0]+yf[1]+yf[2]+yf[3])/4 | 
> (((yf[4]+yf[5]+yf[6]+yf[7])/4)<<16) |
>       ((vf[0]+vf[1]+vf[2]+vf[3])/4 )<<24 | ((uf[0]+uf[1]+uf[2]+uf[3])/4)<<8;
>     fb_ptr+=1;
>       break;
> 
>     default:
>       break;
>     }
> 
> 
>   }
> }
> 
> static void CopyMungedScanlineFilterDown_AXP(unsigned int *fb_ptr, short 
> src_w,
>       unsigned int *tsp1, unsigned int *tpu1, unsigned int *tpv1,
>       unsigned int *tsp2, unsigned int *tpu2, unsigned int *tpv2, int beta , 
> int xds)
> {     
>   unsigned int k,y0_1,y1_1,y0_2,y1_2,u,v;
>   int yf[8], uf[4], vf[4];
>   
>   for(k=src_w/8;k;k--) {
>     y0_1=*tsp1; y1_1=*(tsp1+1);
>     y0_2=*tsp2; y1_2=*(tsp2+1);
>     u=*tpu1;  v=*tpv1;
> 
>     tsp1+=2; tsp2+=2; tpu1++; tpv1++;
>     yf[0] = ((y0_1&0x000000ff) + (y0_2&0x000000ff))>>8;
>     yf[1] = (((y0_1&0x0000ff00)>>8) + ((y0_2&0x0000ff00)>>8))>>8;
>     yf[2] = (((y0_1&0x00ff0000)>>16) + ((y0_2&0x00ff0000)>>16))>>8;
>     yf[3] = (((y0_1&0x000000ff)>>24) + ((y0_2&0x000000ff)>>24))>>8;
>     yf[4] = ((y1_1&0x000000ff) + (y1_2&0x000000ff))>>8;
>     yf[5] = (((y1_1&0x0000ff00)>>8) + ((y1_2&0x0000ff00)>>8))>>8;
>     yf[6] = (((y1_1&0x00ff0000)>>16) + ((y1_2&0x00ff0000)>>16))>>8;
>     yf[7] = (((y1_1&0x000000ff)>>24) + ((y1_2&0x000000ff)>>24))>>8;
> 
>     *(fb_ptr)=(yf[0]) | (yf[1]<<16) |
>       (v&0x000000ff)<<24 | (u&0x000000ff)<<8;
>     *(fb_ptr+1)=(yf[2]) | (yf[3]<<16) |
>       (v&0x0000ff00)<<16 | (u&0x0000ff00);
>     *(fb_ptr+2)=(yf[4]) | (yf[5]<<16) |
>       (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8;
>     *(fb_ptr+3)=(yf[6]) | (yf[7]<<16) |
>       (v&0xff000000) | (u&0xff000000)>>16;
>     
>     fb_ptr+=4;
>   }
> }
> 
> static void MGACopyScaledILOAD(
>                              ScrnInfoPtr pScrn,
>                              int id, unsigned char *buf,
>                              BoxPtr pbox,
>                              int width, int height, int pitch,
>                              short src_x, short src_y,
>                              short src_w, short src_h,
>                              short drw_x, short drw_y,
>                              short drw_w, short drw_h
>                              )
> {
>   MGAPtr pMga = MGAPTR(pScrn);
>   unsigned int *fb_ptr;
>   unsigned char *ubuf, *vbuf, *tbuf;
>   unsigned int *pu, *pv;
>   int k,l, pl, dl, xds, yds;
>   short box_h;
>   short scr_pitch = ( pScrn->virtualX + 15) & ~15;
>   
> #ifdef DEBUG_MGA2164
>   char sbuf[255];
>   
>   sprintf(sbuf,"---- PBOX: x1=%d y1=%d w=%d h=%d (x2=%d y2=%d)\n",
>         pbox->x1,pbox->y1,pbox->x2-pbox->x1,pbox->y2-pbox->y1,
>         pbox->x2,pbox->y2);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>   
>   sprintf(sbuf,"in src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
>         src_x,src_y,src_w,src_h);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>   sprintf(sbuf,"in drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
>         drw_x,drw_y,drw_w,drw_h);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
>   
>   /* scaling yuv->rgb */
>   
>   /* hack to force width and src image to be 8 pixel aligned */
>   src_x&=~0x7;
>   src_w&=~0x7; 
>   
>   box_h=pbox->y2-pbox->y1;
>   
>   if(src_w>drw_w) {
>     if(src_w/2<drw_w) { 
>       xds=2;
>     } else if(src_w/4<drw_w) {
>       xds=4;
>     } else { xds=8; }
>   } else xds = 1;
> 
>   /* prevent crashing when dragging window outside left boundary of screen */
>   /* FIXME: need to implement per pixel left start to avoid undesired effects 
> when dragging 
>      window outside left screen boundary */
> 
>   if(drw_x<0) {
>     src_x=( -(drw_x*src_w)/drw_w + 0x7)&~0x7;
>     src_w-=src_x;
>     drw_w+=drw_x;
>     drw_x=0; 
>   }
> 
>   src_w/=xds;
> 
>   if(src_h>drw_h) {
>     if(src_h/2<drw_h) { 
>       yds=2;
>     } else if(src_h/4<drw_h) {
>       yds=4;
>     } else { yds=8; }
>   } else yds = 1;
> 
>   {
>   char sbuf[255];
>   
> #ifdef DEBUG_MGA2164
>   sprintf(sbuf,"---- xds = %d\n",
>         xds);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
> }
> 
> #ifdef DEBUG_MGA2164
>   sprintf(sbuf,"out src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
>         src_x,src_y,src_w,src_h);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>   sprintf(sbuf,"out drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
>         drw_x,drw_y,drw_w,drw_h);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
>   
>   CHECK_DMA_QUIESCENT(pMga, pScrn);
>   
>   /* scaling ILOAD */   
>   
>   vbuf=buf+width*height;
>   ubuf=vbuf+width*height/4;
>   pu = (unsigned int *)(ubuf+(src_y/2)*(width/2));
>   pv = (unsigned int *)(vbuf+(src_y/2)*(width/2));  
>   
>   for(pl=-1,dl=0;dl<box_h;dl++) {
>     int beta;
>     l=(dl+(pbox->y1-drw_y))*src_h/drw_h;
>     /* FIXME: check the math */
>     beta = ((dl+(pbox->y1-drw_y))*src_h*0xff/drw_h) - 
> ((dl+(pbox->y1-drw_y))*src_h/drw_h*0xff);
>     
> #ifdef MGA2164_BLIT_DUP
>     if(l!=pl) 
> #else      
>       if(1) 
> #endif
>       {
> 
>         /*
> #ifdef DEBUG_MGA2164
>   sprintf(sbuf,"new line: scr_dst %d   img_src %d   prev %d\n",
>         dl,l,pl);
>   xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
>         */
> 
>     OUTREG(MGAREG_DWGCTL, MGADWG_ILOAD_HIQH | MGADWG_BUYUV | MGADWG_SHIFTZERO
>          | MGADWG_SGNZERO | 0xc0000);
>     
>     OUTREG(MGAREG_AR0, pbox->x1 + drw_w -1);    /* SRC LINE END   why -1 ? */
>     OUTREG(MGAREG_AR2, ( ( (src_w-1)<<16) / (drw_w-1)) + 1 ); /* ((SRC_X_DIM 
> -1)<<16) / (DST_X_DIM-1) +1 */
>     OUTREG(MGAREG_AR3, pbox->x1 );                            /* SRC LINE 
> START*/
>     OUTREG(MGAREG_AR5, scr_pitch);                                 /* 
> DST_Y_INCR = PITCH? */
>     OUTREG(MGAREG_AR6, ((src_w-drw_w)<<16) / (drw_w-1) );     /* */
>     OUTREG(MGAREG_FXBNDRY, drw_x|((drw_x+drw_w-1)<<16) );     /* why -1 ? */
>     OUTREG(MGAREG_CXBNDRY, pbox->x1 | ((pbox->x2-1)<<16 ) );     
>     OUTREG(MGAREG_YDST , pbox->y1+dl );                             /* 
> Y_START_POS */
>     OUTREG(MGAREG_LEN + MGAREG_EXEC , 1);                 /* # of LINES */
>     
>     /* xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Data finished\n"); */
>     
>     fb_ptr=(unsigned int *)pMga->ILOADBase;
>   
>     switch(id) {
>     case FOURCC_YV12:
>     case FOURCC_I420:
>       tbuf=buf+(l+src_y)*width;
>       {
>       unsigned int *tpu=pu+src_x/8+l/2*width/8;
>       unsigned int *tpv=pv+src_x/8+l/2*width/8;
>       unsigned int *tsp=(unsigned int *)(tbuf+src_x), *tsp2;
>       
>       if((l+src_y)<(src_h-1)) tsp2=(unsigned int *)(tbuf+src_x+width);
>       else tsp2=(unsigned int *)(tbuf+src_x); 
> 
>       /* it is not clear if waiting is actually good for performance */
>       /*       WAITFIFO(pMga->FifoSize);*/
>       /* should try to get MGACopyMunged data to work here */
>       /*              CopyMungedScanline_AXP(fb_ptr,src_w,tsp,tpu,tpv); */
> 
>       /* Filter does not work yet */
>       CopyMungedScanlineFilter_AXP(fb_ptr,src_w,tsp,tpu,tpv,tsp2,tpu,tpv, 
> beta, xds); 
>       /*      if(l&1) {
>         pu+=width/8;
>         pv+=width/8;
>         } */
>       }
>       break;
>     case FOURCC_UYVY:
>     case FOURCC_YUY2:
>       tbuf=buf+(l+src_y)*width*2;
> 
> #ifndef MGA2164_SWFILTER
>       WAITFIFO(pMga->FifoSize/2);
>       memcpy(fb_ptr, tbuf+src_x*2, src_w*2);
>       fb_ptr+=src_w*2;   /* pointer in the pseudo dma window */
> #else
>       {
>         unsigned int *tsp=(unsigned int *)(tbuf+src_x*2), *tsp2;
>         
>         if((l+src_y)<(src_h-1)) tsp2=(unsigned int *)(tbuf+src_x*2+width*2);
>         else tsp2=(unsigned int *)(tbuf+src_x*2);
>         /*      {
>                 char sbuf [256];
>                 sprintf(sbuf,"dst line: %d   src_line: %d    beta: %x\n",
>                 dl, l, beta );
>                 xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>                 }  */
>         
>         WAITFIFO(pMga->FifoSize/4);
>         for(k=xds*src_w/8;k;k--) {
>           int oneminbeta = 0xff-beta;
>           int y[8], u[4], v[4], ya[4], ua[2], va[2], p;
> 
>           switch(yds) {
>           case 1:
>             /* upscale y filter */
>           for(p=0;p<4;p++) { 
>             
> y[2*p]=(((*(tsp+p)&0x000000ff))*oneminbeta+((*(tsp2+p)&0x000000ff))*beta)>>8;
>             
> y[2*p+1]=(((*(tsp+p)&0x00ff0000)>>16)*oneminbeta+((*(tsp2+p)&0x00ff0000)>>16)*beta)>>8;
>             
> u[p]=(((*(tsp+p)&0x0000ff00)>>8)*oneminbeta+((*(tsp2+p)&0x0000ff00)>>8)*beta)>>8;
>             
> v[p]=(((*(tsp+p)&0xff000000)>>24)*oneminbeta+((*(tsp2+p)&0xff000000)>>24)*beta)>>8;
>           }
>           break;
>           /* downscale y filter */
>           case 2:
>           case 3:
>           case 4:
>           default:
>             for(p=0;p<4;p++) {
>               y[2*p]=(((*(tsp+p)&0x000000ff)));
>               y[2*p+1]=(((*(tsp+p)&0x00ff0000)>>16));
>               u[p]=(((*(tsp+p)&0x0000ff00)>>8));
>               v[p]=(((*(tsp+p)&0xff000000)>>24));
>             }
>             break;
>           }
>           
>           switch (xds) {
>           case 1: /* simple copy */
>             *(fb_ptr++)=y[0]|y[1]<<16|u[0]<<8|v[0]<<24;
>             *(fb_ptr++)=y[2]|y[3]<<16|u[1]<<8|v[1]<<24;
>             *(fb_ptr++)=y[4]|y[5]<<16|u[2]<<8|v[2]<<24;
>             *(fb_ptr++)=y[6]|y[7]<<16|u[3]<<8|v[3]<<24;
>             break;
>           case 2: /* dowscale by 2 */
>             ya[0]=(y[0]+y[1])>>1;
>             ya[1]=(y[2]+y[3])>>1;
>             ya[2]=(y[4]+y[5])>>1;
>             ya[3]=(y[6]+y[7])>>1;
>             ua[0]=(u[0]+u[1])>>1;
>             ua[1]=(u[2]+u[3])>>1;
>             va[0]=(v[0]+v[1])>>1;
>             va[1]=(v[2]+v[3])>>1;
>             *(fb_ptr++)=ya[0]|ya[1]<<16|ua[0]<<8|va[0]<<24;
>             *(fb_ptr++)=ya[2]|ya[3]<<16|ua[1]<<8|va[1]<<24;
>             break;
>           case 4: /* downscale by 4 */
>             ya[0]=(y[0]+y[1]+y[2]+y[3])>>2;
>             ya[1]=(y[4]+y[5]+y[6]+y[7])>>2;
>             ua[0]=(u[0]+u[1]+u[2]+u[3])>>2;
>             va[0]=(v[0]+v[1]+v[2]+v[3])>>2;         
>             *(fb_ptr++)=ya[0]|ya[1]<<16|ua[0]<<8|va[0]<<24;
>             break;
>           case 8:
>           default:
>             break;
>           }
> 
>           /* fb_ptr+=4; */ 
>           tsp+=4; tsp2+=4;
>         }
>         
>       }
> #endif /* MGA2164_SWFILTER */
>       break; 
>     default:
>       
>       break;     
>     }
>     pl=l;
>       } else {
>         /* dup lines */
>         
> #ifdef DEBUG_MGA2164
>         sprintf(sbuf,"dup line: scr_src %d   scr_dst %d\n",
>                 dl-1,dl);
>         xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
>         
>         OUTREG(MGAREG_DWGCTL, 0x040C6008);
>         OUTREG(MGAREG_FXBNDRY, pbox->x1|((pbox->x2-1)<<16) );     /* why -1 ? 
> */
>         OUTREG(MGAREG_AR3, (pbox->y1+dl-1)*scr_pitch+pbox->x1 );              
>               /* SRC LINE START*/
>         OUTREG(MGAREG_AR0, (pbox->y1+dl-1)*scr_pitch+pbox->x2 -1);    /* SRC 
> LINE END   why -1 ? */
>         OUTREG(MGAREG_AR5, scr_pitch);                                 /* 
> DST_Y_INCR = PITCH? */
>         OUTREG(MGAREG_YDST , pbox->y1+dl);                             /* 
> Y_START_POS */
>         OUTREG(MGAREG_LEN + MGAREG_EXEC , 1);                 /* # of LINES */
>       }
>   }
>   OUTREG(MGAREG_CXBNDRY, 0xFFFF0000);     
> }
>  
> static void MGACopyILOAD(
>                        ScrnInfoPtr pScrn,
>                        int id, unsigned char *buf,
>                        BoxPtr pbox,
>                        int width, int height, int pitch,
>                        short src_x, short src_y,
>                        short src_w, short src_h,
>                        short drw_x, short drw_y,
>                        short drw_w, short drw_h
>                        )
> {
>   MGAPtr pMga = MGAPTR(pScrn);
>   unsigned int *fb_ptr;
>   unsigned char *ubuf, *vbuf;
>   unsigned int *pu, *pv;
>   int k,l;
>   short clip_x1, clip_x2, tmp_w;
> 
> #ifdef DEBUG_MGA2164
>   char sbuf[255];
> 
>   sprintf(sbuf,"---- PBOX: x1=%d y1=%d w=%d h=%d (x2=%d y2=%d)\n",
>     pbox->x1,pbox->y1,pbox->x2-pbox->x1,pbox->y2-pbox->y1,
>     pbox->x2,pbox->y2);
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> 
>   sprintf(sbuf,"in src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
>     src_x,src_y,src_w,src_h);
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>   sprintf(sbuf,"in drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
>     drw_x,drw_y,drw_w,drw_h);
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
> 
>   /* non-scaling yuv->rgb */
>   
>   /* hack to force width and src image to be 8 pixel aligned */
>   src_x&=~0x7;
>   src_w&=~0x7; 
>   drw_w&=~0x7;
>   tmp_w=drw_w;
>   clip_x1=drw_x;
>   clip_x2=drw_x+drw_w;
> 
>   /* hack for clipping in non scaling version */
>   /* this works only if no scaling */
>   if(pbox->x1 > drw_x) {              /* left side X clipping*/
>     src_x+=((pbox->x1-drw_x)&~0x7);
>     src_w-=((pbox->x1-drw_x)&~0x7); 
>     clip_x1=pbox->x1;
>     drw_x+=src_x;
>     drw_w=src_w;
>   }
> 
>   if( (pbox->x2) < (drw_x+drw_w) ) {     /* right side X clipping */
>     tmp_w=( (pbox->x2) - drw_x );
>     drw_w= tmp_w & (~0x7);
>     if(drw_w!=tmp_w) drw_w+=8; 
>     clip_x2=drw_x+tmp_w-1; /* not sure why needs -1 */
>     src_w=drw_w;
>   }
> 
>   if(pbox->y1 > drw_y) {             /* top side Y clipping */
>     src_y+=(pbox->y1-drw_y);
>     src_h-=(pbox->y1-drw_y);
>     drw_y+=src_y;
>     drw_h=src_h;
>   }
>   if((pbox->y2)<(drw_y+drw_h)) {     /* bottom side Y clipping */
>     drw_h=(pbox->y2)-drw_y;
>     src_h=drw_h;
>   }
> 
>   if(drw_x<0) drw_x=0;
> 
> #ifdef DEBUG_MGA2164
>   sprintf(sbuf,"out src: src_x=%d src_y=%d src_w=%d src_h=%d\n",
>     src_x,src_y,src_w,src_h);
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
>   sprintf(sbuf,"out drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n",
>     drw_x,drw_y,drw_w,drw_h);
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf);
> #endif
>   
>   /* ready to draw */
>   if(drw_w==0||drw_h==0) return;
> 
>   if(drw_w<0||drw_h<0) {
>       /* actually until scaling is working this might happen 
>       during normal operation */
> /*  sprintf(sbuf,"drw_w or drw_h are negative (this should never 
> happen)\n");
>     xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); */
>       return;
> }
> 
>   CHECK_DMA_QUIESCENT(pMga, pScrn);
>   
>   /* non scaling ILOAD */   
>   WAITFIFO(6);
>   OUTREG(MGAREG_AR5, 0);
>   OUTREG(MGAREG_DWGCTL, MGADWG_ILOAD | MGADWG_BUYUV | MGADWG_SHIFTZERO
>        | MGADWG_SGNZERO | 0xc0000);
>   OUTREG(MGAREG_AR0, (drw_w)-1 );
>   OUTREG(MGAREG_AR3, 0);
>   OUTREG(MGAREG_CXBNDRY, clip_x1|(clip_x2<<16)); 
>   OUTREG(MGAREG_FXBNDRY, drw_x|((drw_x+drw_w-1)<<16)); 
>   OUTREG(MGAREG_YDSTLEN + MGAREG_EXEC , (drw_y<<16)|drw_h); 
>   
>   fb_ptr=(unsigned int *)pMga->ILOADBase;
>   vbuf=buf+width*height;
>   ubuf=vbuf+width*height/4;
>     
>   switch(id) {
>   case FOURCC_YV12:
>   case FOURCC_I420:
>     pu = (unsigned int *)(ubuf+(src_y/2)*(width/2));
>     pv = (unsigned int *)(vbuf+(src_y/2)*(width/2));
>     buf+=src_y*width;
>     
>     for(l=0;l<drw_h;l++) {
>       unsigned int *tpu=pu+src_x/8;
>       unsigned int *tpv=pv+src_x/8;
>       unsigned int *tsp=(unsigned int *)(buf+src_x);
>       
>      /* it is not clear if waiting is actually good for performance */
>       /*      WAITFIFO(pMga->FifoSize);*/
>       /* should try to get MGACopyMunged data to work here */
>       CopyMungedScanline_AXP(fb_ptr,src_w,tsp,tpu,tpv);
>       buf+=width;
>       if(l&1) {
>       pu+=width/8;
>       pv+=width/8;
>       }
>     }
>     break;
>   case FOURCC_UYVY:
>   case FOURCC_YUY2:
>     buf+=src_y*width*2;
>     for(l=0;l<drw_h;l++) {
> 
> #ifndef CUSTOM_MEMCOPY
>       WAITFIFO(pMga->FifoSize/2); /* not sure what's the value for best 
> performance */
>       memcpy(fb_ptr, buf+src_x*2, src_w*2);
>       fb_ptr+=src_w*2; 
> #else
>       unsigned int *tsp=(unsigned int *)(buf+src_x*2);
>       WAITFIFO(pMga->FifoSize/4);
>       for(k=src_w/8;k;k--) {
>       *(fb_ptr)=*(tsp);
>       *(fb_ptr+1)=*(tsp+1);
>       *(fb_ptr+2)=*(tsp+2);
>       *(fb_ptr+3)=*(tsp+3);
>       fb_ptr+=4; tsp+=4;
>             } 
> #endif /* CUSTOM_MEMCOPY */
>       buf+=width*2;
>     }
>     break; 
>   default:
>     break;     
>   }
>   OUTREG(MGAREG_CXBNDRY, 0xFFFF0000);    /* put clipping back to normal */ 
> }
>  
> 
> 
> static int 
> MGAPutImageILOAD( 
>                ScrnInfoPtr pScrn, 
>                short src_x, short src_y, 
>                short drw_x, short drw_y,
>                short src_w, short src_h, 
>                short drw_w, short drw_h,
>                int id, unsigned char* buf, 
>                short width, short height, 
>                Bool Sync,
>                RegionPtr clipBoxes, pointer data
>                ){
>   MGAPtr pMga = MGAPTR(pScrn);
>   MGAPortPrivPtr pPriv = pMga->portPrivate;
>   INT32 x1, x2, y1, y2;
>   int dstPitch = 0;
>   int bpp;
>   BoxRec dstBox;
>   int nbox;
>   BoxPtr pbox;
>   
>   /* Clip */
>   x1 = src_x; x2 = src_x + src_w;
>   y1 = src_y; y2 = src_y + src_h;
>   
>   dstBox.x1 = drw_x; dstBox.x2 = drw_x + drw_w;
>   dstBox.y1 = drw_y; dstBox.y2 = drw_y + drw_h;
>   
>   if(!MGAClipVideo(&dstBox, &x1, &x2, &y1, &y2, clipBoxes, width, height))
>     return Success;
>   
>   bpp = pScrn->bitsPerPixel >> 3;
>   
>   if( pMga->AccelInfoRec->NeedToSync && ((long)data != pPriv->lastPort) ) {
>     MGAStormSync(pScrn);
>   }
>   
>   pPriv->lastPort = (long)data;
>   nbox=REGION_NUM_RECTS(clipBoxes);
>   pbox=REGION_RECTS(clipBoxes);
>   
>   while(nbox--) {
> #if 0
>     if ( (drw_w==src_w) && (drw_h==src_h) && (drw_x >= 0 ) ) {
>       /* special case 1: non scaling optimization */
>       MGACopyILOAD(pScrn,id,buf,pbox,
>                  width, height, dstPitch, src_x, src_y, src_w, src_h,
>                  drw_x, drw_y, drw_w, drw_h);
>     } else if ( (drw_w>src_w) && (drw_h>src_h) && (drw_x >= 0 ) ) {
>       /* special case 2: upscaling for full screen apps */
>       /* FIXME: to do */
>       MGACopyScaledILOAD(pScrn,id,buf,pbox,
>                        width, height, dstPitch, src_x, src_y, src_w, src_h,
>                        drw_x, drw_y, drw_w, drw_h);
> 
>     } else /* generic fallback case */
> #endif
>       MGACopyScaledILOAD(pScrn,id,buf,pbox,
>                        width, height, dstPitch, src_x, src_y, src_w, src_h,
>                        drw_x, drw_y, drw_w, drw_h); 
>     /* FIXME: when the generic is perfect I will enable the optimizations */
> 
>     pbox++;
>   }
>   
>   pMga->AccelInfoRec->NeedToSync = TRUE;
>   pPriv->videoStatus = FREE_TIMER;
>   pPriv->freeTime = currentTime.milliseconds + FREE_DELAY;
>   pMga->VideoTimerCallback = MGAVideoTimerCallback;
>   
>   return Success;
> }
> 

Reply via email to