summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjkeil <jkeil@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-07-12 15:23:26 +0000
committerjkeil <jkeil@b3059339-0415-0410-9bf9-f77b7e298cf2>2001-07-12 15:23:26 +0000
commit009d2b0dc7ce5d7089bdb4136d1b575b115981d0 (patch)
tree62cac7ae3879f54e0e2301e876aa3a18e61b1e4c
parent88b34d0d112d73462f36e003ea788d6adb2211a2 (diff)
downloadmpv-009d2b0dc7ce5d7089bdb4136d1b575b115981d0.tar.bz2
mpv-009d2b0dc7ce5d7089bdb4136d1b575b115981d0.tar.xz
yuv2rgb_mmx crashes with ffdivx codec, when we play back avi files that have
a frame width that is not an exact multiple of 8. Testcase: 405.avi (356x240). Playing on an MMX capable x86 system using the x11 video-out driver results in a segfault. The MMX routines convert image data in quantities of 8 pixels in each loop, and the inner loop was not terminated in case there are only 1-7 pixels left, producing too much RGB output. For now, just ignore the last few pixels on each row, to avoid the segfaults. (Gives a black vertical border on the right, if you play a video with width%8 != 0) A possible future enhancement would be, to add a second loop to convert the last width%8 pixels to RGB using a byte loop. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@1307 b3059339-0415-0410-9bf9-f77b7e298cf2
-rw-r--r--libvo/yuv2rgb_mmx.c144
1 files changed, 59 insertions, 85 deletions
diff --git a/libvo/yuv2rgb_mmx.c b/libvo/yuv2rgb_mmx.c
index 658134f0e3..7c6ed1bcde 100644
--- a/libvo/yuv2rgb_mmx.c
+++ b/libvo/yuv2rgb_mmx.c
@@ -76,24 +76,29 @@ static void yuv420_rgb16_mmx (uint8_t * image, uint8_t * py,
int rgb_stride, int y_stride, int uv_stride)
{
int even = 1;
- int x = 0, y = 0;
+ int x, y;
- /* load data for first scan line */
- __asm__ __volatile__ (
- "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
- "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+ __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
- "pxor %%mm4, %%mm4;" /* zero mm4 */
- "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+ for (y = v_size; --y >= 0; ) {
+ uint8_t *_image = image;
+ uint8_t *_py = py;
+ uint8_t *_pu = pu;
+ uint8_t *_pv = pv;
+
+ /* load data for start of next scan line */
+ __asm__ __volatile__ (
+ "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
+ "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+ "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
- //"movl $0, (%3);" /* cache preload for image */
- : : "r" (py), "r" (pu), "r" (pv), "r" (image));
+ : : "r" (_py), "r" (_pu), "r" (_pv));
- do {
- do {
+ for (x = h_size >> 3; --x >= 0; ) {
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
pixels in each iteration */
- __asm__ __volatile__ (".align 8;"
+
+ __asm__ __volatile__ (
/* Do the multiply part of the conversion for even and odd pixels,
register usage:
mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
@@ -199,40 +204,24 @@ static void yuv420_rgb16_mmx (uint8_t * image, uint8_t * py,
"movd 4 (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
MOVNTQ " %%mm5, 8 (%3);" /* store pixel 4-7 */
- : : "r" (py), "r" (pu), "r" (pv), "r" (image));
-
- py += 8;
- pu += 4;
- pv += 4;
- image += 16;
- x += 8;
- } while (x < h_size);
-
- if (even) {
- pu -= h_size/2;
- pv -= h_size/2;
- } else {
- pu += (uv_stride - h_size/2);
- pv += (uv_stride - h_size/2);
- }
+ : : "r" (_py), "r" (_pu), "r" (_pv), "r" (_image));
- py += (y_stride - h_size);
- image += (rgb_stride - 2*h_size);
-
- /* load data for start of next scan line */
- __asm__ __volatile__ (
- "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 00 u3 u2 u1 u0 */
- "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 00 v2 v1 v0 */
+ _py += 8;
+ _pu += 4;
+ _pv += 4;
+ _image += 16;
+ }
- //"movl $0, (%3);" /* cache preload for image */
- "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+ if (!even) {
+ pu += uv_stride;
+ pv += uv_stride;
+ }
- : : "r" (py), "r" (pu), "r" (pv), "r" (image));
+ py += y_stride;
+ image += rgb_stride;
- x = 0;
- y += 1;
even = (!even);
- } while (y < v_size) ;
+ }
__asm__ __volatile__ (EMMS);
}
@@ -243,25 +232,29 @@ static void yuv420_argb32_mmx (uint8_t * image, uint8_t * py,
int rgb_stride, int y_stride, int uv_stride)
{
int even = 1;
- int x = 0, y = 0;
+ int x, y;
- __asm__ __volatile__ (
- ".align 8;"
- "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
- //"movl $0, (%3);" /* cache preload for image */
+ __asm__ __volatile__ ("pxor %mm4, %mm4;" /* zero mm4 */ );
- "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
- "pxor %%mm4, %%mm4;" /* zero mm4 */
+ for (y = v_size; --y >= 0; ) {
+ uint8_t *_image = image;
+ uint8_t *_py = py;
+ uint8_t *_pu = pu;
+ uint8_t *_pv = pv;
- "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
- : : "r" (py), "r" (pu), "r" (pv), "r" (image));
+ /* load data for start of next scan line */
+ __asm__ __volatile__
+ (
+ "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
+ "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+ "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+ : : "r" (_py), "r" (_pu), "r" (_pv)
+ );
- do {
- do {
+ for (x = h_size >> 3; --x >= 0; ) {
/* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
pixels in each iteration */
__asm__ __volatile__ (
- ".align 8;"
/* Do the multiply part of the conversion for even and odd pixels,
register usage:
mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels,
@@ -379,43 +372,24 @@ static void yuv420_argb32_mmx (uint8_t * image, uint8_t * py,
"pxor %%mm4, %%mm4;" /* zero mm4 */
"movq 8 (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
- : : "r" (py), "r" (pu), "r" (pv), "r" (image));
-
- py += 8;
- pu += 4;
- pv += 4;
- image += 32;
- x += 8;
- } while (x < h_size);
-
- if (even) {
- pu -= h_size/2;
- pv -= h_size/2;
- } else {
- pu += (uv_stride - h_size/2);
- pv += (uv_stride - h_size/2);
- }
-
- py += (y_stride - h_size);
- image += (rgb_stride - 4*h_size);
+ : : "r" (_py), "r" (_pu), "r" (_pv), "r" (_image));
- /* load data for start of next scan line */
- __asm__ __volatile__
- (
- ".align 8;"
- "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
- "movd (%2), %%mm1;" /* Load 4 Cr 00 00 00 00 v3 v2 v1 v0 */
+ _py += 8;
+ _pu += 4;
+ _pv += 4;
+ _image += 32;
+ }
- //"movl $0, (%3);" /* cache preload for image */
- "movq (%0), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
- : : "r" (py), "r" (pu), "r" (pv), "r" (image)
- );
+ if (!even) {
+ pu += uv_stride;
+ pv += uv_stride;
+ }
+ py += y_stride;
+ image += rgb_stride;
- x = 0;
- y += 1;
even = (!even);
- } while ( y < v_size) ;
+ }
__asm__ __volatile__ (EMMS);
}