summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libswscale/internal_bfin.S38
-rw-r--r--libswscale/rgb2rgb.c32
-rw-r--r--libswscale/rgb2rgb.h44
-rw-r--r--libswscale/rgb2rgb_template.c58
-rw-r--r--libswscale/swscale_altivec_template.c16
-rw-r--r--libswscale/swscale_bfin.c2
-rw-r--r--libswscale/swscale_internal.h6
-rw-r--r--libswscale/swscale_template.c41
-rw-r--r--libswscale/yuv2rgb.c8
-rw-r--r--libswscale/yuv2rgb_altivec.c67
-rw-r--r--libswscale/yuv2rgb_bfin.c7
-rw-r--r--libswscale/yuv2rgb_mlib.c3
-rw-r--r--libswscale/yuv2rgb_template.c32
13 files changed, 178 insertions, 176 deletions
diff --git a/libswscale/internal_bfin.S b/libswscale/internal_bfin.S
index 7dcd019fae..fb7bda7e12 100644
--- a/libswscale/internal_bfin.S
+++ b/libswscale/internal_bfin.S
@@ -2,8 +2,8 @@
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
* April 20, 2007
*
- * Blackfin Video Color Space Converters Operations
- * convert I420 YV12 to RGB in various formats,
+ * Blackfin video color space converter operations
+ * convert I420 YV12 to RGB in various formats
*
* This file is part of FFmpeg.
*
@@ -24,8 +24,8 @@
/*
-YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock
-and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts
+YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock
+and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts.
The following calculation is used for the conversion:
@@ -34,36 +34,36 @@ The following calculation is used for the conversion:
g = clipz((y-oy)*cy + cgv*(v-128) + cgu*(u-128))
b = clipz((y-oy)*cy + cbu*(u-128))
-y,u,v are pre scaled by a factor of 4 i.e. left shifted to gain precision.
+y,u,v are prescaled by a factor of 4 i.e. left-shifted to gain precision.
New factorization to eliminate the truncation error which was
-occuring due to the byteop3p.
+occurring due to the byteop3p.
-1) use the bytop16m to subtract quad bytes we use this in U8 this
+1) Use the bytop16m to subtract quad bytes we use this in U8 this
then so the offsets need to be renormalized to 8bits.
-2) scale operands up by a factor of 4 not 8 because Blackfin
+2) Scale operands up by a factor of 4 not 8 because Blackfin
multiplies include a shift.
-3) compute into the accumulators cy*yx0, cy*yx1
+3) Compute into the accumulators cy*yx0, cy*yx1.
-4) compute each of the linear equations
+4) Compute each of the linear equations:
r = clipz((y - oy) * cy + crv * (v - 128))
g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128))
b = clipz((y - oy) * cy + cbu * (u - 128))
- reuse of the accumulators requires that we actually multiply
- twice once with addition and the second time with a subtaction.
+ Reuse of the accumulators requires that we actually multiply
+ twice once with addition and the second time with a subtraction.
- because of this we need to compute the equations in the order R B
+ Because of this we need to compute the equations in the order R B
then G saving the writes for B in the case of 24/32 bit color
formats.
- api: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out,
+ API: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out,
int dW, uint32_t *coeffs);
A B
@@ -77,13 +77,13 @@ uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv;
coeffs is a pointer to oy.
-the {rgb} masks are only utilized by the 565 packing algorithm. Note the data
-replication is used to simplify the internal algorithms for the dual mac architecture
-of BlackFin.
+The {rgb} masks are only utilized by the 565 packing algorithm. Note the data
+replication is used to simplify the internal algorithms for the dual Mac
+architecture of BlackFin.
-All routines are exported with _ff_bfin_ as a symbol prefix
+All routines are exported with _ff_bfin_ as a symbol prefix.
-rough performance gain compared against -O3:
+Rough performance gain compared against -O3:
2779809/1484290 187.28%
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index ce0e0ee31a..14c40700ce 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -1,10 +1,10 @@
/*
- * rgb2rgb.c, Software RGB to RGB convertor
- * pluralize by Software PAL8 to RGB convertor
- * Software YUV to YUV convertor
- * Software YUV to RGB convertor
- * Written by Nick Kurshev.
- * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
+ * software RGB to RGB converter
+ * pluralize by software PAL8 to RGB converter
+ * software YUV to YUV converter
+ * software YUV to RGB converter
+ * Written by Nick Kurshev.
+ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
*
* This file is part of FFmpeg.
*
@@ -22,8 +22,8 @@
* along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
- * the C code (not assembly, mmx, ...) of this file can be used
- * under the LGPL license too
+ * The C code (not assembly, MMX, ...) of this file can be used
+ * under the LGPL license.
*/
#include <inttypes.h>
#include "config.h"
@@ -33,7 +33,7 @@
#include "swscale.h"
#include "swscale_internal.h"
-#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
+#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients
void (*rgb24to32)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
@@ -149,8 +149,8 @@ static uint64_t __attribute__((aligned(8))) dither8[2]={
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
-//Note: we have C, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
-//Plain C versions
+//Note: We have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW + MMX2 one.
+//plain C versions
#undef HAVE_MMX
#undef HAVE_MMX2
#undef HAVE_3DNOW
@@ -190,10 +190,10 @@ static uint64_t __attribute__((aligned(8))) dither8[2]={
#endif //ARCH_X86 || ARCH_X86_64
/*
- rgb15->rgb16 Original by Strepto/Astral
+ RGB15->RGB16 original by Strepto/Astral
ported to gcc & bugfixed : A'rpi
MMX2, 3DNOW optimization by Nick Kurshev
- 32bit c version, and and&add trick by Michael Niedermayer
+ 32-bit C version, and and&add trick by Michael Niedermayer
*/
void sws_rgb2rgb_init(int flags){
@@ -266,7 +266,7 @@ void palette8torgb24(const uint8_t *src, uint8_t *dst, long num_pixels, const ui
{
long i;
/*
- writes 1 byte o much and might cause alignment issues on some architectures?
+ Writes 1 byte too much and might cause alignment issues on some architectures?
for (i=0; i<num_pixels; i++)
((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[src[i]];
*/
@@ -284,7 +284,7 @@ void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const ui
{
long i;
/*
- writes 1 byte o much and might cause alignment issues on some architectures?
+ Writes 1 byte too much and might cause alignment issues on some architectures?
for (i=0; i<num_pixels; i++)
((unsigned *)(&dst[i*3])) = ((unsigned *)palette)[src[i]];
*/
@@ -299,7 +299,7 @@ void palette8tobgr24(const uint8_t *src, uint8_t *dst, long num_pixels, const ui
}
/**
- * Palette is assumed to contain bgr16, see rgb32to16 to convert the palette
+ * Palette is assumed to contain BGR16, see rgb32to16 to convert the palette.
*/
void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette)
{
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 260732f731..f2697c65d6 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -1,8 +1,8 @@
/*
- * rgb2rgb.h, Software RGB to RGB convertor
- * pluralize by Software PAL8 to RGB convertor
- * Software YUV to YUV convertor
- * Software YUV to RGB convertor
+ * software RGB to RGB converter
+ * pluralize by Software PAL8 to RGB converter
+ * Software YUV to YUV converter
+ * Software YUV to RGB converter
* Written by Nick Kurshev.
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
*
@@ -28,7 +28,7 @@
#include <inttypes.h>
-/* A full collection of rgb to rgb(bgr) convertors */
+/* A full collection of RGB to RGB(BGR) converters */
extern void (*rgb24to32) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24to16) (const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb24to15) (const uint8_t *src, uint8_t *dst, long src_size);
@@ -71,53 +71,49 @@ extern void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, c
extern void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
/**
- *
- * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
- * problem for anyone then tell me, and ill fix it)
- * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ * Chrominance data is only taken from every second line, others are ignored.
+ * FIXME: Write HQ version.
*/
//void uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
/**
- *
- * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
- * problem for anyone then tell me, and ill fix it)
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
*/
extern void (*yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride);
/**
- *
- * width should be a multiple of 16
+ * Width should be a multiple of 16.
*/
extern void (*yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride);
/**
- *
- * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
- * problem for anyone then tell me, and ill fix it)
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
*/
extern void (*yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
long lumStride, long chromStride, long srcStride);
/**
- *
- * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
- * problem for anyone then tell me, and ill fix it)
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
*/
extern void (*yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
long lumStride, long chromStride, long dstStride);
/**
- *
- * height should be a multiple of 2 and width should be a multiple of 2 (if this is a
- * problem for anyone then tell me, and ill fix it)
- * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
+ * Height should be a multiple of 2 and width should be a multiple of 2.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ * Chrominance data is only taken from every second line, others are ignored.
+ * FIXME: Write HQ version.
*/
extern void (*rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 3fb04c9c91..ffbf2c734b 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -1,11 +1,11 @@
/*
- * rgb2rgb.c, Software RGB to RGB convertor
- * pluralize by Software PAL8 to RGB convertor
- * Software YUV to YUV convertor
- * Software YUV to RGB convertor
- * Written by Nick Kurshev.
- * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
- * lot of big-endian byteorder fixes by Alex Beregszaszi
+ * software RGB to RGB converter
+ * pluralize by software PAL8 to RGB converter
+ * software YUV to YUV converter
+ * software YUV to RGB converter
+ * Written by Nick Kurshev.
+ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
+ * lot of big-endian byte order fixes by Alex Beregszaszi
*
* This file is part of FFmpeg.
*
@@ -23,7 +23,7 @@
* along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
- * The C code (not assembly, mmx, ...) of this file can be used
+ * The C code (not assembly, MMX, ...) of this file can be used
* under the LGPL license.
*/
@@ -229,10 +229,10 @@ static inline void RENAME(rgb32to24)(const uint8_t *src, uint8_t *dst, long src_
}
/*
- Original by Strepto/Astral
- ported to gcc & bugfixed : A'rpi
+ original by Strepto/Astral
+ ported to gcc & bugfixed: A'rpi
MMX2, 3DNOW optimization by Nick Kurshev
- 32 bit C version, and and&add trick by Michael Niedermayer
+ 32-bit C version, and and&add trick by Michael Niedermayer
*/
static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
{
@@ -926,9 +926,9 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s
----------------
1 1 0 1 1 1 1 0
|=======| |===|
- | Leftmost Bits Repeated to Fill Open Bits
+ | leftmost bits repeated to fill open bits
|
- Original Bits
+ original bits
*/
static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_size)
{
@@ -1006,7 +1006,7 @@ static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, long src_
:"=m"(*d)
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
:"memory");
- /* Borrowed 32 to 24 */
+ /* borrowed 32 to 24 */
asm volatile(
"movq %%mm0, %%mm4 \n\t"
"movq %%mm3, %%mm5 \n\t"
@@ -1147,7 +1147,7 @@ static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, long src_
:"=m"(*d)
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
:"memory");
- /* Borrowed 32 to 24 */
+ /* borrowed 32 to 24 */
asm volatile(
"movq %%mm0, %%mm4 \n\t"
"movq %%mm3, %%mm5 \n\t"
@@ -1479,7 +1479,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
asm volatile(SFENCE:::"memory");
asm volatile(EMMS:::"memory");
- if (mmx_size==23) return; //finihsed, was multiple of 8
+ if (mmx_size==23) return; //finished, was multiple of 8
src+= src_size;
dst+= src_size;
@@ -1638,8 +1638,8 @@ asm( EMMS" \n\t"
}
/**
- * Height should be a multiple of 2 and width should be a multiple of 16 (if
- * this is a problem for anyone then tell me, and I will fix it).
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
*/
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
@@ -1720,7 +1720,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
(vc[0] << 8) + (yc[1] << 0);
#else
*idst++ = uc[0] + (yc[0] << 8) +
- (vc[0] << 16) + (yc[1] << 24);
+ (vc[0] << 16) + (yc[1] << 24);
#endif
yc += 2;
uc++;
@@ -1744,8 +1744,8 @@ asm( EMMS" \n\t"
}
/**
- * Height should be a multiple of 2 and width should be a multiple of 16 (if
- * this is a problem for anyone then tell me, and I will fix it).
+ * Height should be a multiple of 2 and width should be a multiple of 16
+ * (If this is a problem for anyone then tell me, and I will fix it.)
*/
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
long width, long height,
@@ -1766,8 +1766,8 @@ static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usr
}
/**
- * Height should be a multiple of 2 and width should be a multiple of 16 (if
- * this is a problem for anyone then tell me, and I will fix it).
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
*/
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
long width, long height,
@@ -2002,9 +2002,9 @@ asm volatile( EMMS" \n\t"
}
/**
- * Height should be a multiple of 2 and width should be a multiple of 16 (if
- * this is a problem for anyone then tell me, and I will fix it).
- * Chrominance data is only taken from every secound line, others are ignored.
+ * Height should be a multiple of 2 and width should be a multiple of 16.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ * Chrominance data is only taken from every second line, others are ignored.
* FIXME: Write HQ version.
*/
static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
@@ -2128,9 +2128,9 @@ asm volatile( EMMS" \n\t"
}
/**
- * Height should be a multiple of 2 and width should be a multiple of 2 (if
- * this is a problem for anyone then tell me, and I will fix it).
- * Chrominance data is only taken from every secound line,
+ * Height should be a multiple of 2 and width should be a multiple of 2.
+ * (If this is a problem for anyone then tell me, and I will fix it.)
+ * Chrominance data is only taken from every second line,
* others are ignored in the C version.
* FIXME: Write HQ version.
*/
diff --git a/libswscale/swscale_altivec_template.c b/libswscale/swscale_altivec_template.c
index 47b7e0dc6e..2111cec410 100644
--- a/libswscale/swscale_altivec_template.c
+++ b/libswscale/swscale_altivec_template.c
@@ -245,12 +245,12 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW, uint8_t *src, int
src_v = vec_mergeh(src_v, (vector signed short)vzero);
filter_v = vec_ld(i << 3, filter);
- // the 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2)
+ // The 3 above is 2 (filterSize == 4) + 1 (sizeof(short) == 2).
- // the neat trick : we only care for half the elements,
+ // The neat trick: We only care for half the elements,
// high or low depending on (i<<3)%16 (it's 0 or 8 here),
- // and we're going to use vec_mule, so we chose
- // carefully how to "unpack" the elements into the even slots
+ // and we're going to use vec_mule, so we choose
+ // carefully how to "unpack" the elements into the even slots.
if ((i << 3) % 16)
filter_v = vec_mergel(filter_v, (vector signed short)vzero);
else
@@ -405,12 +405,12 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int
return srcSliceH;
}
- /* this code assume:
+ /* This code assumes:
1) dst is 16 bytes-aligned
2) dstStride is a multiple of 16
3) width is a multiple of 16
- 4) lum&chrom stride are multiple of 8
+ 4) lum & chrom stride are multiples of 8
*/
for (y=0; y<height; y++) {
@@ -482,12 +482,12 @@ static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int
return srcSliceH;
}
- /* this code assume:
+ /* This code assumes:
1) dst is 16 bytes-aligned
2) dstStride is a multiple of 16
3) width is a multiple of 16
- 4) lum&chrom stride are multiple of 8
+ 4) lum & chrom stride are multiples of 8
*/
for (y=0; y<height; y++) {
diff --git a/libswscale/swscale_bfin.c b/libswscale/swscale_bfin.c
index bbc304dfe3..3e63bbd638 100644
--- a/libswscale/swscale_bfin.c
+++ b/libswscale/swscale_bfin.c
@@ -1,7 +1,7 @@
/*
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
*
- * Blackfin Software Video SCALER Operations
+ * Blackfin software video scaler operations
*
* This file is part of FFmpeg.
*
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index d8dc974ac1..45cf738973 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -37,7 +37,7 @@
typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]);
-/* this struct should be aligned on at least 32-byte boundary */
+/* This struct should be aligned on at least a 32-byte boundary. */
typedef struct SwsContext{
/**
* info on struct for av_log
@@ -73,7 +73,7 @@ typedef struct SwsContext{
int16_t *vChrFilter;
int16_t *vChrFilterPos;
- uint8_t formatConvBuffer[VOF]; //FIXME dynamic alloc, but we have to change a lot of code for this to be useful
+ uint8_t formatConvBuffer[VOF]; //FIXME dynamic allocation, but we have to change a lot of code for this to be useful
int hLumFilterSize;
int hChrFilterSize;
@@ -122,7 +122,7 @@ typedef struct SwsContext{
#define V_OFFSET "10*8"
#define LUM_MMX_FILTER_OFFSET "11*8"
#define CHR_MMX_FILTER_OFFSET "11*8+4*4*256"
-#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the asm
+#define DSTW_OFFSET "11*8+4*4*256*2" //do not change, it is hardcoded in the ASM
#define ESP_OFFSET "11*8+4*4*256*2+8"
#define VROUNDER_OFFSET "11*8+4*4*256*2+16"
#define U_TEMP "11*8+4*4*256*2+24"
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index f76c5948c5..56c91c14da 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -17,8 +17,8 @@
* along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
- * the C code (not assembly, mmx, ...) of this file can be used
- * under the LGPL license too
+ * The C code (not assembly, MMX, ...) of this file can be used
+ * under the LGPL license.
*/
#undef REAL_MOVNTQ
@@ -30,7 +30,7 @@
#undef SFENCE
#ifdef HAVE_3DNOW
-/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
+/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */
#define EMMS "femms"
#else
#define EMMS "emms"
@@ -1503,7 +1503,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *
const int yalpha1=0;
int i;
- uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
+ uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
const int yalpha= 4096; //FIXME ...
if (flags&SWS_FULL_CHR_H_INT)
@@ -1700,7 +1700,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *
}
}
-//FIXME yuy2* can read upto 7 samples to much
+//FIXME yuy2* can read up to 7 samples too much
static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
{
@@ -2297,7 +2297,7 @@ static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1,
}
}
-// Bilinear / Bicubic scaling
+// bilinear / bicubic scaling
static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
int16_t *filter, int16_t *filterPos, long filterSize)
{
@@ -2544,7 +2544,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, i
}
#ifdef HAVE_MMX
- // use the new MMX scaler if the mmx2 can't be used (it is faster than the x86 ASM one)
+ // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
#else
if (!(flags&SWS_FAST_BILINEAR))
@@ -2552,7 +2552,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, i
{
RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
}
- else // Fast Bilinear upscale / crap downscale
+ else // fast bilinear upscale / crap downscale
{
#if defined(ARCH_X86)
#ifdef HAVE_MMX2
@@ -2761,7 +2761,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1,
}
#ifdef HAVE_MMX
- // use the new MMX scaler if the mmx2 can't be used (it is faster than the x86 ASM one)
+ // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
if (!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
#else
if (!(flags&SWS_FAST_BILINEAR))
@@ -2770,7 +2770,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1,
RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
RENAME(hScale)(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
}
- else // Fast Bilinear upscale / crap downscale
+ else // fast bilinear upscale / crap downscale
{
#if defined(ARCH_X86)
#ifdef HAVE_MMX2
@@ -2890,8 +2890,8 @@ FUNNY_UV_CODE
"cmp %2, %%"REG_a" \n\t"
" jb 1b \n\t"
-/* GCC-3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
- which is needed to support GCC-4.0 */
+/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
+ which is needed to support GCC 4.0. */
#if defined(ARCH_X86_64) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
:: "m" (src1), "m" (dst), "g" ((long)dstWidth), "m" (xInc_shr16), "m" (xInc_mask),
#else
@@ -2963,7 +2963,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
int lastDstY;
uint8_t *pal=NULL;
- /* vars whch will change and which we need to storw back in the context */
+ /* vars which will change and which we need to store back in the context */
int dstY= c->dstY;
int lumBufIndex= c->lumBufIndex;
int chrBufIndex= c->chrBufIndex;
@@ -3004,13 +3004,14 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
if (flags & SWS_PRINT_INFO && firstTime)
{
av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
- " ->cannot do aligned memory acesses anymore\n");
+ " ->cannot do aligned memory accesses anymore\n");
firstTime=0;
}
}
- /* Note the user might start scaling the picture in the middle so this will not get executed
- this is not really intended but works currently, so ppl might do it */
+ /* Note the user might start scaling the picture in the middle so this
+ will not get executed. This is not really intended but works
+ currently, so people might do it. */
if (srcSliceY ==0){
lumBufIndex=0;
chrBufIndex=0;
@@ -3182,7 +3183,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
{
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
- if (vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12
+ if (vLumFilterSize == 1 && vChrFilterSize == 1) // unscaled YV12
{
int16_t *lumBuf = lumPixBuf[0];
int16_t *chrBuf= chrPixBuf[0];
@@ -3200,13 +3201,13 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
{
ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
- if (vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB
+ if (vLumFilterSize == 1 && vChrFilterSize == 2) //unscaled RGB
{
int chrAlpha= vChrFilter[2*dstY+1];
RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
dest, dstW, chrAlpha, dstFormat, flags, dstY);
}
- else if (vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB
+ else if (vLumFilterSize == 2 && vChrFilterSize == 2) //bilinear upscale RGB
{
int lumAlpha= vLumFilter[2*dstY+1];
int chrAlpha= vChrFilter[2*dstY+1];
@@ -3217,7 +3218,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
dest, dstW, lumAlpha, chrAlpha, dstY);
}
- else //General RGB
+ else //general RGB
{
RENAME(yuv2packedX)(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index b90f78c1fb..83d65c5f2a 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -39,7 +39,7 @@
#include "swscale.h"
#include "swscale_internal.h"
-#define DITHER1XBPP // only for mmx
+#define DITHER1XBPP // only for MMX
const uint8_t __attribute__((aligned(8))) dither_2x2_4[2][8]={
{ 1, 3, 1, 3, 1, 3, 1, 3, },
@@ -155,8 +155,8 @@ DECLARE_ASM_CONST(8, uint64_t, mmx_00ffw) = 0x00ff00ff00ff00ffULL;
DECLARE_ASM_CONST(8, uint64_t, mmx_redmask) = 0xf8f8f8f8f8f8f8f8ULL;
DECLARE_ASM_CONST(8, uint64_t, mmx_grnmask) = 0xfcfcfcfcfcfcfcfcULL;
-// the volatile is required because gcc otherwise optimizes some writes away not knowing that these
-// are read in the asm block
+// The volatile is required because gcc otherwise optimizes some writes away
+// not knowing that these are read in the ASM block.
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
@@ -641,7 +641,7 @@ SwsFunc yuv2rgb_get_func_ptr (SwsContext *c)
}
#endif
- av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found\n");
+ av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found.\n");
switch(c->dstFormat){
case PIX_FMT_BGR32:
diff --git a/libswscale/yuv2rgb_altivec.c b/libswscale/yuv2rgb_altivec.c
index 3583e4bf65..d3b30e7ed5 100644
--- a/libswscale/yuv2rgb_altivec.c
+++ b/libswscale/yuv2rgb_altivec.c
@@ -21,63 +21,68 @@
*/
/*
-convert I420 YV12 to RGB in various formats,
- it rejects images that are not in 420 formats
- it rejects images that don't have widths of multiples of 16
- it rejects images that don't have heights of multiples of 2
-reject defers to C simulation codes.
+Convert I420 YV12 to RGB in various formats,
+ it rejects images that are not in 420 formats,
+ it rejects images that don't have widths of multiples of 16,
+ it rejects images that don't have heights of multiples of 2.
+Reject defers to C simulation code.
-lots of optimizations to be done here
+Lots of optimizations to be done here.
-1. need to fix saturation code, I just couldn't get it to fly with packs and adds.
- so we currently use max min to clip
+1. Need to fix saturation code. I just couldn't get it to fly with packs
+ and adds, so we currently use max/min to clip.
-2. the inefficient use of chroma loading needs a bit of brushing up
+2. The inefficient use of chroma loading needs a bit of brushing up.
-3. analysis of pipeline stalls needs to be done, use shark to identify pipeline stalls
+3. Analysis of pipeline stalls needs to be done. Use shark to identify
+ pipeline stalls.
MODIFIED to calculate coeffs from currently selected color space.
-MODIFIED core to be a macro which you spec the output format.
-ADDED UYVY conversion which is never called due to some thing in SWSCALE.
+MODIFIED core to be a macro where you specify the output format.
+ADDED UYVY conversion which is never called due to some thing in swscale.
CORRECTED algorithim selection to be strict on input formats.
-ADDED runtime detection of altivec.
+ADDED runtime detection of AltiVec.
ADDED altivec_yuv2packedX vertical scl + RGB converter
March 27,2004
PERFORMANCE ANALYSIS
-The C version use 25% of the processor or ~250Mips for D1 video rawvideo used as test
-The ALTIVEC version uses 10% of the processor or ~100Mips for D1 video same sequence
+The C version uses 25% of the processor or ~250Mips for D1 video rawvideo
+used as test.
+The AltiVec version uses 10% of the processor or ~100Mips for D1 video
+same sequence.
-720*480*30 ~10MPS
+720 * 480 * 30 ~10MPS
-so we have roughly 10clocks per pixel this is too high something has to be wrong.
+so we have roughly 10 clocks per pixel. This is too high, something has
+to be wrong.
-OPTIMIZED clip codes to utilize vec_max and vec_packs removing the need for vec_min.
+OPTIMIZED clip codes to utilize vec_max and vec_packs removing the
+need for vec_min.
-OPTIMIZED DST OUTPUT cache/dma controls. we are pretty much
-guaranteed to have the input video frame it was just decompressed so
-it probably resides in L1 caches. However we are creating the
-output video stream this needs to use the DSTST instruction to
-optimize for the cache. We couple this with the fact that we are
-not going to be visiting the input buffer again so we mark it Least
-Recently Used. This shaves 25% of the processor cycles off.
+OPTIMIZED DST OUTPUT cache/DMA controls. We are pretty much guaranteed to have
+the input video frame, it was just decompressed so it probably resides in L1
+caches. However, we are creating the output video stream. This needs to use the
+DSTST instruction to optimize for the cache. We couple this with the fact that
+we are not going to be visiting the input buffer again so we mark it Least
+Recently Used. This shaves 25% of the processor cycles off.
-Now MEMCPY is the largest mips consumer in the system, probably due
+Now memcpy is the largest mips consumer in the system, probably due
to the inefficient X11 stuff.
GL libraries seem to be very slow on this machine 1.33Ghz PB running
Jaguar, this is not the case for my 1Ghz PB. I thought it might be
-a versioning issues, however I have libGL.1.2.d