summaryrefslogtreecommitdiffstats
path: root/libmpcodecs
diff options
context:
space:
mode:
authoralex <alex@b3059339-0415-0410-9bf9-f77b7e298cf2>2003-12-08 22:57:47 +0000
committeralex <alex@b3059339-0415-0410-9bf9-f77b7e298cf2>2003-12-08 22:57:47 +0000
commita5c024156d6cb0c55e33ac1749b98cd7128ce660 (patch)
treeb91362c4d6095a100a850f6627b6ba798a4738c4 /libmpcodecs
parentd5188eb90a565fd7d990e61a7515ed592e8f28d1 (diff)
downloadmpv-a5c024156d6cb0c55e33ac1749b98cd7128ce660.tar.bz2
mpv-a5c024156d6cb0c55e33ac1749b98cd7128ce660.tar.xz
Yet another inverse telecine filter by Zoltan Hidvegi <mplayer@hzoli.2y.net>. Also heavily MMX centric.
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@11601 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libmpcodecs')
-rw-r--r--libmpcodecs/Makefile2
-rw-r--r--libmpcodecs/cmmx.h186
-rw-r--r--libmpcodecs/vf.c2
-rw-r--r--libmpcodecs/vf_filmdint.c1443
4 files changed, 1632 insertions, 1 deletions
diff --git a/libmpcodecs/Makefile b/libmpcodecs/Makefile
index 65bc0b9398..ddeccbbb8d 100644
--- a/libmpcodecs/Makefile
+++ b/libmpcodecs/Makefile
@@ -14,7 +14,7 @@ VIDEO_SRCS_NAT=vd_null.c vd_cinepak.c vd_qtrpza.c vd_raw.c vd_hmblck.c vd_msvidc
VIDEO_SRCS_OPT=vd_realvid.c vd_ffmpeg.c vd_dshow.c vd_dmo.c vd_vfw.c vd_vfwex.c vd_odivx.c vd_divx4.c vd_zrmjpeg.c vd_xanim.c vd_xvid.c vd_xvid4.c vd_libdv.c vd_qtvideo.c vd_theora.c
VIDEO_SRCS=dec_video.c vd.c $(VIDEO_SRCS_NAT) $(VIDEO_SRCS_LIB) $(VIDEO_SRCS_OPT)
-VFILTER_SRCS=vf.c vf_vo.c vf_crop.c vf_expand.c vf_scale.c vf_format.c vf_yuy2.c vf_flip.c vf_rgb2bgr.c vf_rotate.c vf_mirror.c vf_palette.c vf_lavc.c vf_dvbscale.c vf_cropdetect.c vf_test.c vf_noise.c vf_yvu9.c vf_rectangle.c vf_lavcdeint.c vf_eq.c vf_eq2.c vf_halfpack.c vf_dint.c vf_1bpp.c vf_bmovl.c vf_2xsai.c vf_unsharp.c vf_swapuv.c vf_il.c vf_boxblur.c vf_sab.c vf_smartblur.c vf_perspective.c vf_down3dright.c vf_field.c vf_denoise3d.c vf_hqdn3d.c vf_detc.c vf_telecine.c vf_tfields.c vf_ivtc.c vf_ilpack.c vf_dsize.c vf_decimate.c vf_softpulldown.c vf_tinterlace.c vf_pullup.c pullup.c vf_framestep.c vf_tile.c vf_delogo.c vf_fil.c vf_hue.c vf_spp.c vf_yuvcsp.c
+VFILTER_SRCS=vf.c vf_vo.c vf_crop.c vf_expand.c vf_scale.c vf_format.c vf_yuy2.c vf_flip.c vf_rgb2bgr.c vf_rotate.c vf_mirror.c vf_palette.c vf_lavc.c vf_dvbscale.c vf_cropdetect.c vf_test.c vf_noise.c vf_yvu9.c vf_rectangle.c vf_lavcdeint.c vf_eq.c vf_eq2.c vf_halfpack.c vf_dint.c vf_1bpp.c vf_bmovl.c vf_2xsai.c vf_unsharp.c vf_swapuv.c vf_il.c vf_boxblur.c vf_sab.c vf_smartblur.c vf_perspective.c vf_down3dright.c vf_field.c vf_denoise3d.c vf_hqdn3d.c vf_detc.c vf_telecine.c vf_tfields.c vf_ivtc.c vf_ilpack.c vf_dsize.c vf_decimate.c vf_softpulldown.c vf_tinterlace.c vf_pullup.c pullup.c vf_framestep.c vf_tile.c vf_delogo.c vf_fil.c vf_hue.c vf_spp.c vf_yuvcsp.c vf_filmdint.c
ifeq ($(HAVE_FFPOSTPROCESS),yes)
VFILTER_SRCS += vf_pp.c
endif
diff --git a/libmpcodecs/cmmx.h b/libmpcodecs/cmmx.h
new file mode 100644
index 0000000000..180248a11c
--- /dev/null
+++ b/libmpcodecs/cmmx.h
@@ -0,0 +1,186 @@
+/*
+ * x86 MMX and MMX2 packed byte operations in portable C.
+ * Extra instructions: pdiffub, pcmpzb, psumbw, pcmpgtub
+ * Author: Zoltan Hidvegi
+ */
+
+#ifndef __CMMX_H
+#define __CMMX_H
+
+typedef unsigned long cmmx_t;
+
+#define ONE_BYTES (~(cmmx_t)0 / 255)
+#define SIGN_BITS (ONE_BYTES << 7)
+#define LOWBW_MASK (~(cmmx_t)0 / 257)
+
+static inline cmmx_t
+paddb(cmmx_t a, cmmx_t b)
+{
+ return ((a & ~SIGN_BITS) + (b & ~SIGN_BITS)) ^ ((a^b) & SIGN_BITS);
+}
+
+static inline cmmx_t
+psubb(cmmx_t a, cmmx_t b)
+{
+ return ((a | SIGN_BITS) - (b & ~SIGN_BITS)) ^ (~(a^b) & SIGN_BITS);
+}
+
+static inline cmmx_t
+paddusb(cmmx_t a, cmmx_t b)
+{
+ cmmx_t s = (a & ~SIGN_BITS) + (b & ~SIGN_BITS);
+ cmmx_t abs = (a | b) & SIGN_BITS;
+ cmmx_t c = abs & (s | (a & b));
+ return s | abs | (abs - (c >> 7));
+}
+
+static inline cmmx_t
+paddusb_s(cmmx_t a, cmmx_t b)
+{
+ cmmx_t sum = a+b;
+ cmmx_t ov = sum & SIGN_BITS;
+ return sum + (sum ^ (ov - (ov>>7)));
+}
+
+static inline cmmx_t
+psubusb(cmmx_t a, cmmx_t b)
+{
+ cmmx_t s = (a | SIGN_BITS) - (b & ~SIGN_BITS);
+ cmmx_t anb = a & ~b;
+ cmmx_t c = (anb | (s & ~(a^b))) & SIGN_BITS;
+ return s & ((c & anb) | (c - (c >> 7)));
+}
+
+static inline cmmx_t
+psubusb_s(cmmx_t a, cmmx_t b)
+{
+ cmmx_t d = (a|SIGN_BITS) - b;
+ cmmx_t m = d & SIGN_BITS;
+ return d & (m - (m>>7));
+}
+
+static inline cmmx_t
+pcmpgtub(cmmx_t b, cmmx_t a)
+{
+ cmmx_t s = (a | SIGN_BITS) - (b & ~SIGN_BITS);
+ cmmx_t ret = ((~a & b) | (~s & ~(a ^ b))) & SIGN_BITS;
+ return ret | (ret - (ret >> 7));
+}
+
+static inline cmmx_t
+pdiffub(cmmx_t a, cmmx_t b)
+{
+ cmmx_t xs = (~a ^ b) & SIGN_BITS;
+ cmmx_t s = ((a | SIGN_BITS) - (b & ~SIGN_BITS)) ^ xs;
+ cmmx_t gt = ((~a & b) | (s & xs)) & SIGN_BITS;
+ cmmx_t gt7 = gt >> 7;
+ return (s ^ gt ^ (gt - gt7)) + gt7;
+}
+
+static inline cmmx_t
+pdiffub_s(cmmx_t a, cmmx_t b)
+{
+ cmmx_t d = (a|SIGN_BITS) - b;
+ cmmx_t g = (~d & SIGN_BITS) >> 7;
+ return (d ^ (SIGN_BITS-g)) + g;
+}
+
+static inline cmmx_t
+pmaxub(cmmx_t a, cmmx_t b)
+{
+ return psubusb(a,b) + b;
+}
+
+static inline cmmx_t
+pminub(cmmx_t a, cmmx_t b)
+{
+ return paddusb(a,~b) - ~b;
+}
+
+static inline cmmx_t
+pminub_s(cmmx_t a, cmmx_t b)
+{
+ cmmx_t d = (a|SIGN_BITS) - b;
+ cmmx_t m = ~SIGN_BITS + ((d&SIGN_BITS)>>7);
+ return ((d&m) + b) & ~SIGN_BITS;
+}
+
+static inline cmmx_t
+pavgb(cmmx_t a, cmmx_t b)
+{
+ cmmx_t ao = a & ONE_BYTES;
+ cmmx_t bo = b & ONE_BYTES;
+ return ((a^ao)>>1) + ((b^bo)>>1) + (ao|bo);
+}
+
+static inline cmmx_t
+pavgb_s(cmmx_t a, cmmx_t b)
+{
+ return ((a+b+ONE_BYTES)>>1) & ~SIGN_BITS;
+}
+
+static inline cmmx_t
+p31avgb(cmmx_t a, cmmx_t b)
+{
+ cmmx_t ao = a & (3*ONE_BYTES);
+ cmmx_t bo = b & (3*ONE_BYTES);
+ return 3*((a^ao)>>2) + ((b^bo)>>2) +
+ (((3*ao+bo+2*ONE_BYTES)>>2) & (3*ONE_BYTES));
+}
+
+static inline cmmx_t
+p31avgb_s(cmmx_t a, cmmx_t b)
+{
+ cmmx_t avg = ((a+b)>>1) & ~SIGN_BITS;
+ return pavgb_s(avg, a);
+}
+
+static inline unsigned long
+psumbw(cmmx_t a)
+{
+ cmmx_t t = (a & LOWBW_MASK) + ((a>>8) & LOWBW_MASK);
+ unsigned long ret =
+ (unsigned long)t + (unsigned long)(t >> (4*sizeof(cmmx_t)));
+ if (sizeof(cmmx_t) > 4)
+ ret += ret >> 16;
+ return ret & 0xffff;
+}
+
+static inline unsigned long
+psumbw_s(cmmx_t a)
+{
+ unsigned long ret =
+ (unsigned long)a + (unsigned long)(a >> (4*sizeof(cmmx_t)));
+ if (sizeof(cmmx_t) <= 4)
+ return (ret & 0xff) + ((ret>>8) & 0xff);
+ ret = (ret & 0xff00ff) + ((ret>>8) & 0xff00ff);
+ ret += ret >> 16;
+ return ret & 0xffff;
+}
+
+static inline unsigned long
+psadbw(cmmx_t a, cmmx_t b)
+{
+ return psumbw(pdiffub(a,b));
+}
+
+static inline unsigned long
+psadbw_s(cmmx_t a, cmmx_t b)
+{
+ return psumbw_s(pdiffub_s(a,b));
+}
+
+static inline cmmx_t
+pcmpzb(cmmx_t a)
+{
+ cmmx_t ret = (((a | SIGN_BITS) - ONE_BYTES) | a) & SIGN_BITS;
+ return ~(ret | (ret - (ret >> 7)));
+}
+
+static inline cmmx_t
+pcmpeqb(cmmx_t a, cmmx_t b)
+{
+ return pcmpzb(a ^ b);
+}
+
+#endif
diff --git a/libmpcodecs/vf.c b/libmpcodecs/vf.c
index ee3abd7b67..9627dee0be 100644
--- a/libmpcodecs/vf.c
+++ b/libmpcodecs/vf.c
@@ -75,6 +75,7 @@ extern vf_info_t vf_info_dsize;
extern vf_info_t vf_info_decimate;
extern vf_info_t vf_info_softpulldown;
extern vf_info_t vf_info_pullup;
+extern vf_info_t vf_info_filmdint;
extern vf_info_t vf_info_framestep;
extern vf_info_t vf_info_tile;
extern vf_info_t vf_info_delogo;
@@ -143,6 +144,7 @@ static vf_info_t* filter_list[]={
&vf_info_decimate,
&vf_info_softpulldown,
&vf_info_pullup,
+ &vf_info_filmdint,
&vf_info_framestep,
&vf_info_tile,
&vf_info_delogo,
diff --git a/libmpcodecs/vf_filmdint.c b/libmpcodecs/vf_filmdint.c
new file mode 100644
index 0000000000..d332200d53
--- /dev/null
+++ b/libmpcodecs/vf_filmdint.c
@@ -0,0 +1,1443 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/time.h>
+
+#include "../config.h"
+#include "../mp_msg.h"
+#include "../cpudetect.h"
+
+#include "img_format.h"
+#include "mp_image.h"
+#include "vf.h"
+#include "cmmx.h"
+
+#include "../libvo/fastmemcpy.h"
+
+#define NUM_STORED 4
+
+enum pu_field_type_t {
+ PU_1ST_OF_3,
+ PU_2ND_OF_3,
+ PU_3RD_OF_3,
+ PU_1ST_OF_2,
+ PU_2ND_OF_2,
+ PU_INTERLACED
+};
+
+struct metrics {
+ /* This struct maps to a packed word 64-bit MMX register */
+ unsigned short int even;
+ unsigned short int odd;
+ unsigned short int noise;
+ unsigned short int temp;
+} __attribute__ ((aligned (8)));
+
+struct frame_stats {
+ struct metrics tiny, low, high, bigger, twox, max;
+ struct { unsigned int even, odd, noise, temp; } sad;
+ unsigned short interlaced_high;
+ unsigned short interlaced_low;
+ unsigned short num_blocks;
+};
+
+struct vf_priv_s {
+ unsigned long inframes;
+ unsigned long outframes;
+ enum pu_field_type_t prev_type;
+ unsigned swapped, chroma_swapped;
+ unsigned luma_only;
+ unsigned verbose;
+ unsigned fast;
+ unsigned long w, h, cw, ch, stride, chroma_stride, nplanes;
+ unsigned long sad_thres;
+ unsigned long dint_thres;
+ unsigned char *memory_allocated;
+ unsigned char *planes[2*NUM_STORED][4];
+ unsigned char **old_planes;
+ unsigned long static_idx;
+ unsigned long temp_idx;
+ unsigned long crop_x, crop_y, crop_cx, crop_cy;
+ unsigned long export_count, merge_count;
+ unsigned long num_breaks;
+ long in_inc, out_dec, iosync;
+ long num_fields;
+ long prev_fields;
+ long notout;
+ long mmx2;
+ unsigned small_bytes[2];
+ unsigned mmx_temp[2];
+ struct frame_stats stats[2];
+ struct metrics thres;
+ char chflag;
+ double diff_time, merge_time, decode_time, vo_time, filter_time;
+};
+
+#define PPZ { 2000, 2000, 0, 2000 }
+#define PPR { 2000, 2000, 0, 2000 }
+static const struct frame_stats ppzs = {PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,0,0,9999};
+static const struct frame_stats pprs = {PPR,PPR,PPR,PPR,PPR,PPR,PPR,0,0,9999};
+
+extern int opt_screen_size_x;
+extern int opt_screen_size_y;
+
+#ifndef MIN
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#endif
+#ifndef MAX
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#endif
+
+static inline void *my_memcpy_pic(void * dst, void * src, int bytesPerLine, int height, int dstStride, int srcStride)
+{
+ int i;
+ void *retval=dst;
+
+ for(i=0; i<height; i++)
+ {
+ memcpy(dst, src, bytesPerLine);
+ src+= srcStride;
+ dst+= dstStride;
+ }
+
+ return retval;
+}
+
+#define PDIFFUB(X,Y,T) "movq " #X "," #T "\n\t" \
+ "psubusb " #Y "," #T "\n\t" \
+ "psubusb " #X "," #Y "\n\t" \
+ "paddusb " #Y "," #T "\n\t"
+
+#define PDIFFUBT(X,Y,T) "movq " #X "," #T "\n\t" \
+ "psubusb " #Y "," #T "\n\t" \
+ "psubusb " #X "," #Y "\n\t" \
+ "paddusb " #T "," #Y "\n\t"
+
+#define PSUMBW(X,T,Z) "movq " #X "," #T "\n\t" \
+ "punpcklbw " #Z "," #X "\n\t" \
+ "punpckhbw " #Z "," #T "\n\t" \
+ "paddw " #T "," #X "\n\t" \
+ "movq " #X "," #T "\n\t" \
+ "psllq $32, " #T "\n\t" \
+ "paddw " #T "," #X "\n\t" \
+ "movq " #X "," #T "\n\t" \
+ "psllq $16, " #T "\n\t" \
+ "paddw " #T "," #X "\n\t" \
+ "psrlq $48, " #X "\n\t"
+
+#define PSADBW(X,Y,T,Z) PDIFFUBT(X,Y,T) PSUMBW(Y,T,Z)
+
+#define PMAXUB(X,Y) "psubusb " #X "," #Y "\n\tpaddusb " #X "," #Y "\n\t"
+#define PMAXUW(X,Y) "psubusw " #X "," #Y "\n\tpaddusw " #X "," #Y "\n\t"
+#define PMINUBT(X,Y,T) "movq " #Y "," #T "\n\t" \
+ "psubusb " #X "," #T "\n\t" \
+ "psubusb " #T "," #Y "\n\t"
+#define PAVGB(X,Y) "pavgusb " #X "," #Y "\n\t"
+
+static inline void
+get_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, int lines,
+ struct metrics *m)
+{
+ a -= as;
+ b -= bs;
+ do {
+ cmmx_t old_po = *(cmmx_t*)(a );
+ cmmx_t po = *(cmmx_t*)(b );
+ cmmx_t e = *(cmmx_t*)(b + bs);
+ cmmx_t old_o = *(cmmx_t*)(a + 2*as);
+ cmmx_t o = *(cmmx_t*)(b + 2*bs);
+ cmmx_t ne = *(cmmx_t*)(b + 3*bs);
+ cmmx_t old_no = *(cmmx_t*)(a + 4*as);
+ cmmx_t no = *(cmmx_t*)(b + 4*bs);
+
+ cmmx_t qup_old_odd = p31avgb(old_o, old_po);
+ cmmx_t qup_odd = p31avgb( o, po);
+ cmmx_t qdown_old_odd = p31avgb(old_o, old_no);
+ cmmx_t qdown_odd = p31avgb( o, no);
+
+ cmmx_t qup_even = p31avgb(ne, e);
+ cmmx_t qdown_even = p31avgb(e, ne);
+
+ cmmx_t temp_up_diff = pdiffub(qdown_even, qup_old_odd);
+ cmmx_t noise_up_diff = pdiffub(qdown_even, qup_odd);
+ cmmx_t temp_down_diff = pdiffub(qup_even, qdown_old_odd);
+ cmmx_t noise_down_diff = pdiffub(qup_even, qdown_odd);
+
+ cmmx_t odd_diff = pdiffub(o, old_o);
+ m->odd += psumbw(odd_diff);
+ m->even += psadbw(e, *(cmmx_t*)(a+as));
+
+ temp_up_diff = pminub(temp_up_diff, temp_down_diff);
+ temp_up_diff = pminub(temp_up_diff, odd_diff);
+ m->temp += psumbw(temp_up_diff);
+ noise_up_diff = pminub(noise_up_diff, odd_diff);
+ noise_up_diff = pminub(noise_up_diff, noise_down_diff);
+
+ m->noise += psumbw(noise_up_diff);
+ a += 2*as;
+ b += 2*bs;
+ } while (--lines);
+}
+
+static inline void
+get_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
+ int lines, struct metrics *m)
+{
+ a -= as;
+ b -= bs;
+ do {
+ cmmx_t old_po = (*(cmmx_t*)(a ) >> 1) & ~SIGN_BITS;
+ cmmx_t po = (*(cmmx_t*)(b ) >> 1) & ~SIGN_BITS;
+ cmmx_t old_e = (*(cmmx_t*)(a + as) >> 1) & ~SIGN_BITS;
+ cmmx_t e = (*(cmmx_t*)(b + bs) >> 1) & ~SIGN_BITS;
+ cmmx_t old_o = (*(cmmx_t*)(a + 2*as) >> 1) & ~SIGN_BITS;
+ cmmx_t o = (*(cmmx_t*)(b + 2*bs) >> 1) & ~SIGN_BITS;
+ cmmx_t ne = (*(cmmx_t*)(b + 3*bs) >> 1) & ~SIGN_BITS;
+ cmmx_t old_no = (*(cmmx_t*)(a + 4*as) >> 1) & ~SIGN_BITS;
+ cmmx_t no = (*(cmmx_t*)(b + 4*bs) >> 1) & ~SIGN_BITS;
+
+ cmmx_t qup_old_odd = p31avgb_s(old_o, old_po);
+ cmmx_t qup_odd = p31avgb_s( o, po);
+ cmmx_t qdown_old_odd = p31avgb_s(old_o, old_no);
+ cmmx_t qdown_odd = p31avgb_s( o, no);
+
+ cmmx_t qup_even = p31avgb_s(ne, e);
+ cmmx_t qdown_even = p31avgb_s(e, ne);
+
+ cmmx_t temp_up_diff = pdiffub_s(qdown_even, qup_old_odd);
+ cmmx_t noise_up_diff = pdiffub_s(qdown_even, qup_odd);
+ cmmx_t temp_down_diff = pdiffub_s(qup_even, qdown_old_odd);
+ cmmx_t noise_down_diff = pdiffub_s(qup_even, qdown_odd);
+
+ cmmx_t odd_diff = pdiffub_s(o, old_o);
+ m->odd += psumbw_s(odd_diff) << 1;
+ m->even += psadbw_s(e, old_e) << 1;
+
+ temp_up_diff = pminub_s(temp_up_diff, temp_down_diff);
+ temp_up_diff = pminub_s(temp_up_diff, odd_diff);
+ m->temp += psumbw_s(temp_up_diff) << 1;
+ noise_up_diff = pminub_s(noise_up_diff, odd_diff);
+ noise_up_diff = pminub_s(noise_up_diff, noise_down_diff);
+
+ m->noise += psumbw_s(noise_up_diff) << 1;
+ a += 2*as;
+ b += 2*bs;
+ } while (--lines);
+}
+
+static inline void
+get_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
+ int lines, struct metrics *m)
+{
+ a -= as;
+ b -= bs;
+ do {
+ cmmx_t old_po = (*(cmmx_t*)(a )>>1) & ~SIGN_BITS;
+ cmmx_t po = (*(cmmx_t*)(b )>>1) & ~SIGN_BITS;
+ cmmx_t old_e = (*(cmmx_t*)(a + as)>>1) & ~SIGN_BITS;
+ cmmx_t e = (*(cmmx_t*)(b + bs)>>1) & ~SIGN_BITS;
+ cmmx_t old_o = (*(cmmx_t*)(a + 2*as)>>1) & ~SIGN_BITS;
+ cmmx_t o = (*(cmmx_t*)(b + 2*bs)>>1) & ~SIGN_BITS;
+ cmmx_t ne = (*(cmmx_t*)(b + 3*bs)>>1) & ~SIGN_BITS;
+
+ cmmx_t down_even = p31avgb_s(e, ne);
+ cmmx_t up_odd = p31avgb_s(o, po);
+ cmmx_t up_old_odd = p31avgb_s(old_o, old_po);
+
+ cmmx_t odd_diff = pdiffub_s(o, old_o);
+ cmmx_t temp_diff = pdiffub_s(down_even, up_old_odd);
+ cmmx_t noise_diff = pdiffub_s(down_even, up_odd);
+
+ m->even += psadbw_s(e, old_e) << 1;
+ m->odd += psumbw_s(odd_diff) << 1;
+
+ temp_diff = pminub_s(temp_diff, odd_diff);
+ noise_diff = pminub_s(noise_diff, odd_diff);
+
+ m->noise += psumbw_s(noise_diff) << 1;
+ m->temp += psumbw_s(temp_diff) << 1;
+ a += 2*as;
+ b += 2*bs;
+ } while (--lines);
+
+}
+
+static inline void
+get_block_stats(struct metrics *m, struct vf_priv_s *p, struct frame_stats *s)
+{
+ unsigned two_e = m->even + MAX(m->even , p->thres.even );
+ unsigned two_o = m->odd + MAX(m->odd , p->thres.odd );
+ unsigned two_n = m->noise + MAX(m->noise, p->thres.noise);
+ unsigned two_t = m->temp + MAX(m->temp , p->thres.temp );
+
+ unsigned e_big = m->even >= (m->odd + two_o + 1)/2;
+ unsigned o_big = m->odd >= (m->even + two_e + 1)/2;
+ unsigned n_big = m->noise >= (m->temp + two_t + 1)/2;
+ unsigned t_big = m->temp >= (m->noise + two_n + 1)/2;
+
+ unsigned e2x = m->even >= two_o;
+ unsigned o2x = m->odd >= two_e;
+ unsigned n2x = m->noise >= two_t;
+ unsigned t2x = m->temp >= two_n;
+
+ unsigned ntiny_e = m->even > p->thres.even ;
+ unsigned ntiny_o = m->odd > p->thres.odd ;
+ unsigned ntiny_n = m->noise > p->thres.noise;
+ unsigned ntiny_t = m->temp > p->thres.temp ;
+
+ unsigned nlow_e = m->even > 2*p->thres.even ;
+ unsigned nlow_o = m->odd > 2*p->thres.odd ;
+ unsigned nlow_n = m->noise > 2*p->thres.noise;
+ unsigned nlow_t = m->temp > 2*p->thres.temp ;
+
+ unsigned high_e = m->even > 4*p->thres.even ;
+ unsigned high_o = m->odd > 4*p->thres.odd ;
+ unsigned high_n = m->noise > 4*p->thres.noise;
+ unsigned high_t = m->temp > 4*p->thres.temp ;
+
+ unsigned low_il = !n_big && !t_big && ntiny_n && ntiny_t;
+ unsigned high_il = !n_big && !t_big && nlow_n && nlow_t;
+
+ if (low_il | high_il) {
+ s->interlaced_low += low_il;
+ s->interlaced_high += high_il;
+ } else {
+ s->tiny.even += ntiny_e;
+ s->tiny.odd += ntiny_o;
+ s->tiny.noise += ntiny_n;
+ s->tiny.temp += ntiny_t;
+
+ s->low .even += nlow_e ;
+ s->low .odd += nlow_o ;
+ s->low .noise += nlow_n ;
+ s->low .temp += nlow_t ;
+
+ s->high.even += high_e ;
+ s->high.odd += high_o ;
+ s->high.noise += high_n ;
+ s->high.temp += high_t ;
+
+ if (m->even >= p->sad_thres) s->sad.even += m->even ;
+ if (m->odd >= p->sad_thres) s->sad.odd += m->odd ;
+ if (m->noise >= p->sad_thres) s->sad.noise += m->noise;
+ if (m->temp >= p->sad_thres) s->sad.temp += m->temp ;
+ }
+ s->num_blocks++;
+ s->max.even = MAX(s->max.even , m->even );
+ s->max.odd = MAX(s->max.odd , m->odd );
+ s->max.noise = MAX(s->max.noise, m->noise);
+ s->max.temp = MAX(s->max.temp , m->temp );
+
+ s->bigger.even += e_big ;
+ s->bigger.odd += o_big ;
+ s->bigger.noise += n_big ;
+ s->bigger.temp += t_big ;
+
+ s->twox.even += e2x ;
+ s->twox.odd += o2x ;
+ s->twox.noise += n2x ;
+ s->twox.temp += t2x ;
+
+}
+
+static inline struct metrics
+block_metrics_c(unsigned char *a, unsigned char *b, int as, int bs,
+ int lines, struct vf_priv_s *p, struct frame_stats *s)
+{
+ struct metrics tm;
+ tm.even = tm.odd = tm.noise = tm.temp = 0;
+ get_metrics_c(a, b, as, bs, lines, &tm);
+ if (sizeof(cmmx_t) < 8)
+ get_metrics_c(a+4, b+4, as, bs, lines, &tm);
+ get_block_stats(&tm, p, s);
+ return tm;
+}
+
+static inline struct metrics
+block_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
+ int lines, struct vf_priv_s *p, struct frame_stats *s)
+{
+ struct metrics tm;
+ tm.even = tm.odd = tm.noise = tm.temp = 0;
+ get_metrics_fast_c(a, b, as, bs, lines, &tm);
+ if (sizeof(cmmx_t) < 8)
+ get_metrics_fast_c(a+4, b+4, as, bs, lines, &tm);
+ get_block_stats(&tm, p, s);
+ return tm;
+}
+
+static inline struct metrics
+block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
+ int lines, struct vf_priv_s *p, struct frame_stats *s)
+{
+ struct metrics tm;
+ tm.even = tm.odd = tm.noise = tm.temp = 0;
+ get_metrics_faster_c(a, b, as, bs, lines, &tm);
+ if (sizeof(cmmx_t) < 8)
+ get_metrics_faster_c(a+4, b+4, as, bs, lines, &tm);
+ get_block_stats(&tm, p, s);
+ return tm;
+}
+
+#define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise)
+
+#define BLOCK_METRICS_TEMPLATE() \
+ asm volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \
+ "pxor %mm6, %mm6\n\t" /* Temp to stay at 0 */ \
+ ); \
+ a -= as; \
+ b -= bs; \
+ do { \
+ asm volatile( \
+ "movq (%0,%2), %%mm0\n\t" \
+ "movq (%1,%3), %%mm1\n\t" /* mm1 = even */ \
+ PSADBW(%%mm1, %%mm0, %%mm4, %%mm6) \
+ "paddusw %%mm0, %%mm7\n\t" /* even diff */ \
+ "movq (%0,%2,2), %%mm0\n\t" /* mm0 = old odd */ \
+ "movq (%1,%3,2), %%mm2\n\t" /* mm2 = odd */ \
+ "movq (%0), %%mm3\n\t" \
+ "psubusb %4, %%mm3\n\t" \
+ PAVGB(%%mm0, %%mm3) \
+ PAVGB(%%mm0, %%mm3) /* mm3 = qup old odd */ \
+ "movq %%mm0, %%mm5\n\t" \
+ PSADBW(%%mm2, %%mm0, %%mm4, %%mm6) \
+ "psllq $16, %%mm0\n\t" \
+ "paddusw %%mm0, %%mm7\n\t" \
+ "movq (%1), %%mm4\n\t" \
+ "leal (%0,%2,2), %0\n\t" \
+ "leal (%1,%3,2), %1\n\t" \
+ "psubusb %4, %%mm4\n\t" \
+ PAVGB(%%mm2, %%mm4) \
+ PAVGB(%%mm2, %%mm4) /* mm4 = qup odd */ \
+ PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 =abs(oldodd-odd) */ \
+ "movq (%1,%3), %%mm5\n\t" \
+ "psubusb %4, %%mm5\n\t" \
+ PAVGB(%%mm1, %%mm5) \
+ PAVGB(%%mm5, %%mm1) /* mm1 = qdown even */ \
+ PAVGB((%1,%3), %%mm5) /* mm5 = qup next even */ \
+ PDIFFUBT(%%mm1, %%mm3, %%mm0) /* mm3 = abs(qupoldo-qde) */ \
+ PDIFFUBT(%%mm1, %%mm4, %%mm0) /* mm4 = abs(qupodd-qde) */ \
+ PMINUBT(%%mm2, %%mm3, %%mm0) /* limit temp to odd diff */ \
+ PMINUBT(%%mm2, %%mm4, %%mm0) /* limit noise to odd diff */ \
+ "movq (%1,%3,2), %%mm2\n\t" \
+ "psubusb %4, %%mm2\n\t" \
+ PAVGB((%1), %%mm2) \
+ PAVGB((%1), %%mm2) /* mm2 = qdown odd */ \
+ "movq (%0,%2,2), %%mm1\n\t" \
+ "psubusb %4, %%mm1\n\t" \
+ PAVGB((%0), %%mm1) \
+ PAVGB((%0), %%mm1) /* mm1 = qdown old odd */ \
+ PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 = abs(qdo-qune) */ \
+ PDIFFUBT(%%mm5, %%mm1, %%mm0) /* mm1 = abs(qdoo-qune) */ \
+ PMINUBT(%%mm4, %%mm2, %%mm0) /* current */ \
+ PMINUBT(%%mm3, %%mm1, %%mm0) /* old */ \
+ PSUMBW(%%mm2, %%mm0, %%mm6) \
+ PSUMBW(%%mm1, %%mm0, %%mm6) \
+ "psllq $32, %%mm2\n\t" \
+ "psllq $48, %%mm1\n\t" \
+ "paddusw %%mm2, %%mm7\n\t" \
+ "paddusw %%mm1, %%mm7\n\t" \
+ : "=r" (a), "=r" (b) \
+ : "r"(as), "r"(bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \
+ ); \
+ } while (--lines);
+
+static inline struct metrics
+block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs,
+ int lines, struct vf_priv_s *p, struct frame_stats *s)
+{
+ struct metrics tm;
+#ifndef HAVE_3DNOW
+ mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_3dnow: internal error\n");
+#else
+ static const unsigned long long ones = 0x0101010101010101ull;
+ unsigned long interlaced;
+
+ BLOCK_METRICS_TEMPLATE();
+ asm volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
+ get_block_stats(&tm, p, s);
+#endif
+ return tm;
+}
+
+#undef PSUMBW
+#undef PSADBW
+#undef PMAXUB
+#undef PMINUBT
+#undef PAVGB
+
+#define PSUMBW(X,T,Z) "psadbw " #Z "," #X "\n\t"
+#define PSADBW(X,Y,T,Z) "psadbw " #X "," #Y "\n\t"
+#define PMAXUB(X,Y) "pmaxub " #X "," #Y "\n\t"
+#define PMINUBT(X,Y,T) "pminub " #X "," #Y "\n\t"
+#define PAVGB(X,Y) "pavgb " #X "," #Y "\n\t"
+
+static inline struct metrics
+block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
+ int lines, struct vf_priv_s *p, struct frame_stats *s)
+{
+ struct metrics tm;
+#ifndef HAVE_MMX
+ mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_mmx2: internal error\n");
+#else
+ static const unsigned long long ones = 0x0101010101010101ull;
+ unsigned long interlaced;
+ unsigned long prefetch_line = (((long)a>>3) & 7) + 10;
+#ifdef DEBUG
+ struct frame_stats ts = *s;
+#endif
+ asm volatile("prefetcht0 (%0,%2)\n\t"
+ "prefetcht0 (%1,%3)\n\t" :
+ : "r" (a), "r" (b),
+ "r" (prefetch_line * as), "r" (prefetch_line * bs));
+
+ BLOCK_METRICS_TEMPLATE();
+
+ s->num_blocks++;
+ asm volatile(
+ "movq %3, %%mm0\n\t"
+ "movq %%mm7, %%mm1\n\t"
+ "psubusw %%mm0, %%mm1\n\t"
+ "movq %%mm1, %%mm2\n\t"
+ "paddusw %%mm0, %%mm2\n\t"
+ "paddusw %%mm7, %%mm2\n\t"
+ "pshufw $0xb1, %%mm2, %%mm3\n\t"
+ "pavgw %%mm7, %%mm2\n\t"
+ "pshufw $0xb1, %%mm2, %%mm2\n\t"
+ "psubusw %%mm7, %%mm2\n\t"
+ "pcmpeqw %%mm6, %%mm2\n\t" /* 1 if >= 1.5x */
+ "psubusw %%mm7, %%mm3\n\t"
+ "pcmpeqw %%mm6, %%mm3\n\t" /* 1 if >= 2x */
+ "movq %1, %%mm4\n\t"
+ "movq %2, %%mm5\n\t"
+ "psubw %%mm2, %%mm4\n\t"
+ "psubw %%mm3, %%mm5\n\t"
+ "movq %%mm4, %1\n\t"
+ "movq %%mm5, %2\n\t"
+ "pxor %%mm4, %%mm4\n\t"
+ "pcmpeqw %%mm1, %%mm4\n\t" /* 1 if <= t */
+ "psubusw %%mm0, %%mm1\n\t"
+ "pxor %%mm5, %%mm5\n\t"
+ "pcmpeqw %%mm1, %%mm5\n\t" /* 1 if <= 2t */
+ "psubusw %%mm0, %%mm1\n\t"
+ "psubusw %%mm0, %%mm1\n\t"
+ "pcmpeqw %%mm6, %%mm1\n\t" /* 1 if <= 4t */
+ "pshufw $0xb1, %%mm2, %%mm0\n\t"
+ "por %%mm2, %%mm0\n\t" /* 1 if not close */
+ "punpckhdq %%mm0, %%mm0\n\t"
+ "movq %%mm4, %%mm2\n\t" /* tttt */
+ "punpckhdq %%mm5, %%mm2\n\t" /* ttll */
+ "por %%mm2, %%mm0\n\t"
+ "pcmpeqd %%mm6, %%mm0\n\t" /* close && big */
+ "psrlq $16, %%mm0\n\t"
+ "psrlw $15, %%mm0\n\t"
+ "movd %%mm0, %0\n\t"
+ : "=r" (interlaced), "=m" (s->bigger), "=m" (s->twox)
+ : "m" (p->thres)
+ );
+
+ if (interlaced) {
+ s->interlaced_high += interlaced >> 16;
+ s->interlaced_low += interlaced;
+ } else {
+ asm volatile(
+ "pcmpeqw %%mm0, %%mm0\n\t" /* -1 */
+ "psubw %%mm0, %%mm4\n\t"
+ "psubw %%mm0, %%mm5\n\t"
+ "psubw %%mm0, %%mm1\n\t"
+ "paddw %0, %%mm4\n\t"
+ "paddw %1, %%mm5\n\t"
+ "paddw %2, %%mm1\n\t"
+ "movq %%mm4, %0\n\t"
+ "movq %%mm5, %1\n\t"
+ "movq %%mm1, %2\n\t"
+ : "=m" (s->tiny), "=m" (s->low), "=m" (s->high)
+ );
+
+ asm volatile(
+ "pshufw $0, %2, %%mm0\n\t"
+ "psubusw %%mm7, %%mm0\n\t"
+ "pcmpeqw %%mm6, %%mm0\n\t" /* 0 if below sad_thres */
+ "pand %%mm7, %%mm0\n\t"
+ "movq %%mm0, %%mm1\n\t"
+ "punpcklwd %%mm6, %%mm0\n\t" /* sad even, odd */
+ "punpckhwd %%mm6, %%mm1\n\t" /* sad noise, temp */
+ "paddd %0, %%mm0\n\t"
+ "paddd %1, %%mm1\n\t"
+ "movq %%mm0, %0\n\t"
+ "movq %%mm1, %1\n\t"
+ : "=m" (s->sad.even), "=m" (s->sad.noise)
+ : "m" (p->sad_thres)
+ );
+ }
+
+ asm volatile(
+ "movq %%mm7, (%1)\n\t"
+ PMAXUW((%0), %%mm7)
+ "movq %%mm7, (%0)\n\t"
+ "emms"
+ : : "r" (&s->max), "r" (&tm), "X" (s->max)
+ : "memory"
+ );
+#ifdef DEBUG
+ if (1) {
+ struct metrics cm;
+ a -= 7*as;
+ b -= 7*bs;
+ cm = block_metrics_c(a, b, as, bs, 4, p, &ts);
+ if (!MEQ(tm, cm))
+ mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad metrics\n");
+ if (s) {
+# define CHECK(X) if (!MEQ(s->X, ts.X)) \
+ mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad " #X "\n");
+ CHECK(tiny);
+ CHECK(low);
+ CHECK(high);
+ CHECK(sad);
+ CHECK(max);
+ }
+ }
+#endif
+#endif
+ return tm;
+}
+
+static inline int
+dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
+ long cos, int ds, int ss, int w, int t)
+{
+#ifndef HAVE_MMX
+ mp_msg(MSGT_VFILTER, MSGL_FATAL, "dint_copy_line_mmx2: internal error\n");
+ return 0;
+#else
+ unsigned long len = (w+7) >> 3;
+ int ret;
+ asm volatile (
+ "pxor %%mm6, %%mm6 \n\t" /* deinterlaced pixel counter */
+ "movd %0, %%mm7 \n\t"
+ "punpcklbw %%mm7, %%mm7 \n\t"
+ "punpcklwd %%mm7, %%mm7 \n\t"
+ "punpckldq %%mm7, %%mm7 \n\t" /* mm7 = threshold */
+ : /* no output */
+ : "rm" (t)
+ );
+ do {
+ asm volatile (
+ "movq (%0), %%mm0\n\t"
+ "movq (%0,%3,2), %%mm1\n\t"
+ "movq %%mm0, (%2)\n\t"
+ "pmaxub %%mm1, %%mm0\n\t"
+ "pavgb (%0), %%mm1\n\t"
+ "psubusb %%mm1, %%mm0\n\t"
+ "paddusb %%mm7, %%mm0\n\t" /* mm0 = max-avg+thr */
+ "movq (%0,%1), %%mm2\n\t"
+ "movq (%0,%5), %%mm3\n\t"
+ "movq %%mm2, %%mm4\n\t"
+ PDIFFUBT(%%mm1, %%mm2, %%mm5)
+ PDIFFUBT(%%mm1, %%mm3, %%mm5)
+ "pminub %%mm2, %%mm3\n\t"
+ "pcmpeqb %%mm3, %%mm2\n\t" /* b = min */
+ "pand %%mm2, %%mm4\n\t"
+ "pandn (%0,%5), %%mm2\n\t"
+ "por %%mm4, %%mm2\n\t"
+ "pminub %%mm0, %%mm3\n\t"
+ "pcmpeqb %%mm0, %%mm3\n\t" /* set to 1s if >= threshold */
+ "psubb %%mm3, %%mm6\n\t" /* count pixels above thr. */
+ "pand %%mm3, %%mm1 \n\t"
+ "pandn %%mm2, %%mm3 \n\t"
+ "por %%mm3, %%mm1 \n\t" /* avg if >= threshold */
+ "movq %%mm1, (%2,%4) \n\t"
+ : /* no output */
+ : "r" (a), "r" (bos), "r" (dst), "r" (ss), "r" (ds), "r" (cos)
+ );
+ a += 8;
+ dst += 8;
+ } while (--len);
+
+ asm volatile ("pxor %%mm7, %%mm7 \n\t"
+ "psadbw %%mm6, %%mm7 \n\t"
+ "movd %%mm7, %0 \n\t"
+ "emms \n\t"
+ : "=r" (ret)
+ );
+ return ret;
+#endif
+}
+
+static inline int
+dint_copy_line(unsigned char *dst, unsigned char *a, long bos,
+ long cos, int ds, int ss, int w, int t)
+{
+ unsigned long len = ((unsigned long)w+sizeof(cmmx_t)-1) / sizeof(cmmx_t);
+ cmmx_t dint_count = 0;
+ cmmx_t thr;
+ t |= t << 8;
+ thr = t | (t << 16);
+ if (sizeof(cmmx_t) > 4)
+ thr |= thr << (sizeof(cmmx_t)*4);
+ do {
+ cmmx_t e = *(cmmx_t*)a;
+ cmmx_t ne = *(cmmx_t*)(a+2*ss);
+ cmmx_t o = *(cmmx_t*)(a+bos);
+ cmmx_t oo = *(cmmx_t*)(a+cos);
+ cmmx_t maxe = pmaxub(e, ne);
+ cmmx_t avge = pavgb(e, ne);
+ cmmx_t max_diff = maxe - avge + thr; /* 0<=max-avg<128, thr<128 */
+ cmmx_t diffo = pdiffub(avge, o);
+ cmmx_t diffoo = pdiffub(avge, oo);
+ cmmx_t diffcmp = pcmpgtub(diffo, diffoo);
+ cmmx_t bo = ((oo ^ o) & diffcmp) ^ o;
+ cmmx_t diffbo = ((diffoo ^ diffo) & diffcmp) ^ diffo;
+ cmmx_t above_thr = ~pcmpgtub(max_diff, diffbo);
+ cmmx_t bo_or_avg = ((avge ^ bo) & above_thr) ^ bo;
+ dint_count += above_thr & ONE_BYTES;
+ *(cmmx_t*)(dst) = e;
+ *(cmmx_t*)(dst+ds) = bo_or_avg;
+ a += sizeof(cmmx_t);
+ dst += sizeof(cmmx_t);
+ } while (--len);
+ return psumbw(dint_count);
+}
+
+static int
+dint_copy_plane(unsigned char *d, unsigned char *a, unsigned char *b,
+ unsigned char *c, unsigned long w, unsigned long h,
+ unsigned long ds, unsigned long ss, unsigned long threshold,
+ long field, long mmx2)
+{
+ unsigned long ret = 0;
+ long bos = b - a;
+ long cos = c - a;
+ if (field) {
+ memcpy(d, b, w);
+ h--;
+ d += ds;
+ a += ss;
+ }
+ bos += ss;
+ cos += ss;
+ while (h > 2) {
+ if (threshold >= 128) {
+ memcpy(d, a, w);
+ memcpy(d+ds, a+bos, w);
+ } else if (mmx2 == 1) {
+ ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold);
+ } else
+ ret += dint_copy_line(d, a, bos, cos, ds, ss, w, threshold);
+ h -= 2;
+ d += 2*ds;
+ a += 2*ss;
+ }
+ memcpy(d, a, w);
+ if (h == 2)
+ memcpy(d+ds, a+bos, w);
+ return ret;
+}
+
+static void
+copy_merge_fields(struct vf_priv_s *p, mp_image_t *dmpi,
+ unsigned char **old, unsigned char **new, unsigned long show)
+{
+ unsigned long threshold = 256;
+ unsigned long field = p->swapped;
+ unsigned long dint_pixels = 0;
+ unsigned char **other = old;
+ if (show >= 12 || !(show & 3))
+ show >>= 2, other = new, new = old;
+ if (show <= 2) { /* Single field: de-interlace */
+ threshold = p->dint_thres;
+ field ^= show & 1;
+ old = new;
+ } else if (show == 3)
+ old = new;
+ else
+ field ^= 1;
+ dint_pixels +=dint_copy_plane(dmpi->planes[0], old[0], new[0],
+ other[0], p->w, p->h, dmpi->stride[0],
+ p->stride, threshold, field, p->mmx2);
+ if (dmpi->flags & MP_IMGFLAG_PLANAR) {
+ if (p->luma_only)
+ old = new, other = new;
+ else
+ threshold = threshold/2 + 1;
+ field ^= p->chroma_swapped;
+ dint_copy_plane(dmpi->planes[1], old[1], new[1],
+ other[1], p->cw, p->ch, dmpi->stride[1],
+ p->chroma_stride, threshold, field, p->mmx2);
+ dint_copy_plane(dmpi->planes[2], old[2], new[2],
+ other[2], p->cw, p->ch, dmpi->stride[2],
+ p->chroma_stride, threshold, field, p->mmx2);
+ }
+ if (dint_pixels > 0 && p->verbose)
+ mp_msg(MSGT_VFILTER,MSGL_INFO,"Deinterlaced %lu pixels\n",dint_pixels);
+}