diff options
Diffstat (limited to 'libmpcodecs/vf_filmdint.c')
-rw-r--r-- | libmpcodecs/vf_filmdint.c | 1442 |
1 files changed, 0 insertions, 1442 deletions
diff --git a/libmpcodecs/vf_filmdint.c b/libmpcodecs/vf_filmdint.c deleted file mode 100644 index 28eb1e77af..0000000000 --- a/libmpcodecs/vf_filmdint.c +++ /dev/null @@ -1,1442 +0,0 @@ -/* - * This file is part of MPlayer. - * - * MPlayer is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * MPlayer is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with MPlayer; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/time.h> - -#include "config.h" -#include "mp_msg.h" -#include "cpudetect.h" -#include "options.h" - -#include "img_format.h" -#include "mp_image.h" -#include "vf.h" -#include "cmmx.h" - -#include "libvo/fastmemcpy.h" - -#define NUM_STORED 4 - -enum pu_field_type_t { - PU_1ST_OF_3, - PU_2ND_OF_3, - PU_3RD_OF_3, - PU_1ST_OF_2, - PU_2ND_OF_2, - PU_INTERLACED -}; - -struct metrics { - /* This struct maps to a packed word 64-bit MMX register */ - unsigned short int even; - unsigned short int odd; - unsigned short int noise; - unsigned short int temp; -} __attribute__ ((aligned (8))); - -struct frame_stats { - struct metrics tiny, low, high, bigger, twox, max; - struct { unsigned int even, odd, noise, temp; } sad; - unsigned short interlaced_high; - unsigned short interlaced_low; - unsigned short num_blocks; -}; - -struct vf_priv_s { - unsigned long inframes; - unsigned long outframes; - enum pu_field_type_t prev_type; - unsigned swapped, chroma_swapped; - unsigned luma_only; - unsigned verbose; - unsigned fast; - unsigned long w, h, cw, ch, stride, chroma_stride, nplanes; - unsigned long sad_thres; - unsigned long dint_thres; - unsigned char *memory_allocated; - unsigned char *planes[2*NUM_STORED][4]; - unsigned char **old_planes; - unsigned long static_idx; - unsigned long temp_idx; - unsigned long crop_x, crop_y, crop_cx, crop_cy; - unsigned long export_count, merge_count; - unsigned long num_breaks; - unsigned long num_copies; - long in_inc, out_dec, iosync; - long num_fields; - long prev_fields; - long notout; - long mmx2; - unsigned small_bytes[2]; - unsigned mmx_temp[2]; - struct frame_stats stats[2]; - struct metrics thres; - char chflag; - double diff_time, merge_time, decode_time, vo_time, filter_time; - struct vf_detc_pts_buf ptsbuf; -}; - -#define PPZ { 2000, 2000, 0, 2000 } -#define PPR { 2000, 2000, 0, 2000 } -static const struct frame_stats ppzs = {PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,0,0,9999}; -static const struct frame_stats pprs = {PPR,PPR,PPR,PPR,PPR,PPR,PPR,0,0,9999}; - -#ifndef MIN -#define MIN(a,b) (((a)<(b))?(a):(b)) -#endif -#ifndef MAX -#define MAX(a,b) (((a)>(b))?(a):(b)) -#endif - -#define PDIFFUB(X,Y,T) "movq " #X "," #T "\n\t" \ - "psubusb " #Y "," #T "\n\t" \ - "psubusb " #X "," #Y "\n\t" \ - "paddusb " #Y "," #T "\n\t" - -#define PDIFFUBT(X,Y,T) "movq " #X "," #T "\n\t" \ - "psubusb " #Y "," #T "\n\t" \ - "psubusb " #X "," #Y "\n\t" \ - "paddusb " #T "," #Y "\n\t" - -#define PSUMBW(X,T,Z) "movq " #X "," #T "\n\t" \ - "punpcklbw " #Z "," #X "\n\t" \ - "punpckhbw " #Z "," #T "\n\t" \ - "paddw " #T "," #X "\n\t" \ - "movq " #X "," #T "\n\t" \ - "psllq $32, " #T "\n\t" \ - "paddw " #T "," #X "\n\t" \ - "movq " #X "," #T "\n\t" \ - "psllq $16, " #T "\n\t" \ - "paddw " #T "," #X "\n\t" \ - "psrlq $48, " #X "\n\t" - -#define PSADBW(X,Y,T,Z) PDIFFUBT(X,Y,T) PSUMBW(Y,T,Z) - -#define PMAXUB(X,Y) "psubusb " #X "," #Y "\n\tpaddusb " #X "," #Y "\n\t" -#define PMAXUW(X,Y) "psubusw " #X "," #Y "\n\tpaddusw " #X "," #Y "\n\t" -#define PMINUBT(X,Y,T) "movq " #Y "," #T "\n\t" \ - "psubusb " #X "," #T "\n\t" \ - "psubusb " #T "," #Y "\n\t" -#define PAVGB(X,Y) "pavgusb " #X "," #Y "\n\t" - -static inline void -get_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, int lines, - struct metrics *m) -{ - a -= as; - b -= bs; - do { - cmmx_t old_po = *(cmmx_t*)(a ); - cmmx_t po = *(cmmx_t*)(b ); - cmmx_t e = *(cmmx_t*)(b + bs); - cmmx_t old_o = *(cmmx_t*)(a + 2*as); - cmmx_t o = *(cmmx_t*)(b + 2*bs); - cmmx_t ne = *(cmmx_t*)(b + 3*bs); - cmmx_t old_no = *(cmmx_t*)(a + 4*as); - cmmx_t no = *(cmmx_t*)(b + 4*bs); - - cmmx_t qup_old_odd = p31avgb(old_o, old_po); - cmmx_t qup_odd = p31avgb( o, po); - cmmx_t qdown_old_odd = p31avgb(old_o, old_no); - cmmx_t qdown_odd = p31avgb( o, no); - - cmmx_t qup_even = p31avgb(ne, e); - cmmx_t qdown_even = p31avgb(e, ne); - - cmmx_t temp_up_diff = pdiffub(qdown_even, qup_old_odd); - cmmx_t noise_up_diff = pdiffub(qdown_even, qup_odd); - cmmx_t temp_down_diff = pdiffub(qup_even, qdown_old_odd); - cmmx_t noise_down_diff = pdiffub(qup_even, qdown_odd); - - cmmx_t odd_diff = pdiffub(o, old_o); - m->odd += psumbw(odd_diff); - m->even += psadbw(e, *(cmmx_t*)(a+as)); - - temp_up_diff = pminub(temp_up_diff, temp_down_diff); - temp_up_diff = pminub(temp_up_diff, odd_diff); - m->temp += psumbw(temp_up_diff); - noise_up_diff = pminub(noise_up_diff, odd_diff); - noise_up_diff = pminub(noise_up_diff, noise_down_diff); - - m->noise += psumbw(noise_up_diff); - a += 2*as; - b += 2*bs; - } while (--lines); -} - -static inline void -get_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs, - int lines, struct metrics *m) -{ - a -= as; - b -= bs; - do { - cmmx_t old_po = (*(cmmx_t*)(a ) >> 1) & ~SIGN_BITS; - cmmx_t po = (*(cmmx_t*)(b ) >> 1) & ~SIGN_BITS; - cmmx_t old_e = (*(cmmx_t*)(a + as) >> 1) & ~SIGN_BITS; - cmmx_t e = (*(cmmx_t*)(b + bs) >> 1) & ~SIGN_BITS; - cmmx_t old_o = (*(cmmx_t*)(a + 2*as) >> 1) & ~SIGN_BITS; - cmmx_t o = (*(cmmx_t*)(b + 2*bs) >> 1) & ~SIGN_BITS; - cmmx_t ne = (*(cmmx_t*)(b + 3*bs) >> 1) & ~SIGN_BITS; - cmmx_t old_no = (*(cmmx_t*)(a + 4*as) >> 1) & ~SIGN_BITS; - cmmx_t no = (*(cmmx_t*)(b + 4*bs) >> 1) & ~SIGN_BITS; - - cmmx_t qup_old_odd = p31avgb_s(old_o, old_po); - cmmx_t qup_odd = p31avgb_s( o, po); - cmmx_t qdown_old_odd = p31avgb_s(old_o, old_no); - cmmx_t qdown_odd = p31avgb_s( o, no); - - cmmx_t qup_even = p31avgb_s(ne, e); - cmmx_t qdown_even = p31avgb_s(e, ne); - - cmmx_t temp_up_diff = pdiffub_s(qdown_even, qup_old_odd); - cmmx_t noise_up_diff = pdiffub_s(qdown_even, qup_odd); - cmmx_t temp_down_diff = pdiffub_s(qup_even, qdown_old_odd); - cmmx_t noise_down_diff = pdiffub_s(qup_even, qdown_odd); - - cmmx_t odd_diff = pdiffub_s(o, old_o); - m->odd += psumbw_s(odd_diff) << 1; - m->even += psadbw_s(e, old_e) << 1; - - temp_up_diff = pminub_s(temp_up_diff, temp_down_diff); - temp_up_diff = pminub_s(temp_up_diff, odd_diff); - m->temp += psumbw_s(temp_up_diff) << 1; - noise_up_diff = pminub_s(noise_up_diff, odd_diff); - noise_up_diff = pminub_s(noise_up_diff, noise_down_diff); - - m->noise += psumbw_s(noise_up_diff) << 1; - a += 2*as; - b += 2*bs; - } while (--lines); -} - -static inline void -get_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs, - int lines, struct metrics *m) -{ - a -= as; - b -= bs; - do { - cmmx_t old_po = (*(cmmx_t*)(a )>>1) & ~SIGN_BITS; - cmmx_t po = (*(cmmx_t*)(b )>>1) & ~SIGN_BITS; - cmmx_t old_e = (*(cmmx_t*)(a + as)>>1) & ~SIGN_BITS; - cmmx_t e = (*(cmmx_t*)(b + bs)>>1) & ~SIGN_BITS; - cmmx_t old_o = (*(cmmx_t*)(a + 2*as)>>1) & ~SIGN_BITS; - cmmx_t o = (*(cmmx_t*)(b + 2*bs)>>1) & ~SIGN_BITS; - cmmx_t ne = (*(cmmx_t*)(b + 3*bs)>>1) & ~SIGN_BITS; - - cmmx_t down_even = p31avgb_s(e, ne); - cmmx_t up_odd = p31avgb_s(o, po); - cmmx_t up_old_odd = p31avgb_s(old_o, old_po); - - cmmx_t odd_diff = pdiffub_s(o, old_o); - cmmx_t temp_diff = pdiffub_s(down_even, up_old_odd); - cmmx_t noise_diff = pdiffub_s(down_even, up_odd); - - m->even += psadbw_s(e, old_e) << 1; - m->odd += psumbw_s(odd_diff) << 1; - - temp_diff = pminub_s(temp_diff, odd_diff); - noise_diff = pminub_s(noise_diff, odd_diff); - - m->noise += psumbw_s(noise_diff) << 1; - m->temp += psumbw_s(temp_diff) << 1; - a += 2*as; - b += 2*bs; - } while (--lines); - -} - -static inline void -get_block_stats(struct metrics *m, struct vf_priv_s *p, struct frame_stats *s) -{ - unsigned two_e = m->even + MAX(m->even , p->thres.even ); - unsigned two_o = m->odd + MAX(m->odd , p->thres.odd ); - unsigned two_n = m->noise + MAX(m->noise, p->thres.noise); - unsigned two_t = m->temp + MAX(m->temp , p->thres.temp ); - - unsigned e_big = m->even >= (m->odd + two_o + 1)/2; - unsigned o_big = m->odd >= (m->even + two_e + 1)/2; - unsigned n_big = m->noise >= (m->temp + two_t + 1)/2; - unsigned t_big = m->temp >= (m->noise + two_n + 1)/2; - - unsigned e2x = m->even >= two_o; - unsigned o2x = m->odd >= two_e; - unsigned n2x = m->noise >= two_t; - unsigned t2x = m->temp >= two_n; - - unsigned ntiny_e = m->even > p->thres.even ; - unsigned ntiny_o = m->odd > p->thres.odd ; - unsigned ntiny_n = m->noise > p->thres.noise; - unsigned ntiny_t = m->temp > p->thres.temp ; - - unsigned nlow_e = m->even > 2*p->thres.even ; - unsigned nlow_o = m->odd > 2*p->thres.odd ; - unsigned nlow_n = m->noise > 2*p->thres.noise; - unsigned nlow_t = m->temp > 2*p->thres.temp ; - - unsigned high_e = m->even > 4*p->thres.even ; - unsigned high_o = m->odd > 4*p->thres.odd ; - unsigned high_n = m->noise > 4*p->thres.noise; - unsigned high_t = m->temp > 4*p->thres.temp ; - - unsigned low_il = !n_big && !t_big && ntiny_n && ntiny_t; - unsigned high_il = !n_big && !t_big && nlow_n && nlow_t; - - if (low_il | high_il) { - s->interlaced_low += low_il; - s->interlaced_high += high_il; - } else { - s->tiny.even += ntiny_e; - s->tiny.odd += ntiny_o; - s->tiny.noise += ntiny_n; - s->tiny.temp += ntiny_t; - - s->low .even += nlow_e ; - s->low .odd += nlow_o ; - s->low .noise += nlow_n ; - s->low .temp += nlow_t ; - - s->high.even += high_e ; - s->high.odd += high_o ; - s->high.noise += high_n ; - s->high.temp += high_t ; - - if (m->even >= p->sad_thres) s->sad.even += m->even ; - if (m->odd >= p->sad_thres) s->sad.odd += m->odd ; - if (m->noise >= p->sad_thres) s->sad.noise += m->noise; - if (m->temp >= p->sad_thres) s->sad.temp += m->temp ; - } - s->num_blocks++; - s->max.even = MAX(s->max.even , m->even ); - s->max.odd = MAX(s->max.odd , m->odd ); - s->max.noise = MAX(s->max.noise, m->noise); - s->max.temp = MAX(s->max.temp , m->temp ); - - s->bigger.even += e_big ; - s->bigger.odd += o_big ; - s->bigger.noise += n_big ; - s->bigger.temp += t_big ; - - s->twox.even += e2x ; - s->twox.odd += o2x ; - s->twox.noise += n2x ; - s->twox.temp += t2x ; - -} - -static inline struct metrics -block_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, - int lines, struct vf_priv_s *p, struct frame_stats *s) -{ - struct metrics tm; - tm.even = tm.odd = tm.noise = tm.temp = 0; - get_metrics_c(a, b, as, bs, lines, &tm); - if (sizeof(cmmx_t) < 8) - get_metrics_c(a+4, b+4, as, bs, lines, &tm); - get_block_stats(&tm, p, s); - return tm; -} - -static inline struct metrics -block_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs, - int lines, struct vf_priv_s *p, struct frame_stats *s) -{ - struct metrics tm; - tm.even = tm.odd = tm.noise = tm.temp = 0; - get_metrics_fast_c(a, b, as, bs, lines, &tm); - if (sizeof(cmmx_t) < 8) - get_metrics_fast_c(a+4, b+4, as, bs, lines, &tm); - get_block_stats(&tm, p, s); - return tm; -} - -static inline struct metrics -block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs, - int lines, struct vf_priv_s *p, struct frame_stats *s) -{ - struct metrics tm; - tm.even = tm.odd = tm.noise = tm.temp = 0; - get_metrics_faster_c(a, b, as, bs, lines, &tm); - if (sizeof(cmmx_t) < 8) - get_metrics_faster_c(a+4, b+4, as, bs, lines, &tm); - get_block_stats(&tm, p, s); - return tm; -} - -#define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise) - -#define BLOCK_METRICS_TEMPLATE() \ - __asm__ volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \ - "pxor %mm6, %mm6\n\t" /* Temp to stay at 0 */ \ - ); \ - a -= as; \ - b -= bs; \ - do { \ - __asm__ volatile( \ - "movq (%0,%2), %%mm0\n\t" \ - "movq (%1,%3), %%mm1\n\t" /* mm1 = even */ \ - PSADBW(%%mm1, %%mm0, %%mm4, %%mm6) \ - "paddusw %%mm0, %%mm7\n\t" /* even diff */ \ - "movq (%0,%2,2), %%mm0\n\t" /* mm0 = old odd */ \ - "movq (%1,%3,2), %%mm2\n\t" /* mm2 = odd */ \ - "movq (%0), %%mm3\n\t" \ - "psubusb %4, %%mm3\n\t" \ - PAVGB(%%mm0, %%mm3) \ - PAVGB(%%mm0, %%mm3) /* mm3 = qup old odd */ \ - "movq %%mm0, %%mm5\n\t" \ - PSADBW(%%mm2, %%mm0, %%mm4, %%mm6) \ - "psllq $16, %%mm0\n\t" \ - "paddusw %%mm0, %%mm7\n\t" \ - "movq (%1), %%mm4\n\t" \ - "lea (%0,%2,2), %0\n\t" \ - "lea (%1,%3,2), %1\n\t" \ - "psubusb %4, %%mm4\n\t" \ - PAVGB(%%mm2, %%mm4) \ - PAVGB(%%mm2, %%mm4) /* mm4 = qup odd */ \ - PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 =abs(oldodd-odd) */ \ - "movq (%1,%3), %%mm5\n\t" \ - "psubusb %4, %%mm5\n\t" \ - PAVGB(%%mm1, %%mm5) \ - PAVGB(%%mm5, %%mm1) /* mm1 = qdown even */ \ - PAVGB((%1,%3), %%mm5) /* mm5 = qup next even */ \ - PDIFFUBT(%%mm1, %%mm3, %%mm0) /* mm3 = abs(qupoldo-qde) */ \ - PDIFFUBT(%%mm1, %%mm4, %%mm0) /* mm4 = abs(qupodd-qde) */ \ - PMINUBT(%%mm2, %%mm3, %%mm0) /* limit temp to odd diff */ \ - PMINUBT(%%mm2, %%mm4, %%mm0) /* limit noise to odd diff */ \ - "movq (%1,%3,2), %%mm2\n\t" \ - "psubusb %4, %%mm2\n\t" \ - PAVGB((%1), %%mm2) \ - PAVGB((%1), %%mm2) /* mm2 = qdown odd */ \ - "movq (%0,%2,2), %%mm1\n\t" \ - "psubusb %4, %%mm1\n\t" \ - PAVGB((%0), %%mm1) \ - PAVGB((%0), %%mm1) /* mm1 = qdown old odd */ \ - PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 = abs(qdo-qune) */ \ - PDIFFUBT(%%mm5, %%mm1, %%mm0) /* mm1 = abs(qdoo-qune) */ \ - PMINUBT(%%mm4, %%mm2, %%mm0) /* current */ \ - PMINUBT(%%mm3, %%mm1, %%mm0) /* old */ \ - PSUMBW(%%mm2, %%mm0, %%mm6) \ - PSUMBW(%%mm1, %%mm0, %%mm6) \ - "psllq $32, %%mm2\n\t" \ - "psllq $48, %%mm1\n\t" \ - "paddusw %%mm2, %%mm7\n\t" \ - "paddusw %%mm1, %%mm7\n\t" \ - : "=r" (a), "=r" (b) \ - : "r"((x86_reg)as), "r"((x86_reg)bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \ - ); \ - } while (--lines); - -#undef PSUMBW -#undef PSADBW -#undef PMAXUB -#undef PMINUBT -#undef PAVGB - -#define PSUMBW(X,T,Z) "psadbw " #Z "," #X "\n\t" -#define PSADBW(X,Y,T,Z) "psadbw " #X "," #Y "\n\t" -#define PMAXUB(X,Y) "pmaxub " #X "," #Y "\n\t" -#define PMINUBT(X,Y,T) "pminub " #X "," #Y "\n\t" -#define PAVGB(X,Y) "pavgb " #X "," #Y "\n\t" - -static inline struct metrics -block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs, - int lines, struct vf_priv_s *p, struct frame_stats *s) -{ - struct metrics tm; -#if !HAVE_MMX - mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_mmx2: internal error\n"); -#else - static const unsigned long long ones = 0x0101010101010101ull; - x86_reg interlaced; - x86_reg prefetch_line = (((long)a>>3) & 7) + 10; -#ifdef DEBUG - struct frame_stats ts = *s; -#endif - __asm__ volatile("prefetcht0 (%0,%2)\n\t" - "prefetcht0 (%1,%3)\n\t" : - : "r" (a), "r" (b), - "r" (prefetch_line * as), "r" (prefetch_line * bs)); - - BLOCK_METRICS_TEMPLATE(); - - s->num_blocks++; - __asm__ volatile( - "movq %3, %%mm0\n\t" - "movq %%mm7, %%mm1\n\t" - "psubusw %%mm0, %%mm1\n\t" - "movq %%mm1, %%mm2\n\t" - "paddusw %%mm0, %%mm2\n\t" - "paddusw %%mm7, %%mm2\n\t" - "pshufw $0xb1, %%mm2, %%mm3\n\t" - "pavgw %%mm7, %%mm2\n\t" - "pshufw $0xb1, %%mm2, %%mm2\n\t" - "psubusw %%mm7, %%mm2\n\t" - "pcmpeqw %%mm6, %%mm2\n\t" /* 1 if >= 1.5x */ - "psubusw %%mm7, %%mm3\n\t" - "pcmpeqw %%mm6, %%mm3\n\t" /* 1 if >= 2x */ - "movq %1, %%mm4\n\t" - "movq %2, %%mm5\n\t" - "psubw %%mm2, %%mm4\n\t" - "psubw %%mm3, %%mm5\n\t" - "movq %%mm4, %1\n\t" - "movq %%mm5, %2\n\t" - "pxor %%mm4, %%mm4\n\t" - "pcmpeqw %%mm1, %%mm4\n\t" /* 1 if <= t */ - "psubusw %%mm0, %%mm1\n\t" - "pxor %%mm5, %%mm5\n\t" - "pcmpeqw %%mm1, %%mm5\n\t" /* 1 if <= 2t */ - "psubusw %%mm0, %%mm1\n\t" - "psubusw %%mm0, %%mm1\n\t" - "pcmpeqw %%mm6, %%mm1\n\t" /* 1 if <= 4t */ - "pshufw $0xb1, %%mm2, %%mm0\n\t" - "por %%mm2, %%mm0\n\t" /* 1 if not close */ - "punpckhdq %%mm0, %%mm0\n\t" - "movq %%mm4, %%mm2\n\t" /* tttt */ - "punpckhdq %%mm5, %%mm2\n\t" /* ttll */ - "por %%mm2, %%mm0\n\t" - "pcmpeqd %%mm6, %%mm0\n\t" /* close && big */ - "psrlq $16, %%mm0\n\t" - "psrlw $15, %%mm0\n\t" - "movd %%mm0, %0\n\t" - : "=r" (interlaced), "=m" (s->bigger), "=m" (s->twox) - : "m" (p->thres) - ); - - if (interlaced) { - s->interlaced_high += interlaced >> 16; - s->interlaced_low += interlaced; - } else { - __asm__ volatile( - "pcmpeqw %%mm0, %%mm0\n\t" /* -1 */ - "psubw %%mm0, %%mm4\n\t" - "psubw %%mm0, %%mm5\n\t" - "psubw %%mm0, %%mm1\n\t" - "paddw %0, %%mm4\n\t" - "paddw %1, %%mm5\n\t" - "paddw %2, %%mm1\n\t" - "movq %%mm4, %0\n\t" - "movq %%mm5, %1\n\t" - "movq %%mm1, %2\n\t" - : "=m" (s->tiny), "=m" (s->low), "=m" (s->high) - ); - - __asm__ volatile( - "pshufw $0, %2, %%mm0\n\t" - "psubusw %%mm7, %%mm0\n\t" - "pcmpeqw %%mm6, %%mm0\n\t" /* 0 if below sad_thres */ - "pand %%mm7, %%mm0\n\t" - "movq %%mm0, %%mm1\n\t" - "punpcklwd %%mm6, %%mm0\n\t" /* sad even, odd */ - "punpckhwd %%mm6, %%mm1\n\t" /* sad noise, temp */ - "paddd %0, %%mm0\n\t" - "paddd %1, %%mm1\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm1, %1\n\t" - : "=m" (s->sad.even), "=m" (s->sad.noise) - : "m" (p->sad_thres) - ); - } - - __asm__ volatile( - "movq %%mm7, (%1)\n\t" - PMAXUW((%0), %%mm7) - "movq %%mm7, (%0)\n\t" - "emms" - : : "r" (&s->max), "r" (&tm), "X" (s->max) - : "memory" - ); -#ifdef DEBUG - if (1) { - struct metrics cm; - a -= 7*as; - b -= 7*bs; - cm = block_metrics_c(a, b, as, bs, 4, p, &ts); - if (!MEQ(tm, cm)) - mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad metrics\n"); - if (s) { -# define CHECK(X) if (!MEQ(s->X, ts.X)) \ - mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad " #X "\n"); - CHECK(tiny); - CHECK(low); - CHECK(high); - CHECK(sad); - CHECK(max); - } - } -#endif -#endif - return tm; -} - -static inline int -dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos, - long cos, int ds, int ss, int w, int t) -{ -#if !HAVE_MMX - mp_msg(MSGT_VFILTER, MSGL_FATAL, "dint_copy_line_mmx2: internal error\n"); - return 0; -#else - unsigned long len = (w+7) >> 3; - int ret; - __asm__ volatile ( - "pxor %%mm6, %%mm6 \n\t" /* deinterlaced pixel counter */ - "movd %0, %%mm7 \n\t" - "punpcklbw %%mm7, %%mm7 \n\t" - "punpcklwd %%mm7, %%mm7 \n\t" - "punpckldq %%mm7, %%mm7 \n\t" /* mm7 = threshold */ - : /* no output */ - : "rm" (t) - ); - do { - __asm__ volatile ( - "movq (%0), %%mm0\n\t" - "movq (%0,%3,2), %%mm1\n\t" - "movq %%mm0, (%2)\n\t" - "pmaxub %%mm1, %%mm0\n\t" - "pavgb (%0), %%mm1\n\t" - "psubusb %%mm1, %%mm0\n\t" - "paddusb %%mm7, %%mm0\n\t" /* mm0 = max-avg+thr */ - "movq (%0,%1), %%mm2\n\t" - "movq (%0,%5), %%mm3\n\t" - "movq %%mm2, %%mm4\n\t" - PDIFFUBT(%%mm1, %%mm2, %%mm5) - PDIFFUBT(%%mm1, %%mm3, %%mm5) - "pminub %%mm2, %%mm3\n\t" - "pcmpeqb %%mm3, %%mm2\n\t" /* b = min */ - "pand %%mm2, %%mm4\n\t" - "pandn (%0,%5), %%mm2\n\t" - "por %%mm4, %%mm2\n\t" - "pminub %%mm0, %%mm3\n\t" - "pcmpeqb %%mm0, %%mm3\n\t" /* set to 1s if >= threshold */ - "psubb %%mm3, %%mm6\n\t" /* count pixels above thr. */ - "pand %%mm3, %%mm1 \n\t" - "pandn %%mm2, %%mm3 \n\t" - "por %%mm3, %%mm1 \n\t" /* avg if >= threshold */ - "movq %%mm1, (%2,%4) \n\t" - : /* no output */ - : "r" (a), "r" ((x86_reg)bos), "r" ((x86_reg)dst), "r" ((x86_reg)ss), "r" ((x86_reg)ds), "r" ((x86_reg)cos) - ); - a += 8; - dst += 8; - } while (--len); - - __asm__ volatile ("pxor %%mm7, %%mm7 \n\t" - "psadbw %%mm6, %%mm7 \n\t" - "movd %%mm7, %0 \n\t" - "emms \n\t" - : "=r" (ret) - ); - return ret; -#endif -} - -static inline int -dint_copy_line(unsigned char *dst, unsigned char *a, long bos, - long cos, int ds, int ss, int w, int t) -{ - unsigned long len = ((unsigned long)w+sizeof(cmmx_t)-1) / sizeof(cmmx_t); - cmmx_t dint_count = 0; - cmmx_t thr; - t |= t << 8; - thr = t | (t << 16); - if (sizeof(cmmx_t) > 4) - thr |= thr << (sizeof(cmmx_t)*4); - do { - cmmx_t e = *(cmmx_t*)a; - cmmx_t ne = *(cmmx_t*)(a+2*ss); - cmmx_t o = *(cmmx_t*)(a+bos); - cmmx_t oo = *(cmmx_t*)(a+cos); - cmmx_t maxe = pmaxub(e, ne); - cmmx_t avge = pavgb(e, ne); - cmmx_t max_diff = maxe - avge + thr; /* 0<=max-avg<128, thr<128 */ - cmmx_t diffo = pdiffub(avge, o); - cmmx_t diffoo = pdiffub(avge, oo); - cmmx_t diffcmp = pcmpgtub(diffo, diffoo); - cmmx_t bo = ((oo ^ o) & diffcmp) ^ o; - cmmx_t diffbo = ((diffoo ^ diffo) & diffcmp) ^ diffo; - cmmx_t above_thr = ~pcmpgtub(max_diff, diffbo); - cmmx_t bo_or_avg = ((avge ^ bo) & above_thr) ^ bo; - dint_count += above_thr & ONE_BYTES; - *(cmmx_t*)(dst) = e; - *(cmmx_t*)(dst+ds) = bo_or_avg; - a += sizeof(cmmx_t); - dst += sizeof(cmmx_t); - } while (--len); - return psumbw(dint_count); -} - -static int -dint_copy_plane(unsigned char *d, unsigned char *a, unsigned char *b, - unsigned char *c, unsigned long w, unsigned long h, - unsigned long ds, unsigned long ss, unsigned long threshold, - long field, long mmx2) -{ - unsigned long ret = 0; - long bos = b - a; - long cos = c - a; - if (field) { - fast_memcpy(d, b, w); - h--; - d += ds; - a += ss; - } - bos += ss; - cos += ss; - while (h > 2) { - if (threshold >= 128) { - fast_memcpy(d, a, w); - fast_memcpy(d+ds, a+bos, w); - } else if (mmx2 == 1) { - ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold); - } else - ret += dint_copy_line(d, a, bos, cos, ds, ss, w, threshold); - h -= 2; - d += 2*ds; - a += 2*ss; - } - fast_memcpy(d, a, w); - if (h == 2) - fast_memcpy(d+ds, a+bos, w); - return ret; -} - -static void -copy_merge_fields(struct vf_priv_s *p, mp_image_t *dmpi, - unsigned char **old, unsigned char **new, unsigned long show) -{ - unsigned long threshold = 256; - unsigned long field = p->swapped; - unsigned long dint_pixels = 0; - unsigned char **other = old; - if (show >= 12 || !(show & 3)) - show >>= 2, other = new, new = old; - if (show <= 2) { /* Single field: de-interlace */ - threshold = p->dint_thres; - field ^= show & 1; - old = new; - } else if (show == 3) - old = new; - else - field ^= 1; - dint_pixels +=dint_copy_plane(dmpi->planes[0], old[0], new[0], - other[0], p->w, p->h, dmpi->stride[0], - p->stride, threshold, field, p->mmx2); - if (dmpi->flags & MP_IMGFLAG_PLANAR) { - if (p->luma_only) - old = new, other = new; - else - threshold = threshold/2 + 1; - field ^= p->chroma_swapped; - dint_copy_plane(dmpi->planes[1], old[1], new[1], - other[1], p->cw, p->ch, dmpi->stride[1], - p->chroma_stride, threshold, field, p->mmx2); - dint_copy_plane(dmpi->planes[2], old[2], new[2], - other[2], p->cw, p->ch, dmpi->stride[2], - p->chroma_stride, threshold, field, p->mmx2); - } - if (dint_pixels > 0 && p->verbose) - mp_msg(MSGT_VFILTER,MSGL_INFO,"Deinterlaced %lu pixels\n",dint_pixels); -} - -static void diff_planes(struct vf_priv_s *p, struct frame_stats *s, - unsigned char *of, unsigned char *nf, - int w, int h, int os, int ns, int swapped) -{ - int i, y; - int align = -(long)nf & 7; - of += align; - nf += align; - w -= align; - if (swapped) - of -= os, nf -= ns; - i = (h*3 >> 7) & ~1; - of += i*os + 8; - nf += i*ns + 8; - h -= i; - w -= 16; - - memset(s, 0, sizeof(*s)); - - for (y = (h-8) >> 3; y; y--) { - if (p->mmx2 == 1) { - for (i = 0; i < w; i += 8) - block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s); - } else if (p->fast > 3) { - for (i = 0; i < w; i += 8) - block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s); - } else if (p->fast > 1) { - for (i = 0; i < w; i += 8) - block_metrics_fast_c(of+i, nf+i, os, ns, 4, p, s); - } else { - for (i = 0; i < w; i += 8) - block_metrics_c(of+i, nf+i, os, ns, 4, p, s); - } - of += 8*os; - nf += 8*ns; - } -} - -#define METRICS(X) (X).even, (X).odd, (X).noise, (X).temp - -static void diff_fields(struct vf_priv_s *p, struct frame_stats *s, - unsigned char **old, unsigned char **new) -{ - diff_planes(p, s, old[0], new[0], p->w, p->h, - p->stride, p->stride, p->swapped); - s->sad.even = (s->sad.even * 16ul) / s->num_blocks; - s->sad.odd = (s->sad.odd * 16ul) / s->num_blocks; - s->sad.noise = (s->sad.noise * 16ul) / s->num_blocks; - s->sad.temp = (s->sad.temp * 16ul) / s->num_blocks; - if (p->verbose) - mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu%c M:%d/%d/%d/%d - %d, " - "t:%d/%d/%d/%d, l:%d/%d/%d/%d, h:%d/%d/%d/%d, bg:%d/%d/%d/%d, " - "2x:%d/%d/%d/%d, sad:%d/%d/%d/%d, lil:%d, hil:%d, ios:%.1f\n", - p->inframes, p->chflag, METRICS(s->max), s->num_blocks, - METRICS(s->tiny), METRICS(s->low), METRICS(s->high), - METRICS(s->bigger), METRICS(s->twox), METRICS(s->sad), - s->interlaced_low, s->interlaced_high, - p->iosync / (double) p->in_inc); -} - -static const char *parse_args(struct vf_priv_s *p, const char *args) -{ - args--; - while (args && *++args && - (sscanf(args, "io=%lu:%lu", &p->out_dec, &p->in_inc) == 2 || - sscanf(args, "diff_thres=%hu", &p->thres.even ) == 1 || - sscanf(args, "comb_thres=%hu", &p->thres.noise) == 1 || - sscanf(args, "sad_thres=%lu", &p->sad_thres ) == 1 || - sscanf(args, "dint_thres=%lu", &p->dint_thres ) == 1 || - sscanf(args, "fast=%u", &p->fast ) == 1 || - sscanf(args, "mmx2=%lu", &p->mmx2 ) == 1 || - sscanf(args, "luma_only=%u", &p->luma_only ) == 1 || - sscanf(args, "verbose=%u", &p->verbose ) == 1 || - sscanf(args, "crop=%lu:%lu:%lu:%lu", &p->w, - &p->h, &p->crop_x, &p->crop_y) == 4)) - args = strchr(args, '/'); - return args; -} - -static unsigned long gcd(unsigned long x, unsigned long y) -{ - unsigned long t; - if (x > y) - t = x, x = y, y = t; - - while (x) { - t = y % x; - y = x; - x = t; - } - return y; -} - -static void init(struct vf_priv_s *p, mp_image_t *mpi) -{ - unsigned long i; - unsigned long plane_size, chroma_plane_size; - unsigned char *plane; - unsigned long cos, los; - p->crop_cx = p->crop_x >> mpi->chroma_x_shift; - p->crop_cy = p->crop_y >> mpi->chroma_y_shift; - if (mpi->flags & MP_IMGFLAG_ACCEPT_STRIDE) { - p->stride = (mpi->w + 15) & ~15; - p->chroma_stride = p->stride >> mpi->chroma_x_shift; - } else { - p->stride = mpi->width; - p->chroma_stride = mpi->chroma_width; - } - p->cw = p->w >> mpi->chroma_x_shift; - p->ch = p->h >> mpi->chroma_y_shift; - p->nplanes = 1; - p->static_idx = 0; - p->temp_idx = 0; - p->old_planes = p->planes[0]; - plane_size = mpi->h * p->stride; - chroma_plane_size = mpi->flags & MP_IMGFLAG_PLANAR ? - mpi->chroma_height * p->chroma_stride : 0; - p->memory_allocated = - malloc(NUM_STORED * (plane_size+2*chroma_plane_size) + - 8*p->chroma_stride + 4096); - /* align to page boundary */ - plane = p->memory_allocated + (-(long)p->memory_allocated & 4095); - memset(plane, 0, NUM_STORED * plane_size); - los = p->crop_x + p->crop_y * p->stride; - cos = p->crop_cx + p->crop_cy * p->chroma_stride; - for (i = 0; i != NUM_STORED; i++, plane += plane_size) { - p->planes[i][0] = plane; - p->planes[NUM_STORED + i][0] = plane + los; - } - if (mpi->flags & MP_IMGFLAG_PLANAR) { - p->nplanes = 3; - memset(plane, 0x80, NUM_STORED * 2 * chroma_plane_size); - for (i = 0; i != NUM_STORED; i++) { - p->planes[i][1] = plane; - p->planes[NUM_STORED + i][1] = plane + cos; - plane += chroma_plane_size; - p->planes[i][2] = plane; - p->planes[NUM_STORED + i][2] = plane + cos; - plane += chroma_plane_size; - } - } - p->out_dec <<= 2; - i = gcd(p->in_inc, p->out_dec); - p->in_inc /= i; - p->out_dec /= i; - p->iosync = 0; - p->num_fields = 3; -} - -static inline double get_time(void) -{ - struct timeval tv; - gettimeofday(&tv, 0); - return tv.tv_sec + tv.tv_usec * 1e-6; -} - -static void get_image(struct vf_instance *vf, mp_image_t *mpi) -{ - struct vf_priv_s *p = vf->priv; - static unsigned char **planes, planes_idx; - - if (mpi->type == MP_IMGTYPE_STATIC) return; - - if (!p->planes[0][0]) init(p, mpi); - - if (mpi->type == MP_IMGTYPE_TEMP || - (mpi->type == MP_IMGTYPE_IPB && !(mpi->flags & MP_IMGFLAG_READABLE))) - planes_idx = NUM_STORED/2 + (++p->temp_idx % (NUM_STORED/2)); - else - planes_idx = ++p->static_idx % (NUM_STORED/2); - planes = p->planes[planes_idx]; - mpi->priv = p->planes[NUM_STORED + planes_idx]; - if (mpi->priv == p->old_planes) { - unsigned char **old_planes = - p->planes[NUM_STORED + 2 + (++p->temp_idx & 1)]; - my_memcpy_pic(old_planes[0], p->old_planes[0], - p->w, p->h, p->stride, p->stride); - if (mpi->flags & MP_IMGFLAG_PLANAR) { - my_memcpy_pic(old_planes[1], p->old_planes[1], - p->cw, p->ch, p->chroma_stride, p->chroma_stride); - my_memcpy_pic(old_planes[2], p->old_planes[2], - p->cw, p->ch, p->chroma_stride, p->chroma_stride); - } - p->old_planes = old_planes; - p->num_copies++; - } - mpi->planes[0] = planes[0]; - mpi->stride[0] = p->stride; - if (mpi->flags & MP_IMGFLAG_PLANAR) { - mpi->planes[1] = planes[1]; - mpi->planes[2] = planes[2]; - mpi->stride[1] = mpi->stride[2] = p->chroma_stride; - } - mpi->width = p->stride; - - mpi->flags |= MP_IMGFLAG_DIRECT; - mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK; -} - -static inline long -cmpe(unsigned long x, unsigned long y, unsigned long err, unsigned long e) -{ - long diff = x-y; - long unit = ((x+y+err) >> e); - long ret = (diff > unit) - (diff < -unit); - unit >>= 1; - return ret + (diff > unit) - (diff < -unit); -} - -static unsigned long -find_breaks(struct vf_priv_s *p, struct frame_stats *s) -{ - struct frame_stats *ps = &p->stats[(p->inframes-1) & 1]; - long notfilm = 5*p->in_inc - p->out_dec; - unsigned long n = s->num_blocks >> 8; - unsigned long sad_comb_cmp = cmpe(s->sad.temp, s->sad.noise, 512, 1); - unsigned long ret = 8; - - if (cmpe(s->sad.temp, s->sad.even, 512, 1) > 0) - mp_msg(MSGT_VFILTER, MSGL_WARN, - "@@@@@@@@ Bottom-first field??? @@@@@@@@\n"); - if (s->sad.temp > 1000 && s->sad.noise > 1000) - return 3; - if (s->interlaced_high >= 2*n && s->sad.temp > 256 && s->sad.noise > 256) - return 3; - if (s->high.noise > s->num_blocks/4 && s->sad.noise > 10000 && - s->sad.noise > 2*s->sad.even && s->sad.noise > 2*ps->sad.odd) { - // Mid-frame scene change - if (s->tiny.temp + s->interlaced_low < n || - s->low.temp + s->interlaced_high < n/4 || - s->high.temp + s->interlaced_high < n/8 || - s->sad.temp < 160) - return 1; - return 3; - } - if (s->high.temp > s->num_blocks/4 && s->sad.temp > 10000 && - s->sad.temp > 2*ps->sad.odd && s->sad.temp > 2*ps->sad.even) { - // Start frame scene change - if (s->tiny.noise + s->interlaced_low < n || - s->low.noise + s->interlaced_high < n/4 || - s->high.noise + s->interlaced_high < n/8 || - s->sad.noise < 160) - return 2; - return 3; - } - if (sad_comb_cmp == 2) - return 2; - if (sad_comb_cmp == -2) - return 1; - - if (s->tiny.odd > 3*MAX(n,s->tiny.even) + s->interlaced_low) - return 1; - if (s->tiny.even > 3*MAX(n,s->tiny.odd)+s->interlaced_low && - (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4))) - return 4; - - if (s->sad.noise < 64 && s->sad.temp < 64 && - s->low.noise <= n/2 && s->high.noise < |