[MPlayer-dev-eng] [PATCH] convol (general convolution filter)

Mon Aug 3 00:56:02 CEST 2009

The attached patch adds a new filter, convol, which does general
convolution.  The scale filter already uses convolution for scaling
and image-format conversion, but provides very limited control over
the filter coefficients.  The convol filter does no scaling and no
image-format conversion but provides full control over the convolution
coefficients, with (optionally) separate filters for luma-x, luma-y,
chroma-x, and chroma-y.

The geq filter can do most of what convol can do (and more), but is
about 20x slower for similar operations and has a more cumbersome option
syntax.

I use convol for attenuating very noisy high-frequency "information" in
the luma channel of over-compressed MJPEG source material from a pocket
camera before recoding as H.264.

AMC
-------------- next part --------------
Index: libmpcodecs/vf_convol.c
===================================================================

--- libmpcodecs/vf_convol.c	(revision 0)
+++ libmpcodecs/vf_convol.c	(revision 0)
@@ -0,0 +1,889 @@
+/*
+
+Unlike vf_scale, which does scaling and image-format conversion using
+convolution filters but provides very limited control over the filter
+coefficients, vf_convol does no scaling and no image-format conversion
+but provides full control over the convolution coefficients.
+
+vf_geq can do most of what vf_convol does (and more), but is about 20x
+slower for similar operations.
+
+Note that convolution is a linear operation that really only makes
+sense when applied to linear samples, not gamma-encoded samples.  The
+correct way to apply convolution to luma/chroma samples would be to
+upsample the chroma, convert luma/chroma to RGB, gamma-decode the RGB
+samples, apply the convolution, gamma-encode the RGB samples, convert
+back to luma/chroma, and subsample the chroma.  But that doesn't seem
+very practical, and all the resampling would introduce errors, so the
+common practice is to apply the convolution directly to the luma and
+chroma channels and live with the resulting errors (like dark outlines
+appearing at sharp boundaries between complementary colors).
+
+*/
+
+#include <limits.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "config.h"
+#include "img_format.h"  /* must be included before mp_image.h */
+#include "m_struct.h"
+#include "mp_image.h"
+#include "mp_msg.h"
+#include "vf.h"
+
+
+#if UINT_MAX < 0xFFFFFFFF
+#error Sorry, this code depends on int being at least 32 bits.
+#endif
+
+
+struct sequence_filter {
+  int *taps;     /* Filter coefficients. */
+  int num_taps;  /* Must be odd. */
+  int sum_taps;  /* Sum of taps, must be > 0. */
+};
+
+
+/* A channel is all samples of the same kind (R, G, B, Y, U, or V)   */
+/* in an image, regardless of whether the image is planar or packed. */
+
+struct channel_filter {
+  struct sequence_filter horizontal;  /* Filter for rows. */
+  struct sequence_filter vertical;    /* Filter for columns. */
+  int **buf;            /* Array of horizontally-filtered row buffers, */
+                        /* of which vertical.num_taps are non-NULL.    */
+  int *tmp_row;         /* Staging for an output row. */
+  int samples_per_row;  /* Length of each row buffer, >= horizontal.num_taps. */
+  int num_rows;         /* Length of buf array, >= vertical.num_taps. */
+  int max_raw_output;   /* 0xFF * horizontal.sum_taps * vertical.sum_taps. */
+                        /* Not-yet-normalized output is clamped to */
+                        /* [0..max_raw_output]. */
+  unsigned int normalize_factor;  /* Scales max_raw_output to 32-bit range. */
+
+  /* output = (unsigned_clamped_raw_output * normalize_factor) >> 24
+
+     We always round down, rather than rounding to the nearest integer,
+     because it's more important to preserve the correspondence between
+     0 and black, rather than the correspondence between 0xFF and white.
+     Black is a physical absolute, whereas white is an arbitrary maximum
+     allowed brightness.
+  */
+};
+
+struct vf_priv_s {
+  struct channel_filter non_chroma_filter;  /* For R, G, B, Y. */
+  struct channel_filter chroma_filter;      /* For U, V. */
+};
+
+
+static inline int dot_product(
+  int num_taps,
+  const int *taps,
+  const uint8_t *samples,
+  int sample_stride)
+{
+  int sum = 0, i;
+
+  for (i = 0;  i < num_taps;  ++i) {
+    sum += *samples * taps[i];
+    samples += sample_stride;
+  }
+
+  return sum;
+}
+
+
+static inline int min(int a, int b) {
+  return a >= b ? b : a;
+}
+
+static inline int max(int a, int b) {
+  return a >= b ? a : b;
+}
+
+
+/* Clamp sample to the range [0..sample_max]. */
+static inline int clamp(int sample, int sample_max) {
+  return min(sample_max, max(0, sample));
+}
+
+
+/* Apply a convolution filter to one row of one channel. */
+
+static void convolve_row(
+  const struct channel_filter *filter,
+  int sample_stride,
+  const uint8_t *in_row,
+  int *out_row)
+{
+  const uint8_t *in_sample;
+  int col, col_end;
+
+  /* We do not horizontally filter a few samples at the far left */
+  /* and right, where the filter would extend out of bounds.     */
+
+  int horizontal_margin = filter->horizontal.num_taps / 2;
+
+  /* Apply the identity filter to the first horizontal_margin samples. */
+  in_sample = in_row;
+  for (col = 0;  col < horizontal_margin;  ++col) {
+    out_row[col] = *in_sample * filter->horizontal.sum_taps;
+    in_sample += sample_stride;
+  }
+
+  /* Apply the horizontal filter. */
+  in_sample = in_row;
+  col_end = filter->samples_per_row - horizontal_margin;
+  for (;  col < col_end;  ++col) {
+    out_row[col] = dot_product(filter->horizontal.num_taps,
+                               filter->horizontal.taps,
+                               in_sample,
+                               sample_stride);
+    in_sample += sample_stride;
+  }
+
+  /* Apply the identity filter to the last horizontal_margin samples. */
+  in_sample += horizontal_margin * sample_stride;
+  for (;  col < filter->samples_per_row;  ++col) {
+    out_row[col] = *in_sample * filter->horizontal.sum_taps;
+    in_sample += sample_stride;
+  }
+}
+
+
+/* Copy a channel. */
+
+static void copy_channel(
+  int width,
+  int height,
+  int sample_stride,
+  int in_row_stride,
+  int out_row_stride,
+  const uint8_t *in_channel,
+  uint8_t *out_channel)
+{
+  const uint8_t *in_row, *in_sample;
+  uint8_t *out_row, *out_sample;
+  int row, col;
+
+  if (sample_stride == 1 &&
+      in_row_stride == out_row_stride &&
+      in_row_stride >= width &&
+      in_row_stride < width * 2 &&
+      in_row_stride - width < 64) {
+    memcpy(out_channel, in_channel, in_row_stride * height);
+    return;
+  }
+
+  for (row = 0, in_row = in_channel, out_row = out_channel;
+       row < height;
+       ++row, in_row += in_row_stride, out_row += out_row_stride) {
+    for (col = 0, in_sample = in_row, out_sample = out_row;
+         col < width;
+         ++col, in_sample += sample_stride, out_sample += sample_stride) {
+      *out_sample = *in_sample;
+    }
+  }
+}
+
+
+/* Apply a convolution filter to one channel. */
+
+static void convolve_channel(
+  const struct channel_filter *filter,
+  int sample_stride,
+  int in_row_stride,
+  int out_row_stride,
+  const uint8_t *in_channel,
+  uint8_t *out_channel)
+{
+  const uint8_t *in_row = in_channel;
+  uint8_t *out_row = out_channel;
+  int row, col, vertical_margin;
+
+  /* Optimization:  If the filter is a no-op, just copy the channel. */
+  if (filter->horizontal.num_taps == 1 && filter->vertical.num_taps == 1) {
+    copy_channel(filter->samples_per_row,
+                 filter->num_rows,
+                 sample_stride,
+                 in_row_stride,
+                 out_row_stride,
+                 in_channel,
+                 out_channel);
+    return;
+  }
+
+  /* We do not vertically filter a few rows at the top and */
+  /* bottom, where the filter would extend out of bounds.  */
+
+  vertical_margin = filter->vertical.num_taps / 2;
+
+  /* Fill up the horizontally-filtered row buffers. */
+
+  for (row = 0;  row < filter->vertical.num_taps;  ++row) {
+    convolve_row(filter, sample_stride, in_row, filter->buf[row]);
+    in_row += in_row_stride;
+  }
+
+  /* Output the top vertical_margin rows. */
+
+  for (row = 0;  row < vertical_margin;  ++row) {
+    uint8_t *out_sample = out_row;
+    const int *buf_row = filter->buf[row];
+
+    for (col = 0;  col < filter->samples_per_row;  ++col) {
+      unsigned int sample = clamp(buf_row[col] * filter->vertical.sum_taps,
+                                  filter->max_raw_output);
+      *out_sample = (sample * filter->normalize_factor) >> 24;
+      out_sample += sample_stride;
+    }
+
+    out_row += out_row_stride;
+  }
+
+  /* Main loop: Filter both horizontally and vertically. */
+
+  for (row = filter->vertical.num_taps;  ;  ++row) {
+    int buf_top, *buf_row, factor, r;
+    uint8_t *out_sample = out_row;
+
+    /* Vertically filter the row buffers to yield one output row. */
+    buf_top = row - filter->vertical.num_taps;
+    buf_row = filter->buf[buf_top];
+    factor = filter->vertical.taps[0];
+    for (col = 0;  col < filter->samples_per_row;  ++col) {
+      filter->tmp_row[col] = buf_row[col] * factor;
+    }
+    for (r = 1;  r < filter->vertical.num_taps;  ++r) {
+      buf_row = filter->buf[buf_top + r];
+      factor = filter->vertical.taps[r];
+      for (col = 0;  col < filter->samples_per_row;  ++col) {
+        filter->tmp_row[col] += buf_row[col] * factor;
+      }
+    }
+    for (col = 0;  col < filter->samples_per_row;  ++col) {
+      unsigned int sample = clamp(filter->tmp_row[col],
+                                  filter->max_raw_output);
+      *out_sample = (sample * filter->normalize_factor) >> 24;
+      out_sample += sample_stride;
+    }
+    out_row += out_row_stride;
+
+    if (row == filter->num_rows) break;
+
+    /* Recycle the top row buffer for a new horizontally filtered row. */
+    filter->buf[row] = filter->buf[row - filter->vertical.num_taps];
+    filter->buf[row - filter->vertical.num_taps] = NULL;
+    convolve_row(filter, sample_stride, in_row, filter->buf[row]);
+    in_row += in_row_stride;
+  }
+
+  /* Output the bottom vertical_margin rows. */
+
+  for (row = filter->num_rows - vertical_margin;
+       row < filter->num_rows;
+       ++row) {
+    uint8_t *out_sample = out_row;
+    int *buf_row = filter->buf[row];
+
+    for (col = 0;  col < filter->samples_per_row;  ++col) {
+      unsigned int sample = clamp(buf_row[col] * filter->vertical.sum_taps,
+                                  filter->max_raw_output);
+      *out_sample = (sample * filter->normalize_factor) >> 24;
+      out_sample += sample_stride;
+    }
+
+    out_row += out_row_stride;
+  }
+
+  /* Move the row buffers back to the top. */
+
+  for (row = 0;  row < filter->vertical.num_taps;  ++row) {
+    int from_row = row + filter->num_rows - filter->vertical.num_taps;
+    int *buf_row = filter->buf[from_row];
+    filter->buf[from_row] = NULL;
+    filter->buf[row] = buf_row;
+  }
+}
+
+
+static void init_filter_buffers(
+  struct channel_filter *filter,
+  int width,
+  int height)
+{
+  int row;
+  filter->samples_per_row = width;
+  filter->num_rows = height;
+  filter->tmp_row = malloc(width * sizeof filter->tmp_row[0]);
+  filter->buf = malloc(height * sizeof filter->buf[0]);
+
+  /* If the image is shorter/narrower than the filter, the unfiltered      */
+  /* margins occupy the entire image, and the filter does not apply.       */
+  /* Rather than make the filter code handle this corner case, it's easier */
+  /* to just change the filter to be one-tap, which has the same effect.   */
+
+  if (width < filter->horizontal.num_taps) {
+    filter->horizontal.num_taps = 1;
+    filter->horizontal.taps[0] = filter->horizontal.sum_taps;
+  }
+  if (height < filter->vertical.num_taps) {
+    filter->vertical.num_taps = 1;
+    filter->vertical.taps[0] = filter->vertical.sum_taps;
+  }
+
+  for (row = 0;  row < filter->vertical.num_taps;  ++row) {
+    filter->buf[row] = malloc(width * sizeof filter->buf[0][0]);
+  }
+  for (;  row < height;  ++row) {
+    filter->buf[row] = NULL;
+  }
+}
+
+
+static int config(
+  struct vf_instance_s *vf,
+  int width,
+  int height,
+  int d_width,
+  int d_height,
+  unsigned int flags,
+  unsigned int outfmt)
+{
+  int chroma_width, chroma_height;
+  mp_image_t mpi;
+
+  mpi.width = width;
+  mpi.height = height;
+  mpi.chroma_width = mpi.chroma_height = 0;
+  mp_image_setfmt(&mpi, outfmt);
+
+  /* The following calculation of chroma_width and chroma_height */
+  /* needs to be kept consistent with put_image().               */
+
+  /* Defaults appropriate for planar YCC: */
+  chroma_width = mpi.chroma_width;
+  chroma_height = mpi.chroma_height;
+
+  /* Exceptions: */
+  if (IMGFMT_IS_RGB(outfmt) || IMGFMT_IS_BGR(outfmt)) {
+    /* no chroma channels */
+    chroma_width = chroma_height = 0;
+  }
+  else switch (outfmt) {
+    case IMGFMT_UYVY:
+    case IMGFMT_YUY2:
+      /* mpi.chroma_width,height are not set for packed formats. */
+      chroma_width = width / 2;
+      chroma_height = height;
+      break;
+
+    case IMGFMT_NV12:
+    case IMGFMT_NV21:
+      chroma_width /= 2;
+      break;
+  }
+
+  init_filter_buffers(&vf->priv->non_chroma_filter, width, height);
+  init_filter_buffers(&vf->priv->chroma_filter, chroma_width, chroma_height);
+  return vf_next_config(vf, width, height, d_width, d_height, flags, outfmt);
+}
+
+
+static void set_min(int *out, int upper_bound) {
+  if (*out > upper_bound) *out = upper_bound;
+}
+
+
+static int put_image(struct vf_instance_s *vf, mp_image_t *mpi, double pts)
+{
+  int channel;
+  int imgfmt = mpi->imgfmt;
+  int width = mpi->width;
+  int height = mpi->height;
+  struct channel_filter *non_chroma_filter = &vf->priv->non_chroma_filter,
+                        *chroma_filter = &vf->priv->chroma_filter;
+  int non_chroma_buf_width = non_chroma_filter->samples_per_row;
+  int non_chroma_buf_height = non_chroma_filter->num_rows;
+  int chroma_buf_width = chroma_filter->samples_per_row;
+  int chroma_buf_height = chroma_filter->num_rows;
+
+  mp_image_t *dmpi = vf_get_image(vf->next,
+                                  imgfmt,
+                                  MP_IMGTYPE_TEMP,
+                                  MP_IMGFLAG_ACCEPT_STRIDE,
+                                  width,
+                                  height);
+
+  /* Only one-byte samples are supported, but we try to support as */
+  /* many arrangements of them as possible.  All we need to know   */
+  /* is whether the samples are one byte each, and which samples   */
+  /* are chroma samples, but there seems to be no reliable way of  */
+  /* determining that other than enumerating the image formats.    */
+
+  /* Defaults appropriate for planar YCC: */
+  const uint8_t *in_channel[3] =
+      { mpi->planes[0], mpi->planes[1], mpi->planes[2] };
+  uint8_t *out_channel[3] =
+      { dmpi->planes[0], dmpi->planes[1], dmpi->planes[2] };
+  int in_row_stride[3] = { mpi->stride[0], mpi->stride[1], mpi->stride[2] };
+  int out_row_stride[3] = { dmpi->stride[0], dmpi->stride[1], dmpi->stride[2] };
+  int sample_stride[3] = { 1, 1, 1 };
+  int is_chroma[3] = { 0, 1, 1 };
+  int chroma_width = mpi->chroma_width;
+  int chroma_height = mpi->chroma_height;
+
+  if (mpi->bpp == 0 || mpi->num_planes >= 4) {
+    mp_msg(MSGT_VFILTER,
+           MSGL_FATAL,
+           "convol: Unsupported image format: %s\n",
+           vo_format_name(imgfmt));
+    return 0;
+  }
+  else if (IMGFMT_IS_RGB(imgfmt) || IMGFMT_IS_BGR(imgfmt)) {
+    if (mpi->bpp != 24) {
+      mp_msg(MSGT_VFILTER,
+             MSGL_FATAL,
+             "convol: Requires one-byte samples, cannot handle %s\n",
+             vo_format_name(imgfmt));
+      return 0;
+    }
+    /* packed, three non-chroma channels */
+    in_channel[1] = in_channel[0] + 1;
+    in_channel[2] = in_channel[0] + 2;
+    out_channel[1] = out_channel[0] + 1;
+    out_channel[2] = out_channel[0] + 2;
+    in_row_stride[1] = in_row_stride[2] = in_row_stride[0];
+    out_row_stride[1] = out_row_stride[2] = out_row_stride[0];
+    sample_stride[0] = sample_stride[1] = sample_stride[2] = 3;
+    is_chroma[0] = is_chroma[1] = is_chroma[2] = 0;
+    chroma_width = chroma_height = 0;
+  }
+  else switch (imgfmt) {
+    /* planar YCC */
+    case IMGFMT_I420:
+    case IMGFMT_IYUV:
+    case IMGFMT_YV12:
+    case IMGFMT_YVU9:
+    case IMGFMT_444P:
+    case IMGFMT_422P:
+    case IMGFMT_411P:
+      break;
+
+    /* grayscale */
+    case IMGFMT_Y800:
+    case IMGFMT_Y8:
+      in_channel[1] = in_channel[2] = NULL;
+      out_channel[1] = out_channel[2] = NULL;
+      break;
+
+    /* packed CYCY */
+    case IMGFMT_UYVY:
+      ++in_channel[0];
+      ++out_channel[0];
+      sample_stride[0] = 2;
+      in_channel[1] = in_channel[0] - 1;
+      in_channel[2] = in_channel[0] + 1;
+      out_channel[1] = out_channel[0] - 1;
+      out_channel[2] = out_channel[0] + 1;
+      in_row_stride[1] = in_row_stride[2] = in_row_stride[0];
+      out_row_stride[1] = out_row_stride[2] = out_row_stride[0];
+      sample_stride[1] = sample_stride[2] = 4;
+      chroma_width = width / 2;
+      chroma_height = height;
+      break;
+
+    /* packed YCYC */
+    case IMGFMT_YUY2:
+      sample_stride[0] = 2;
+      in_channel[1] = in_channel[0] + 1;
+      in_channel[2] = in_channel[0] + 3;
+      out_channel[1] = out_channel[0] + 1;
+      out_channel[2] = out_channel[0] + 3;
+      in_row_stride[1] = in_row_stride[2] = in_row_stride[0];
+      out_row_stride[1] = out_row_stride[2] = out_row_stride[0];
+      sample_stride[1] = sample_stride[2] = 4;
+      chroma_width = width / 2;
+      chroma_height = height;
+      break;
+
+    /* hybrid: Y-plane, packed-CC-plane */
+    case IMGFMT_NV12:
+    case IMGFMT_NV21:
+      in_channel[2] = in_channel[1] + 1;
+      out_channel[2] = out_channel[1] + 1;
+      in_row_stride[2] = in_row_stride[1];
+      out_row_stride[2] = out_row_stride[1];
+      sample_stride[1] = sample_stride[2] = 2;
+      chroma_width /= 2;
+      break;
+
+    default:
+      mp_msg(MSGT_VFILTER,
+             MSGL_FATAL,
+             "convol: Unsupported image format: %s\n",
+             vo_format_name(imgfmt));
+      return 0;
+  }
+
+  /* If the image dimensions are greater than the configured          */
+  /* dimensions, the excess is presumably padding containing garbage  */
+  /* that shouldn't infect the real image, so we should exclude it    */
+  /* from the filter (and we have to anyway, because the intermediate */
+  /* buffers are only big enough for the configured dimensions).  If  */
+  /* the image dimensions are less than the configured dimensions,    */
+  /* obviously we cannot filter parts of the image that aren't there. */
+  /* The upshot is we use the minimum of the configured dimensions    */
+  /* and the actual dimensions.  Note we already saved a copy of the  */
+  /* configured dimensions.                                           */
+
+  set_min(&non_chroma_filter->samples_per_row, width);
+  set_min(&non_chroma_filter->num_rows, height);
+  set_min(&chroma_filter->samples_per_row, chroma_width);
+  set_min(&chroma_filter->num_rows, chroma_height);
+
+  /* Optimization:  If the chroma channels are just being copied, */
+  /* and they are interleaved in the same plane, copy them in one */
+  /* pass rather than two.                                        */
+
+  if (is_chroma[1] && is_chroma[2] && sample_stride[1] == sample_stride[2] &&
+      in_channel[2] == in_channel[1] + sample_stride[1] / 2 &&
+      chroma_filter->horizontal.num_taps == 1 &&
+      chroma_filter->vertical.num_taps == 1) {
+    in_channel[2] = out_channel[2] = NULL;
+    sample_stride[1] /= 2;
+    chroma_width *= 2;
+    chroma_filter->samples_per_row = chroma_width;
+  }
+
+  for (channel = 0;  channel < 3;  ++channel) {
+    if (out_channel[channel] == NULL) continue;
+    convolve_channel(is_chroma[channel] ? chroma_filter : non_chroma_filter,
+                     sample_stride[channel],
+                     in_row_stride[channel],
+                     out_row_stride[channel],
+                     in_channel[channel],
+                     out_channel[channel]);
+  }
+
+  /* Restore the configured dimensions. */
+  non_chroma_filter->samples_per_row = non_chroma_buf_width;
+  non_chroma_filter->num_rows = non_chroma_buf_height;
+  chroma_filter->samples_per_row = chroma_buf_width;
+  chroma_filter->num_rows = chroma_buf_height;
+
+  return vf_next_put_image(vf, dmpi, pts);
+}
+
+
+static int query_format(struct vf_instance_s *vf, unsigned int fmt)
+{
+  int supported = 1;
+  mp_image_t mpi;
+  mp_image_setfmt(&mpi, fmt);
+
+  /* The following calculation of supported formats */
+  /* needs to be kept consistent with put_image().  */
+
+  if (mpi.bpp == 0 || mpi.num_planes >= 4) {
+    supported = 0;
+  }
+  else if (IMGFMT_IS_RGB(fmt) || IMGFMT_IS_BGR(fmt)) {
+    if (mpi.bpp != 24) supported = 0;
+  }
+  else switch (fmt) {
+    case IMGFMT_I420:
+    case IMGFMT_IYUV:
+    case IMGFMT_YV12:
+    case IMGFMT_YVU9:
+    case IMGFMT_444P:
+    case IMGFMT_422P:
+    case IMGFMT_411P:
+    case IMGFMT_Y800:
+    case IMGFMT_Y8:
+    case IMGFMT_UYVY:
+    case IMGFMT_YUY2:
+    case IMGFMT_NV12:
+    case IMGFMT_NV21:
+      break;
+
+    default:
+      supported = 0;
+  }
+
+  if (!supported) {
+    mp_msg(MSGT_VFILTER,
+           MSGL_INFO,
+           "convol: Unsupported image format: %s\n",
+           vo_format_name(fmt));
+    return 0;
+  }
+
+  return vf_next_query_format(vf, fmt);
+}
+
+
+static void destroy_filter(struct channel_filter *filter)
+{
+  int row;
+  for (row = 0;  row < filter->vertical.num_taps;  ++row) {
+    free(filter->buf[row]);
+  }
+  free(filter->buf);
+  filter->buf = NULL;
+  free(filter->tmp_row);
+  filter->tmp_row = NULL;
+  free(filter->horizontal.taps);
+  filter->horizontal.taps = NULL;
+  free(filter->vertical.taps);
+  filter->vertical.taps = NULL;
+}
+
+
+static void uninit(struct vf_instance_s* vf)
+{
+  destroy_filter(&vf->priv->non_chroma_filter);
+  destroy_filter(&vf->priv->chroma_filter);
+  free(vf->priv);
+}
+
+
+/* Initialize a sequence filter from a text specification, which is  */
+/* a sequence of coefficients, each of which is a plus or minus sign */
+/* followed by one or more decimal digits.  Returns 0 on failure.    */
+
+static int init_sequence_filter(
+  struct sequence_filter *filter, 
+  const char *spec,
+  const char *spec_end)
+{
+  int num_taps, i;
+  const char *p;
+
+  if (filter->taps != NULL) {
+    mp_msg(MSGT_VFILTER,
+           MSGL_FATAL,
+           "convol: Cannot specify the same filter more than once.\n");
+    return 0;
+  }
+
+  for (num_taps = 0, p = spec;  p < spec_end;  ++p) {
+    if (*p == '+' || *p == '-') ++num_taps;
+  }
+
+  if (num_taps % 2 == 0) {
+    mp_msg(MSGT_VFILTER,
+           MSGL_FATAL,
+           "convol: Each filter must have an odd number of coefficients.\n");
+    return 0;
+  }
+
+  filter->num_taps = num_taps;
+  filter->taps = malloc(num_taps * sizeof filter->taps[0]);
+  filter->sum_taps = 0;
+
+  for (p = spec, i = 0;  i < num_taps;  ++i) {
+    int num_fields, field_width;
+    if (p >= spec_end || (*p != '+' && *p != '-')) break;
+    num_fields = sscanf(p, "%d%n", filter->taps + i, &field_width);
+    if (num_fields != 1) break;
+    filter->sum_taps += filter->taps[i];
+    p += field_width;
+  }
+
+  if (p != spec_end || i != num_taps) {
+    mp_msg(MSGT_VFILTER,
+           MSGL_FATAL,
+           "convol: Parse error in filter spec: %.*s\n",
+           spec_end - spec,
+           spec);
+    return 0;
+  }
+
+  if (filter->sum_taps <= 0) {
+    mp_msg(MSGT_VFILTER,
+           MSGL_FATAL,
+           "convol: Each filter's coefficients must have a positive sum.\n");
+    return 0;
+  }
+
+  return 1;
+}
+
+
+/* Assumes the sequence filters have already been    */
+/* intialized.  Does not allocate the buffers        */
+/* because the channel dimensions are not yet known. */
+
+static void init_channel_filter(struct channel_filter *filter) {
+  int i, sum_pos = 0, sum_neg = 0, horizontal_max, vertical_max;
+
+  for (i = 0;  i < filter->horizontal.num_taps;  ++i) {
+    int c = filter->horizontal.taps[i];
+    if (c >= 0) sum_pos += c;
+    else sum_neg += c;
+  }
+
+  horizontal_max = max(sum_pos, -sum_neg);
+
+  for (i = 0;  i < filter->vertical.num_taps;  ++i) {
+    int c = filter->vertical.taps[i];
+    if (c >= 0) sum_pos += c;
+    else sum_neg += c;
+  }
+
+  vertical_max = max(sum_pos, -sum_neg);
+
+  if (0x7FFFFFFF / 0xFF / horizontal_max < vertical_max) {
+    mp_msg(MSGT_VFILTER,
+           MSGL_WARN,
+           "convol: Filter coefficients are too large, may cause overflow.\n");
+  }
+
+  filter->buf = NULL;
+  filter->tmp_row = NULL;
+  filter->samples_per_row = 0;
+  filter->num_rows = 0;
+  filter->max_raw_output =
+      0xFF * filter->horizontal.sum_taps * filter->vertical.sum_taps;
+  filter->normalize_factor = 0xFFFFFFFF / filter->max_raw_output;
+
+  if (filter->normalize_factor < 512) {
+    mp_msg(MSGT_VFILTER,
+           MSGL_WARN,
+           "convol: Filter coefficients are too large, "
+           "may cause significant roundoff error.\n");
+  }
+}
+
+
+static int open(vf_instance_t *vf, char *args)
+{
+  int args_len = strlen(args), status;
+  const char *args_end = args + args_len, *arg;
+  const char *default_value = "+1";
+  const char *default_value_end = default_value + strlen(default_value);
+  struct vf_priv_s *priv;
+  struct sequence_filter *non_chroma_h, *non_chroma_v, *chroma_h, *chroma_v;
+
+  vf->query_format = query_format;
+  vf->config = config;
+  vf->put_image = put_image;
+  vf->uninit = uninit;
+
+  priv = malloc(sizeof (struct vf_priv_s));
+  priv->non_chroma_filter.horizontal.taps = NULL;
+  priv->non_chroma_filter.vertical.taps = NULL;
+  priv->chroma_filter.horizontal.taps = NULL;
+  priv->chroma_filter.vertical.taps = NULL;
+  vf->priv = priv;
+
+  for (arg = args;  arg < args_end; ) {
+    int name_len;
+    const char *value;
+    const char *arg_end = strchr(arg, ':');
+    if (arg_end == NULL) arg_end = args_end;
+    value = strchr(arg, '=');
+
+    if (value != NULL && value < arg_end) {
+      name_len = value - arg;
+      ++value;
+    }
+    else {
+      name_len = 0;
+      value = arg;
+    }
+
+    non_chroma_h = &priv->non_chroma_filter.horizontal;
+    non_chroma_v = &priv->non_chroma_filter.vertical;
+    chroma_h = &priv->chroma_filter.horizontal;
+    chroma_v = &priv->chroma_filter.vertical;
+
+    if (name_len == 0) {
+      status =
+          init_sequence_filter(non_chroma_h, value, arg_end) &&
+          init_sequence_filter(non_chroma_v, value, arg_end) &&
+          init_sequence_filter(chroma_h, value, arg_end) &&
+          init_sequence_filter(chroma_v, value, arg_end);
+    }
+    else if (name_len == 1 && strncmp(arg, "x", name_len) == 0) {
+      status =
+          init_sequence_filter(non_chroma_h, value, arg_end) &&
+          init_sequence_filter(chroma_h, value, arg_end);
+    }
+    else if (name_len == 1 && strncmp(arg, "y", name_len) == 0) {
+      status =
+          init_sequence_filter(non_chroma_v, value, arg_end) &&
+          init_sequence_filter(chroma_v, value, arg_end);
+    }
+    else if (name_len == 4 && strncmp(arg, "luma", name_len) == 0) {
+      status =
+          init_sequence_filter(non_chroma_h, value, arg_end) &&
+          init_sequence_filter(non_chroma_v, value, arg_end);
+    }
+    else if (name_len == 6 && strncmp(arg, "chroma", name_len) == 0) {
+      status =
+          init_sequence_filter(chroma_h, value, arg_end) &&
+          init_sequence_filter(chroma_v, value, arg_end);
+    }
+    else if (name_len == 6 && strncmp(arg, "luma-x", name_len) == 0) {
+      status = init_sequence_filter(non_chroma_h, value, arg_end);
+    }
+    else if (name_len == 6 && strncmp(arg, "luma-y", name_len) == 0) {
+      status = init_sequence_filter(non_chroma_v, value, arg_end);
+    }
+    else if (name_len == 8 && strncmp(arg, "chroma-x", name_len) == 0) {
+      status = init_sequence_filter(chroma_h, value, arg_end);
+    }
+    else if (name_len == 8 && strncmp(arg, "chroma-y", name_len) == 0) {
+      status = init_sequence_filter(chroma_v, value, arg_end);
+    }
+    else {
+      mp_msg(MSGT_VFILTER,
+             MSGL_FATAL,
+             "convol: Unrecognized parameter: %.*s\n",
+             name_len,
+             arg);
+      status = 0;
+    }
+
+    if (!status) return 0;
+    arg = arg_end + 1;
+  }
+
+  if (priv->non_chroma_filter.horizontal.taps == NULL) {
+    init_sequence_filter(&priv->non_chroma_filter.horizontal,
+                         default_value,
+                         default_value_end);
+  }
+  if (priv->non_chroma_filter.vertical.taps == NULL) {
+    init_sequence_filter(&priv->non_chroma_filter.vertical,
+                         default_value,
+                         default_value_end);
+  }
+  if (priv->chroma_filter.horizontal.taps == NULL) {
+    init_sequence_filter(&priv->chroma_filter.horizontal,
+                         default_value,
+                         default_value_end);
+  }
+  if (priv->chroma_filter.vertical.taps == NULL) {
+    init_sequence_filter(&priv->chroma_filter.vertical,
+                         default_value,
+                         default_value_end);
+  }
+
+  init_channel_filter(&priv->non_chroma_filter);
+  init_channel_filter(&priv->chroma_filter);
+  return 1;
+}
+
+
+const vf_info_t vf_info_convol = {
+  "general convolution filter",
+  "convol",
+  "Adam M. Costello http://www.nicemice.net/amc/",
+  "",
+  open,
+  NULL,
+};
Index: libmpcodecs/vf.c
===================================================================
--- libmpcodecs/vf.c	(revision 29464)
+++ libmpcodecs/vf.c	(working copy)
@@ -100,6 +100,7 @@
 extern const vf_info_t vf_info_blackframe;
 extern const vf_info_t vf_info_geq;
 extern const vf_info_t vf_info_ow;
+extern const vf_info_t vf_info_convol;
 
 // list of available filters:
 static const vf_info_t* const filter_list[]={
@@ -193,6 +194,7 @@
     &vf_info_yadif,
     &vf_info_blackframe,
     &vf_info_ow,
+    &vf_info_convol,
     NULL
 };
 
Index: AUTHORS
===================================================================
--- AUTHORS	(revision 29464)
+++ AUTHORS	(working copy)
@@ -184,6 +184,9 @@
     * dvdnav hacks
     * rawdv demuxer fixes
 
+Costello, Adam <http://www.nicemice.net/amc/>
+    * convol filter
+
 Curry, Alan (pacman, tcsetattr) <pacman at TheWorld.com>
     * swscale AltiVec/big-endian fixes
     * misc vo_fbdev fixes
Index: Makefile
===================================================================
--- Makefile	(revision 29464)
+++ Makefile	(working copy)
@@ -405,6 +405,7 @@
               libmpcodecs/vf_2xsai.c \
               libmpcodecs/vf_blackframe.c \
               libmpcodecs/vf_boxblur.c \
+              libmpcodecs/vf_convol.c \
               libmpcodecs/vf_crop.c \
               libmpcodecs/vf_cropdetect.c \
               libmpcodecs/vf_decimate.c \
Index: DOCS/man/en/mplayer.1
===================================================================
--- DOCS/man/en/mplayer.1	(revision 29464)
+++ DOCS/man/en/mplayer.1	(working copy)
@@ -6224,6 +6224,43 @@
 .RE
 .
 .TP
+.B convol=filter[:filter...]
+General convolution filter.  There are four independent filters:
+horizontal chroma, vertical chroma, horizontal non-chroma, and vertical
+non-chroma.  The non-chroma filters apply to the Y or R,G,B channels,
+and the chroma filters apply to the U,V channels (if present).  Each
+row or column of each channel is convolved with the specified filter
+coefficients and divided by the sum of the coefficients (so that the
+average sample value is unchanged).
+.RSs
+.IPs <filter>
+An odd number of decimal integer filter coefficients whose sum
+is positive.  Each integer must have a sign, either - or +. The
+coefficients can optionally be preceded by <name>=.  If <name>= is
+omitted, the same coefficients are used for all four filters.  If <name>
+is luma-x, luma-y, chroma-x, or chroma-y, the coefficients are used for
+only one filter (here "luma" means non-chroma).  If <name> is luma,
+chroma, x, or y, the coefficients are used for two filters.  Multiple
+filter specifications can be combined if they don't overlap.  Any
+filters left unspecified default to +1 (the no-effect filter).
+.RE
+.sp 1
+.RS
+.I EXAMPLE:
+.RE
+.PD 0
+.RSs
+.IPs "-vf convol=luma=-1+4+26+4-1:chroma-y=-1+4+10+4-1"
+Mildly blur the luma channel, and strongly blur the chroma channels
+vertically (but not horizontally).  [Tip: -1+4+a+4-1 where a >= 10 is a
+good low-pass filter that attenuates the highest frequencies by a factor
+of (a-10)/(a+6).]
+.IPs "-vf convol=-1+6-1"
+Sharpen all channels.
+.RE
+.PD 1
+.
+.TP
 .B "test\ \ \ "
 Generate various test patterns.
 .