[MPlayer-dev-eng] [PATCH] Update libass to latest upstream version.

Reimar Döffinger Reimar.Doeffinger at gmx.de
Sat Feb 1 15:55:24 CET 2014


This in particular contains fixes to insufficient clipping which
caused performance issues for some scripts.
See also: https://github.com/rdoeffinger/libass/tree/mplayer
---
 libass/ass.c                | 154 ++++---
 libass/ass.h                |  16 +-
 libass/ass_bitmap.c         | 296 +++++++------
 libass/ass_bitmap.h         |  43 +-
 libass/ass_cache.c          |  42 +-
 libass/ass_cache.h          |   6 +-
 libass/ass_cache_template.h |  45 +-
 libass/ass_drawing.c        |  12 +-
 libass/ass_font.c           | 115 ++---
 libass/ass_font.h           |   1 +
 libass/ass_fontconfig.c     |  19 +-
 libass/ass_parse.c          | 341 +++++++--------
 libass/ass_render.c         | 990 +++++++++++++++++++++++++++++---------------
 libass/ass_render.h         |  85 +++-
 libass/ass_render_api.c     |   7 -
 libass/ass_shaper.c         | 244 ++++++-----
 libass/ass_shaper.h         |   2 +-
 libass/ass_strtod.c         |  15 +-
 libass/ass_types.h          |  80 +++-
 libass/ass_utils.c          | 112 ++++-
 libass/ass_utils.h          |  30 +-
 21 files changed, 1715 insertions(+), 940 deletions(-)

diff --git a/libass/ass.c b/libass/ass.c
index 576f04b..3817134 100644
--- a/libass/ass.c
+++ b/libass/ass.c
@@ -57,7 +57,11 @@ struct parser_priv {
 };
 
 #define ASS_STYLES_ALLOC 20
-#define ASS_EVENTS_ALLOC 200
+
+int ass_library_version(void)
+{
+    return LIBASS_VERSION;
+}
 
 void ass_free_track(ASS_Track *track)
 {
@@ -117,7 +121,7 @@ int ass_alloc_event(ASS_Track *track)
     assert(track->n_events <= track->max_events);
 
     if (track->n_events == track->max_events) {
-        track->max_events += ASS_EVENTS_ALLOC;
+        track->max_events = track->max_events * 2 + 1;
         track->events =
             (ASS_Event *) realloc(track->events,
                                   sizeof(ASS_Event) *
@@ -229,36 +233,51 @@ static int numpad2align(int val)
 	token = next_token(&str); \
 	if (!token) break;
 
+
+#define ALIAS(alias,name) \
+        if (strcasecmp(tname, #alias) == 0) {tname = #name;}
+
+/* One section started with PARSE_START and PARSE_END parses a single token
+ * (contained in the variable named token) for the header indicated by the
+ * variable tname. It does so by chaining a number of else-if statements, each
+ * of which checks if the tname variable indicates that this header should be
+ * parsed. The first parameter of the macro gives the name of the header.
+ *
+ * The string that is passed is in str. str is advanced to the next token if
+ * a header could be parsed. The parsed results are stored in the variable
+ * target, which has the type ASS_Style* or ASS_Event*.
+ */
+#define PARSE_START if (0) {
+#define PARSE_END   }
+
 #define ANYVAL(name,func) \
 	} else if (strcasecmp(tname, #name) == 0) { \
-		target->name = func(token); \
-		ass_msg(track->library, MSGL_DBG2, "%s = %s", #name, token);
+		target->name = func(token);
 
 #define STRVAL(name) \
 	} else if (strcasecmp(tname, #name) == 0) { \
 		if (target->name != NULL) free(target->name); \
-		target->name = strdup(token); \
-		ass_msg(track->library, MSGL_DBG2, "%s = %s", #name, token);
+		target->name = strdup(token);
+
+#define STARREDSTRVAL(name) \
+    } else if (strcasecmp(tname, #name) == 0) { \
+        if (target->name != NULL) free(target->name); \
+        while (*token == '*') ++token; \
+        target->name = strdup(token);
 
 #define COLORVAL(name) \
 	} else if (strcasecmp(tname, #name) == 0) { \
-		target->name = string2color(track->library, token); \
-		ass_msg(track->library, MSGL_DBG2, "%s = %s", #name, token);
+		target->name = string2color(track->library, token);
 
 #define INTVAL(name) ANYVAL(name,atoi)
 #define FPVAL(name) ANYVAL(name,ass_atof)
 #define TIMEVAL(name) \
 	} else if (strcasecmp(tname, #name) == 0) { \
-		target->name = string2timecode(track->library, token); \
-		ass_msg(track->library, MSGL_DBG2, "%s = %s", #name, token);
+		target->name = string2timecode(track->library, token);
 
 #define STYLEVAL(name) \
 	} else if (strcasecmp(tname, #name) == 0) { \
-		target->name = lookup_style(track, token); \
-		ass_msg(track->library, MSGL_DBG2, "%s = %s", #name, token);
-
-#define ALIAS(alias,name) \
-	if (strcasecmp(tname, #alias) == 0) {tname = #name;}
+		target->name = lookup_style(track, token);
 
 static char *next_token(char **str)
 {
@@ -329,7 +348,6 @@ static int process_event_tail(ASS_Track *track, ASS_Event *event,
                 if (last >= event->Text && *last == '\r')
                     *last = 0;
             }
-            ass_msg(track->library, MSGL_DBG2, "Text = %s", event->Text);
             event->Duration -= event->Start;
             free(format);
             return 0;           // "Text" is always the last
@@ -337,7 +355,7 @@ static int process_event_tail(ASS_Track *track, ASS_Event *event,
         NEXT(p, token);
 
         ALIAS(End, Duration)    // temporarily store end timecode in event->Duration
-        if (0) {            // cool ;)
+        PARSE_START
             INTVAL(Layer)
             STYLEVAL(Style)
             STRVAL(Name)
@@ -347,7 +365,7 @@ static int process_event_tail(ASS_Track *track, ASS_Event *event,
             INTVAL(MarginV)
             TIMEVAL(Start)
             TIMEVAL(Duration)
-        }
+        PARSE_END
     }
     free(format);
     return 1;
@@ -403,7 +421,7 @@ void ass_process_force_style(ASS_Track *track)
             if (style == NULL
                 || strcasecmp(track->styles[sid].Name, style) == 0) {
                 target = track->styles + sid;
-                if (0) {
+                PARSE_START
                     STRVAL(FontName)
                     COLORVAL(PrimaryColour)
                     COLORVAL(SecondaryColour)
@@ -415,7 +433,7 @@ void ass_process_force_style(ASS_Track *track)
                     INTVAL(Underline)
                     INTVAL(StrikeOut)
                     FPVAL(Spacing)
-                    INTVAL(Angle)
+                    FPVAL(Angle)
                     INTVAL(BorderStyle)
                     INTVAL(Alignment)
                     INTVAL(MarginL)
@@ -427,7 +445,7 @@ void ass_process_force_style(ASS_Track *track)
                     FPVAL(Outline)
                     FPVAL(Shadow)
                     FPVAL(Blur)
-                }
+                PARSE_END
             }
         }
         *eq = '=';
@@ -497,11 +515,10 @@ static int process_style(ASS_Track *track, char *str)
         NEXT(q, tname);
         NEXT(p, token);
 
-        if (0) {                // cool ;)
-            STRVAL(Name)
-            if ((strcmp(target->Name, "Default") == 0)
-                || (strcmp(target->Name, "*Default") == 0))
-            track->default_style = sid;
+        PARSE_START
+            STARREDSTRVAL(Name)
+            if (strcmp(target->Name, "Default") == 0)
+                track->default_style = sid;
             STRVAL(FontName)
             COLORVAL(PrimaryColour)
             COLORVAL(SecondaryColour)
@@ -522,6 +539,11 @@ static int process_style(ASS_Track *track, char *str)
             INTVAL(Alignment)
             if (track->track_type == TRACK_TYPE_ASS)
                 target->Alignment = numpad2align(target->Alignment);
+            // VSFilter compatibility
+            else if (target->Alignment == 8)
+                target->Alignment = 3;
+            else if (target->Alignment == 4)
+                target->Alignment = 11;
             INTVAL(MarginL)
             INTVAL(MarginR)
             INTVAL(MarginV)
@@ -530,13 +552,17 @@ static int process_style(ASS_Track *track, char *str)
             FPVAL(ScaleY)
             FPVAL(Outline)
             FPVAL(Shadow)
-        }
+        PARSE_END
     }
-    style->ScaleX /= 100.;
-    style->ScaleY /= 100.;
+    style->ScaleX = FFMAX(style->ScaleX, 0.) / 100.;
+    style->ScaleY = FFMAX(style->ScaleY, 0.) / 100.;
+    style->Spacing = FFMAX(style->Spacing, 0.);
+    style->Outline = FFMAX(style->Outline, 0.);
+    style->Shadow = FFMAX(style->Shadow, 0.);
     style->Bold = !!style->Bold;
     style->Italic = !!style->Italic;
     style->Underline = !!style->Underline;
+    style->StrikeOut = !!style->StrikeOut;
     if (!style->Name)
         style->Name = strdup("Default");
     if (!style->FontName)
@@ -962,6 +988,11 @@ static char *sub_recode(ASS_Library *library, char *data, size_t size,
             ass_msg(library, MSGL_V, "Opened iconv descriptor");
         } else
             ass_msg(library, MSGL_ERR, "Error opening iconv descriptor");
+#ifdef CONFIG_ENCA
+        if (cp_tmp != codepage) {
+            free((void*)cp_tmp);
+        }
+#endif
     }
 
     {
@@ -1111,7 +1142,7 @@ ASS_Track *ass_read_memory(ASS_Library *library, char *buf,
                            size_t bufsize, char *codepage)
 {
     ASS_Track *track;
-    int need_free = 0;
+    int copied = 0;
 
     if (!buf)
         return 0;
@@ -1122,12 +1153,19 @@ ASS_Track *ass_read_memory(ASS_Library *library, char *buf,
         if (!buf)
             return 0;
         else
-            need_free = 1;
+            copied = 1;
     }
 #endif
+    if (!copied) {
+        char *newbuf = malloc(bufsize + 1);
+        if (!newbuf)
+            return 0;
+        memcpy(newbuf, buf, bufsize);
+        newbuf[bufsize] = '\0';
+        buf = newbuf;
+    }
     track = parse_memory(library, buf);
-    if (need_free)
-        free(buf);
+    free(buf);
     if (!track)
         return 0;
 
@@ -1224,33 +1262,45 @@ int ass_read_styles(ASS_Track *track, char *fname, char *codepage)
 long long ass_step_sub(ASS_Track *track, long long now, int movement)
 {
     int i;
+    ASS_Event *best = NULL;
+    long long target = now;
+    int direction = movement > 0 ? 1 : -1;
 
     if (movement == 0)
         return 0;
     if (track->n_events == 0)
         return 0;
 
-    if (movement < 0)
-        for (i = 0;
-             (i < track->n_events)
-             &&
-             ((long long) (track->events[i].Start +
-                           track->events[i].Duration) <= now); ++i) {
-    } else
-        for (i = track->n_events - 1;
-             (i >= 0) && ((long long) (track->events[i].Start) > now);
-             --i) {
+    while (movement) {
+        ASS_Event *closest = NULL;
+        long long closest_time = now;
+        for (i = 0; i < track->n_events; i++) {
+            if (direction < 0) {
+                long long end =
+                    track->events[i].Start + track->events[i].Duration;
+                if (end < target) {
+                    if (!closest || end > closest_time) {
+                        closest = &track->events[i];
+                        closest_time = end;
+                    }
+                }
+            } else {
+                long long start = track->events[i].Start;
+                if (start > target) {
+                    if (!closest || start < closest_time) {
+                        closest = &track->events[i];
+                        closest_time = start;
+                    }
+                }
+            }
         }
+        target = closest_time + direction;
+        movement -= direction;
+        if (closest)
+            best = closest;
+    }
 
-    // -1 and n_events are ok
-    assert(i >= -1);
-    assert(i <= track->n_events);
-    i += movement;
-    if (i < 0)
-        i = 0;
-    if (i >= track->n_events)
-        i = track->n_events - 1;
-    return ((long long) track->events[i].Start) - now;
+    return best ? best->Start - now : 0;
 }
 
 ASS_Track *ass_new_track(ASS_Library *library)
diff --git a/libass/ass.h b/libass/ass.h
index 0778a5c..e7c2ade 100644
--- a/libass/ass.h
+++ b/libass/ass.h
@@ -23,7 +23,7 @@
 #include <stdarg.h>
 #include "ass_types.h"
 
-#define LIBASS_VERSION 0x01020000
+#define LIBASS_VERSION 0x01030000
 
 /*
  * A linked list of images produced by an ass renderer.
@@ -56,6 +56,13 @@ typedef struct ass_image {
 /*
  * Hinting type. (see ass_set_hinting below)
  *
+ * Setting hinting to anything but ASS_HINTING_NONE will put libass in a mode
+ * that reduces compatibility with vsfilter and many ASS scripts. The main
+ * problem is that hinting conflicts with smooth scaling, which precludes
+ * animations and precise positioning.
+ *
+ * In other words, enabling hinting might break some scripts severely.
+ *
  * FreeType's native hinter is still buggy sometimes and it is recommended
  * to use the light autohinter, ASS_HINTING_LIGHT, instead.  For best
  * compatibility with problematic fonts, disable hinting.
@@ -81,6 +88,13 @@ typedef enum {
 } ASS_ShapingLevel;
 
 /**
+ * \brief Return the version of library. This returns the value LIBASS_VERSION
+ * was set to when the library was compiled.
+ * \return library version
+ */
+int ass_library_version(void);
+
+/**
  * \brief Initialize the library.
  * \return library handle or NULL if failed
  */
diff --git a/libass/ass_bitmap.c b/libass/ass_bitmap.c
index 0ad1647..7689651 100644
--- a/libass/ass_bitmap.c
+++ b/libass/ass_bitmap.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2006 Evgeniy Stepanov <eugeni.stepanov at gmail.com>
  * Copyright (C) 2011 Grigori Goronzy <greg at chown.ath.cx>
+ * Copyright (c) 2011-2014, Yu Zhuohuang <yuzhuohuang at qq.com>
  *
  * This file is part of libass.
  *
@@ -27,24 +28,11 @@
 
 #include "ass_utils.h"
 #include "ass_bitmap.h"
+#include "ass_render.h"
 
-struct ass_synth_priv {
-    int tmp_w, tmp_h;
-    unsigned short *tmp;
-
-    int g_r;
-    int g_w;
-
-    unsigned *g;
-    unsigned *gt2;
-
-    double radius;
-};
-
-static const unsigned int maxcolor = 255;
 static const unsigned base = 256;
 
-static int generate_tables(ASS_SynthPriv *priv, double radius)
+int generate_tables(ASS_SynthPriv *priv, double radius)
 {
     double A = log(1.0 / base) / (radius * radius * 2);
     int mx, i;
@@ -60,6 +48,7 @@ static int generate_tables(ASS_SynthPriv *priv, double radius)
     priv->g_w = 2 * priv->g_r + 1;
 
     if (priv->g_r) {
+        priv->g0 = realloc(priv->g0, priv->g_w * sizeof(double));
         priv->g = realloc(priv->g, priv->g_w * sizeof(unsigned));
         priv->gt2 = realloc(priv->gt2, 256 * priv->g_w * sizeof(unsigned));
         if (priv->g == NULL || priv->gt2 == NULL) {
@@ -68,25 +57,26 @@ static int generate_tables(ASS_SynthPriv *priv, double radius)
     }
 
     if (priv->g_r) {
-        // gaussian curve with volume = 256
+        // exact gaussian curve
+        for (i = 0; i < priv->g_w; ++i) {
+            priv->g0[i] = exp(A * (i - priv->g_r) * (i - priv->g_r));
+        }
+
+        // integer gaussian curve with volume = 65536
         for (volume_diff = 10000000; volume_diff > 0.0000001;
              volume_diff *= 0.5) {
             volume_factor += volume_diff;
             volume = 0;
             for (i = 0; i < priv->g_w; ++i) {
-                priv->g[i] =
-                    (unsigned) (exp(A * (i - priv->g_r) * (i - priv->g_r)) *
-                                volume_factor + .5);
+                priv->g[i] = (unsigned) (priv->g0[i] * volume_factor + .5);
                 volume += priv->g[i];
             }
-            if (volume > 256)
+            if (volume > 65536)
                 volume_factor -= volume_diff;
         }
         volume = 0;
         for (i = 0; i < priv->g_w; ++i) {
-            priv->g[i] =
-                (unsigned) (exp(A * (i - priv->g_r) * (i - priv->g_r)) *
-                            volume_factor + .5);
+            priv->g[i] = (unsigned) (priv->g0[i] * volume_factor + .5);
             volume += priv->g[i];
         }
 
@@ -101,7 +91,7 @@ static int generate_tables(ASS_SynthPriv *priv, double radius)
     return 0;
 }
 
-static void resize_tmp(ASS_SynthPriv *priv, int w, int h)
+void resize_tmp(ASS_SynthPriv *priv, int w, int h)
 {
     if (priv->tmp_w >= w && priv->tmp_h >= h)
         return;
@@ -114,7 +104,7 @@ static void resize_tmp(ASS_SynthPriv *priv, int w, int h)
     while (priv->tmp_h < h)
         priv->tmp_h *= 2;
     free(priv->tmp);
-    priv->tmp = malloc((priv->tmp_w + 1) * priv->tmp_h * sizeof(short));
+    priv->tmp = malloc((priv->tmp_w + 1) * priv->tmp_h * sizeof(unsigned));
 }
 
 ASS_SynthPriv *ass_synth_init(double radius)
@@ -127,17 +117,23 @@ ASS_SynthPriv *ass_synth_init(double radius)
 void ass_synth_done(ASS_SynthPriv *priv)
 {
     free(priv->tmp);
+    free(priv->g0);
     free(priv->g);
     free(priv->gt2);
     free(priv);
 }
 
-static Bitmap *alloc_bitmap(int w, int h)
+Bitmap *alloc_bitmap(int w, int h)
 {
     Bitmap *bm;
-    unsigned s = w; // XXX: alignment
+
+    uintptr_t alignment_offset = (w > 31) ? 31 : ((w > 15) ? 15 : 0);
+    unsigned s = (w + alignment_offset) & ~alignment_offset;
     bm = malloc(sizeof(Bitmap));
-    bm->buffer = calloc(s, h);
+    bm->buffer_ptr = malloc(s * h + alignment_offset + 32);
+    bm->buffer = (unsigned char*)
+        (((uintptr_t)bm->buffer_ptr + alignment_offset) & ~alignment_offset);
+    memset(bm->buffer, 0, s * h + 32);
     bm->w = w;
     bm->h = h;
     bm->stride = s;
@@ -148,11 +144,11 @@ static Bitmap *alloc_bitmap(int w, int h)
 void ass_free_bitmap(Bitmap *bm)
 {
     if (bm)
-        free(bm->buffer);
+        free(bm->buffer_ptr);
     free(bm);
 }
 
-static Bitmap *copy_bitmap(const Bitmap *src)
+Bitmap *copy_bitmap(const Bitmap *src)
 {
     Bitmap *dst = alloc_bitmap(src->w, src->h);
     dst->left = src->left;
@@ -217,7 +213,7 @@ Bitmap *outline_to_bitmap(ASS_Library *library, FT_Library ftlib,
  * The glyph bitmap is subtracted from outline bitmap. This way looks much
  * better in some cases.
  */
-static void fix_outline(Bitmap *bm_g, Bitmap *bm_o)
+void fix_outline(Bitmap *bm_g, Bitmap *bm_o)
 {
     int x, y;
     const int l = bm_o->left > bm_g->left ? bm_o->left : bm_g->left;
@@ -250,7 +246,7 @@ static void fix_outline(Bitmap *bm_g, Bitmap *bm_o)
  * \brief Shift a bitmap by the fraction of a pixel in x and y direction
  * expressed in 26.6 fixed point
  */
-static void shift_bitmap(Bitmap *bm, int shift_x, int shift_y)
+void shift_bitmap(Bitmap *bm, int shift_x, int shift_y)
 {
     int x, y, b;
     int w = bm->w;
@@ -302,24 +298,25 @@ static void shift_bitmap(Bitmap *bm, int shift_x, int shift_y)
 /*
  * Gaussian blur.  An fast pure C implementation from MPlayer.
  */
-static void ass_gauss_blur(unsigned char *buffer, unsigned short *tmp2,
-                           int width, int height, int stride, int *m2,
-                           int r, int mwidth)
+void ass_gauss_blur(unsigned char *buffer, unsigned *tmp2,
+                    int width, int height, int stride,
+                    unsigned *m2, int r, int mwidth)
 {
 
     int x, y;
 
     unsigned char *s = buffer;
-    unsigned short *t = tmp2 + 1;
+    unsigned *t = tmp2 + 1;
     for (y = 0; y < height; y++) {
-        memset(t - 1, 0, (width + 1) * sizeof(short));
+        memset(t - 1, 0, (width + 1) * sizeof(unsigned));
+        t[-1] = 32768;
 
         for (x = 0; x < r; x++) {
             const int src = s[x];
             if (src) {
-                register unsigned short *dstp = t + x - r;
+                register unsigned *dstp = t + x - r;
                 int mx;
-                unsigned *m3 = (unsigned *) (m2 + src * mwidth);
+                unsigned *m3 = m2 + src * mwidth;
                 for (mx = r - x; mx < mwidth; mx++) {
                     dstp[mx] += m3[mx];
                 }
@@ -329,9 +326,9 @@ static void ass_gauss_blur(unsigned char *buffer, unsigned short *tmp2,
         for (; x < width - r; x++) {
             const int src = s[x];
             if (src) {
-                register unsigned short *dstp = t + x - r;
+                register unsigned *dstp = t + x - r;
                 int mx;
-                unsigned *m3 = (unsigned *) (m2 + src * mwidth);
+                unsigned *m3 = m2 + src * mwidth;
                 for (mx = 0; mx < mwidth; mx++) {
                     dstp[mx] += m3[mx];
                 }
@@ -341,10 +338,10 @@ static void ass_gauss_blur(unsigned char *buffer, unsigned short *tmp2,
         for (; x < width; x++) {
             const int src = s[x];
             if (src) {
-                register unsigned short *dstp = t + x - r;
+                register unsigned *dstp = t + x - r;
                 int mx;
                 const int x2 = r + width - x;
-                unsigned *m3 = (unsigned *) (m2 + src * mwidth);
+                unsigned *m3 = m2 + src * mwidth;
                 for (mx = 0; mx < x2; mx++) {
                     dstp[mx] += m3[mx];
                 }
@@ -358,31 +355,31 @@ static void ass_gauss_blur(unsigned char *buffer, unsigned short *tmp2,
     t = tmp2;
     for (x = 0; x < width; x++) {
         for (y = 0; y < r; y++) {
-            unsigned short *srcp = t + y * (width + 1) + 1;
+            unsigned *srcp = t + y * (width + 1) + 1;
             int src = *srcp;
             if (src) {
-                register unsigned short *dstp = srcp - 1 + width + 1;
-                const int src2 = (src + 128) >> 8;
-                unsigned *m3 = (unsigned *) (m2 + src2 * mwidth);
+                register unsigned *dstp = srcp - 1 - y * (width + 1);
+                const int src2 = (src + 32768) >> 16;
+                unsigned *m3 = m2 + src2 * mwidth;
 
                 int mx;
-                *srcp = 128;
-                for (mx = r - 1; mx < mwidth; mx++) {
+                *srcp = 32768;
+                for (mx = r - y; mx < mwidth; mx++) {
                     *dstp += m3[mx];
                     dstp += width + 1;
                 }
             }
         }
         for (; y < height - r; y++) {
-            unsigned short *srcp = t + y * (width + 1) + 1;
+            unsigned *srcp = t + y * (width + 1) + 1;
             int src = *srcp;
             if (src) {
-                register unsigned short *dstp = srcp - 1 - r * (width + 1);
-                const int src2 = (src + 128) >> 8;
-                unsigned *m3 = (unsigned *) (m2 + src2 * mwidth);
+                register unsigned *dstp = srcp - 1 - r * (width + 1);
+                const int src2 = (src + 32768) >> 16;
+                unsigned *m3 = m2 + src2 * mwidth;
 
                 int mx;
-                *srcp = 128;
+                *srcp = 32768;
                 for (mx = 0; mx < mwidth; mx++) {
                     *dstp += m3[mx];
                     dstp += width + 1;
@@ -390,16 +387,16 @@ static void ass_gauss_blur(unsigned char *buffer, unsigned short *tmp2,
             }
         }
         for (; y < height; y++) {
-            unsigned short *srcp = t + y * (width + 1) + 1;
+            unsigned *srcp = t + y * (width + 1) + 1;
             int src = *srcp;
             if (src) {
                 const int y2 = r + height - y;
-                register unsigned short *dstp = srcp - 1 - r * (width + 1);
-                const int src2 = (src + 128) >> 8;
-                unsigned *m3 = (unsigned *) (m2 + src2 * mwidth);
+                register unsigned *dstp = srcp - 1 - r * (width + 1);
+                const int src2 = (src + 32768) >> 16;
+                unsigned *m3 = m2 + src2 * mwidth;
 
                 int mx;
-                *srcp = 128;
+                *srcp = 32768;
                 for (mx = 0; mx < y2; mx++) {
                     *dstp += m3[mx];
                     dstp += width + 1;
@@ -413,7 +410,7 @@ static void ass_gauss_blur(unsigned char *buffer, unsigned short *tmp2,
     s = buffer;
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++) {
-            s[x] = t[x] >> 8;
+            s[x] = t[x] >> 16;
         }
         s += stride;
         t += width + 1;
@@ -423,31 +420,71 @@ static void ass_gauss_blur(unsigned char *buffer, unsigned short *tmp2,
 /**
  * \brief Blur with [[1,2,1]. [2,4,2], [1,2,1]] kernel
  * This blur is the same as the one employed by vsfilter.
+ * Pure C implementation.
  */
-static void be_blur(Bitmap *bm)
+void be_blur_c(uint8_t *buf, intptr_t w,
+               intptr_t h, intptr_t stride,
+               uint16_t *tmp)
 {
-    int w = bm->w;
-    int h = bm->h;
-    int s = bm->stride;
-    unsigned char *buf = bm->buffer;
-    unsigned int x, y;
-    unsigned int old_sum, new_sum;
-
-    for (y = 0; y < h; y++) {
-        old_sum = 2 * buf[y * s];
-        for (x = 0; x < w - 1; x++) {
-            new_sum = buf[y * s + x] + buf[y * s + x + 1];
-            buf[y * s + x] = (old_sum + new_sum) >> 2;
-            old_sum = new_sum;
+    unsigned short *col_pix_buf = tmp;
+    unsigned short *col_sum_buf = tmp + w * sizeof(unsigned short);
+    unsigned x, y, old_pix, old_sum, new_sum, temp1, temp2;
+    unsigned char *src, *dst;
+    memset(col_pix_buf, 0, w * sizeof(unsigned short));
+    memset(col_sum_buf, 0, w * sizeof(unsigned short));
+    {
+        y = 0;
+        src=buf+y*stride;
+
+        x = 2;
+        old_pix = src[x-1];
+        old_sum = old_pix + src[x-2];
+        for ( ; x < w; x++) {
+            temp1 = src[x];
+            temp2 = old_pix + temp1;
+            old_pix = temp1;
+            temp1 = old_sum + temp2;
+            old_sum = temp2;
+            col_pix_buf[x] = temp1;
+        }
+    }
+    new_sum = 2 * buf[y * stride + w - 1];
+    buf[y * stride + w - 1] = (old_sum + new_sum) >> 2;
+    {
+        x = 2;
+        old_pix = src[x-1];
+        old_sum = old_pix + src[x-2];
+        for ( ; x < w; x++) {
+            temp1 = src[x];
+            temp2 = old_pix + temp1;
+            old_pix = temp1;
+            temp1 = old_sum + temp2;
+            old_sum = temp2;
+
+            temp2 = col_pix_buf[x] + temp1;
+            col_pix_buf[x] = temp1;
+            col_sum_buf[x] = temp2;
         }
     }
 
-    for (x = 0; x < w; x++) {
-        old_sum = 2 * buf[x];
-        for (y = 0; y < h - 1; y++) {
-            new_sum = buf[y * s + x] + buf[(y + 1) * s + x];
-            buf[y * s + x] = (old_sum + new_sum) >> 2;
-            old_sum = new_sum;
+    for (y = 2; y < h; y++) {
+        src=buf+y*stride;
+        dst=buf+(y-1)*stride;
+
+        x = 2;
+        old_pix = src[x-1];
+        old_sum = old_pix + src[x-2];
+        for ( ; x < w; x++) {
+            temp1 = src[x];
+            temp2 = old_pix + temp1;
+            old_pix = temp1;
+            temp1 = old_sum + temp2;
+            old_sum = temp2;
+
+            temp2 = col_pix_buf[x] + temp1;
+            col_pix_buf[x] = temp1;
+            dst[x-1] = (col_sum_buf[x] + temp2) >> 4;
+            col_sum_buf[x] = temp2;
         }
     }
 }
@@ -481,48 +518,69 @@ int outline_to_bitmap3(ASS_Library *library, ASS_SynthPriv *priv_blur,
         }
     }
 
-    // Apply box blur (multiple passes, if requested)
-    while (be--) {
-        if (*bm_o)
-            be_blur(*bm_o);
-        if (!*bm_o || border_style == 3)
-            be_blur(*bm_g);
-    }
+    return 0;
+}
 
-    // Apply gaussian blur
-    if (blur_radius > 0.0) {
-        if (*bm_o)
-            resize_tmp(priv_blur, (*bm_o)->w, (*bm_o)->h);
-        if (!*bm_o || border_style == 3)
-            resize_tmp(priv_blur, (*bm_g)->w, (*bm_g)->h);
-        generate_tables(priv_blur, blur_radius);
-        if (*bm_o)
-            ass_gauss_blur((*bm_o)->buffer, priv_blur->tmp,
-                           (*bm_o)->w, (*bm_o)->h, (*bm_o)->stride,
-                           (int *) priv_blur->gt2, priv_blur->g_r,
-                           priv_blur->g_w);
-        if (!*bm_o || border_style == 3)
-            ass_gauss_blur((*bm_g)->buffer, priv_blur->tmp,
-                           (*bm_g)->w, (*bm_g)->h, (*bm_g)->stride,
-                           (int *) priv_blur->gt2, priv_blur->g_r,
-                           priv_blur->g_w);
+/**
+ * \brief Add two bitmaps together at a given position
+ * Uses additive blending, clipped to [0,255]. Pure C implementation.
+ */
+void add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+                   uint8_t *src, intptr_t src_stride,
+                   intptr_t height, intptr_t width)
+{
+    unsigned out;
+    uint8_t* end = dst + dst_stride * height;
+    while (dst < end) {
+        for (unsigned j = 0; j < width; ++j) {
+            out = dst[j] + src[j];
+            dst[j] = FFMIN(out, 255);
+        }
+        dst += dst_stride;
+        src += src_stride;
     }
+}
 
-    // Create shadow and fix outline as needed
-    if (*bm_o && border_style != 3) {
-        *bm_s = copy_bitmap(*bm_o);
-        fix_outline(*bm_g, *bm_o);
-    } else if (*bm_o && border_visible) {
-        *bm_s = copy_bitmap(*bm_o);
-    } else if (*bm_o) {
-        *bm_s = *bm_o;
-        *bm_o = 0;
-    } else
-        *bm_s = copy_bitmap(*bm_g);
-
-    assert(bm_s);
+void sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+                   uint8_t *src, intptr_t src_stride,
+                   intptr_t height, intptr_t width)
+{
+    unsigned out;
+    uint8_t* end = dst + dst_stride * height;
+    while (dst < end) {
+        for (unsigned j = 0; j < width; ++j) {
+            out = dst[j] - src[j];
+            dst[j] = FFMAX(out, 0);
+        }
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
 
-    shift_bitmap(*bm_s, shadow_offset.x, shadow_offset.y);
+void restride_bitmap_c(uint8_t *dst, intptr_t dst_stride,
+                       uint8_t *src, intptr_t src_stride,
+                       intptr_t width, intptr_t height)
+{
+    uint8_t* end = dst + dst_stride * height;
+    while (dst < end) {
+        memcpy(dst, src, width);
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
 
-    return 0;
+void mul_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+                   uint8_t *src1, intptr_t src1_stride,
+                   uint8_t *src2, intptr_t src2_stride,
+                   intptr_t w, intptr_t h)
+{
+    uint8_t* end = src1 + src1_stride * h;
+    while (src1 < end) {
+        for (unsigned x = 0; x < w; ++x) {
+            dst[x] = (src1[x] * src2[x] + 255) >> 8;
+        }
+        dst  += dst_stride;
+        src1 += src1_stride;
+        src2 += src2_stride;
+    }
 }
diff --git a/libass/ass_bitmap.h b/libass/ass_bitmap.h
index 53be7af..b51c1bf 100644
--- a/libass/ass_bitmap.h
+++ b/libass/ass_bitmap.h
@@ -24,7 +24,19 @@
 
 #include "ass.h"
 
-typedef struct ass_synth_priv ASS_SynthPriv;
+typedef struct ass_synth_priv {
+    int tmp_w, tmp_h;
+    unsigned *tmp;
+
+    int g_r;
+    int g_w;
+
+    double *g0;
+    unsigned *g;
+    unsigned *gt2;
+
+    double radius;
+} ASS_SynthPriv;
 
 ASS_SynthPriv *ass_synth_init(double);
 void ass_synth_done(ASS_SynthPriv *priv);
@@ -33,11 +45,14 @@ typedef struct {
     int left, top;
     int w, h;                   // width, height
     int stride;
-    unsigned char *buffer;      // w x h buffer
+    unsigned char *buffer;      // h * stride buffer
+    unsigned char *buffer_ptr;  // unaligned pointer (for free())
 } Bitmap;
 
 Bitmap *outline_to_bitmap(ASS_Library *library, FT_Library ftlib,
                           FT_Outline *outline, int bord);
+
+Bitmap *alloc_bitmap(int w, int h);
 /**
  * \brief perform glyph rendering
  * \param glyph original glyph
@@ -55,5 +70,29 @@ int outline_to_bitmap3(ASS_Library *library, ASS_SynthPriv *priv_blur,
                        int border_style, int border_visible);
 
 void ass_free_bitmap(Bitmap *bm);
+void ass_gauss_blur(unsigned char *buffer, unsigned *tmp2,
+                    int width, int height, int stride,
+                    unsigned *m2, int r, int mwidth);
+void be_blur_c(uint8_t *buf, intptr_t w,
+               intptr_t h, intptr_t stride,
+               uint16_t *tmp);
+void add_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+                   uint8_t *src, intptr_t src_stride,
+                   intptr_t height, intptr_t width);
+void sub_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+                   uint8_t *src, intptr_t src_stride,
+                   intptr_t height, intptr_t width);
+void restride_bitmap_c(uint8_t *dst, intptr_t dst_stride,
+                       uint8_t *src, intptr_t src_stride,
+                       intptr_t width, intptr_t height);
+void mul_bitmaps_c(uint8_t *dst, intptr_t dst_stride,
+                   uint8_t *src1, intptr_t src1_stride,
+                   uint8_t *src2, intptr_t src2_stride,
+                   intptr_t w, intptr_t h);
+void shift_bitmap(Bitmap *bm, int shift_x, int shift_y);
+void fix_outline(Bitmap *bm_g, Bitmap *bm_o);
+void resize_tmp(ASS_SynthPriv *priv, int w, int h);
+int generate_tables(ASS_SynthPriv *priv, double radius);
+Bitmap *copy_bitmap(const Bitmap *src);
 
 #endif                          /* LIBASS_BITMAP_H */
diff --git a/libass/ass_cache.c b/libass/ass_cache.c
index 91801a0..6baa924 100644
--- a/libass/ass_cache.c
+++ b/libass/ass_cache.c
@@ -125,12 +125,28 @@ static unsigned bitmap_compare (void *a, void *b, size_t key_size)
 static void composite_destruct(void *key, void *value)
 {
     CompositeHashValue *v = value;
-    free(v->a);
-    free(v->b);
+    CompositeHashKey *k = key;
+    if (v->bm)
+        ass_free_bitmap(v->bm);
+    if (v->bm_o)
+        ass_free_bitmap(v->bm_o);
+    if (v->bm_s)
+        ass_free_bitmap(v->bm_s);
+    free(k->str);
     free(key);
     free(value);
 }
 
+static size_t composite_size(void *value, size_t value_size)
+{
+    CompositeHashValue *val = value;
+    if (val->bm_o)
+        return val->bm_o->w * val->bm_o->h * 3;
+    else if (val->bm)
+        return val->bm->w * val->bm->h * 3;
+    return 0;
+}
+
 // outline cache
 
 static unsigned outline_hash(void *key, size_t key_size)
@@ -242,14 +258,16 @@ Cache *ass_cache_create(HashFunction hash_func, HashCompare compare_func,
 void *ass_cache_put(Cache *cache, void *key, void *value)
 {
     unsigned bucket = cache->hash_func(key, cache->key_size) % cache->buckets;
-    CacheItem **item = &cache->map[bucket];
-    while (*item)
-        item = &(*item)->next;
-    (*item) = calloc(1, sizeof(CacheItem));
-    (*item)->key = malloc(cache->key_size);
-    (*item)->value = malloc(cache->value_size);
-    memcpy((*item)->key, key, cache->key_size);
-    memcpy((*item)->value, value, cache->value_size);
+    CacheItem **bucketptr = &cache->map[bucket];
+
+    CacheItem *item = calloc(1, sizeof(CacheItem));
+    item->key = malloc(cache->key_size);
+    item->value = malloc(cache->value_size);
+    memcpy(item->key, key, cache->key_size);
+    memcpy(item->value, value, cache->value_size);
+
+    item->next = *bucketptr;
+    *bucketptr = item;
 
     cache->items++;
     if (cache->size_func)
@@ -257,7 +275,7 @@ void *ass_cache_put(Cache *cache, void *key, void *value)
     else
         cache->cache_size++;
 
-    return (*item)->value;
+    return item->value;
 }
 
 void *ass_cache_get(Cache *cache, void *key)
@@ -347,6 +365,6 @@ Cache *ass_bitmap_cache_create(void)
 Cache *ass_composite_cache_create(void)
 {
     return ass_cache_create(composite_hash, composite_compare,
-            composite_destruct, (ItemSize)NULL, sizeof(CompositeHashKey),
+            composite_destruct, composite_size, sizeof(CompositeHashKey),
             sizeof(CompositeHashValue));
 }
diff --git a/libass/ass_cache.h b/libass/ass_cache.h
index 7375f04..677b705 100644
--- a/libass/ass_cache.h
+++ b/libass/ass_cache.h
@@ -35,8 +35,10 @@ typedef struct {
 } BitmapHashValue;
 
 typedef struct {
-    unsigned char *a;
-    unsigned char *b;
+    Bitmap *bm;
+    Bitmap *bm_o;
+    Bitmap *bm_s;
+    FT_Vector pos;
 } CompositeHashValue;
 
 typedef struct {
diff --git a/libass/ass_cache_template.h b/libass/ass_cache_template.h
index f9aab77..c91c897 100644
--- a/libass/ass_cache_template.h
+++ b/libass/ass_cache_template.h
@@ -96,6 +96,7 @@ START(glyph, glyph_hash_key)
     FTVECTOR(outline) // border width, 16.16
     GENERIC(unsigned, flags)    // glyph decoration flags
     GENERIC(unsigned, border_style)
+    GENERIC(int, hspacing) // 16.16
 END(GlyphHashKey)
 
 START(glyph_metrics, glyph_metrics_hash_key)
@@ -114,6 +115,7 @@ START(drawing, drawing_hash_key)
     GENERIC(int, pbo)
     FTVECTOR(outline)
     GENERIC(unsigned, border_style)
+    GENERIC(int, hspacing)
     GENERIC(int, scale)
     GENERIC(unsigned, hash)
     STRING(text)
@@ -121,21 +123,38 @@ END(DrawingHashKey)
 
 // Cache for composited bitmaps
 START(composite, composite_hash_key)
-    GENERIC(int, aw)
-    GENERIC(int, ah)
-    GENERIC(int, bw)
-    GENERIC(int, bh)
-    GENERIC(int, ax)
-    GENERIC(int, ay)
-    GENERIC(int, bx)
-    GENERIC(int, by)
-    GENERIC(int, as)
-    GENERIC(int, bs)
-    GENERIC(unsigned char *, a)
-    GENERIC(unsigned char *, b)
+    GENERIC(unsigned, w)
+    GENERIC(unsigned, h)
+    GENERIC(unsigned, o_w)
+    GENERIC(unsigned, o_h)
+    GENERIC(int, is_drawing)
+    GENERIC(unsigned, chars)
+    GENERIC(int, be)
+    GENERIC(double, blur)
+    GENERIC(int, border_style)
+    GENERIC(int, has_border)
+    GENERIC(double, border_x)
+    GENERIC(double, border_y)
+    GENERIC(double, shadow_x)
+    GENERIC(double, shadow_y)
+    GENERIC(double, frx)
+    GENERIC(double, fry)
+    GENERIC(double, frz)
+    GENERIC(double, fax)
+    GENERIC(double, fay)
+    GENERIC(double, scale_x)
+    GENERIC(double, scale_y)
+    GENERIC(double, hspacing)
+    GENERIC(unsigned, italic)
+    GENERIC(unsigned, bold)
+    GENERIC(int, flags)
+    GENERIC(unsigned, has_outline)
+    GENERIC(int, shift_x)
+    GENERIC(int, shift_y)
+    FTVECTOR(advance)
+    STRING(str)
 END(CompositeHashKey)
 
-
 #undef START
 #undef GENERIC
 #undef STRING
diff --git a/libass/ass_drawing.c b/libass/ass_drawing.c
index 03eb568..32a3803 100644
--- a/libass/ass_drawing.c
+++ b/libass/ass_drawing.c
@@ -86,7 +86,7 @@ static void drawing_prepare(ASS_Drawing *drawing)
  */
 static void drawing_finish(ASS_Drawing *drawing, int raw_mode)
 {
-    int i, offset;
+    int i;
     double pbo;
     FT_BBox bbox = drawing->cbox;
     FT_Outline *ol = &drawing->outline;
@@ -104,15 +104,13 @@ static void drawing_finish(ASS_Drawing *drawing, int raw_mode)
 
     drawing->advance.x = bbox.xMax - bbox.xMin;
 
-    pbo = drawing->pbo / (64.0 / (1 << (drawing->scale - 1)));
-    drawing->desc = double_to_d6(-pbo * drawing->scale_y);
-    drawing->asc = bbox.yMax - bbox.yMin + drawing->desc;
+    pbo = drawing->pbo / (1 << (drawing->scale - 1));
+    drawing->desc = double_to_d6(pbo * drawing->scale_y);
+    drawing->asc = bbox.yMax - bbox.yMin - drawing->desc;
 
     // Place it onto the baseline
-    offset = (bbox.yMax - bbox.yMin) + double_to_d6(-pbo *
-                                                    drawing->scale_y);
     for (i = 0; i < ol->n_points; i++)
-        ol->points[i].y += offset;
+        ol->points[i].y += drawing->asc;
 }
 
 /*
diff --git a/libass/ass_font.c b/libass/ass_font.c
index 6840e2f..dd275c1 100644
--- a/libass/ass_font.c
+++ b/libass/ass_font.c
@@ -77,6 +77,21 @@ static void charmap_magic(ASS_Library *library, FT_Face face)
 }
 
 /**
+ * Adjust char index if the charmap is weird
+ * (currently just MS Symbol)
+ */
+
+uint32_t ass_font_index_magic(FT_Face face, uint32_t symbol)
+{
+    switch(face->charmap->encoding){
+    case FT_ENCODING_MS_SYMBOL:
+        return 0xF000 | symbol;
+    default:
+        return symbol;
+    }
+}
+
+/**
  * \brief find a memory font by name
  */
 static int find_font(ASS_Library *library, char *name)
@@ -223,7 +238,8 @@ void ass_face_set_size(FT_Face face, double size)
     // The idea was borrowed from asa (http://asa.diac24.net)
     if (hori && os2) {
         int hori_height = hori->Ascender - hori->Descender;
-        int os2_height = os2->usWinAscent + os2->usWinDescent;
+        /* sometimes used for signed values despite unsigned in spec */
+        int os2_height = (short)os2->usWinAscent + (short)os2->usWinDescent;
         if (hori_height && os2_height)
             mscale = (double) hori_height / os2_height;
     }
@@ -263,11 +279,11 @@ void ass_font_get_asc_desc(ASS_Font *font, uint32_t ch, int *asc,
     for (i = 0; i < font->n_faces; ++i) {
         FT_Face face = font->faces[i];
         TT_OS2 *os2 = FT_Get_Sfnt_Table(face, ft_sfnt_os2);
-        if (FT_Get_Char_Index(face, ch)) {
+        if (FT_Get_Char_Index(face, ass_font_index_magic(face, ch))) {
             int y_scale = face->size->metrics.y_scale;
             if (os2) {
-                *asc = FT_MulFix(os2->usWinAscent, y_scale);
-                *desc = FT_MulFix(os2->usWinDescent, y_scale);
+                *asc = FT_MulFix((short)os2->usWinAscent, y_scale);
+                *desc = FT_MulFix((short)os2->usWinDescent, y_scale);
             } else {
                 *asc = FT_MulFix(face->ascender, y_scale);
                 *desc = FT_MulFix(-face->descender, y_scale);
@@ -279,6 +295,31 @@ void ass_font_get_asc_desc(ASS_Font *font, uint32_t ch, int *asc,
     *asc = *desc = 0;
 }
 
+static void add_line(FT_Outline *ol, int bear, int advance, int dir, int pos, int size) {
+    FT_Vector points[4] = {
+        {.x = bear,      .y = pos + size},
+        {.x = advance,   .y = pos + size},
+        {.x = advance,   .y = pos - size},
+        {.x = bear,      .y = pos - size},
+    };
+
+    if (dir == FT_ORIENTATION_TRUETYPE) {
+        int i;
+        for (i = 0; i < 4; i++) {
+            ol->points[ol->n_points] = points[i];
+            ol->tags[ol->n_points++] = 1;
+        }
+    } else {
+        int i;
+        for (i = 3; i >= 0; i--) {
+            ol->points[ol->n_points] = points[i];
+            ol->tags[ol->n_points++] = 1;
+        }
+    }
+
+    ol->contours[ol->n_contours++] = ol->n_points - 1;
+}
+
 /*
  * Strike a glyph with a horizontal line; it's possible to underline it
  * and/or strike through it.  For the line's position and size, truetype
@@ -318,64 +359,24 @@ static int ass_strike_outline_glyph(FT_Face face, ASS_Font *font,
 
     // Add points to the outline
     if (under && ps) {
-        int pos, size;
-        pos = FT_MulFix(ps->underlinePosition, y_scale * font->scale_y);
-        size = FT_MulFix(ps->underlineThickness,
-                         y_scale * font->scale_y / 2);
+        int pos = FT_MulFix(ps->underlinePosition, y_scale * font->scale_y);
+        int size = FT_MulFix(ps->underlineThickness,
+                             y_scale * font->scale_y / 2);
 
         if (pos > 0 || size <= 0)
             return 1;
 
-        FT_Vector points[4] = {
-            {.x = bear,      .y = pos + size},
-            {.x = advance,   .y = pos + size},
-            {.x = advance,   .y = pos - size},
-            {.x = bear,      .y = pos - size},
-        };
-
-        if (dir == FT_ORIENTATION_TRUETYPE) {
-            for (i = 0; i < 4; i++) {
-                ol->points[ol->n_points] = points[i];
-                ol->tags[ol->n_points++] = 1;
-            }
-        } else {
-            for (i = 3; i >= 0; i--) {
-                ol->points[ol->n_points] = points[i];
-                ol->tags[ol->n_points++] = 1;
-            }
-        }
-
-        ol->contours[ol->n_contours++] = ol->n_points - 1;
+        add_line(ol, bear, advance, dir, pos, size);
     }
 
     if (through && os2) {
-        int pos, size;
-        pos = FT_MulFix(os2->yStrikeoutPosition, y_scale * font->scale_y);
-        size = FT_MulFix(os2->yStrikeoutSize, y_scale * font->scale_y / 2);
+        int pos = FT_MulFix(os2->yStrikeoutPosition, y_scale * font->scale_y);
+        int size = FT_MulFix(os2->yStrikeoutSize, y_scale * font->scale_y / 2);
 
         if (pos < 0 || size <= 0)
             return 1;
 
-        FT_Vector points[4] = {
-            {.x = bear,      .y = pos + size},
-            {.x = advance,   .y = pos + size},
-            {.x = advance,   .y = pos - size},
-            {.x = bear,      .y = pos - size},
-        };
-
-        if (dir == FT_ORIENTATION_TRUETYPE) {
-            for (i = 0; i < 4; i++) {
-                ol->points[ol->n_points] = points[i];
-                ol->tags[ol->n_points++] = 1;
-            }
-        } else {
-            for (i = 3; i >= 0; i--) {
-                ol->points[ol->n_points] = points[i];
-                ol->tags[ol->n_points++] = 1;
-            }
-        }
-
-        ol->contours[ol->n_contours++] = ol->n_points - 1;
+        add_line(ol, bear, advance, dir, pos, size);
     }
 
     return 0;
@@ -445,13 +446,13 @@ int ass_font_get_index(void *fcpriv, ASS_Font *font, uint32_t symbol,
     // try with the requested face
     if (*face_index < font->n_faces) {
         face = font->faces[*face_index];
-        index = FT_Get_Char_Index(face, symbol);
+        index = FT_Get_Char_Index(face, ass_font_index_magic(face, symbol));
     }
 
     // not found in requested face, try all others
     for (i = 0; i < font->n_faces && index == 0; ++i) {
         face = font->faces[i];
-        index = FT_Get_Char_Index(face, symbol);
+        index = FT_Get_Char_Index(face, ass_font_index_magic(face, symbol));
         if (index)
             *face_index = i;
     }
@@ -466,14 +467,14 @@ int ass_font_get_index(void *fcpriv, ASS_Font *font, uint32_t symbol,
         face_idx = *face_index = add_face(fcpriv, font, symbol);
         if (face_idx >= 0) {
             face = font->faces[face_idx];
-            index = FT_Get_Char_Index(face, symbol);
+            index = FT_Get_Char_Index(face, ass_font_index_magic(face, symbol));
             if (index == 0 && face->num_charmaps > 0) {
                 int i;
                 ass_msg(font->library, MSGL_WARN,
                     "Glyph 0x%X not found, broken font? Trying all charmaps", symbol);
                 for (i = 0; i < face->num_charmaps; i++) {
                     FT_Set_Charmap(face, face->charmaps[i]);
-                    if ((index = FT_Get_Char_Index(face, symbol)) != 0) break;
+                    if ((index = FT_Get_Char_Index(face, ass_font_index_magic(face, symbol))) != 0) break;
                 }
             }
             if (index == 0) {
@@ -587,8 +588,8 @@ FT_Vector ass_font_get_kerning(ASS_Font *font, uint32_t c1, uint32_t c2)
 
     for (i = 0; i < font->n_faces; ++i) {
         FT_Face face = font->faces[i];
-        int i1 = FT_Get_Char_Index(face, c1);
-        int i2 = FT_Get_Char_Index(face, c2);
+        int i1 = FT_Get_Char_Index(face, ass_font_index_magic(face, c1));
+        int i2 = FT_Get_Char_Index(face, ass_font_index_magic(face, c2));
         if (i1 && i2) {
             if (FT_HAS_KERNING(face))
                 FT_Get_Kerning(face, i1, i2, FT_KERNING_DEFAULT, &v);
diff --git a/libass/ass_font.h b/libass/ass_font.h
index 481a630..f80b887 100644
--- a/libass/ass_font.h
+++ b/libass/ass_font.h
@@ -68,6 +68,7 @@ void ass_font_get_asc_desc(ASS_Font *font, uint32_t ch, int *asc,
                            int *desc);
 int ass_font_get_index(void *fcpriv, ASS_Font *font, uint32_t symbol,
                        int *face_index, int *glyph_index);
+uint32_t ass_font_index_magic(FT_Face face, uint32_t symbol);
 FT_Glyph ass_font_get_glyph(void *fontconfig_priv, ASS_Font *font,
                             uint32_t ch, int face_index, int index,
                             ASS_Hinting hinting, int deco);
diff --git a/libass/ass_fontconfig.c b/libass/ass_fontconfig.c
index d3dddea..dcb0bcf 100644
--- a/libass/ass_fontconfig.c
+++ b/libass/ass_fontconfig.c
@@ -188,7 +188,7 @@ static char *select_font(ASS_Library *library, FCInstance *priv,
      */
     FcPatternDel(pat, "lang");
 
-    fsorted = FcFontSort(priv->config, pat, FcTrue, NULL, &result);
+    fsorted = FcFontSort(priv->config, pat, FcFalse, NULL, &result);
     ffullname = match_fullname(library, priv, family, bold, italic);
     if (!fsorted || !ffullname)
         goto error;
@@ -257,11 +257,18 @@ static char *select_font(ASS_Library *library, FCInstance *priv,
 
     if (!treat_family_as_pattern &&
         !(r_family && strcasecmp((const char *) r_family, family) == 0) &&
-        !(r_fullname && strcasecmp((const char *) r_fullname, family) == 0))
-        ass_msg(library, MSGL_WARN,
-               "fontconfig: Selected font is not the requested one: "
-               "'%s' != '%s'",
-               (const char *) (r_fullname ? r_fullname : r_family), family);
+        !(r_fullname && strcasecmp((const char *) r_fullname, family) == 0)) {
+        char *fallback = (char *) (r_fullname ? r_fullname : r_family);
+        if (code) {
+            ass_msg(library, MSGL_WARN,
+                    "fontconfig: cannot find glyph U+%04X in font '%s', falling back to '%s'",
+                    (unsigned int)code, family, fallback);
+        } else {
+            ass_msg(library, MSGL_WARN,
+                    "fontconfig: cannot find font '%s', falling back to '%s'",
+                    family, fallback);
+        }
+    }
 
     result = FcPatternGetString(rpat, FC_STYLE, 0, &r_style);
     if (result != FcResultMatch)
diff --git a/libass/ass_parse.c b/libass/ass_parse.c
index bb64971..fcf62c3 100644
--- a/libass/ass_parse.c
+++ b/libass/ass_parse.c
@@ -81,14 +81,14 @@ void update_font(ASS_Renderer *render_priv)
 
     val = render_priv->state.bold;
     // 0 = normal, 1 = bold, >1 = exact weight
-    if (val == 1 || val == -1)
+    if (val == 1)
         val = 200;              // bold
     else if (val <= 0)
         val = 80;               // normal
     desc.bold = val;
 
     val = render_priv->state.italic;
-    if (val == 1 || val == -1)
+    if (val == 1)
         val = 110;              // italic
     else if (val <= 0)
         val = 0;                // normal
@@ -105,27 +105,6 @@ void update_font(ASS_Renderer *render_priv)
 }
 
 /**
- * \brief Calculate valid border size. Makes sure the border sizes make sense.
- *
- * \param priv renderer state object
- * \param border_x requested x border size
- * \param border_y requested y border size
- */
-void calc_border(ASS_Renderer *priv, double border_x, double border_y)
-{
-    if (border_x < 0 && border_y < 0) {
-        if (priv->state.border_style == 1 ||
-            priv->state.border_style == 3)
-            border_x = border_y = priv->state.style->Outline;
-        else
-            border_x = border_y = 1.;
-    }
-
-    priv->state.border_x = border_x;
-    priv->state.border_y = border_y;
-}
-
-/**
  * \brief Change border width
  *
  * \param render_priv renderer state object
@@ -204,16 +183,16 @@ interpolate_alpha(long long now, long long t1, long long t2, long long t3,
 
     if (now < t1) {
         a = a1;
-    } else if (now >= t4) {
-        a = a3;
-    } else if (now < t2 && t2 > t1) {
+    } else if (now < t2) {
         cf = ((double) (now - t1)) / (t2 - t1);
         a = a1 * (1 - cf) + a2 * cf;
-    } else if (now >= t3 && t4 > t3) {
+    } else if (now < t3) {
+        a = a2;
+    } else if (now < t4) {
         cf = ((double) (now - t3)) / (t4 - t3);
         a = a2 * (1 - cf) + a3 * cf;
-    } else {                    // t2 <= now < t3
-        a = a2;
+    } else {                    // now >= t4
+        a = a3;
     }
 
     return a;
@@ -263,36 +242,34 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
     // New tags introduced in vsfilter 2.39
     if (mystrcmp(&p, "xbord")) {
         double val;
-        if (mystrtod(&p, &val))
+        if (mystrtod(&p, &val)) {
             val = render_priv->state.border_x * (1 - pwr) + val * pwr;
-        else
-            val = -1.;
-        calc_border(render_priv, val, render_priv->state.border_y);
-        render_priv->state.bm_run_id++;
+            val = (val < 0) ? 0 : val;
+        } else
+            val = render_priv->state.style->Outline;
+        render_priv->state.border_x = val;
     } else if (mystrcmp(&p, "ybord")) {
         double val;
-        if (mystrtod(&p, &val))
+        if (mystrtod(&p, &val)) {
             val = render_priv->state.border_y * (1 - pwr) + val * pwr;
-        else
-            val = -1.;
-        calc_border(render_priv, render_priv->state.border_x, val);
-        render_priv->state.bm_run_id++;
+            val = (val < 0) ? 0 : val;
+        } else
+            val = render_priv->state.style->Outline;
+        render_priv->state.border_y = val;
     } else if (mystrcmp(&p, "xshad")) {
         double val;
         if (mystrtod(&p, &val))
             val = render_priv->state.shadow_x * (1 - pwr) + val * pwr;
         else
-            val = 0.;
+            val = render_priv->state.style->Shadow;
         render_priv->state.shadow_x = val;
-        render_priv->state.bm_run_id++;
     } else if (mystrcmp(&p, "yshad")) {
         double val;
         if (mystrtod(&p, &val))
             val = render_priv->state.shadow_y * (1 - pwr) + val * pwr;
         else
-            val = 0.;
+            val = render_priv->state.style->Shadow;
         render_priv->state.shadow_y = val;
-        render_priv->state.bm_run_id++;
     } else if (mystrcmp(&p, "fax")) {
         double val;
         if (mystrtod(&p, &val))
@@ -333,8 +310,7 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
         } else if (!render_priv->state.clip_drawing) {
             p = parse_vector_clip(render_priv, start);
             render_priv->state.clip_drawing_mode = 1;
-        } else
-            render_priv->state.clip_mode = 0;
+        }
     } else if (mystrcmp(&p, "blur")) {
         double val;
         if (mystrtod(&p, &val)) {
@@ -344,7 +320,6 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
             render_priv->state.blur = val;
         } else
             render_priv->state.blur = 0.0;
-        render_priv->state.bm_run_id++;
         // ASS standard tags
     } else if (mystrcmp(&p, "fsc")) {
         char tp = *p++;
@@ -352,19 +327,23 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
         if (tp == 'x') {
             if (mystrtod(&p, &val)) {
                 val /= 100;
-                render_priv->state.scale_x =
-                    render_priv->state.scale_x * (1 - pwr) + val * pwr;
+                val = render_priv->state.scale_x * (1 - pwr) + val * pwr;
+                val = (val < 0) ? 0 : val;
             } else
-                render_priv->state.scale_x =
-                    render_priv->state.style->ScaleX;
+                val = render_priv->state.style->ScaleX;
+            render_priv->state.scale_x = val;
         } else if (tp == 'y') {
             if (mystrtod(&p, &val)) {
                 val /= 100;
-                render_priv->state.scale_y =
-                    render_priv->state.scale_y * (1 - pwr) + val * pwr;
+                val = render_priv->state.scale_y * (1 - pwr) + val * pwr;
+                val = (val < 0) ? 0 : val;
             } else
-                render_priv->state.scale_y =
-                    render_priv->state.style->ScaleY;
+                val = render_priv->state.style->ScaleY;
+            render_priv->state.scale_y = val;
+        } else {
+            --p;
+            render_priv->state.scale_x = render_priv->state.style->ScaleX;
+            render_priv->state.scale_y = render_priv->state.style->ScaleY;
         }
     } else if (mystrcmp(&p, "fsp")) {
         double val;
@@ -375,17 +354,17 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
             render_priv->state.hspacing = render_priv->state.style->Spacing;
     } else if (mystrcmp(&p, "fs+")) {
         double val;
-        if (mystrtod(&p, &val)) {
-            val = render_priv->state.font_size + pwr * val;
-        } else
+        mystrtod(&p, &val);
+        val = render_priv->state.font_size * (1 + pwr * val / 10);
+        if (val <= 0)
             val = render_priv->state.style->FontSize;
         if (render_priv->state.font)
             change_font_size(render_priv, val);
     } else if (mystrcmp(&p, "fs-")) {
         double val;
-        if (mystrtod(&p, &val))
-            val = render_priv->state.font_size - pwr * val;
-        else
+        mystrtod(&p, &val);
+        val = render_priv->state.font_size * (1 - pwr * val / 10);
+        if (val <= 0)
             val = render_priv->state.style->FontSize;
         if (render_priv->state.font)
             change_font_size(render_priv, val);
@@ -393,18 +372,21 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
         double val;
         if (mystrtod(&p, &val))
             val = render_priv->state.font_size * (1 - pwr) + val * pwr;
-        else
+        if (val <= 0)
             val = render_priv->state.style->FontSize;
         if (render_priv->state.font)
             change_font_size(render_priv, val);
     } else if (mystrcmp(&p, "bord")) {
-        double val;
+        double val, xval, yval;
         if (mystrtod(&p, &val)) {
-                val = render_priv->state.border_x * (1 - pwr) + val * pwr;
+            xval = render_priv->state.border_x * (1 - pwr) + val * pwr;
+            yval = render_priv->state.border_y * (1 - pwr) + val * pwr;
+            xval = (xval < 0) ? 0 : xval;
+            yval = (yval < 0) ? 0 : yval;
         } else
-            val = -1.;          // reset to default
-        calc_border(render_priv, val, val);
-        render_priv->state.bm_run_id++;
+            xval = yval = render_priv->state.style->Outline;
+        render_priv->state.border_x = xval;
+        render_priv->state.border_y = yval;
     } else if (mystrcmp(&p, "move")) {
         double x1, x2, y1, y2;
         long long t1, t2, delta_t, t;
@@ -424,10 +406,6 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
             mystrtoll(&p, &t1);
             skip(',');
             mystrtoll(&p, &t2);
-            ass_msg(render_priv->library, MSGL_DBG2,
-                   "movement6: (%f, %f) -> (%f, %f), (%" PRId64 " .. %"
-                   PRId64 ")\n", x1, y1, x2, y2, (int64_t) t1,
-                   (int64_t) t2);
             // VSFilter
             if (t1 > t2) {
                 double tmp = t2;
@@ -438,8 +416,6 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
         if (t1 <= 0 && t2 <= 0) {
             t1 = 0;
             t2 = render_priv->state.event->Duration;
-            ass_msg(render_priv->library, MSGL_DBG2,
-                   "movement: (%f, %f) -> (%f, %f)", x1, y1, x2, y2);
         }
         skipopt(')');
         delta_t = t2 - t1;
@@ -487,7 +463,7 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
         char *start = p;
         char *family;
         skip_to('\\');
-        if (p > start) {
+        if (p > start && strncmp(start, "0", p - start)) {
             family = malloc(p - start + 1);
             strncpy(family, start, p - start);
             family[p - start] = '\0';
@@ -506,44 +482,44 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
                 change_alpha(&render_priv->state.c[i], a, pwr);
         } else {
             change_alpha(&render_priv->state.c[0],
-                         render_priv->state.style->PrimaryColour, pwr);
+                         render_priv->state.style->PrimaryColour, 1);
             change_alpha(&render_priv->state.c[1],
-                         render_priv->state.style->SecondaryColour, pwr);
+                         render_priv->state.style->SecondaryColour, 1);
             change_alpha(&render_priv->state.c[2],
-                         render_priv->state.style->OutlineColour, pwr);
+                         render_priv->state.style->OutlineColour, 1);
             change_alpha(&render_priv->state.c[3],
-                         render_priv->state.style->BackColour, pwr);
+                         render_priv->state.style->BackColour, 1);
         }
-        render_priv->state.bm_run_id++;
         // FIXME: simplify
     } else if (mystrcmp(&p, "an")) {
         int val;
-        if (mystrtoi(&p, &val) && val) {
-            int v = (val - 1) / 3;      // 0, 1 or 2 for vertical alignment
-            ass_msg(render_priv->library, MSGL_DBG2, "an %d", val);
-            if (v != 0)
-                v = 3 - v;
-            val = ((val - 1) % 3) + 1;  // horizontal alignment
-            val += v * 4;
-            ass_msg(render_priv->library, MSGL_DBG2, "align %d", val);
-            if ((render_priv->state.parsed_tags & PARSED_A) == 0) {
+        mystrtoi(&p, &val);
+        if ((render_priv->state.parsed_tags & PARSED_A) == 0) {
+            if (val >= 1 && val <= 9) {
+                int v = (val - 1) / 3;      // 0, 1 or 2 for vertical alignment
+                if (v != 0)
+                    v = 3 - v;
+                val = ((val - 1) % 3) + 1;  // horizontal alignment
+                val += v * 4;
                 render_priv->state.alignment = val;
-                render_priv->state.parsed_tags |= PARSED_A;
-            }
-        } else
-            render_priv->state.alignment =
-                render_priv->state.style->Alignment;
+            } else
+                render_priv->state.alignment =
+                    render_priv->state.style->Alignment;
+            render_priv->state.parsed_tags |= PARSED_A;
+        }
     } else if (mystrcmp(&p, "a")) {
         int val;
-        if (mystrtoi(&p, &val) && val) {
-            if ((render_priv->state.parsed_tags & PARSED_A) == 0) {
-                // take care of a vsfilter quirk: handle illegal \a8 like \a5
-                render_priv->state.alignment = (val == 8) ? 5 : val;
-                render_priv->state.parsed_tags |= PARSED_A;
-            }
-        } else
-            render_priv->state.alignment =
-                render_priv->state.style->Alignment;
+        mystrtoi(&p, &val);
+        if ((render_priv->state.parsed_tags & PARSED_A) == 0) {
+            if (val >= 1 && val <= 11)
+                // take care of a vsfilter quirk:
+                // handle illegal \a8 and \a4 like \a5
+                render_priv->state.alignment = ((val & 3) == 0) ? 5 : val;
+            else
+                render_priv->state.alignment =
+                    render_priv->state.style->Alignment;
+            render_priv->state.parsed_tags |= PARSED_A;
+        }
     } else if (mystrcmp(&p, "pos")) {
         double v1, v2;
         skip('(');
@@ -551,7 +527,6 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
         skip(',');
         mystrtod(&p, &v2);
         skipopt(')');
-        ass_msg(render_priv->library, MSGL_DBG2, "pos(%f, %f)", v1, v2);
         if (render_priv->state.evt_type == EVENT_POSITIONED) {
             ass_msg(render_priv->library, MSGL_V, "Subtitle has a new \\pos "
                    "after \\move or \\pos, ignoring");
@@ -573,15 +548,15 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
         if (*p == ')') {
             // 2-argument version (\fad, according to specs)
             // a1 and a2 are fade-in and fade-out durations
-            t1 = 0;
-            t4 = render_priv->state.event->Duration;
+            t1 = -1;
             t2 = a1;
-            t3 = t4 - a2;
+            t3 = a2;
+            t4 = -1;
             a1 = 0xFF;
             a2 = 0;
             a3 = 0xFF;
         } else {
-            // 6-argument version (\fade)
+            // 7-argument version (\fade)
             // a1 and a2 (and a3) are opacity values
             skip(',');
             mystrtoi(&p, &a3);
@@ -595,6 +570,11 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
             mystrtoll(&p, &t4);
         }
         skipopt(')');
+        if (t1 == -1 && t4 == -1) {
+            t1 = 0;
+            t4 = render_priv->state.event->Duration;
+            t3 = t4 - t3;
+        }
         if ((render_priv->state.parsed_tags & PARSED_FADE) == 0) {
             render_priv->state.fade =
                 interpolate_alpha(render_priv->time -
@@ -603,13 +583,12 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
             render_priv->state.parsed_tags |= PARSED_FADE;
         }
     } else if (mystrcmp(&p, "org")) {
-        int v1, v2;
+        double v1, v2;
         skip('(');
-        mystrtoi(&p, &v1);
+        mystrtod(&p, &v1);
         skip(',');
-        mystrtoi(&p, &v2);
+        mystrtod(&p, &v2);
         skipopt(')');
-        ass_msg(render_priv->library, MSGL_DBG2, "org(%d, %d)", v1, v2);
         if (!render_priv->state.have_origin) {
             render_priv->state.org_x = v1;
             render_priv->state.org_y = v2;
@@ -618,8 +597,7 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
         }
     } else if (mystrcmp(&p, "t")) {
         double v[3];
-        int v1, v2;
-        double v3;
+        double accel;
         int cnt;
         long long t1, t2, t, delta_t;
         double k;
@@ -630,28 +608,26 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
             skip(',');
         }
         if (cnt == 3) {
-            v1 = v[0];
-            v2 = (v[1] < v1) ? render_priv->state.event->Duration : v[1];
-            v3 = v[2];
+            t1 = v[0];
+            t2 = v[1];
+            accel = v[2];
         } else if (cnt == 2) {
-            v1 = v[0];
-            v2 = (v[1] < v1) ? render_priv->state.event->Duration : v[1];
-            v3 = 1.;
+            t1 = v[0];
+            t2 = v[1];
+            accel = 1.;
         } else if (cnt == 1) {
-            v1 = 0;
-            v2 = render_priv->state.event->Duration;
-            v3 = v[0];
+            t1 = 0;
+            t2 = 0;
+            accel = v[0];
         } else {                // cnt == 0
-            v1 = 0;
-            v2 = render_priv->state.event->Duration;
-            v3 = 1.;
+            t1 = 0;
+            t2 = 0;
+            accel = 1.;
         }
         render_priv->state.detect_collisions = 0;
-        t1 = v1;
-        t2 = v2;
-        delta_t = v2 - v1;
-        if (v3 < 0.)
-            v3 = 0.;
+        if (t2 == 0)
+            t2 = render_priv->state.event->Duration;
+        delta_t = t2 - t1;
         t = render_priv->time - render_priv->state.event->Start;        // FIXME: move to render_context
         if (t <= t1)
             k = 0.;
@@ -659,7 +635,7 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
             k = 1.;
         else {
             assert(delta_t != 0.);
-            k = pow(((double) (t - t1)) / delta_t, v3);
+            k = pow(((double) (t - t1)) / delta_t, accel);
         }
         while (*p != ')' && *p != '}' && *p != '\0')
             p = parse_tag(render_priv, p, k);   // maybe k*pwr ? no, specs forbid nested \t's
@@ -691,20 +667,15 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
         } else if (!render_priv->state.clip_drawing) {
             p = parse_vector_clip(render_priv, start);
             render_priv->state.clip_drawing_mode = 0;
-        } else {
-            render_priv->state.clip_x0 = 0;
-            render_priv->state.clip_y0 = 0;
-            render_priv->state.clip_x1 = render_priv->track->PlayResX;
-            render_priv->state.clip_y1 = render_priv->track->PlayResY;
         }
     } else if (mystrcmp(&p, "c")) {
         uint32_t val;
         int hex = render_priv->track->track_type == TRACK_TYPE_ASS;
-        if (!strtocolor(render_priv->library, &p, &val, hex))
-            val = render_priv->state.style->PrimaryColour;
-        ass_msg(render_priv->library, MSGL_DBG2, "color: %X", val);
-        change_color(&render_priv->state.c[0], val, pwr);
-        render_priv->state.bm_run_id++;
+        if (strtocolor(render_priv->library, &p, &val, hex))
+            change_color(&render_priv->state.c[0], val, pwr);
+        else
+            change_color(&render_priv->state.c[0],
+                         render_priv->state.style->PrimaryColour, 1);
     } else if ((*p >= '1') && (*p <= '4') && (++p)
                && (mystrcmp(&p, "c") || mystrcmp(&p, "a"))) {
         char n = *(p - 2);
@@ -713,7 +684,7 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
         uint32_t val;
         int hex = render_priv->track->track_type == TRACK_TYPE_ASS;
         assert((n >= '1') && (n <= '4'));
-        if (!strtocolor(render_priv->library, &p, &val, hex))
+        if (!strtocolor(render_priv->library, &p, &val, hex)) {
             switch (n) {
             case '1':
                 val = render_priv->state.style->PrimaryColour;
@@ -731,22 +702,20 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
                 val = 0;
                 break;          // impossible due to assert; avoid compilation warning
             }
+            pwr = 1;
+        }
         switch (cmd) {
         case 'c':
             change_color(render_priv->state.c + cidx, val, pwr);
-            render_priv->state.bm_run_id++;
             break;
         case 'a':
             change_alpha(render_priv->state.c + cidx, val >> 24, pwr);
-            render_priv->state.bm_run_id++;
             break;
         default:
             ass_msg(render_priv->library, MSGL_WARN, "Bad command: %c%c",
                     n, cmd);
             break;
         }
-        ass_msg(render_priv->library, MSGL_DBG2, "single c/a at %f: %c%c = %X",
-               pwr, n, cmd, render_priv->state.c[cidx]);
     } else if (mystrcmp(&p, "r")) {
         char *start = p;
         char *style;
@@ -756,7 +725,7 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
             strncpy(style, start, p - start);
             style[p - start] = '\0';
             reset_render_context(render_priv,
-                    render_priv->track->styles + lookup_style(render_priv->track, style));
+                    lookup_style_strict(render_priv->track, style));
             free(style);
         } else
             reset_render_context(render_priv, NULL);
@@ -769,84 +738,85 @@ char *parse_tag(ASS_Renderer *render_priv, char *p, double pwr)
             render_priv->state.be = val;
         } else
             render_priv->state.be = 0;
-        render_priv->state.bm_run_id++;
     } else if (mystrcmp(&p, "b")) {
-        int b;
-        if (mystrtoi(&p, &b)) {
-            if (pwr >= .5)
-                render_priv->state.bold = b;
-        } else
-            render_priv->state.bold = render_priv->state.style->Bold;
+        int val;
+        if (!mystrtoi(&p, &val) || !(val == 0 || val == 1 || val >= 100))
+            val = render_priv->state.style->Bold;
+        render_priv->state.bold = val;
         update_font(render_priv);
     } else if (mystrcmp(&p, "i")) {
-        int i;
-        if (mystrtoi(&p, &i)) {
-            if (pwr >= .5)
-                render_priv->state.italic = i;
-        } else
-            render_priv->state.italic = render_priv->state.style->Italic;
+        int val;
+        if (!mystrtoi(&p, &val) || !(val == 0 || val == 1))
+            val = render_priv->state.style->Italic;
+        render_priv->state.italic = val;
         update_font(render_priv);
     } else if (mystrcmp(&p, "kf") || mystrcmp(&p, "K")) {
-        int val = 0;
-        mystrtoi(&p, &val);
+        double val;
+        if (!mystrtod(&p, &val))
+            val = 100;
         render_priv->state.effect_type = EF_KARAOKE_KF;
         if (render_priv->state.effect_timing)
             render_priv->state.effect_skip_timing +=
                 render_priv->state.effect_timing;
         render_priv->state.effect_timing = val * 10;
     } else if (mystrcmp(&p, "ko")) {
-        int val = 0;
-        mystrtoi(&p, &val);
+        double val;
+        if (!mystrtod(&p, &val))
+            val = 100;
         render_priv->state.effect_type = EF_KARAOKE_KO;
         if (render_priv->state.effect_timing)
             render_priv->state.effect_skip_timing +=
                 render_priv->state.effect_timing;
         render_priv->state.effect_timing = val * 10;
     } else if (mystrcmp(&p, "k")) {
-        int val = 0;
-        mystrtoi(&p, &val);
+        double val;
+        if (!mystrtod(&p, &val))
+            val = 100;
         render_priv->state.effect_type = EF_KARAOKE;
         if (render_priv->state.effect_timing)
             render_priv->state.effect_skip_timing +=
                 render_priv->state.effect_timing;
         render_priv->state.effect_timing = val * 10;
     } else if (mystrcmp(&p, "shad")) {
-        double val;
+        double val, xval, yval;
         if (mystrtod(&p, &val)) {
-            if (render_priv->state.shadow_x == render_priv->state.shadow_y)
-                val = render_priv->state.shadow_x * (1 - pwr) + val * pwr;
+            xval = render_priv->state.shadow_x * (1 - pwr) + val * pwr;
+            yval = render_priv->state.shadow_y * (1 - pwr) + val * pwr;
+            // VSFilter compatibility: clip for \shad but not for \[xy]shad
+            xval = (xval < 0) ? 0 : xval;
+            yval = (yval < 0) ? 0 : yval;
         } else
-            val = 0.;
-        render_priv->state.shadow_x = render_priv->state.shadow_y = val;
-        render_priv->state.bm_run_id++;
+            xval = yval = render_priv->state.style->Shadow;
+        render_priv->state.shadow_x = xval;
+        render_priv->state.shadow_y = yval;
     } else if (mystrcmp(&p, "s")) {
         int val;
-        if (mystrtoi(&p, &val) && val)
+        if (!mystrtoi(&p, &val) || !(val == 0 || val == 1))
+            val = render_priv->state.style->StrikeOut;
+        if (val)
             render_priv->state.flags |= DECO_STRIKETHROUGH;
         else
             render_priv->state.flags &= ~DECO_STRIKETHROUGH;
-        render_priv->state.bm_run_id++;
     } else if (mystrcmp(&p, "u")) {
         int val;
-        if (mystrtoi(&p, &val) && val)
+        if (!mystrtoi(&p, &val) || !(val == 0 || val == 1))
+            val = render_priv->state.style->Underline;
+        if (val)
             render_priv->state.flags |= DECO_UNDERLINE;
         else
             render_priv->state.flags &= ~DECO_UNDERLINE;
-        render_priv->state.bm_run_id++;
     } else if (mystrcmp(&p, "pbo")) {
-        double val = 0;
-        if (mystrtod(&p, &val))
-            render_priv->state.drawing->pbo = val;
+        double val;
+        mystrtod(&p, &val);
+        render_priv->state.pbo = val;
     } else if (mystrcmp(&p, "p")) {
         int val;
-        if (!mystrtoi(&p, &val))
-            val = 0;
-        if (val)
-            render_priv->state.drawing->scale = val;
-        render_priv->state.drawing_mode = !!val;
+        mystrtoi(&p, &val);
+        val = (val < 0) ? 0 : val;
+        render_priv->state.drawing_scale = val;
     } else if (mystrcmp(&p, "q")) {
         int val;
-        if (!mystrtoi(&p, &val))
+        if (!mystrtoi(&p, &val) || !(val >= 0 && val <= 3))
             val = render_priv->track->WrapStyle;
         render_priv->state.wrap_style = val;
     } else if (mystrcmp(&p, "fe")) {
@@ -982,7 +952,7 @@ void process_karaoke_effects(ASS_Renderer *render_priv)
                 dt = (tm_current - tm_start);
                 if ((s1->effect_type == EF_KARAOKE)
                     || (s1->effect_type == EF_KARAOKE_KO)) {
-                    if (dt > 0)
+                    if (dt >= 0)
                         x = x_end + 1;
                     else
                         x = x_start;
@@ -999,6 +969,7 @@ void process_karaoke_effects(ASS_Renderer *render_priv)
                     cur2->effect_type = s1->effect_type;
                     cur2->effect_timing = x - d6_to_int(cur2->pos.x);
                 }
+                s1->effect = 1;
             }
         }
     }
diff --git a/libass/ass_render.c b/libass/ass_render.c
index 61ba666..16d983d 100644
--- a/libass/ass_render.c
+++ b/libass/ass_render.c
@@ -20,6 +20,7 @@
 
 #include <assert.h>
 #include <math.h>
+#include <string.h>
 
 #include "ass_render.h"
 #include "ass_parse.h"
@@ -27,9 +28,18 @@
 
 #define MAX_GLYPHS_INITIAL 1024
 #define MAX_LINES_INITIAL 64
+#define MAX_BITMAPS_INITIAL 16
+#define MAX_STR_LENGTH_INITIAL 64
 #define SUBPIXEL_MASK 63
 #define SUBPIXEL_ACCURACY 7
 
+#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
+
+#include "x86/blend_bitmaps.h"
+#include "x86/be_blur.h"
+
+#endif // ASM
+
 ASS_Renderer *ass_renderer_init(ASS_Library *library)
 {
     int error;
@@ -59,15 +69,44 @@ ASS_Renderer *ass_renderer_init(ASS_Library *library)
     priv->ftlibrary = ft;
     // images_root and related stuff is zero-filled in calloc
 
+    #if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
+        int sse2 = has_sse2();
+        int avx2 = has_avx2();
+        priv->add_bitmaps_func = avx2 ? ass_add_bitmaps_avx2 :
+            (sse2 ? ass_add_bitmaps_sse2 : ass_add_bitmaps_x86);
+        #ifdef __x86_64__
+            priv->be_blur_func = avx2 ? ass_be_blur_avx2 :
+                (sse2 ? ass_be_blur_sse2 : be_blur_c);
+            priv->mul_bitmaps_func = avx2 ? ass_mul_bitmaps_avx2 :
+                (sse2 ? ass_mul_bitmaps_sse2 : mul_bitmaps_c);
+            priv->sub_bitmaps_func = avx2 ? ass_sub_bitmaps_avx2 :
+                (sse2 ? ass_sub_bitmaps_sse2 : ass_sub_bitmaps_x86);
+        #else
+            priv->be_blur_func = be_blur_c;
+            priv->mul_bitmaps_func = mul_bitmaps_c;
+            priv->sub_bitmaps_func = ass_sub_bitmaps_x86;
+        #endif
+    #else
+        priv->add_bitmaps_func = add_bitmaps_c;
+        priv->sub_bitmaps_func = sub_bitmaps_c;
+        priv->mul_bitmaps_func = mul_bitmaps_c;
+        priv->be_blur_func = be_blur_c;
+    #endif
+    priv->restride_bitmap_func = restride_bitmap_c;
+
     priv->cache.font_cache = ass_font_cache_create();
     priv->cache.bitmap_cache = ass_bitmap_cache_create();
     priv->cache.composite_cache = ass_composite_cache_create();
     priv->cache.outline_cache = ass_outline_cache_create();
     priv->cache.glyph_max = GLYPH_CACHE_MAX;
     priv->cache.bitmap_max_size = BITMAP_CACHE_MAX_SIZE;
+    priv->cache.composite_max_size = COMPOSITE_CACHE_MAX_SIZE;
 
+    priv->text_info.max_bitmaps = MAX_BITMAPS_INITIAL;
     priv->text_info.max_glyphs = MAX_GLYPHS_INITIAL;
     priv->text_info.max_lines = MAX_LINES_INITIAL;
+    priv->text_info.n_bitmaps = 0;
+    priv->text_info.combined_bitmaps = calloc(MAX_BITMAPS_INITIAL, sizeof(CombinedBitmapInfo));
     priv->text_info.glyphs = calloc(MAX_GLYPHS_INITIAL, sizeof(GlyphInfo));
     priv->text_info.lines = calloc(MAX_LINES_INITIAL, sizeof(LineInfo));
 
@@ -129,6 +168,8 @@ void ass_renderer_done(ASS_Renderer *render_priv)
     free(render_priv->text_info.glyphs);
     free(render_priv->text_info.lines);
 
+    free(render_priv->text_info.combined_bitmaps);
+
     free(render_priv->settings.default_font);
     free(render_priv->settings.default_family);
 
@@ -372,22 +413,18 @@ render_glyph(ASS_Renderer *render_priv, Bitmap *bm, int dst_x, int dst_y,
 
     tmp = dst_x - clip_x0;
     if (tmp < 0) {
-        ass_msg(render_priv->library, MSGL_DBG2, "clip left");
         b_x0 = -tmp;
     }
     tmp = dst_y - clip_y0;
     if (tmp < 0) {
-        ass_msg(render_priv->library, MSGL_DBG2, "clip top");
         b_y0 = -tmp;
     }
     tmp = clip_x1 - dst_x - bm->w;
     if (tmp < 0) {
-        ass_msg(render_priv->library, MSGL_DBG2, "clip right");
         b_x1 = bm->w + tmp;
     }
     tmp = clip_y1 - dst_y - bm->h;
     if (tmp < 0) {
-        ass_msg(render_priv->library, MSGL_DBG2, "clip bottom");
         b_y1 = bm->h + tmp;
     }
 
@@ -419,109 +456,6 @@ render_glyph(ASS_Renderer *render_priv, Bitmap *bm, int dst_x, int dst_y,
     return tail;
 }
 
-/**
- * \brief Replace the bitmap buffer in ASS_Image with a copy
- * \param img ASS_Image to operate on
- * \return pointer to old bitmap buffer
- */
-static unsigned char *clone_bitmap_buffer(ASS_Image *img)
-{
-    unsigned char *old_bitmap = img->bitmap;
-    int size = img->stride * (img->h - 1) + img->w;
-    img->bitmap = malloc(size);
-    memcpy(img->bitmap, old_bitmap, size);
-    return old_bitmap;
-}
-
-/**
- * \brief Calculate overlapping area of two consecutive bitmaps and in case they
- * overlap, blend them together
- * Mainly useful for translucent glyphs and especially borders, to avoid the
- * luminance adding up where they overlap (which looks ugly)
- */
-static void
-render_overlap(ASS_Renderer *render_priv, ASS_Image **last_tail,
-               ASS_Image **tail)
-{
-    int left, top, bottom, right;
-    int old_left, old_top, w, h, cur_left, cur_top;
-    int x, y, opos, cpos;
-    char m;
-    CompositeHashKey hk;
-    CompositeHashValue *hv;
-    CompositeHashValue chv;
-    int ax = (*last_tail)->dst_x;
-    int ay = (*last_tail)->dst_y;
-    int aw = (*last_tail)->w;
-    int as = (*last_tail)->stride;
-    int ah = (*last_tail)->h;
-    int bx = (*tail)->dst_x;
-    int by = (*tail)->dst_y;
-    int bw = (*tail)->w;
-    int bs = (*tail)->stride;
-    int bh = (*tail)->h;
-    unsigned char *a;
-    unsigned char *b;
-
-    if ((*last_tail)->bitmap == (*tail)->bitmap)
-        return;
-
-    if ((*last_tail)->color != (*tail)->color)
-        return;
-
-    // Calculate overlap coordinates
-    left = (ax > bx) ? ax : bx;
-    top = (ay > by) ? ay : by;
-    right = ((ax + aw) < (bx + bw)) ? (ax + aw) : (bx + bw);
-    bottom = ((ay + ah) < (by + bh)) ? (ay + ah) : (by + bh);
-    if ((right <= left) || (bottom <= top))
-        return;
-    old_left = left - ax;
-    old_top = top - ay;
-    w = right - left;
-    h = bottom - top;
-    cur_left = left - bx;
-    cur_top = top - by;
-
-    // Query cache
-    hk.a = (*last_tail)->bitmap;
-    hk.b = (*tail)->bitmap;
-    hk.aw = aw;
-    hk.ah = ah;
-    hk.bw = bw;
-    hk.bh = bh;
-    hk.ax = ax;
-    hk.ay = ay;
-    hk.bx = bx;
-    hk.by = by;
-    hk.as = as;
-    hk.bs = bs;
-    hv = ass_cache_get(render_priv->cache.composite_cache, &hk);
-    if (hv) {
-        (*last_tail)->bitmap = hv->a;
-        (*tail)->bitmap = hv->b;
-        return;
-    }
-    // Allocate new bitmaps and copy over data
-    a = clone_bitmap_buffer(*last_tail);
-    b = clone_bitmap_buffer(*tail);
-
-    // Blend overlapping area
-    for (y = 0; y < h; y++)
-        for (x = 0; x < w; x++) {
-            opos = (old_top + y) * (as) + (old_left + x);
-            cpos = (cur_top + y) * (bs) + (cur_left + x);
-            m = FFMIN(a[opos] + b[cpos], 0xff);
-            (*last_tail)->bitmap[opos] = 0;
-            (*tail)->bitmap[cpos] = m;
-        }
-
-    // Insert bitmaps into the cache
-    chv.a = (*last_tail)->bitmap;
-    chv.b = (*tail)->bitmap;
-    ass_cache_put(render_priv->cache.composite_cache, &hk, &chv);
-}
-
 static void free_list_add(ASS_Renderer *render_priv, void *object)
 {
     if (!render_priv->free_head) {
@@ -570,7 +504,7 @@ static void blend_vector_clip(ASS_Renderer *render_priv,
         if (!outline) {
             ass_msg(render_priv->library, MSGL_WARN,
                     "Clip vector parsing failed. Skipping.");
-            goto blend_vector_error;
+            return;
         }
 
         // We need to translate the clip according to screen borders
@@ -583,10 +517,6 @@ static void blend_vector_clip(ASS_Renderer *render_priv,
             FT_Outline_Translate(outline, trans.x, trans.y);
         }
 
-        ass_msg(render_priv->library, MSGL_DBG2,
-                "Parsed vector clip: scales (%f, %f) string [%s]\n",
-                drawing->scale_x, drawing->scale_y, drawing->text);
-
         clip_bm = outline_to_bitmap(render_priv->library,
                 render_priv->ftlibrary, outline, 0);
 
@@ -596,13 +526,12 @@ static void blend_vector_clip(ASS_Renderer *render_priv,
         v.bm = clip_bm;
         ass_cache_put(render_priv->cache.bitmap_cache, &key, &v);
     }
-blend_vector_error:
 
-    if (!clip_bm) goto blend_vector_exit;
+    if (!clip_bm) return;
 
     // Iterate through bitmaps and blend/clip them
     for (cur = head; cur; cur = cur->next) {
-        int left, top, right, bottom, apos, bpos, y, x, w, h;
+        int left, top, right, bottom, w, h;
         int ax, ay, aw, ah, as;
         int bx, by, bw, bh, bs;
         int aleft, atop, bleft, btop;
@@ -636,58 +565,57 @@ blend_vector_error:
         if (render_priv->state.clip_drawing_mode) {
             // Inverse clip
             if (ax + aw < bx || ay + ah < by || ax > bx + bw ||
-                ay > by + bh) {
+                ay > by + bh || !h || !w) {
                 continue;
             }
 
             // Allocate new buffer and add to free list
-            nbuffer = malloc(as * ah);
-            if (!nbuffer) goto blend_vector_exit;
+            nbuffer = malloc(as * ah + 0x1F);
+            if (!nbuffer) return;
             free_list_add(render_priv, nbuffer);
+            nbuffer = (unsigned char*)(((uintptr_t)nbuffer + 0x1F) & ~0x1F);
 
             // Blend together
-            memcpy(nbuffer, abuffer, as * (ah - 1) + aw);
-            for (y = 0; y < h; y++)
-                for (x = 0; x < w; x++) {
-                    apos = (atop + y) * as + aleft + x;
-                    bpos = (btop + y) * bs + bleft + x;
-                    nbuffer[apos] = FFMAX(0, abuffer[apos] - bbuffer[bpos]);
-                }
+            memcpy(nbuffer, abuffer, ((ah - 1) * as) + aw);
+            render_priv->sub_bitmaps_func(nbuffer + atop * as + aleft, as,
+                                          bbuffer + btop * bs + bleft, bs,
+                                          h, w);
         } else {
             // Regular clip
             if (ax + aw < bx || ay + ah < by || ax > bx + bw ||
-                ay > by + bh) {
-                cur->w = cur->h = 0;
+                ay > by + bh || !h || !w) {
+                cur->w = cur->h = cur->stride = 0;
                 continue;
             }
 
             // Allocate new buffer and add to free list
-            nbuffer = calloc(as, h);
-            if (!nbuffer) goto blend_vector_exit;
+            uintptr_t alignment_offset = (w > 15) ? 15 : ((w > 7) ? 7 : 0);
+            unsigned ns = (w + alignment_offset) & ~alignment_offset;
+            nbuffer = malloc(ns * h + alignment_offset);
+            if (!nbuffer) return;
             free_list_add(render_priv, nbuffer);
+            nbuffer = (unsigned char*)
+                (((uintptr_t)nbuffer + alignment_offset) & ~alignment_offset);
 
             // Blend together
-            for (y = 0; y < h; y++)
-                for (x = 0; x < w; x++) {
-                    apos = (atop + y) * as + aleft + x;
-                    bpos = (btop + y) * bs + bleft + x;
-                    nbuffer[y * as + x] = (abuffer[apos] * bbuffer[bpos] + 255) >> 8;
-                }
-            cur->dst_x = left;
-            cur->dst_y = top;
+            render_priv->mul_bitmaps_func(nbuffer, ns,
+                                          abuffer + atop * as + aleft, as,
+                                          bbuffer + btop * bs + bleft, bs,
+                                          w, h);
+            cur->dst_x += aleft;
+            cur->dst_y += atop;
             cur->w = w;
             cur->h = h;
+            cur->stride = ns;
         }
         cur->bitmap = nbuffer;
     }
-
-blend_vector_exit:
-    ass_drawing_free(render_priv->state.clip_drawing);
-    render_priv->state.clip_drawing = 0;
 }
 
-#define SKIP_SYMBOL(x) ((x) == 0 || (x) == '\n' || (x) == '\r')
-
+static inline int is_skip_symbol(uint32_t x)
+{
+    return (x == 0 || x == '\n' || x == '\r');
+}
 /**
  * \brief Convert TextInfo struct to ASS_Image list
  * Splits glyphs in halves when needed (for \kf karaoke).
@@ -699,118 +627,102 @@ static ASS_Image *render_text(ASS_Renderer *render_priv, int dst_x, int dst_y)
     Bitmap *bm;
     ASS_Image *head;
     ASS_Image **tail = &head;
-    ASS_Image **last_tail = 0;
-    ASS_Image **here_tail = 0;
     TextInfo *text_info = &render_priv->text_info;
 
-    for (i = 0; i < text_info->length; ++i) {
-        GlyphInfo *info = text_info->glyphs + i;
-        if (SKIP_SYMBOL(info->symbol) || !info->bm_s
-            || (info->shadow_x == 0 && info->shadow_y == 0) || info->skip)
+    for (i = 0; i < text_info->n_bitmaps; ++i) {
+        CombinedBitmapInfo *info = &text_info->combined_bitmaps[i];
+        if (!info->bm_s || (info->shadow_x == 0 && info->shadow_y == 0))
             continue;
 
-        while (info) {
-            if (!info->bm_s) {
-                info = info->next;
-                continue;
-            }
-
-            pen_x =
-                dst_x + (info->pos.x >> 6) +
-                (int) (info->shadow_x * render_priv->border_scale);
-            pen_y =
-                dst_y + (info->pos.y >> 6) +
-                (int) (info->shadow_y * render_priv->border_scale);
-            bm = info->bm_s;
-
-            here_tail = tail;
-            tail =
-                render_glyph(render_priv, bm, pen_x, pen_y, info->c[3], 0,
-                        1000000, tail, IMAGE_TYPE_SHADOW);
-
-            if (last_tail && tail != here_tail && ((info->c[3] & 0xff) > 0))
-                render_overlap(render_priv, last_tail, here_tail);
-            last_tail = here_tail;
-
-            info = info->next;
-        }
+        pen_x =
+            dst_x + info->pos.x +
+            (int) (info->shadow_x * render_priv->border_scale);
+        pen_y =
+            dst_y + info->pos.y +
+            (int) (info->shadow_y * render_priv->border_scale);
+        bm = info->bm_s;
+
+        tail =
+            render_glyph(render_priv, bm, pen_x, pen_y, info->c[3], 0,
+                    1000000, tail, IMAGE_TYPE_SHADOW);
     }
 
-    last_tail = 0;
-    for (i = 0; i < text_info->length; ++i) {
-        GlyphInfo *info = text_info->glyphs + i;
-        if (SKIP_SYMBOL(info->symbol) || !info->bm_o
-            || info->skip)
+    for (i = 0; i < text_info->n_bitmaps; ++i) {
+        CombinedBitmapInfo *info = &text_info->combined_bitmaps[i];
+        if (!info->bm_o)
             continue;
 
-        while (info) {
-            if (!info->bm_o) {
-                info = info->next;
-                continue;
-            }
-
-            pen_x = dst_x + (info->pos.x >> 6);
-            pen_y = dst_y + (info->pos.y >> 6);
-            bm = info->bm_o;
+        pen_x = dst_x + info->pos.x;
+        pen_y = dst_y + info->pos.y;
+        bm = info->bm_o;
 
-            if ((info->effect_type == EF_KARAOKE_KO)
-                    && (info->effect_timing <= (info->bbox.xMax >> 6))) {
-                // do nothing
-            } else {
-                here_tail = tail;
-                tail =
-                    render_glyph(render_priv, bm, pen_x, pen_y, info->c[2],
-                            0, 1000000, tail, IMAGE_TYPE_OUTLINE);
-                if (last_tail && tail != here_tail && ((info->c[2] & 0xff) > 0))
-                    render_overlap(render_priv, last_tail, here_tail);
-
-                last_tail = here_tail;
-            }
-            info = info->next;
+        if ((info->effect_type == EF_KARAOKE_KO)
+                && (info->effect_timing <= info->first_pos_x)) {
+            // do nothing
+        } else {
+            tail =
+                render_glyph(render_priv, bm, pen_x, pen_y, info->c[2],
+                        0, 1000000, tail, IMAGE_TYPE_OUTLINE);
         }
     }
 
-    for (i = 0; i < text_info->length; ++i) {
-        GlyphInfo *info = text_info->glyphs + i;
-        if (SKIP_SYMBOL(info->symbol) || !info->bm
-            || info->skip)
+    for (i = 0; i < text_info->n_bitmaps; ++i) {
+        CombinedBitmapInfo *info = &text_info->combined_bitmaps[i];
+        if (!info->bm)
             continue;
 
-        while (info) {
-            if (!info->bm) {
-                info = info->next;
-                continue;
-            }
+        pen_x = dst_x + info->pos.x;
+        pen_y = dst_y + info->pos.y;
+        bm = info->bm;
 
-            pen_x = dst_x + (info->pos.x >> 6);
-            pen_y = dst_y + (info->pos.y >> 6);
-            bm = info->bm;
-
-            if ((info->effect_type == EF_KARAOKE)
-                    || (info->effect_type == EF_KARAOKE_KO)) {
-                if (info->effect_timing > (info->bbox.xMax >> 6))
-                    tail =
-                        render_glyph(render_priv, bm, pen_x, pen_y,
-                                info->c[0], 0, 1000000, tail, IMAGE_TYPE_CHARACTER);
-                else
-                    tail =
-                        render_glyph(render_priv, bm, pen_x, pen_y,
-                                info->c[1], 0, 1000000, tail, IMAGE_TYPE_CHARACTER);
-            } else if (info->effect_type == EF_KARAOKE_KF) {
+        if ((info->effect_type == EF_KARAOKE)
+                || (info->effect_type == EF_KARAOKE_KO)) {
+            if (info->effect_timing > info->first_pos_x)
                 tail =
-                    render_glyph(render_priv, bm, pen_x, pen_y, info->c[0],
-                            info->c[1], info->effect_timing, tail, IMAGE_TYPE_CHARACTER);
-            } else
+                    render_glyph(render_priv, bm, pen_x, pen_y,
+                            info->c[0], 0, 1000000, tail, IMAGE_TYPE_CHARACTER);
+            else
                 tail =
-                    render_glyph(render_priv, bm, pen_x, pen_y, info->c[0],
-                            0, 1000000, tail, IMAGE_TYPE_CHARACTER);
-            info = info->next;
-        }
+                    render_glyph(render_priv, bm, pen_x, pen_y,
+                            info->c[1], 0, 1000000, tail, IMAGE_TYPE_CHARACTER);
+        } else if (info->effect_type == EF_KARAOKE_KF) {
+            tail =
+                render_glyph(render_priv, bm, pen_x, pen_y, info->c[0],
+                        info->c[1], info->effect_timing, tail, IMAGE_TYPE_CHARACTER);
+        } else
+            tail =
+                render_glyph(render_priv, bm, pen_x, pen_y, info->c[0],
+                        0, 1000000, tail, IMAGE_TYPE_CHARACTER);
     }
 
     *tail = 0;
     blend_vector_clip(render_priv, head);
 
+    for (ASS_Image* cur = head; cur; cur = cur->next) {
+        unsigned w = cur->w,
+                 h = cur->h,
+                 s = cur->stride;
+        if(w + 31 < (unsigned)cur->stride){ // Larger value? Play with this.
+            // Allocate new buffer and add to free list
+            uintptr_t alignment_offset = (w > 31) ? 31 : ((w > 15) ? 15 : 0);
+            unsigned ns = (w + alignment_offset) & ~alignment_offset;
+            uint8_t* nbuffer = malloc(ns * cur->h + alignment_offset);
+            if (!nbuffer) continue;
+            free_list_add(render_priv, nbuffer);
+            nbuffer = (unsigned char*)
+                (((uintptr_t)nbuffer + alignment_offset) & ~alignment_offset);
+
+            // Copy
+            render_priv->restride_bitmap_func(nbuffer, ns,
+                                              cur->bitmap, s,
+                                              w, h);
+            cur->w = w;
+            cur->h = h;
+            cur->stride = ns;
+            cur->bitmap = nbuffer;
+        }
+    }
+
     return head;
 }
 
@@ -868,7 +780,8 @@ void reset_render_context(ASS_Renderer *render_priv, ASS_Style *style)
     update_font(render_priv);
 
     render_priv->state.border_style = style->BorderStyle;
-    calc_border(render_priv, style->Outline, style->Outline);
+    render_priv->state.border_x = style->Outline;
+    render_priv->state.border_y = style->Outline;
     change_border(render_priv, render_priv->state.border_x, render_priv->state.border_y);
     render_priv->state.scale_x = style->ScaleX;
     render_priv->state.scale_y = style->ScaleY;
@@ -910,7 +823,7 @@ init_render_context(ASS_Renderer *render_priv, ASS_Event *event)
     render_priv->state.clip_mode = 0;
     render_priv->state.detect_collisions = 1;
     render_priv->state.fade = 0;
-    render_priv->state.drawing_mode = 0;
+    render_priv->state.drawing_scale = 0;
     render_priv->state.effect_type = EF_NONE;
     render_priv->state.effect_timing = 0;
     render_priv->state.effect_skip_timing = 0;
@@ -926,9 +839,11 @@ static void free_render_context(ASS_Renderer *render_priv)
 {
     free(render_priv->state.family);
     ass_drawing_free(render_priv->state.drawing);
+    ass_drawing_free(render_priv->state.clip_drawing);
 
     render_priv->state.family = NULL;
     render_priv->state.drawing = NULL;
+    render_priv->state.clip_drawing = NULL;
 }
 
 /*
@@ -941,8 +856,8 @@ static void draw_opaque_box(ASS_Renderer *render_priv, GlyphInfo *info,
 {
     int i;
     int adv = advance.x;
-    double scale_y = info->scale_y;
-    double scale_x = info->scale_x;
+    double scale_y = info->orig_scale_y;
+    double scale_x = info->orig_scale_x;
 
     // to avoid gaps
     sx = FFMAX(64, sx);
@@ -1045,6 +960,10 @@ fill_glyph_hash(ASS_Renderer *priv, OutlineHashKey *outline_key,
         key->outline.x = double_to_d16(info->border_x);
         key->outline.y = double_to_d16(info->border_y);
         key->border_style = info->border_style;
+        // hpacing only matters for opaque box borders (see draw_opaque_box),
+        // so for normal borders, maximize cache utility by ignoring it
+        key->hspacing =
+            info->border_style == 3 ? double_to_d16(info->hspacing) : 0;
         key->hash = info->drawing->hash;
         key->text = info->drawing->text;
         key->pbo = info->drawing->pbo;
@@ -1064,10 +983,49 @@ fill_glyph_hash(ASS_Renderer *priv, OutlineHashKey *outline_key,
         key->outline.y = double_to_d16(info->border_y);
         key->flags = info->flags;
         key->border_style = info->border_style;
+        key->hspacing =
+            info->border_style == 3 ? double_to_d16(info->hspacing) : 0;
     }
 }
 
 /**
+ * \brief Prepare combined-bitmap hash
+ */
+static void fill_composite_hash(CompositeHashKey *hk, CombinedBitmapInfo *info)
+{
+    hk->w = info->w;
+    hk->h = info->h;
+    hk->o_w = info->o_w;
+    hk->o_h = info->o_h;
+    hk->be = info->be;
+    hk->blur = info->blur;
+    hk->border_style = info->border_style;
+    hk->has_outline = info->has_outline;
+    hk->is_drawing = info->is_drawing;
+    hk->str = info->str;
+    hk->chars = info->chars;
+    hk->shadow_x = info->shadow_x;
+    hk->shadow_y = info->shadow_y;
+    hk->flags = info->flags;
+    hk->bold = info->bold;
+    hk->italic = info->italic;
+    hk->hspacing = info->hspacing;
+    hk->scale_x = info->scale_x;
+    hk->scale_y = info->scale_y;
+    hk->has_border = info->has_border;
+    hk->border_x = info->border_x;
+    hk->border_y = info->border_y;
+    hk->frx = info->frx;
+    hk->fry = info->fry;
+    hk->frz = info->frz;
+    hk->fax = info->fax;
+    hk->fay = info->fay;
+    hk->shift_x = info->shift_x;
+    hk->shift_y = info->shift_y;
+    hk->advance = info->advance;
+}
+
+/**
  * \brief Get normal and outline (border) glyphs
  * \param info out: struct filled with extracted data
  * Tries to get both glyphs from cache.
@@ -1103,14 +1061,10 @@ get_outline_glyph(ASS_Renderer *priv, GlyphInfo *info)
             v.desc = drawing->desc;
             key.u.drawing.text = strdup(drawing->text);
         } else {
-            // arbitrary, not too small to prevent grid fitting rounding effects
-            // XXX: this is a rather crude hack
-            const double ft_size = 256.0;
-            ass_face_set_size(info->font->faces[info->face_index], ft_size);
-            ass_font_set_transform(info->font,
-                info->scale_x * info->font_size / ft_size,
-                info->scale_y * info->font_size / ft_size,
-                NULL);
+            ass_face_set_size(info->font->faces[info->face_index],
+                              info->font_size);
+            ass_font_set_transform(info->font, info->scale_x,
+                                   info->scale_y, NULL);
             FT_Glyph glyph =
                 ass_font_get_glyph(priv->fontconfig_priv, info->font,
                         info->symbol, info->face_index, info->glyph_index,
@@ -1125,8 +1079,8 @@ get_outline_glyph(ASS_Renderer *priv, GlyphInfo *info)
                 FT_Done_Glyph(glyph);
                 ass_font_get_asc_desc(info->font, info->symbol,
                         &v.asc, &v.desc);
-                v.asc  *= info->scale_y * info->font_size / ft_size;
-                v.desc *= info->scale_y * info->font_size / ft_size;
+                v.asc  *= info->scale_y;
+                v.desc *= info->scale_y;
             }
         }
 
@@ -1173,8 +1127,6 @@ get_outline_glyph(ASS_Renderer *priv, GlyphInfo *info)
     }
     info->asc = val->asc;
     info->desc = val->desc;
-
-    ass_drawing_free(info->drawing);
 }
 
 /**
@@ -1291,9 +1243,10 @@ get_bitmap_glyph(ASS_Renderer *render_priv, GlyphInfo *info)
         fay_scaled = info->fay / info->scale_x * info->scale_y;
 
         // apply rotation
+        // use blur_scale because, like blurs, VSFilter forgets to scale this
         transform_3d(shift, outline, border,
                 info->frx, info->fry, info->frz, fax_scaled,
-                fay_scaled, render_priv->font_scale, info->asc);
+                fay_scaled, render_priv->blur_scale, info->asc);
 
         // PAR correction scaling
         FT_Matrix m = { double_to_d16(scale_x), 0,
@@ -1334,12 +1287,6 @@ get_bitmap_glyph(ASS_Renderer *render_priv, GlyphInfo *info)
 
     info->bm = val->bm;
     info->bm_o = val->bm_o;
-    info->bm_s = val->bm_s;
-
-    // VSFilter compatibility: invisible fill and no border?
-    // In this case no shadow is supposed to be rendered.
-    if (!info->border && (info->c[0] & 0xFF) == 0xFF)
-        info->bm_s = 0;
 }
 
 /**
@@ -1361,7 +1308,7 @@ static void measure_text(ASS_Renderer *render_priv)
     text_info->height = 0.;
     for (i = 0; i < text_info->length + 1; ++i) {
         if ((i == text_info->length) || text_info->glyphs[i].linebreak) {
-            if (empty_line && cur_line > 0 && last && i < text_info->length) {
+            if (empty_line && cur_line > 0 && last) {
                 max_asc = d6_to_double(last->asc) / 2.0;
                 max_desc = d6_to_double(last->desc) / 2.0;
             }
@@ -1371,16 +1318,17 @@ static void measure_text(ASS_Renderer *render_priv)
             cur_line++;
             max_asc = max_desc = 0.;
             empty_line = 1;
-        } else
-            empty_line = 0;
+        }
         if (i < text_info->length) {
             GlyphInfo *cur = text_info->glyphs + i;
             if (d6_to_double(cur->asc) > max_asc)
                 max_asc = d6_to_double(cur->asc);
             if (d6_to_double(cur->desc) > max_desc)
                 max_desc = d6_to_double(cur->desc);
-            if (cur->symbol != '\n' && cur->symbol != 0)
+            if (cur->symbol != '\n' && cur->symbol != 0) {
+                empty_line = 0;
                 last = cur;
+            }
         }
     }
     text_info->height +=
@@ -1600,11 +1548,7 @@ wrap_lines_smart(ASS_Renderer *render_priv, double max_text_width)
             run_offset++;
             pen_shift_x = d6_to_double(-cur->pos.x);
             pen_shift_y += height + render_priv->settings.line_spacing;
-            ass_msg(render_priv->library, MSGL_DBG2,
-                   "shifting from %d to %d by (%f, %f)", i,
-                   text_info->length - 1, pen_shift_x, pen_shift_y);
         }
-        cur->bm_run_id += run_offset;
         cur->pos.x += double_to_d6(pen_shift_x);
         cur->pos.y += double_to_d6(pen_shift_y);
     }
@@ -1679,6 +1623,169 @@ fill_bitmap_hash(ASS_Renderer *priv, GlyphInfo *info,
 }
 
 /**
+ * \brief Adjust the glyph's font size and scale factors to ensure smooth
+ *  scaling and handle pathological font sizes. The main problem here is
+ *  freetype's grid fitting, which destroys animations by font size, or will
+ *  result in incorrect final text size if font sizes are very small and
+ *  scale factors very large. See Google Code issue #46.
+ * \param priv guess what
+ * \param glyph the glyph to be modified
+ */
+static void
+fix_glyph_scaling(ASS_Renderer *priv, GlyphInfo *glyph)
+{
+    double ft_size;
+    if (priv->settings.hinting == ASS_HINTING_NONE) {
+        // arbitrary, not too small to prevent grid fitting rounding effects
+        // XXX: this is a rather crude hack
+        ft_size = 256.0;
+    } else {
+        // If hinting is enabled, we want to pass the real font size
+        // to freetype. Normalize scale_y to 1.0.
+        ft_size = glyph->scale_y * glyph->font_size;
+    }
+    glyph->scale_x = glyph->scale_x * glyph->font_size / ft_size;
+    glyph->scale_y = glyph->scale_y * glyph->font_size / ft_size;
+    glyph->font_size = ft_size;
+}
+
+ /**
+  * \brief Checks whether a glyph should start a new bitmap run
+  * \param info Pointer to new GlyphInfo to check
+  * \param current_info Pointer to CombinedBitmapInfo for current run (may be NULL)
+  * \return 1 if a new run should be started
+  */
+static int is_new_bm_run(GlyphInfo *info, GlyphInfo *last)
+{
+    if (!last || info->linebreak || info->effect ||
+        info->drawing || last->drawing) {
+        return 1;
+    }
+    // FIXME: Don't break on glyph substitutions
+    if (strcmp(last->font->desc.family, info->font->desc.family) ||
+        last->font->desc.vertical != info->font->desc.vertical ||
+        last->face_index != info->face_index ||
+        last->font_size != info->font_size ||
+        last->c[0] != info->c[0] ||
+        last->c[1] != info->c[1] ||
+        last->c[2] != info->c[2] ||
+        last->c[3] != info->c[3] ||
+        last->be != info->be ||
+        last->blur != info->blur ||
+        last->shadow_x != info->shadow_x ||
+        last->shadow_y != info->shadow_y ||
+        last->frx != info->frx ||
+        last->fry != info->fry ||
+        last->frz != info->frz ||
+        last->fax != info->fax ||
+        last->fay != info->fay ||
+        last->scale_x != info->scale_x ||
+        last->scale_y != info->scale_y ||
+        last->border_style != info->border_style ||
+        last->border_x != info->border_x ||
+        last->border_y != info->border_y ||
+        last->hspacing != info->hspacing ||
+        last->italic != info->italic ||
+        last->bold != info->bold ||
+        last->flags != info->flags){
+        return 1;
+    }
+    return 0;
+}
+
+static void apply_blur(CombinedBitmapInfo *info, ASS_Renderer *render_priv)
+{
+    int be = info->be;
+    double blur_radius = info->blur * render_priv->blur_scale * 2;
+    ASS_SynthPriv *priv_blur = render_priv->synth_priv;
+    Bitmap *bm_g = info->bm;
+    Bitmap *bm_o = info->bm_o;
+    int border_style = info->border_style;
+
+    if(blur_radius > 0.0 || be){
+        if (bm_o)
+            resize_tmp(priv_blur, bm_o->w, bm_o->h);
+        if (!bm_o || border_style == 3)
+            resize_tmp(priv_blur, bm_g->w, bm_g->h);
+    }
+
+    // Apply box blur (multiple passes, if requested)
+    if (be) {
+        uint16_t* tmp = (uint16_t*)(((uintptr_t)priv_blur->tmp + 0x0F) & ~0x0F);
+        if (bm_o) {
+            unsigned passes = be;
+            unsigned w = bm_o->w;
+            unsigned h = bm_o->h;
+            unsigned stride = bm_o->stride;
+            unsigned char *buf = bm_o->buffer;
+            if(w && h){
+                while(passes--){
+                    memset(tmp, 0, stride * 2);
+                    if(w < 16){
+                        be_blur_c(buf, w, h, stride, tmp);
+                    }else{
+                        render_priv->be_blur_func(buf, w, h, stride, tmp);
+                    }
+                }
+            }
+        }
+        if (!bm_o || border_style == 3) {
+            unsigned passes = be;
+            unsigned w = bm_g->w;
+            unsigned h = bm_g->h;
+            unsigned stride = bm_g->stride;
+            unsigned char *buf = bm_g->buffer;
+            if(w && h){
+                while(passes--){
+                    memset(tmp, 0, stride * 2);
+                    render_priv->be_blur_func(buf, w, h, stride, tmp);
+                }
+            }
+        }
+    }
+
+    // Apply gaussian blur
+    if (blur_radius > 0.0) {
+        generate_tables(priv_blur, blur_radius);
+        if (bm_o)
+            ass_gauss_blur(bm_o->buffer, priv_blur->tmp,
+                           bm_o->w, bm_o->h, bm_o->stride,
+                           priv_blur->gt2, priv_blur->g_r,
+                           priv_blur->g_w);
+        if (!bm_o || border_style == 3)
+            ass_gauss_blur(bm_g->buffer, priv_blur->tmp,
+                           bm_g->w, bm_g->h, bm_g->stride,
+                           priv_blur->gt2, priv_blur->g_r,
+                           priv_blur->g_w);
+    }
+}
+
+static void make_shadow_bitmap(CombinedBitmapInfo *info)
+{
+    // VSFilter compatibility: invisible fill and no border?
+    // In this case no shadow is supposed to be rendered.
+    if (!info->has_border && (info->c[0] & 0xFF) == 0xFF) {
+        return;
+    }
+
+    // Create shadow and fix outline as needed
+    if (info->bm_o && info->border_style != 3) {
+        info->bm_s = copy_bitmap(info->bm_o);
+        fix_outline(info->bm, info->bm_o);
+    } else if (info->bm_o && (info->border_x || info->border_y)) {
+        info->bm_s = copy_bitmap(info->bm_o);
+    } else if (info->bm_o) {
+        info->bm_s = info->bm_o;
+        info->bm_o = 0;
+    } else
+        info->bm_s = copy_bitmap(info->bm);
+
+    assert(info->bm_s);
+
+    shift_bitmap(info->bm_s, info->shadow_x, info->shadow_y);
+}
+
+/**
  * \brief Main ass rendering function, glues everything together
  * \param event event to render
  * \param event_images struct containing resulting images, will also be initialized
@@ -1689,8 +1796,6 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
                  EventImages *event_images)
 {
     char *p;
-    FT_UInt previous;
-    FT_UInt num_glyphs;
     FT_Vector pen;
     unsigned code;
     DBBox bbox;
@@ -1717,7 +1822,6 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
 
     drawing = render_priv->state.drawing;
     text_info->length = 0;
-    num_glyphs = 0;
     p = event->Text;
 
     int in_tag = 0;
@@ -1731,9 +1835,16 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
             if (!in_tag && *p == '{') {            // '\0' goes here
                 p++;
                 in_tag = 1;
+                if (render_priv->state.drawing_scale) {
+                    // A drawing definition has just ended.
+                    // Exit and create the drawing now lest we
+                    // accidentally let it consume later text
+                    // or be affected by later override tags.
+                    // See Google Code issues #47 and #101.
+                    break;
+                }
             }
             if (in_tag) {
-                int prev_drawing_mode = render_priv->state.drawing_mode;
                 p = parse_tag(render_priv, p, 1.);
                 if (*p == '}') {    // end of tag
                     p++;
@@ -1742,15 +1853,9 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
                     ass_msg(render_priv->library, MSGL_V,
                             "Unable to parse: '%.30s'", p);
                 }
-                if (prev_drawing_mode && !render_priv->state.drawing_mode) {
-                    // Drawing mode was just disabled. We must exit and draw it
-                    // immediately, instead of letting further tags affect it.
-                    // See bug #47.
-                    break;
-                }
             } else {
                 code = get_next_char(render_priv, &p);
-                if (code && render_priv->state.drawing_mode) {
+                if (code && render_priv->state.drawing_scale) {
                     ass_drawing_add_char(drawing, (char) code);
                     continue;   // skip everything in drawing mode
                 }
@@ -1766,8 +1871,10 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
                         sizeof(GlyphInfo) * text_info->max_glyphs);
         }
 
+        GlyphInfo *info = &glyphs[text_info->length];
+
         // Clear current GlyphInfo
-        memset(&glyphs[text_info->length], 0, sizeof(GlyphInfo));
+        memset(info, 0, sizeof(GlyphInfo));
 
         // Parse drawing
         if (drawing->i) {
@@ -1775,8 +1882,10 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
                                      render_priv->font_scale;
             drawing->scale_y = render_priv->state.scale_y *
                                      render_priv->font_scale;
+            drawing->scale = render_priv->state.drawing_scale;
+            drawing->pbo = render_priv->state.pbo;
             code = 0xfffc; // object replacement character
-            glyphs[text_info->length].drawing = drawing;
+            info->drawing = drawing;
         }
 
         // face could have been changed in get_next_char
@@ -1789,44 +1898,44 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
             break;
 
         // Fill glyph information
-        glyphs[text_info->length].symbol = code;
-        glyphs[text_info->length].font = render_priv->state.font;
+        info->symbol = code;
+        info->font = render_priv->state.font;
         for (i = 0; i < 4; ++i) {
             uint32_t clr = render_priv->state.c[i];
             change_alpha(&clr,
                          mult_alpha(_a(clr), render_priv->state.fade), 1.);
-            glyphs[text_info->length].c[i] = clr;
+            info->c[i] = clr;
         }
-        glyphs[text_info->length].effect_type = render_priv->state.effect_type;
-        glyphs[text_info->length].effect_timing =
-            render_priv->state.effect_timing;
-        glyphs[text_info->length].effect_skip_timing =
-            render_priv->state.effect_skip_timing;
-        glyphs[text_info->length].font_size =
-                    render_priv->state.font_size * render_priv->font_scale;
-        glyphs[text_info->length].be = render_priv->state.be;
-        glyphs[text_info->length].blur = render_priv->state.blur;
-        glyphs[text_info->length].shadow_x = render_priv->state.shadow_x;
-        glyphs[text_info->length].shadow_y = render_priv->state.shadow_y;
-        glyphs[text_info->length].scale_x= render_priv->state.scale_x;
-        glyphs[text_info->length].scale_y = render_priv->state.scale_y;
-        glyphs[text_info->length].border_style = render_priv->state.border_style;
-        glyphs[text_info->length].border_x= render_priv->state.border_x;
-        glyphs[text_info->length].border_y = render_priv->state.border_y;
-        glyphs[text_info->length].hspacing = render_priv->state.hspacing;
-        glyphs[text_info->length].bold = render_priv->state.bold;
-        glyphs[text_info->length].italic = render_priv->state.italic;
-        glyphs[text_info->length].flags = render_priv->state.flags;
-        glyphs[text_info->length].frx = render_priv->state.frx;
-        glyphs[text_info->length].fry = render_priv->state.fry;
-        glyphs[text_info->length].frz = render_priv->state.frz;
-        glyphs[text_info->length].fax = render_priv->state.fax;
-        glyphs[text_info->length].fay = render_priv->state.fay;
-        glyphs[text_info->length].bm_run_id = render_priv->state.bm_run_id;
-
-        if (glyphs[text_info->length].drawing) {
+
+        info->effect_type = render_priv->state.effect_type;
+        info->effect_timing = render_priv->state.effect_timing;
+        info->effect_skip_timing = render_priv->state.effect_skip_timing;
+        info->font_size =
+            render_priv->state.font_size * render_priv->font_scale;
+        info->be = render_priv->state.be;
+        info->blur = render_priv->state.blur;
+        info->shadow_x = render_priv->state.shadow_x;
+        info->shadow_y = render_priv->state.shadow_y;
+        info->scale_x = info->orig_scale_x = render_priv->state.scale_x;
+        info->scale_y = info->orig_scale_y = render_priv->state.scale_y;
+        info->border_style = render_priv->state.border_style;
+        info->border_x= render_priv->state.border_x;
+        info->border_y = render_priv->state.border_y;
+        info->hspacing = render_priv->state.hspacing;
+        info->bold = render_priv->state.bold;
+        info->italic = render_priv->state.italic;
+        info->flags = render_priv->state.flags;
+        info->frx = render_priv->state.frx;
+        info->fry = render_priv->state.fry;
+        info->frz = render_priv->state.frz;
+        info->fax = render_priv->state.fax;
+        info->fay = render_priv->state.fay;
+
+        if (info->drawing) {
             drawing = render_priv->state.drawing =
                 ass_drawing_new(render_priv->library, render_priv->ftlibrary);
+        } else {
+            fix_glyph_scaling(render_priv, info);
         }
 
         text_info->length++;
@@ -1872,7 +1981,7 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
 
         // add horizontal letter spacing
         info->cluster_advance.x += double_to_d6(info->hspacing *
-                render_priv->font_scale * info->scale_x);
+                render_priv->font_scale * info->orig_scale_x);
 
         // add displacement for vertical shearing
         info->cluster_advance.y += (info->fay / info->scale_x * info->scale_y) * info->cluster_advance.x;
@@ -1880,7 +1989,6 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
     }
 
     // Preliminary layout (for line wrapping)
-    previous = 0;
     pen.x = 0;
     pen.y = 0;
     for (i = 0; i < text_info->length; i++) {
@@ -1902,7 +2010,6 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
         info = glyphs + i;
         pen.x += info->cluster_advance.x;
         pen.y += info->cluster_advance.y;
-        previous = info->symbol;
     }
 
 
@@ -1945,16 +2052,23 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
     pen.x = 0;
     pen.y = 0;
     int lineno = 1;
+    double last_pen_x = 0;
+    double last_fay = 0;
     for (i = 0; i < text_info->length; i++) {
         GlyphInfo *info = glyphs + cmap[i];
         if (glyphs[i].linebreak) {
-            pen.y -= (info->fay / info->scale_x * info->scale_y) * pen.x;
-            pen.x = 0;
+            pen.y -= (last_fay / info->scale_x * info->scale_y) * (pen.x - last_pen_x);
+            last_pen_x = pen.x = 0;
             pen.y += double_to_d6(text_info->lines[lineno-1].desc);
             pen.y += double_to_d6(text_info->lines[lineno].asc);
             pen.y += double_to_d6(render_priv->settings.line_spacing);
             lineno++;
         }
+        else if (last_fay != info->fay) {
+            pen.y -= (last_fay / info->scale_x * info->scale_y) * (pen.x - last_pen_x);
+            last_pen_x = pen.x;
+        }
+        last_fay = info->fay;
         if (info->skip) continue;
         FT_Vector cluster_pen = pen;
         while (info) {
@@ -1975,10 +2089,6 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
         double width = 0;
         for (i = 0; i <= text_info->length; ++i) {   // (text_info->length + 1) is the end of the last line
             if ((i == text_info->length) || glyphs[i].linebreak) {
-                // remove letter spacing (which is included in cluster_advance)
-                if (i > 0)
-                    width -= render_priv->state.hspacing * render_priv->font_scale *
-                        glyphs[i-1].scale_x;
                 double shift = 0;
                 if (halign == HALIGN_LEFT) {    // left aligned, no action
                     shift = 0;
@@ -2076,8 +2186,6 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
     if (render_priv->state.evt_type == EVENT_POSITIONED) {
         double base_x = 0;
         double base_y = 0;
-        ass_msg(render_priv->library, MSGL_DBG2, "positioned event at %f, %f",
-               render_priv->state.pos_x, render_priv->state.pos_y);
         get_base_point(&bbox, alignment, &base_x, &base_y);
         device_x =
             x2scr_pos(render_priv, render_priv->state.pos_x) - base_x;
@@ -2154,6 +2262,10 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
     // convert glyphs to bitmaps
     int left = render_priv->settings.left_margin;
     device_x = (device_x - left) * render_priv->font_scale_x + left;
+    unsigned nb_bitmaps = 0;
+    CombinedBitmapInfo *combined_info = text_info->combined_bitmaps;
+    CombinedBitmapInfo *current_info = NULL;
+    GlyphInfo *last_info = NULL;
     for (i = 0; i < text_info->length; ++i) {
         GlyphInfo *info = glyphs + i;
         while (info) {
@@ -2166,10 +2278,234 @@ ass_render_event(ASS_Renderer *render_priv, ASS_Event *event,
                 double_to_d6(device_y - (int) device_y +
                         d6_to_double(info->pos.y & SUBPIXEL_MASK)) & ~SUBPIXEL_ACCURACY;
             get_bitmap_glyph(render_priv, info);
+
+            int bm_x = info->pos.x >> 6,
+                bm_y = info->pos.y >> 6,
+                bm_o_x = bm_x, bm_o_y = bm_y, min_bm_x = bm_x, min_bm_y = bm_y;
+
+            if(info->bm){
+                bm_x += info->bm->left;
+                bm_y += info->bm->top;
+                min_bm_x = bm_x;
+                min_bm_y = bm_y;
+            }
+
+            if(info->bm_o){
+                bm_o_x += info->bm_o->left;
+                bm_o_y += info->bm_o->top;
+                min_bm_x = FFMIN(min_bm_x, bm_o_x);
+                min_bm_y = FFMIN(min_bm_y, bm_o_y);
+            }
+
+            if(is_new_bm_run(info, last_info)){
+                ++nb_bitmaps;
+                if (nb_bitmaps >= text_info->max_bitmaps) {
+                    // Raise maximum number of bitmaps
+                    text_info->max_bitmaps *= 2;
+                    text_info->combined_bitmaps = combined_info =
+                        realloc(combined_info,
+                                sizeof(CombinedBitmapInfo) * text_info->max_bitmaps);
+                }
+
+                current_info = &combined_info[nb_bitmaps - 1];
+
+                current_info->pos.x = min_bm_x;
+                current_info->pos.y = min_bm_y;
+
+                current_info->first_pos_x = info->bbox.xMax >> 6;
+
+                memcpy(&current_info->c, &info->c, sizeof(info->c));
+                current_info->effect_type = info->effect_type;
+                current_info->effect_timing = info->effect_timing;
+                current_info->be = info->be;
+                current_info->blur = info->blur;
+                current_info->shadow_x = info->shadow_x;
+                current_info->shadow_y = info->shadow_y;
+                current_info->frx = info->frx;
+                current_info->fry = info->fry;
+                current_info->frz = info->frz;
+                current_info->fax = info->fax;
+                current_info->fay = info->fay;
+                current_info->scale_x = info->scale_x;
+                current_info->scale_y = info->scale_y;
+                current_info->border_style = info->border_style;
+                current_info->border_x = info->border_x;
+                current_info->border_y = info->border_y;
+                current_info->hspacing = info->hspacing;
+                current_info->italic = info->italic;
+                current_info->bold = info->bold;
+                current_info->flags = info->flags;
+
+                current_info->shift_x = key->shift_x;
+                current_info->shift_y = key->shift_y;
+                current_info->advance = key->advance;
+
+                current_info->has_border = !!info->border;
+
+                current_info->has_outline = 0;
+                current_info->cached = 0;
+                current_info->is_drawing = 0;
+
+                current_info->bm = current_info->bm_o = current_info->bm_s = NULL;
+
+                current_info->max_str_length = MAX_STR_LENGTH_INITIAL;
+                current_info->str_length = 0;
+                current_info->str = malloc(MAX_STR_LENGTH_INITIAL);
+                current_info->chars = 0;
+
+                current_info->w = current_info->h = current_info->o_w = current_info->o_h = 0;
+
+            }
+
+            if(info->drawing){
+                free(current_info->str);
+                current_info->str = strdup(info->drawing->text);
+                current_info->is_drawing = 1;
+                ass_drawing_free(info->drawing);
+            }else{
+                current_info->str_length +=
+                    ass_utf8_put_char(
+                        current_info->str + current_info->str_length,
+                        info->symbol);
+                current_info->chars++;
+                if(current_info->str_length > current_info->max_str_length - 5){
+                    current_info->max_str_length *= 2;
+                    current_info->str = realloc(current_info->str,
+                                                current_info->max_str_length);
+                }
+            }
+
+            current_info->has_outline = current_info->has_outline || !!info->bm_o;
+
+            if(min_bm_y < current_info->pos.y){
+                current_info->h += current_info->pos.y - min_bm_y;
+                current_info->o_h += current_info->pos.y - min_bm_y;
+                current_info->pos.y = min_bm_y;
+            }
+
+            if(min_bm_x < current_info->pos.x){
+                current_info->w += current_info->pos.x - min_bm_x;
+                current_info->o_w += current_info->pos.x - min_bm_x;
+                current_info->pos.x = min_bm_x;
+            }
+
+            if(info->bm){
+                current_info->w =
+                    FFMAX(current_info->w, info->bm->w + bm_x - current_info->pos.x);
+                current_info->h =
+                    FFMAX(current_info->h, info->bm->h + bm_y - current_info->pos.y);
+            }
+
+            if(info->bm_o){
+                current_info->o_w =
+                    FFMAX(current_info->o_w, info->bm_o->w + bm_o_x - current_info->pos.x);
+                current_info->o_h =
+                    FFMAX(current_info->o_h, info->bm_o->h + bm_o_y - current_info->pos.y);
+            }
+
+            info->bm_run_id = nb_bitmaps - 1;
+
+            last_info = info;
             info = info->next;
         }
     }
 
+    CompositeHashKey hk;
+    CompositeHashValue *hv;
+    for (i = 0; i < nb_bitmaps; ++i) {
+        CombinedBitmapInfo *info = &combined_info[i];
+
+        fill_composite_hash(&hk, info);
+
+        hv = ass_cache_get(render_priv->cache.composite_cache, &hk);
+
+        if(hv){
+            info->bm = hv->bm;
+            info->bm_o = hv->bm_o;
+            info->bm_s = hv->bm_s;
+            info->cached = 1;
+            free(info->str);
+        }else{
+            if(info->chars != 1 && !info->is_drawing){
+                info->bm = alloc_bitmap(info->w, info->h);
+                if(info->has_outline){
+                    info->bm_o = alloc_bitmap(info->o_w, info->o_h);
+                }
+            }
+        }
+    }
+
+    for (i = 0; i < text_info->length; ++i) {
+        GlyphInfo *info = glyphs + i;
+        while (info) {
+            current_info = &combined_info[info->bm_run_id];
+            if(!current_info->cached && !is_skip_symbol(info->symbol)){
+                if(current_info->chars == 1 || current_info->is_drawing){
+                    int offset_x = (info->pos.x >> 6) - current_info->pos.x;
+                    int offset_y = (info->pos.y >> 6) - current_info->pos.y;
+                    if(info->bm){
+                        current_info->bm = copy_bitmap(info->bm);
+                        current_info->bm->left += offset_x;
+                        current_info->bm->top += offset_y;
+                    }
+                    if(info->bm_o){
+                        current_info->bm_o = copy_bitmap(info->bm_o);
+                        current_info->bm_o->left += offset_x;
+                        current_info->bm_o->top += offset_y;
+                    }
+                }else{
+                    unsigned offset_x, offset_y;
+                    if(info->bm && info->bm->w && info->bm->h){
+                        offset_x = (info->pos.x >> 6) - current_info->pos.x + info->bm->left;
+                        offset_y = (info->pos.y >> 6) - current_info->pos.y + info->bm->top;
+                        render_priv->add_bitmaps_func(
+                            &current_info->bm->buffer[offset_y * current_info->bm->stride + offset_x],
+                            current_info->bm->stride,
+                            info->bm->buffer,
+                            info->bm->stride,
+                            info->bm->h,
+                            info->bm->w
+                        );
+                    }
+                    if(info->bm_o && info->bm_o->w && info->bm_o->h){
+                        offset_x = (info->pos.x >> 6) - current_info->pos.x + info->bm_o->left;
+                        offset_y = (info->pos.y >> 6) - current_info->pos.y + info->bm_o->top;
+                        render_priv->add_bitmaps_func(
+                            &current_info->bm_o->buffer[offset_y * current_info->bm_o->stride + offset_x],
+                            current_info->bm_o->stride,
+                            info->bm_o->buffer,
+                            info->bm_o->stride,
+                            info->bm_o->h,
+                            info->bm_o->w
+                        );
+                    }
+                }
+            }
+            info = info->next;
+        }
+    }
+
+    for (i = 0; i < nb_bitmaps; ++i) {
+        if(!combined_info[i].cached){
+            CompositeHashValue chv;
+            CombinedBitmapInfo *info = &combined_info[i];
+            if(info->bm || info->bm_o){
+                apply_blur(info, render_priv);
+                make_shadow_bitmap(info);
+            }
+
+            fill_composite_hash(&hk, info);
+
+            chv.bm = info->bm;
+            chv.bm_o = info->bm_o;
+            chv.bm_s = info->bm_s;
+
+            ass_cache_put(render_priv->cache.composite_cache, &hk, &chv);
+        }
+    }
+
+    text_info->n_bitmaps = nb_bitmaps;
+
     memset(event_images, 0, sizeof(*event_images));
     event_images->top = device_y - text_info->lines[0].asc;
     event_images->height = text_info->height;
@@ -2207,14 +2543,17 @@ void ass_free_images(ASS_Image *img)
 static void check_cache_limits(ASS_Renderer *priv, CacheStore *cache)
 {
     if (ass_cache_empty(cache->bitmap_cache, cache->bitmap_max_size)) {
-        ass_cache_empty(cache->composite_cache, 0);
         ass_free_images(priv->prev_images_root);
         priv->prev_images_root = 0;
         priv->cache_cleared = 1;
     }
     if (ass_cache_empty(cache->outline_cache, cache->glyph_max)) {
         ass_cache_empty(cache->bitmap_cache, 0);
-        ass_cache_empty(cache->composite_cache, 0);
+        ass_free_images(priv->prev_images_root);
+        priv->prev_images_root = 0;
+        priv->cache_cleared = 1;
+    }
+    if (ass_cache_empty(cache->composite_cache, cache->composite_max_size)) {
         ass_free_images(priv->prev_images_root);
         priv->prev_images_root = 0;
         priv->cache_cleared = 1;
@@ -2234,9 +2573,6 @@ ass_start_frame(ASS_Renderer *render_priv, ASS_Track *track,
         && !render_priv->settings.frame_height)
         return 1;               // library not initialized
 
-    if (render_priv->library != track->library)
-        return 1;
-
     if (!render_priv->fontconfig_priv)
         return 1;
 
@@ -2252,9 +2588,9 @@ ass_start_frame(ASS_Renderer *render_priv, ASS_Track *track,
 
     render_priv->font_scale = settings_priv->font_size_coeff *
         render_priv->orig_height / render_priv->track->PlayResY;
-    if (render_priv->storage_height)
+    if (settings_priv->storage_height)
         render_priv->blur_scale = ((double) render_priv->orig_height) /
-            render_priv->storage_height;
+            settings_priv->storage_height;
     else
         render_priv->blur_scale = 1.;
     if (render_priv->track->ScaledBorderAndShadow)
@@ -2263,6 +2599,8 @@ ass_start_frame(ASS_Renderer *render_priv, ASS_Track *track,
             render_priv->track->PlayResY;
     else
         render_priv->border_scale = render_priv->blur_scale;
+    if (!settings_priv->storage_height)
+        render_priv->blur_scale = render_priv->border_scale;
     render_priv->border_scale *= settings_priv->font_size_coeff;
 
     ass_shaper_set_kerning(render_priv->shaper, track->Kerning);
diff --git a/libass/ass_render.h b/libass/ass_render.h
index ecfca61..7b3b6d7 100644
--- a/libass/ass_render.h
+++ b/libass/ass_render.h
@@ -41,9 +41,11 @@ typedef struct ass_shaper ASS_Shaper;
 #include "ass_fontconfig.h"
 #include "ass_library.h"
 #include "ass_drawing.h"
+#include "ass_bitmap.h"
 
-#define GLYPH_CACHE_MAX 1000
-#define BITMAP_CACHE_MAX_SIZE 30 * 1048576
+#define GLYPH_CACHE_MAX 10000
+#define BITMAP_CACHE_MAX_SIZE 500 * 1048576
+#define COMPOSITE_CACHE_MAX_SIZE 500 * 1048576
 
 #define PARSED_FADE (1<<0)
 #define PARSED_A    (1<<1)
@@ -68,8 +70,8 @@ typedef struct free_list {
 typedef struct {
     int frame_width;
     int frame_height;
-    int storage_width;          // width of the source image
-    int storage_height;         // height of the source image
+    int storage_width;          // video width before any rescaling
+    int storage_height;         // video height before any rescaling
     double font_size_coeff;     // font size multiplier
     double line_spacing;        // additional line spacing (in frame pixels)
     double line_position;       // vertical position for subtitles, 0-100 (0 = no change)
@@ -103,6 +105,50 @@ typedef enum {
     EF_KARAOKE_KO
 } Effect;
 
+// describes a combined bitmap
+typedef struct {
+    Bitmap *bm;                 // glyphs bitmap
+    unsigned w;
+    unsigned h;
+    Bitmap *bm_o;               // outline bitmap
+    unsigned o_w;
+    unsigned o_h;
+    Bitmap *bm_s;               // shadow bitmap
+    FT_Vector pos;
+    uint32_t c[4];              // colors
+    FT_Vector advance;          // 26.6
+    Effect effect_type;
+    int effect_timing;          // time duration of current karaoke word
+    // after process_karaoke_effects: distance in pixels from the glyph origin.
+    // part of the glyph to the left of it is displayed in a different color.
+    int be;                     // blur edges
+    double blur;                // gaussian blur
+    double shadow_x;
+    double shadow_y;
+    double frx, fry, frz;       // rotation
+    double fax, fay;            // text shearing
+    double scale_x, scale_y;
+    int border_style;
+    int has_border;
+    double border_x, border_y;
+    double hspacing;
+    unsigned italic;
+    unsigned bold;
+    int flags;
+    int shift_x, shift_y;
+
+    unsigned has_outline;
+    unsigned is_drawing;
+
+    int max_str_length;
+    int str_length;
+    unsigned chars;
+    char *str;
+    int cached;
+    FT_Vector pos_orig;
+    int first_pos_x;
+} CombinedBitmapInfo;
+
 // describes a glyph
 // GlyphInfo and TextInfo are used for text centering and word-wrapping operations
 typedef struct glyph_info {
@@ -130,6 +176,7 @@ typedef struct glyph_info {
     uint32_t c[4];              // colors
     FT_Vector advance;          // 26.6
     FT_Vector cluster_advance;
+    char effect;                // the first (leading) glyph of some effect ?
     Effect effect_type;
     int effect_timing;          // time duration of current karaoke word
     // after process_karaoke_effects: distance in pixels from the glyph origin.
@@ -143,6 +190,7 @@ typedef struct glyph_info {
     double frx, fry, frz;       // rotation
     double fax, fay;            // text shearing
     double scale_x, scale_y;
+    double orig_scale_x, orig_scale_y; // scale_x,y before fix_glyph_scaling
     int border_style;
     double border_x, border_y;
     double hspacing;
@@ -169,9 +217,12 @@ typedef struct {
     int length;
     LineInfo *lines;
     int n_lines;
+    CombinedBitmapInfo *combined_bitmaps;
+    unsigned n_bitmaps;
     double height;
     int max_glyphs;
     int max_lines;
+    unsigned max_bitmaps;
 } TextInfo;
 
 // Renderer state.
@@ -213,7 +264,8 @@ typedef struct {
     double blur;                // gaussian blur
     double shadow_x;
     double shadow_y;
-    int drawing_mode;           // not implemented; when != 0 text is discarded, except for style override tags
+    int drawing_scale;          // currently reading: regular text if 0, drawing otherwise
+    double pbo;                 // drawing baseline offset
     ASS_Drawing *drawing;       // current drawing
     ASS_Drawing *clip_drawing;  // clip vector
     int clip_drawing_mode;      // 0 = regular clip, 1 = inverse clip
@@ -249,8 +301,23 @@ typedef struct {
     Cache *composite_cache;
     size_t glyph_max;
     size_t bitmap_max_size;
+    size_t composite_max_size;
 } CacheStore;
 
+typedef void (*BitmapBlendFunc)(uint8_t *dst, intptr_t dst_stride,
+                                uint8_t *src, intptr_t src_stride,
+                                intptr_t height, intptr_t width);
+typedef void (*BitmapMulFunc)(uint8_t *dst, intptr_t dst_stride,
+                              uint8_t *src1, intptr_t src1_stride,
+                              uint8_t *src2, intptr_t src2_stride,
+                              intptr_t width, intptr_t height);
+typedef void (*BEBlurFunc)(uint8_t *buf, intptr_t w,
+                           intptr_t h, intptr_t stride,
+                           uint16_t *tmp);
+typedef void (*RestrideBitmapFunc)(uint8_t *dst, intptr_t dst_stride,
+                                   uint8_t *src, intptr_t src_stride,
+                                   intptr_t width, intptr_t height);
+
 struct ass_renderer {
     ASS_Library *library;
     FT_Library ftlibrary;
@@ -273,8 +340,6 @@ struct ass_renderer {
     int orig_width;             // frame width ( = screen width - margins )
     int orig_height_nocrop;     // frame height ( = screen height - margins + cropheight)
     int orig_width_nocrop;      // frame width ( = screen width - margins + cropwidth)
-    int storage_height;         // video height before any rescaling
-    int storage_width;          // video width before any rescaling
     ASS_Track *track;
     long long time;             // frame's timestamp, ms
     double font_scale;
@@ -286,6 +351,12 @@ struct ass_renderer {
     TextInfo text_info;
     CacheStore cache;
 
+    BitmapBlendFunc add_bitmaps_func;
+    BitmapBlendFunc sub_bitmaps_func;
+    BitmapMulFunc mul_bitmaps_func;
+    BEBlurFunc be_blur_func;
+    RestrideBitmapFunc restride_bitmap_func;
+
     FreeList *free_head;
     FreeList *free_tail;
 };
diff --git a/libass/ass_render_api.c b/libass/ass_render_api.c
index 5777aad..b06fc80 100644
--- a/libass/ass_render_api.c
+++ b/libass/ass_render_api.c
@@ -43,13 +43,6 @@ static void ass_reconfigure(ASS_Renderer *priv)
     priv->orig_height_nocrop =
         settings->frame_height - FFMAX(settings->top_margin, 0) -
         FFMAX(settings->bottom_margin, 0);
-    if (settings->storage_height) {
-        priv->storage_width = settings->storage_width;
-        priv->storage_height = settings->storage_height;
-    } else {
-        priv->storage_width = priv->orig_width;
-        priv->storage_height = priv->orig_height;
-    }
 }
 
 void ass_set_frame_size(ASS_Renderer *priv, int w, int h)
diff --git a/libass/ass_shaper.c b/libass/ass_shaper.c
index e5757b0..49ba364 100644
--- a/libass/ass_shaper.c
+++ b/libass/ass_shaper.c
@@ -18,26 +18,22 @@
 
 #include "config.h"
 
-#ifdef CONFIG_FRIBIDI
-#include <fribidi/fribidi.h>
-#endif
-
 #include "ass_shaper.h"
 #include "ass_render.h"
 #include "ass_font.h"
 #include "ass_parse.h"
 #include "ass_cache.h"
 
-#define MAX_RUNS 50
-
 #ifdef CONFIG_HARFBUZZ
 #include <hb-ft.h>
 enum {
     VERT = 0,
     VKNA,
-    KERN
+    KERN,
+    LIGA,
+    CLIG
 };
-#define NUM_FEATURES 3
+#define NUM_FEATURES 5
 #endif
 
 struct ass_shaper {
@@ -152,11 +148,15 @@ static void init_features(ASS_Shaper *shaper)
 
     shaper->n_features = NUM_FEATURES;
     shaper->features[VERT].tag = HB_TAG('v', 'e', 'r', 't');
-    shaper->features[VERT].end = INT_MAX;
+    shaper->features[VERT].end = UINT_MAX;
     shaper->features[VKNA].tag = HB_TAG('v', 'k', 'n', 'a');
-    shaper->features[VKNA].end = INT_MAX;
+    shaper->features[VKNA].end = UINT_MAX;
     shaper->features[KERN].tag = HB_TAG('k', 'e', 'r', 'n');
-    shaper->features[KERN].end = INT_MAX;
+    shaper->features[KERN].end = UINT_MAX;
+    shaper->features[LIGA].tag = HB_TAG('l', 'i', 'g', 'a');
+    shaper->features[LIGA].end = UINT_MAX;
+    shaper->features[CLIG].tag = HB_TAG('c', 'l', 'i', 'g');
+    shaper->features[CLIG].end = UINT_MAX;
 }
 
 /**
@@ -164,11 +164,17 @@ static void init_features(ASS_Shaper *shaper)
  */
 static void set_run_features(ASS_Shaper *shaper, GlyphInfo *info)
 {
-        // enable vertical substitutions for @font runs
-        if (info->font->desc.vertical)
-            shaper->features[VERT].value = shaper->features[VKNA].value = 1;
-        else
-            shaper->features[VERT].value = shaper->features[VKNA].value = 0;
+    // enable vertical substitutions for @font runs
+    if (info->font->desc.vertical)
+        shaper->features[VERT].value = shaper->features[VKNA].value = 1;
+    else
+        shaper->features[VERT].value = shaper->features[VKNA].value = 0;
+
+    // disable ligatures if horizontal spacing is non-standard
+    if (info->hspacing)
+        shaper->features[LIGA].value = shaper->features[CLIG].value = 0;
+    else
+        shaper->features[LIGA].value = shaper->features[CLIG].value = 1;
 }
 
 /**
@@ -197,7 +203,7 @@ static void update_hb_size(hb_font_t *hb_font, FT_Face face)
 
 GlyphMetricsHashValue *
 get_cached_metrics(struct ass_shaper_metrics_data *metrics, FT_Face face,
-                   hb_codepoint_t glyph)
+                   hb_codepoint_t unicode, hb_codepoint_t glyph)
 {
     GlyphMetricsHashValue *val;
 
@@ -216,7 +222,7 @@ get_cached_metrics(struct ass_shaper_metrics_data *metrics, FT_Face face,
 
         // if @font rendering is enabled and the glyph should be rotated,
         // make cached_h_advance pick up the right advance later
-        if (metrics->vertical && glyph >= VERTICAL_LOWER_BOUND)
+        if (metrics->vertical && unicode >= VERTICAL_LOWER_BOUND)
             new_val.metrics.horiAdvance = new_val.metrics.vertAdvance;
 
         val = ass_cache_put(metrics->metrics_cache, &metrics->hash_key, &new_val);
@@ -230,11 +236,16 @@ get_glyph(hb_font_t *font, void *font_data, hb_codepoint_t unicode,
           hb_codepoint_t variation, hb_codepoint_t *glyph, void *user_data)
 {
     FT_Face face = font_data;
+    struct ass_shaper_metrics_data *metrics_priv = user_data;
 
     if (variation)
-        *glyph = FT_Face_GetCharVariantIndex(face, unicode, variation);
+        *glyph = FT_Face_GetCharVariantIndex(face, ass_font_index_magic(face, unicode), variation);
     else
-        *glyph = FT_Get_Char_Index(face, unicode);
+        *glyph = FT_Get_Char_Index(face, ass_font_index_magic(face, unicode));
+
+    // rotate glyph advances for @fonts while we still know the Unicode codepoints
+    if (*glyph != 0)
+        get_cached_metrics(metrics_priv, face, unicode, *glyph);
 
     return *glyph != 0;
 }
@@ -245,7 +256,7 @@ cached_h_advance(hb_font_t *font, void *font_data, hb_codepoint_t glyph,
 {
     FT_Face face = font_data;
     struct ass_shaper_metrics_data *metrics_priv = user_data;
-    GlyphMetricsHashValue *metrics = get_cached_metrics(metrics_priv, face, glyph);
+    GlyphMetricsHashValue *metrics = get_cached_metrics(metrics_priv, face, 0, glyph);
 
     if (!metrics)
         return 0;
@@ -259,7 +270,7 @@ cached_v_advance(hb_font_t *font, void *font_data, hb_codepoint_t glyph,
 {
     FT_Face face = font_data;
     struct ass_shaper_metrics_data *metrics_priv = user_data;
-    GlyphMetricsHashValue *metrics = get_cached_metrics(metrics_priv, face, glyph);
+    GlyphMetricsHashValue *metrics = get_cached_metrics(metrics_priv, face, 0, glyph);
 
     if (!metrics)
         return 0;
@@ -281,7 +292,7 @@ cached_v_origin(hb_font_t *font, void *font_data, hb_codepoint_t glyph,
 {
     FT_Face face = font_data;
     struct ass_shaper_metrics_data *metrics_priv = user_data;
-    GlyphMetricsHashValue *metrics = get_cached_metrics(metrics_priv, face, glyph);
+    GlyphMetricsHashValue *metrics = get_cached_metrics(metrics_priv, face, 0, glyph);
 
     if (!metrics)
         return 0;
@@ -318,7 +329,7 @@ cached_extents(hb_font_t *font, void *font_data, hb_codepoint_t glyph,
 {
     FT_Face face = font_data;
     struct ass_shaper_metrics_data *metrics_priv = user_data;
-    GlyphMetricsHashValue *metrics = get_cached_metrics(metrics_priv, face, glyph);
+    GlyphMetricsHashValue *metrics = get_cached_metrics(metrics_priv, face, 0, glyph);
 
     if (!metrics)
         return 0;
@@ -404,9 +415,7 @@ static hb_font_t *get_hb_font(ASS_Shaper *shaper, GlyphInfo *info)
                 font->faces[info->face_index], NULL);
     }
 
-    // XXX: this is a rather crude hack
-    const double ft_size = 256.0;
-    ass_face_set_size(font->faces[info->face_index], ft_size);
+    ass_face_set_size(font->faces[info->face_index], info->font_size);
     update_hb_size(hb_fonts[info->face_index], font->faces[info->face_index]);
 
     // update hash key for cached metrics
@@ -523,92 +532,92 @@ hb_shaper_get_run_language(ASS_Shaper *shaper, hb_script_t script)
 }
 
 /**
+ * \brief Feed a run of shaped characters into the GlyphInfo array.
+ *
+ * \param glyphs GlyphInfo array
+ * \param buf buffer of shaped run
+ * \param offset offset into GlyphInfo array
+ */
+static void
+shape_harfbuzz_process_run(GlyphInfo *glyphs, hb_buffer_t *buf, int offset)
+{
+    int j;
+    int num_glyphs = hb_buffer_get_length(buf);
+    hb_glyph_info_t *glyph_info = hb_buffer_get_glyph_infos(buf, NULL);
+    hb_glyph_position_t *pos    = hb_buffer_get_glyph_positions(buf, NULL);
+
+    for (j = 0; j < num_glyphs; j++) {
+        unsigned idx = glyph_info[j].cluster + offset;
+        GlyphInfo *info = glyphs + idx;
+        GlyphInfo *root = info;
+
+        // if we have more than one glyph per cluster, allocate a new one
+        // and attach to the root glyph
+        if (info->skip == 0) {
+            while (info->next)
+                info = info->next;
+            info->next = malloc(sizeof(GlyphInfo));
+            memcpy(info->next, info, sizeof(GlyphInfo));
+            info = info->next;
+            info->next = NULL;
+        }
+
+        // set position and advance
+        info->skip = 0;
+        info->glyph_index = glyph_info[j].codepoint;
+        info->offset.x    = pos[j].x_offset * info->scale_x;
+        info->offset.y    = -pos[j].y_offset * info->scale_y;
+        info->advance.x   = pos[j].x_advance * info->scale_x;
+        info->advance.y   = -pos[j].y_advance * info->scale_y;
+
+        // accumulate advance in the root glyph
+        root->cluster_advance.x += info->advance.x;
+        root->cluster_advance.y += info->advance.y;
+    }
+}
+
+/**
  * \brief Shape event text with HarfBuzz. Full OpenType shaping.
  * \param glyphs glyph clusters
  * \param len number of clusters
  */
 static void shape_harfbuzz(ASS_Shaper *shaper, GlyphInfo *glyphs, size_t len)
 {
-    int i, j;
-    int run = 0;
-    struct {
-        int offset;
-        int end;
-        hb_buffer_t *buf;
-        hb_font_t *font;
-    } runs[MAX_RUNS];
-    const double ft_size = 256.0;
-
-    for (i = 0; i < len && run < MAX_RUNS; i++, run++) {
-        // get length and level of the current run
-        int k = i;
-        int level = glyphs[i].shape_run_id;
-        int direction = shaper->emblevels[k] % 2;
-        hb_script_t script = glyphs[i].script;
-        while (i < (len - 1) && level == glyphs[i+1].shape_run_id)
-            i++;
-        runs[run].offset = k;
-        runs[run].end    = i;
-        runs[run].buf    = hb_buffer_create();
-        runs[run].font   = get_hb_font(shaper, glyphs + k);
-        set_run_features(shaper, glyphs + k);
-        hb_buffer_pre_allocate(runs[run].buf, i - k + 1);
-        hb_buffer_set_direction(runs[run].buf, direction ? HB_DIRECTION_RTL :
-                HB_DIRECTION_LTR);
-        hb_buffer_set_language(runs[run].buf,
-                hb_shaper_get_run_language(shaper, script));
-        hb_buffer_set_script(runs[run].buf, script);
-        hb_buffer_add_utf32(runs[run].buf, shaper->event_text + k, i - k + 1,
-                0, i - k + 1);
-        hb_shape(runs[run].font, runs[run].buf, shaper->features,
-                shaper->n_features);
-    }
+    int i;
+    hb_buffer_t *buf = hb_buffer_create();
+    hb_segment_properties_t props = HB_SEGMENT_PROPERTIES_DEFAULT;
 
     // Initialize: skip all glyphs, this is undone later as needed
     for (i = 0; i < len; i++)
         glyphs[i].skip = 1;
 
-    // Update glyph indexes, positions and advances from the shaped runs
-    for (i = 0; i < run; i++) {
-        int num_glyphs = hb_buffer_get_length(runs[i].buf);
-        hb_glyph_info_t *glyph_info = hb_buffer_get_glyph_infos(runs[i].buf, NULL);
-        hb_glyph_position_t *pos    = hb_buffer_get_glyph_positions(runs[i].buf, NULL);
-
-        for (j = 0; j < num_glyphs; j++) {
-            int idx = glyph_info[j].cluster + runs[i].offset;
-            GlyphInfo *info = glyphs + idx;
-            GlyphInfo *root = info;
-
-            // if we have more than one glyph per cluster, allocate a new one
-            // and attach to the root glyph
-            if (info->skip == 0) {
-                while (info->next)
-                    info = info->next;
-                info->next = malloc(sizeof(GlyphInfo));
-                memcpy(info->next, info, sizeof(GlyphInfo));
-                info = info->next;
-                info->next = NULL;
-            }
-
-            // set position and advance
-            info->skip = 0;
-            info->glyph_index = glyph_info[j].codepoint;
-            info->offset.x    = pos[j].x_offset * info->scale_x * (info->font_size / ft_size);
-            info->offset.y    = -pos[j].y_offset * info->scale_y * (info->font_size / ft_size);
-            info->advance.x   = pos[j].x_advance * info->scale_x * (info->font_size / ft_size);
-            info->advance.y   = -pos[j].y_advance * info->scale_y * (info->font_size / ft_size);
-
-            // accumulate advance in the root glyph
-            root->cluster_advance.x += info->advance.x;
-            root->cluster_advance.y += info->advance.y;
-        }
-    }
+    for (i = 0; i < len; i++) {
+        int offset = i;
+        hb_font_t *font = get_hb_font(shaper, glyphs + offset);
+        int level = glyphs[offset].shape_run_id;
+        int direction = shaper->emblevels[offset] % 2;
 
-    // Free runs and associated data
-    for (i = 0; i < run; i++) {
-        hb_buffer_destroy(runs[i].buf);
+        // advance in text until end of run
+        while (i < (len - 1) && level == glyphs[i+1].shape_run_id)
+            i++;
+
+        hb_buffer_pre_allocate(buf, i - offset + 1);
+        hb_buffer_add_utf32(buf, shaper->event_text + offset, i - offset + 1,
+                0, i - offset + 1);
+
+        props.direction = direction ? HB_DIRECTION_RTL : HB_DIRECTION_LTR;
+        props.script = glyphs[offset].script;
+        props.language  = hb_shaper_get_run_language(shaper, props.script);
+        hb_buffer_set_segment_properties(buf, &props);
+
+        set_run_features(shaper, glyphs + offset);
+        hb_shape(font, buf, shaper->features, shaper->n_features);
+
+        shape_harfbuzz_process_run(glyphs, buf, offset);
+        hb_buffer_reset(buf);
     }
 
+    hb_buffer_destroy(buf);
 }
 
 /**
@@ -684,7 +693,7 @@ static void shape_fribidi(ASS_Shaper *shaper, GlyphInfo *glyphs, size_t len)
         GlyphInfo *info = glyphs + i;
         FT_Face face = info->font->faces[info->face_index];
         info->symbol = shaper->event_text[i];
-        info->glyph_index = FT_Get_Char_Index(face, shaper->event_text[i]);
+        info->glyph_index = FT_Get_Char_Index(face, ass_font_index_magic(face, shaper->event_text[i]));
     }
 
     free(joins);
@@ -726,11 +735,37 @@ void ass_shaper_find_runs(ASS_Shaper *shaper, ASS_Renderer *render_priv,
         // set size and get glyph index
         ass_font_get_index(render_priv->fontconfig_priv, info->font,
                 info->symbol, &info->face_index, &info->glyph_index);
-        // shape runs share the same font face and size
+        // shape runs break on: xbord, ybord, xshad, yshad,
+        // all four colors, all four alphas, be, blur, fn, fs,
+        // fscx, fscy, fsp, bold, italic, underline, strikeout,
+        // frx, fry, frz, fax, fay, karaoke start, karaoke type,
+        // and on every line break
         if (i > 0 && (last->font != info->font ||
-                    last->font_size != info->font_size ||
                     last->face_index != info->face_index ||
-                    last->script != info->script))
+                    last->script != info->script ||
+                    last->font_size != info->font_size ||
+                    last->c[0] != info->c[0] ||
+                    last->c[1] != info->c[1] ||
+                    last->c[2] != info->c[2] ||
+                    last->c[3] != info->c[3] ||
+                    last->be != info->be ||
+                    last->blur != info->blur ||
+                    last->shadow_x != info->shadow_x ||
+                    last->shadow_y != info->shadow_y ||
+                    last->frx != info->frx ||
+                    last->fry != info->fry ||
+                    last->frz != info->frz ||
+                    last->fax != info->fax ||
+                    last->fay != info->fay ||
+                    last->scale_x != info->scale_x ||
+                    last->scale_y != info->scale_y ||
+                    last->border_style != info->border_style ||
+                    last->border_x != info->border_x ||
+                    last->border_y != info->border_y ||
+                    last->hspacing != info->hspacing ||
+                    last->italic != info->italic ||
+                    last->bold != info->bold ||
+                    last->flags != info->flags))
             shape_run++;
         info->shape_run_id = shape_run;
     }
@@ -903,10 +938,9 @@ FriBidiStrIndex *ass_shaper_reorder(ASS_Shaper *shaper, TextInfo *text_info)
     // Create reorder map line-by-line
     for (i = 0; i < text_info->n_lines; i++) {
         LineInfo *line = text_info->lines + i;
-        int level;
         FriBidiParType dir = FRIBIDI_PAR_ON;
 
-        level = fribidi_reorder_line(0,
+        fribidi_reorder_line(0,
                 shaper->ctypes + line->offset, line->len, 0, dir,
                 shaper->emblevels + line->offset, NULL,
                 shaper->cmap + line->offset);
diff --git a/libass/ass_shaper.h b/libass/ass_shaper.h
index 98b6288..bcfff50 100644
--- a/libass/ass_shaper.h
+++ b/libass/ass_shaper.h
@@ -22,7 +22,7 @@
 #include "config.h"
 
 #ifdef CONFIG_FRIBIDI
-#include <fribidi/fribidi.h>
+#include <fribidi.h>
 #else
 typedef int FriBidiParType;
 typedef int FriBidiStrIndex;
diff --git a/libass/ass_strtod.c b/libass/ass_strtod.c
index f55b37a..4e96404 100644
--- a/libass/ass_strtod.c
+++ b/libass/ass_strtod.c
@@ -16,15 +16,15 @@
 #include <ctype.h>
 #include <errno.h>
 
-const
-static int maxExponent = 511;   /* Largest possible base 10 exponent.  Any
+static
+const int maxExponent = 511;    /* Largest possible base 10 exponent.  Any
                                  * exponent larger than this will already
                                  * produce underflow or overflow, so there's
                                  * no need to worry about additional digits.
                                  */
 
-const
-static double powersOf10[] = {  /* Table giving binary powers of 10.  Entry */
+static
+const double powersOf10[] = {   /* Table giving binary powers of 10.  Entry */
     10.,                        /* is 10^2^i.  Used to convert decimal */
     100.,                       /* exponents into floating-point numbers. */
     1.0e4,
@@ -58,8 +58,8 @@ static double powersOf10[] = {  /* Table giving binary powers of 10.  Entry */
  */
 
 double
-ass_strtod(string, endPtr)
-    const char *string;     /* A decimal ASCII floating-point number,
+ass_strtod(
+    const char *string,     /* A decimal ASCII floating-point number,
                              * optionally preceded by white space.
                              * Must have form "-I.FE-X", where I is the
                              * integer part of the mantissa, F is the
@@ -71,8 +71,9 @@ ass_strtod(string, endPtr)
                              * The "E" may actually be an "e".  E and X
                              * may both be omitted (but not just one).
                              */
-    char **endPtr;          /* If non-NULL, store terminating character's
+    char **endPtr           /* If non-NULL, store terminating character's
                              * address here. */
+    )
 {
     int sign, expSign = 0;
     double fraction, dblExp, *d;
diff --git a/libass/ass_types.h b/libass/ass_types.h
index bc003aa..ccb0a0e 100644
--- a/libass/ass_types.h
+++ b/libass/ass_types.h
@@ -84,6 +84,72 @@ typedef struct ass_event {
     ASS_RenderPriv *render_priv;
 } ASS_Event;
 
+/**
+ * Support for (xy-)vsfilter mangled colors
+ *
+ * Generally, xy-vsfilter emulates the classic vsfilter behavior of
+ * rendering directly into the (usually YCbCr) video. vsfilter is
+ * hardcoded to use BT.601(TV) as target colorspace when converting
+ * the subtitle RGB color to the video colorspace. This led to major
+ * breakage when HDTV video was introduced: HDTV typically uses
+ * BT.709(TV), but vsfilter still used BT.601(TV) for conversion.
+ *
+ * This means classic vsfilter will mangle colors as follows:
+ *
+ *    screen_rgb = bt_709tv_to_rgb(rgb_to_bt601tv(ass_rgb))
+ *
+ * Or in general:
+ *
+ *    screen_rgb = video_csp_to_rgb(rgb_to_bt601tv(ass_rgb))
+ *
+ * where video_csp is the colorspace of the video with which the
+ * subtitle was muxed.
+ *
+ * xy-vsfilter did not fix this, but instead introduced explicit
+ * rules how colors were mangled by adding a "YCbCr Matrix" header.
+ * If this header specifies a real colorspace (like BT.601(TV) etc.),
+ * xy-vsfilter behaves exactly like vsfilter, but using the specified
+ * colorspace for conversion of ASS input RGB to screen RGB:
+ *
+ *    screen_rgb = video_csp_to_rgb(rgb_to_ycbcr_header_csp(ass_rgb))
+ *
+ * Further, xy-vsfilter behaves like vsfilter with no changes if the header
+ * is missing.
+ *
+ * The special value "None" means untouched RGB values. Keep in mind that
+ * some version of xy-vsfilter are buggy and don't interpret this correctly.
+ * It appears some people are advocating that this header value is
+ * intended for situations where exact colors do not matter.
+ *
+ * Note that newer Aegisub versions (the main application to produce ASS
+ * subtitle scripts) have an option that tries not to mangle the colors. It
+ * is said that if the header is not set to BT.601(TV), the colors are
+ * supposed not to be mangled, even if the "YCbCr Matrix" header is not
+ * set to "None". In other words, the video colorspace as detected by
+ * Aegisub is the same as identified in the file header.
+ *
+ * In general, misinterpreting this header or not using it will lead to
+ * slightly different subtitle colors, which can matter if the subtitle
+ * attempts to match solid colored areas in the video.
+ *
+ * Note that libass doesn't change colors based on this header. It
+ * absolutely can't do that, because the video colorspace is required
+ * in order to handle this as intended by xy-vsfilter.
+ */
+typedef enum ASS_YCbCrMatrix {
+    YCBCR_DEFAULT = 0,  // Header missing
+    YCBCR_UNKNOWN,      // Header could not be parsed correctly
+    YCBCR_NONE,         // "None" special value
+    YCBCR_BT601_TV,
+    YCBCR_BT601_PC,
+    YCBCR_BT709_TV,
+    YCBCR_BT709_PC,
+    YCBCR_SMPTE240M_TV,
+    YCBCR_SMPTE240M_PC,
+    YCBCR_FCC_TV,
+    YCBCR_FCC_PC
+} ASS_YCbCrMatrix;
+
 /*
  * ass track represent either an external script or a matroska subtitle stream
  * (no real difference between them); it can be used in rendering after the
@@ -114,19 +180,7 @@ typedef struct ass_track {
     int ScaledBorderAndShadow;
     int Kerning;
     char *Language;
-    enum {
-        YCBCR_DEFAULT = 0,  // TV.601 on YCbCr video, None on RGB video
-        YCBCR_UNKNOWN,
-        YCBCR_NONE,         // untouched RGB values
-        YCBCR_BT601_TV,
-        YCBCR_BT601_PC,
-        YCBCR_BT709_TV,
-        YCBCR_BT709_PC,
-        YCBCR_SMPTE240M_TV,
-        YCBCR_SMPTE240M_PC,
-        YCBCR_FCC_TV,
-        YCBCR_FCC_PC
-    } YCbCrMatrix;
+    ASS_YCbCrMatrix YCbCrMatrix;
 
     int default_style;      // index of default style
     char *name;             // file name in case of external subs, 0 for streams
diff --git a/libass/ass_utils.c b/libass/ass_utils.c
index df7c447..72993d7 100644
--- a/libass/ass_utils.c
+++ b/libass/ass_utils.c
@@ -21,14 +21,48 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <inttypes.h>
-#include <ft2build.h>
-#include FT_GLYPH_H
 #include <strings.h>
 
 #include "ass_library.h"
 #include "ass.h"
 #include "ass_utils.h"
 
+#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
+
+#include "x86/cpuid.h"
+
+int has_sse2(void)
+{
+    uint32_t eax = 1, ebx, ecx, edx;
+    ass_get_cpuid(&eax, &ebx, &ecx, &edx);
+    return (!!(edx & (1 << 26)));
+}
+
+int has_avx(void)
+{
+    uint32_t eax = 1, ebx, ecx, edx;
+    ass_get_cpuid(&eax, &ebx, &ecx, &edx);
+    if(!(ecx & (1 << 27))){
+        return 0;
+    }
+    uint32_t misc = ecx;
+    eax = 0;
+    ass_get_cpuid(&eax, &ebx, &ecx, &edx);
+    if((ecx & (0x2 | 0x4)) != (0x2 | 0x4)){
+        return 0;
+    }
+    return (!!(misc & (1 << 28)));
+}
+
+int has_avx2(void)
+{
+    uint32_t eax = 7, ebx, ecx, edx;
+    ass_get_cpuid(&eax, &ebx, &ecx, &edx);
+    return (!!(ebx & (1 << 5))) && has_avx();
+}
+
+#endif // ASM
+
 int mystrtoi(char **p, int *res)
 {
     double temp_res;
@@ -81,7 +115,8 @@ int strtocolor(ASS_Library *library, char **q, uint32_t *res, int hex)
     int base = hex ? 16 : 10;
 
     if (*p == '&')
-        ++p;
+        while (*p == '&')
+            ++p;
     else
         ass_msg(library, MSGL_DBG2, "suspicious color format: \"%s\"\n", p);
 
@@ -92,6 +127,9 @@ int strtocolor(ASS_Library *library, char **q, uint32_t *res, int hex)
         result = mystrtou32(&p, base, &color);
     }
 
+    while (*p == '&' || *p == 'H')
+        ++p;
+
     {
         unsigned char *tmp = (unsigned char *) (&color);
         unsigned char b;
@@ -201,18 +239,56 @@ unsigned ass_utf8_get_char(char **str)
 }
 
 /**
+ * Original version from http://www.cprogramming.com/tutorial/utf8.c
+ * \brief Converts a single UTF-32 code point to UTF-8
+ * \param dest Buffer to write to. Writes a NULL terminator.
+ * \param ch 32-bit character code to convert
+ * \return number of bytes written
+ * converts a single character and ASSUMES YOU HAVE ENOUGH SPACE
+ */
+unsigned ass_utf8_put_char(char *dest, uint32_t ch)
+{
+    char *orig_dest = dest;
+
+    if (ch < 0x80) {
+        *dest++ = (char)ch;
+    } else if (ch < 0x800) {
+        *dest++ = (ch >> 6) | 0xC0;
+        *dest++ = (ch & 0x3F) | 0x80;
+    } else if (ch < 0x10000) {
+        *dest++ = (ch >> 12) | 0xE0;
+        *dest++ = ((ch >> 6) & 0x3F) | 0x80;
+        *dest++ = (ch & 0x3F) | 0x80;
+    } else if (ch < 0x110000) {
+        *dest++ = (ch >> 18) | 0xF0;
+        *dest++ = ((ch >> 12) & 0x3F) | 0x80;
+        *dest++ = ((ch >> 6) & 0x3F) | 0x80;
+        *dest++ = (ch & 0x3F) | 0x80;
+    }
+
+    *dest = '\0';
+    return dest - orig_dest;
+}
+
+/**
  * \brief find style by name
  * \param track track
  * \param name style name
  * \return index in track->styles
- * Returnes 0 if no styles found => expects at least 1 style.
- * Parsing code always adds "Default" style in the end.
+ * Returns 0 if no styles found => expects at least 1 style.
+ * Parsing code always adds "Default" style in the beginning.
  */
 int lookup_style(ASS_Track *track, char *name)
 {
     int i;
-    if (*name == '*')
-        ++name;                 // FIXME: what does '*' really mean ?
+    // '*' seem to mean literally nothing;
+    // VSFilter removes them as soon as it can
+    while (*name == '*')
+        ++name;
+    // VSFilter then normalizes the case of "Default"
+    // (only in contexts where this function is called)
+    if (strcasecmp(name, "Default") == 0)
+        name = "Default";
     for (i = track->n_styles - 1; i >= 0; --i) {
         if (strcmp(track->styles[i].Name, name) == 0)
             return i;
@@ -221,7 +297,27 @@ int lookup_style(ASS_Track *track, char *name)
     ass_msg(track->library, MSGL_WARN,
             "[%p]: Warning: no style named '%s' found, using '%s'",
             track, name, track->styles[i].Name);
-    return i;                   // use the first style
+    return i;
+}
+
+/**
+ * \brief find style by name as in \r
+ * \param track track
+ * \param name style name
+ * \return style in track->styles
+ * Returns NULL if no style has the given name.
+ */
+ASS_Style *lookup_style_strict(ASS_Track *track, char *name)
+{
+    int i;
+    for (i = track->n_styles - 1; i >= 0; --i) {
+        if (strcmp(track->styles[i].Name, name) == 0)
+            return track->styles + i;
+    }
+    ass_msg(track->library, MSGL_WARN,
+            "[%p]: Warning: no style named '%s' found",
+            track, name);
+    return NULL;
 }
 
 #ifdef CONFIG_ENCA
diff --git a/libass/ass_utils.h b/libass/ass_utils.h
index 2d0c6f9..4e2ba6c 100644
--- a/libass/ass_utils.h
+++ b/libass/ass_utils.h
@@ -43,6 +43,12 @@
 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
 #define FFMINMAX(c,a,b) FFMIN(FFMAX(c, a), b)
 
+#if (defined(__i386__) || defined(__x86_64__)) && CONFIG_ASM
+int has_sse2(void);
+int has_avx(void);
+int has_avx2(void);
+#endif
+
 int mystrtoi(char **p, int *res);
 int mystrtoll(char **p, long long *res);
 int mystrtou32(char **p, int base, uint32_t *res);
@@ -51,8 +57,10 @@ int strtocolor(ASS_Library *library, char **q, uint32_t *res, int hex);
 char parse_bool(char *str);
 int parse_ycbcr_matrix(char *str);
 unsigned ass_utf8_get_char(char **str);
+unsigned ass_utf8_put_char(char *dest, uint32_t ch);
 void ass_msg(ASS_Library *priv, int lvl, char *fmt, ...);
 int lookup_style(ASS_Track *track, char *name);
+ASS_Style *lookup_style_strict(ASS_Track *track, char *name);
 #ifdef CONFIG_ENCA
 void *ass_guess_buffer_cp(ASS_Library *library, unsigned char *buffer,
                           int buflen, char *preferred_language,
@@ -118,18 +126,22 @@ static inline int rot_key(double a)
     return double_to_d22(a) % m;
 }
 
-#define FNV1_32A_INIT (unsigned)0x811c9dc5
+#define FNV1_32A_INIT 0x811c9dc5U
+#define FNV1_32A_PRIME 16777619U
 
 static inline unsigned fnv_32a_buf(void *buf, size_t len, unsigned hval)
 {
     unsigned char *bp = buf;
-    unsigned char *be = bp + len;
-    while (bp < be) {
-        hval ^= (unsigned) *bp++;
-        hval +=
-            (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) +
-            (hval << 24);
+    size_t n = (len + 3) / 4;
+
+    switch (len % 4) {
+    case 0: do { hval ^= (unsigned) *bp++; hval *= FNV1_32A_PRIME;
+    case 3:      hval ^= (unsigned) *bp++; hval *= FNV1_32A_PRIME;
+    case 2:      hval ^= (unsigned) *bp++; hval *= FNV1_32A_PRIME;
+    case 1:      hval ^= (unsigned) *bp++; hval *= FNV1_32A_PRIME;
+               } while (--n > 0);
     }
+
     return hval;
 }
 static inline unsigned fnv_32a_str(char *str, unsigned hval)
@@ -137,9 +149,7 @@ static inline unsigned fnv_32a_str(char *str, unsigned hval)
     unsigned char *s = (unsigned char *) str;
     while (*s) {
         hval ^= (unsigned) *s++;
-        hval +=
-            (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) +
-            (hval << 24);
+        hval *= FNV1_32A_PRIME;
     }
     return hval;
 }
-- 
1.9.rc1



More information about the MPlayer-dev-eng mailing list