--- diffviewer/smdiff.py	(revision 0)
+++ diffviewer/smdiff.py	(revision 0)
@@ -0,0 +1,65 @@
+from difflib import SequenceMatcher
+
+
+class SMDiffer(SequenceMatcher):
+    """
+    Wrapper around SequenceMatcher that works around bugs in how it does
+    its matching.
+    """
+    def __init__(self, a, b):
+        SequenceMatcher.__init__(self, None, a, b)
+
+    def get_opcodes(self):
+        for tag, i1, i2, j1, j2 in SequenceMatcher.get_opcodes(self):
+            if tag == 'replace':
+                oldlines = self.a[i1:i2]
+                newlines = self.b[j1:j2]
+
+                i = i_start = 0
+                j = j_start = 0
+
+                while i < len(oldlines) and j < len(newlines):
+                    new_tag = None
+                    new_i, new_j = i, j
+
+                    if oldlines[i] == "" and newlines[j] == "":
+                        new_tag = "equal"
+                        new_i += 1
+                        new_j += 1
+                    elif oldlines[i] == "":
+                        new_tag = "insert"
+                        new_j += 1
+                    elif newlines[j] == "":
+                        new_tag = "delete"
+                        new_i += 1
+                    else:
+                        new_tag = "replace"
+                        new_i += 1
+                        new_j += 1
+
+                    if new_tag != tag:
+                        if i > i_start or j > j_start:
+                            yield tag, i1 + i_start, i1 + i, \
+                                       j1 + j_start, j1 + j
+
+                        tag = new_tag
+                        i_start, j_start = i, j
+
+                    i, j = new_i, new_j
+
+                yield tag, i1 + i_start, i1 + i, j1 + j_start, j1 + j
+                i_start = i
+                j_start = j
+
+                if i2 > i1 + i_start or j2 > j1 + j_start:
+                    tag = None
+
+                    if len(oldlines) > len(newlines):
+                        tag = "delete"
+                    elif len(oldlines) < len(newlines):
+                        tag = "insert"
+
+                    if tag != None:
+                        yield tag, i1 + i_start, i2, j1 + j_start, j2
+            else:
+                yield tag, i1, i2, j1, j2
--- diffviewer/myersdiff.py	(revision 0)
+++ diffviewer/myersdiff.py	(revision 0)
@@ -0,0 +1,699 @@
+class MyersDiffer:
+    """
+    An implementation of Eugene Myers's O(ND) Diff algorithm, based partly
+    on the C# implementation available at http://www.mathertel.de/Diff/ and
+    GNU diff.
+    """
+    SNAKE_LIMIT = 20
+
+    class DiffData:
+        def __init__(self, data):
+            self.data = data
+            self.length = len(data)
+            self.modified = {}
+            self.undiscarded = []
+            self.undiscarded_lines = 0
+            self.real_indexes = []
+
+        def is_modified(self, line):
+            return self.modified.has_key(line) and self.modified[line]
+
+
+    def __init__(self, a, b, ignore_space=False):
+        if type(a) != type(b):
+            raise TypeError
+
+        self.a = a
+        self.b = b
+        self.code_table = {}
+        self.last_code = 0
+        self.a_data = self.b_data = None
+        self.ignore_space = ignore_space
+        self.minimal_diff = False
+
+        # SMS State
+        self.max_lines = 0
+        self.fdiag = None
+        self.bdiag = None
+
+    def ratio(self):
+        self._gen_diff_data()
+        a_equals = self.a_data.length - len(self.a_data.modified)
+        b_equals = self.b_data.length - len(self.b_data.modified)
+
+        return 1.0 * (a_equals + b_equals) / \
+                     (self.a_data.length + self.b_data.length)
+
+    def get_opcodes(self):
+        """
+        Generator that returns opcodes representing the contents of the
+        diff.
+
+        The resulting opcodes are in the format of
+        (tag, i1, i2, j1, j2)
+        """
+        self._gen_diff_data()
+
+        a_line = b_line = 0
+        last_group = None
+
+        while a_line < self.a_data.length or b_line < self.b_data.length:
+            a_start = a_line
+            b_start = b_line
+
+            if a_line < self.a_data.length and \
+               not self.a_data.is_modified(a_line) and \
+               b_line < self.b_data.length and \
+               not self.b_data.is_modified(b_line):
+                # Equal
+                a_changed = b_changed = 1
+                tag = "equal"
+                a_line += 1
+                b_line += 1
+            else:
+                # Deleted, inserted or replaced
+                while a_line < self.a_data.length and \
+                      (b_line >= self.b_data.length or \
+                       self.a_data.is_modified(a_line)):
+                    a_line += 1
+
+                while b_line < self.b_data.length and \
+                      (a_line >= self.a_data.length or \
+                       self.b_data.is_modified(b_line)):
+                    b_line += 1
+
+                a_changed = a_line - a_start
+                b_changed = b_line - b_start
+
+                assert a_start < a_line or b_start < b_line
+                assert a_changed != 0 or b_changed != 0
+
+                if a_changed == 0 and b_changed > 0:
+                    tag = "insert"
+                elif a_changed > 0 and b_changed == 0:
+                    tag = "delete"
+                elif a_changed > 0 and b_changed > 0:
+                    tag = "replace"
+
+                    if a_changed != b_changed:
+                        if a_changed > b_changed:
+                            a_line -= a_changed - b_changed
+                        elif a_changed < b_changed:
+                            b_line -= b_changed - a_changed
+
+                        a_changed = b_changed = min(a_changed, b_changed)
+
+            if last_group and last_group[0] == tag:
+                last_group = (tag,
+                              last_group[1], last_group[2] + a_changed,
+                              last_group[3], last_group[4] + b_changed)
+            else:
+                if last_group:
+                    yield last_group
+
+                last_group = (tag, a_start, a_start + a_changed,
+                              b_start, b_start + b_changed)
+
+
+        if not last_group:
+            last_group = ("equal", 0, self.a_data.length, 0, self.b_data.length)
+
+        yield last_group
+
+
+    def _gen_diff_data(self):
+        if self.a_data and self.b_data:
+            return
+
+        if type(self.a) == list:
+            self.a_data = self.DiffData(self._gen_diff_codes(self.a))
+            self.b_data = self.DiffData(self._gen_diff_codes(self.b))
+        else:
+            self.a_data = self.DiffData(self.a)
+            self.b_data = self.DiffData(self.b)
+
+        self._discard_confusing_lines()
+
+        self.max_lines = self.a_data.undiscarded_lines + \
+                         self.b_data.undiscarded_lines + 3
+
+        vector_size = self.max_lines - self.b_data.undiscarded_lines - 1
+        self.fdiag = [0] * vector_size
+        self.bdiag = [0] * vector_size
+
+        self._lcs(0, self.a_data.undiscarded_lines,
+                  0, self.b_data.undiscarded_lines,
+                  self.minimal_diff)
+        self._shift_chunks(self.a_data, self.b_data)
+        self._shift_chunks(self.b_data, self.a_data)
+
+
+    def _gen_diff_codes(self, lines):
+        """
+        Converts all unique lines of text into unique numbers. Comparing
+        lists of numbers is faster than comparing lists of strings.
+        """
+        codes = []
+
+        for line in lines:
+            # TODO: Handle ignoring/triming spaces, ignoring casing, and
+            #       special hooks
+
+            if self.ignore_space:
+                temp = line.lstrip()
+
+                # We still want to show lines that contain only whitespace.
+                if temp != "":
+                    line = temp
+
+            if self.code_table.has_key(line):
+                code = self.code_table[line]
+            else:
+                self.last_code += 1
+                code = self.last_code
+                self.code_table[line] = code
+
+            codes.append(code)
+
+        return codes
+
+
+    def _findSMS(self, a_lower, a_upper, b_lower, b_upper, find_minimal):
+        """
+        Finds the Shortest Middle Snake.
+        """
+        # The vector for the (0, 0) to (x, y) search
+        down_vector = self.fdiag #[0] * vector_size
+
+        # The vector for the (u, v) to (N, M) search
+        up_vector   = self.bdiag #[0] * vector_size
+
+        down_k = a_lower - b_lower # The k-line to start the forward search
+        up_k   = a_upper - b_upper # The k-line to start the reverse search
+        odd_delta = (down_k - up_k) % 2 != 0
+
+        down_vector[down_k] = a_lower
+        up_vector[up_k] = a_upper
+
+        dmin = a_lower - b_upper
+        dmax = a_upper - b_lower
+
+        down_min = down_max = down_k
+        up_min = up_max = up_k
+
+        cost = 0
+        max_cost = 1
+        temp = self.max_lines
+        while temp != 0:
+            max_cost <<= 1
+            temp >>= 2
+
+        max_cost = max(256, max_cost)
+
+        while True:
+            cost += 1
+            big_snake = False
+
+            if down_min > dmin:
+                down_min -= 1
+                down_vector[down_min - 1] = -1
+            else:
+                down_min += 1
+
+            if down_max < dmax:
+                down_max += 1
+                down_vector[down_max + 1] = -1
+            else:
+                down_max -= 1
+
+            # Extend the forward path
+            for k in range(down_max, down_min - 1, -2):
+                tlo = down_vector[k - 1]
+                thi = down_vector[k + 1]
+
+                if tlo >= thi:
+                    x = tlo + 1
+                else:
+                    x = thi
+
+                y = x - k
+                old_x = x
+
+                # Find the end of the furthest reaching forward D-path in
+                # diagonal k
+                while x < a_upper and y < b_upper and \
+                      self.a_data.undiscarded[x] == self.b_data.undiscarded[y]:
+                    x += 1
+                    y += 1
+
+                if x - old_x > self.SNAKE_LIMIT:
+                    big_snake = True
+
+                down_vector[k] = x
+
+                if odd_delta and up_min <= k <= up_max and \
+                   up_vector[k] <= x:
+                    return x, y, True, True
+
+            # Extend the reverse path
+            if up_min > dmin:
+                up_min -= 1
+                up_vector[up_min - 1] = self.max_lines
+            else:
+                up_min += 1
+
+            if up_max < dmax:
+                up_max += 1
+                up_vector[up_max + 1] = self.max_lines
+            else:
+                up_max -= 1
+
+            for k in range(up_max, up_min - 1, -2):
+                tlo = up_vector[k - 1]
+                thi = up_vector[k + 1]
+
+                if tlo < thi:
+                    x = tlo
+                else:
+                    x = thi - 1
+
+                y = x - k
+                old_x = x
+
+                while x > a_lower and y > b_lower and \
+                      self.a_data.undiscarded[x - 1] == \
+                      self.b_data.undiscarded[y - 1]:
+                    x -= 1
+                    y -= 1
+
+                if old_x - x > self.SNAKE_LIMIT:
+                    big_snake = True
+
+                up_vector[k] = x
+
+                if not odd_delta and down_min <= k <= down_max and \
+                   x <= down_vector[k]:
+                    return x, y, True, True
+
+            if find_minimal:
+                continue
+
+            # Heuristics courtesy of GNU diff.
+            #
+            # We check occasionally for a diagonal that made lots of progress
+            # compared with the edit distance. If we have one, find the one
+            # that made the most progress and return it.
+            #
+            # This gives us better, more dense chunks, instead of lots of
+            # small ones often starting with replaces. It also makes the output
+            # closer to that of GNU diff, which more people would expect.
+
+            if cost > 200 and big_snake:
+                best = 0
+                ret_x = 0
+                ret_y = 0
+
+                for d in range(down_max, down_min - 1, -2):
+                    dd = d - down_k
+                    x = down_vector[d]
+                    y = x - d
+                    v = (x - a_lower) * 2 - dd
+
+                    if v > 12 * (cost + abs(dd)):
+                        if v > best and \
+                           a_lower + self.SNAKE_LIMIT <= x < a_upper and \
+                           b_lower + self.SNAKE_LIMIT <= y < b_upper:
+                            # We found a sufficient diagonal.
+                            k = 1
+
+                            while self.a_data.undiscarded[x - k] == \
+                                  self.b_data.undiscarded[y - k]:
+                                if k == self.SNAKE_LIMIT:
+                                    best = v
+                                    ret_x, ret_y = x, y
+                                    break
+
+                                k += 1
+
+                if best > 0:
+                    return ret_x, ret_y, True, False
+
+                best = 0
+
+                for d in range(up_max, up_min - 1, -2):
+                    dd = d - up_k
+                    x = up_vector[d]
+                    y = x - d
+                    v = (a_upper - x) * 2 + dd
+
+                    if v > 12 * (cost + abs(dd)):
+                        if v > best and \
+                           a_lower < x <= a_upper - self.SNAKE_LIMIT and \
+                           b_lower < y <= b_upper - self.SNAKE_LIMIT:
+                            # We found a sufficient diagonal.
+                            k = 0
+
+                            while self.a_data.undiscarded[x + k] == \
+                                  self.b_data.undiscarded[y + k]:
+                                if k == self.SNAKE_LIMIT - 1:
+                                    best = v
+                                    ret_x, ret_y = x, y
+                                    break
+
+                                k += 1
+
+                if best > 0:
+                    return ret_x, ret_y, False, True
+
+            # If we've reached or gone past the max cost, just give up now
+            # and report the halfway point between our best results.
+            if cost >= max_cost:
+                fx_best = bx_best = 0
+
+                # Find the forward diagonal that maximized x + y
+                fxy_best = -1
+                for d in range(down_max, down_min - 1, -2):
+                    x = min(down_vector[d], a_upper)
+                    y = x - d
+
+                    if b_upper < y:
+                        x = b_upper + d
+                        y = b_upper
+
+                    if fxy_best < x + y:
+                        fxy_best = x + y
+                        fx_best = x
+
+                # Find the backward diagonal that minimizes x + y
+                bxy_best = self.max_lines
+                for d in range(up_max, up_min - 1, -2):
+                    x = max(a_lower, up_vector[d])
+                    y = x - d
+
+                    if y < b_lower:
+                        x = b_lower + d
+                        y = b_lower
+
+                    if bxy_best == None or x + y < bxy_best:
+                        bxy_best = x + y
+                        bx_bet = x
+
+                # Use the better of the two diagonals
+                if a_upper + b_upper - bxy_best < \
+                   fxy_best - (a_lower + b_lower):
+                    return fx_best, fxy_best - fx_best, True, False
+                else:
+                    return bx_best, bxy_best - bx_best, False, True
+
+
+        raise Exception("The function should not have reached here.")
+
+
+    def _lcs(self, a_lower, a_upper, b_lower, b_upper, find_minimal):
+        """
+        The divide-and-conquer implementation of the Longest Common
+        Subsequence (LCS) algorithm.
+        """
+        # Fast walkthrough equal lines at the start
+        while a_lower < a_upper and b_lower < b_upper and \
+              self.a_data.undiscarded[a_lower] == \
+              self.b_data.undiscarded[b_lower]:
+            a_lower += 1
+            b_lower += 1
+
+        while a_lower < a_upper and b_lower < b_upper and \
+              self.a_data.undiscarded[a_upper - 1] == \
+              self.b_data.undiscarded[b_upper - 1]:
+            a_upper -= 1
+            b_upper -= 1
+
+        if a_lower == a_upper:
+            # Inserted lines.
+            while b_lower < b_upper:
+                self.b_data.modified[self.b_data.real_indexes[b_lower]] = True
+                b_lower += 1
+        elif b_lower == b_upper:
+            # Deleted lines
+            while a_lower < a_upper:
+                self.a_data.modified[self.a_data.real_indexes[a_lower]] = True
+                a_lower += 1
+        else:
+            # Find the middle snake and length of an optimal path for A and B
+            x, y, low_minimal, high_minimal = \
+                self._findSMS(a_lower, a_upper, b_lower, b_upper, find_minimal)
+
+            self._lcs(a_lower, x, b_lower, y, low_minimal)
+            self._lcs(x, a_upper, y, b_upper, high_minimal)
+
+    def _shift_chunks(self, data, other_data):
+        """
+        Shifts the inserts/deletes of identical lines in order to join
+        the changes together a bit more. This has the effect of cleaning
+        up the diff.
+
+        Often times, a generated diff will have two identical lines before
+        and after a chunk (say, a blank line). The default algorithm will
+        insert at the front of that range and include two blank lines at the
+        end, but that does not always produce the best looking diff. Since
+        the two lines are identical, we can shift the chunk so that the line
+        appears both before and after the line, rather than only after.
+        """
+        i = j = 0
+        i_end = len(data.data)
+
+        while True:
+            # Scan forward in order to find the start of a run of changes.
+            while i < i_end and not data.is_modified(i):
+                i += 1
+
+                while other_data.is_modified(j):
+                    j += 1
+
+            if i == i_end:
+                return;
+
+            start = i
+
+            # Find the end of these changes
+            i += 1
+            while data.is_modified(i):
+                i += 1
+
+            while other_data.is_modified(j):
+                j += 1
+
+            while True:
+                run_length = i - start
+
+                # Move the changed chunks back as long as the previous
+                # unchanged line matches the last changed line.
+                # This merges with the previous changed chunks.
+                while start != 0 and data.data[start - 1] == data.data[i - 1]:
+                    start -= 1
+                    i -= 1
+
+                    data.modified[start] = True
+                    data.modified[i] = False
+
+                    while data.is_modified(start - 1):
+                        start -= 1
+
+                    j -= 1
+                    while other_data.is_modified(j):
+                        j -= 1
+
+                # The end of the changed run at the last point where it
+                # corresponds to the changed run in the other data set.
+                # If it's equal to i_end, then we didn't find a corresponding
+                # point.
+                if other_data.is_modified(j - 1):
+                    corresponding = i
+                else:
+                    corresponding = i_end
+
+                # Move the changed region forward as long as the first
+                # changed line is the same as the following unchanged line.
+                while i != i_end and data.data[start] == data.data[i]:
+                    data.modified[start] = False
+                    data.modified[i] = True
+
+                    start += 1
+                    i += 1
+
+                    while data.is_modified(i):
+                        i += 1
+
+                    j += 1
+                    while other_data.is_modified(j):
+                        j += 1
+                        corresponding = i
+
+                if run_length == i - start:
+                    break
+
+            # Move the fully-merged run back to a corresponding run in the
+            # other data set, if we can.
+            while corresponding < i:
+                start -= 1
+                i -= 1
+
+                data.modified[start] = True
+                data.modified[i] = False
+
+                j -= 1
+                while other_data.is_modified(j):
+                    j -= 1
+
+    def _discard_confusing_lines(self):
+        def build_discard_list(data, discards, counts):
+            end = data.length
+            many = 5
+
+            tem = (end / 64) >> 2
+            while tem > 0:
+                many *= 2
+                tem >>= 2
+
+            for i in range(end):
+                if data.data[i] != 0:
+                    num_matches = counts[data.data[i]]
+
+                    if num_matches == 0:
+                        discards[i] = 1
+                    elif num_matches > many:
+                        discards[i] = 2
+
+        def check_discard_runs(data, discards):
+            end = data.length
+
+            for i in range(end):
+                # Cancel the provisional discards that are not in the middle
+                # of a run of discards
+                if discards[i] == 2:
+                    discards[i] = 0
+                elif discards[i] != 0:
+                    # We found a provisional discard
+                    provisional = 0
+
+                    # Find the end of this run of discardable lines and count
+                    # how many are provisionally discardable.
+                    for j in range(i, end):
+                        if discards[j] == 0:
+                            break
+                        elif discards[j] == 2:
+                            provisional += 1
+
+                    # Cancel the provisional discards at the end and shrink
+                    # the run.
+                    while j > i and discards[j - 1] == 2:
+                        j -= 1
+                        discards[j] = 0
+                        provisional -= 1
+
+                    length = j - i
+
+                    # If 1/4 of the lines are provisional, cancel discarding
+                    # all the provisional lines in the run.
+                    if provisional * 4 > length:
+                        while j > i:
+                            j -= 1
+                            if discards[j] == 2:
+                                discards[j] = 0
+                    else:
+                        minimum = 1
+                        tem = length >> 4
+
+                        while tem > 0:
+                            tem >>= 2
+                            minimum <<= 1
+
+                        minimum += 1
+                        consec = 0
+
+                        for j in range(length):
+                            if discards[i + j] != 2:
+                                consec = 0
+                            else:
+                                consec += 1
+                                if minimum == consec:
+                                    j -= consec
+                                elif minimum < consec:
+                                    dicards[i + j] = 0
+
+                        consec = 0
+
+                        for j in range(length):
+                            discard = discards[i + j]
+
+                            if j >= 8 and discard == 1:
+                                break
+
+                            if discard == 2:
+                                consec = 0
+                                discards[i + j] = 0
+                            elif discard == 0:
+                                consec = 0
+                            else:
+                                consec += 1
+
+                            if consec == 3:
+                                break
+
+                        i += length - 1
+                        consec = 0
+
+                        for j in range(length):
+                            discard = discards[i - j]
+
+                            if j >= 8 and discard == 1:
+                                break
+
+                            if discard == 2:
+                                consec = 0
+                                discards[i - j] = 0
+                            elif discard == 0:
+                                consec = 0
+                            else:
+                                consec += 1
+
+                            if consec == 3:
+                                break
+
+        def discard_lines(data, discards):
+            end = data.length
+            j = 0
+
+            for i in range(end):
+                if self.minimal_diff or discards[i] == 0:
+                    data.undiscarded[j] = data.data[i]
+                    data.real_indexes[j] = i
+                    j += 1
+                else:
+                    data.modified[i] = True
+
+            data.undiscarded_lines = j
+
+
+        self.a_data.undiscarded = [0] * self.a_data.length
+        self.b_data.undiscarded = [0] * self.b_data.length
+        self.a_data.real_indexes = [0] * self.a_data.length
+        self.b_data.real_indexes = [0] * self.b_data.length
+        a_discarded = [0] * self.a_data.length
+        b_discarded = [0] * self.b_data.length
+        a_code_counts = [0] * (1 + self.last_code)
+        b_code_counts = [0] * (1 + self.last_code)
+
+        for i in range(self.a_data.length):
+            a_code_counts[self.a_data.data[i]] += 1
+
+        for i in range(self.b_data.length):
+            b_code_counts[self.b_data.data[i]] += 1
+
+        build_discard_list(self.a_data, a_discarded, b_code_counts)
+        build_discard_list(self.b_data, b_discarded, a_code_counts)
+
+        check_discard_runs(self.a_data, a_discarded)
+        check_discard_runs(self.b_data, b_discarded)
+
+        discard_lines(self.a_data, a_discarded)
+        discard_lines(self.b_data, b_discarded)
--- diffviewer/diffutils.py	(revision 696)
+++ diffviewer/diffutils.py	(working copy)
@@ -1,75 +1,31 @@
-import difflib
 import math
 import os
 import subprocess
 import tempfile
 
-def diff(a, b):
-    """
-    Wrapper around SequenceMatcher that works around bugs in how it does
-    its matching.
-    """
-    matcher = difflib.SequenceMatcher(None, a, b).get_opcodes()
+from difflib import SequenceMatcher
+from reviewboard.diffviewer.myersdiff import MyersDiffer
+from reviewboard.diffviewer.smdiff import SMDiffer
 
-    for tag, i1, i2, j1, j2 in matcher:
-        if tag == 'replace':
-            oldlines = a[i1:i2]
-            newlines = b[j1:j2]
 
-            i = 0
-            j = 0
-            i_start = 0
-            j_start = 0
+DEFAULT_DIFF_COMPAT_VERSION = 1
 
-            while i < len(oldlines) and j < len(newlines):
-                new_tag = None
-                new_i = i
-                new_j = j
 
-                if oldlines[i] == "" and newlines[j] == "":
-                    new_tag = "equal"
-                    new_i += 1
-                    new_j += 1
-                elif oldlines[i] == "":
-                    new_tag = "insert"
-                    new_j += 1
-                elif newlines[j] == "":
-                    new_tag = "delete"
-                    new_i += 1
-                else:
-                    new_tag = "replace"
-                    new_i += 1
-                    new_j += 1
+def Differ(a, b, ignore_space=False,
+           compat_version=DEFAULT_DIFF_COMPAT_VERSION):
+    """
+    Factory wrapper for returning a differ class based on the compat version
+    and flags specified.
+    """
+    if compat_version == 0:
+        return SMDiffer(a, b)
+    elif compat_version == 1:
+        return MyersDiffer(a, b, ignore_space)
+    else:
+        raise Exception("Invalid diff compat version (%s) passed to Differ" %
+                        (compat_version))
 
-                if new_tag != tag:
-                    if i > i_start or j > j_start:
-                        yield tag, i1 + i_start, i1 + i, j1 + j_start, j1 + j
 
-                    tag = new_tag
-                    i_start = i
-                    j_start = j
-
-                i = new_i
-                j = new_j
-
-            yield tag, i1 + i_start, i1 + i, j1 + j_start, j1 + j
-            i_start = i
-            j_start = j
-
-            if i2 > i1 + i_start or j2 > j1 + j_start:
-                tag = None
-
-                if len(oldlines) > len(newlines):
-                    tag = "delete"
-                elif len(oldlines) < len(newlines):
-                    tag = "insert"
-
-                if tag != None:
-                    yield tag, i1 + i_start, i2, j1 + j_start, j2
-        else:
-            yield tag, i1, i2, j1, j2
-
-
 def patch(diff, file, filename):
     """Apply a diff to a file.  Delegates out to `patch` because noone
        except Larry Wall knows how to patch."""
@@ -121,22 +77,24 @@
     if oldline is None or newline is None:
         return (None, None)
 
-    s = difflib.SequenceMatcher(None, oldline, newline)
-    oldchanges = []
-    newchanges = []
-    back = (0, 0)
+    # Use the SequenceMatcher directly. It seems to give us better results
+    # for this. We should investigate steps to move to the new differ.
+    differ = SequenceMatcher(None, oldline, newline)
 
     # This thresholds our results -- we don't want to show inter-line diffs if
     # most of the line has changed, unless those lines are very short.
-    opcodes = s.get_opcodes()
 
     # FIXME: just a plain, linear threshold is pretty crummy here.  Short
     # changes in a short line get lost.  I haven't yet thought of a fancy
     # nonlinear test.
-    if s.ratio() < 0.6:
+    if differ.ratio() < 0.6:
         return (None, None)
 
-    for tag, i1, i2, j1, j2 in opcodes:
+    oldchanges = []
+    newchanges = []
+    back = (0, 0)
+
+    for tag, i1, i2, j1, j2 in differ.get_opcodes():
         if tag == "equal":
             if (i2 - i1 < 3) or (j2 - j1 < 3):
                 back = (j2 - j1, i2 - i1)
--- diffviewer/views.py	(revision 696)
+++ diffviewer/views.py	(working copy)
@@ -83,7 +83,10 @@
 
         chunks = []
         linenum = 1
-        for tag, i1, i2, j1, j2 in diffutils.diff(a, b):
+        differ = diffutils.Differ(a, b, ignore_space=True,
+                                  compat_version=diffset.diffcompat)
+
+        for tag, i1, i2, j1, j2 in differ.get_opcodes():
             oldlines = a[i1:i2]
             newlines = b[j1:j2]
             numlines = max(len(oldlines), len(newlines))
--- diffviewer/tests.py	(revision 696)
+++ diffviewer/tests.py	(working copy)
@@ -4,6 +4,35 @@
 import reviewboard.diffviewer.parser as diffparser
 import diffutils
 
+
+class MyersDifferTest(unittest.TestCase):
+    def diff(self):
+        self.__test_diff(["1", "2", "3"],
+                         ["1", "2", "3"],
+                         [("equal", 0, 0, 3, 3),])
+
+        self.__test_diff(["1", "2", "3"],
+                         [],
+                         [("delete", 0, 0, 3, 0),])
+
+        self.__test_diff("1\n2\n3\n",
+                         "0\n1\n2\n3\n",
+                         [("insert", 0, 0, 0, 1),
+                          ("equal",  0, 1, 3, 3)])
+
+        self.__test_diff("1\n2\n3\n7\n",
+                         "1\n2\n4\n5\n6\n7\n",
+                         [("equal",   0, 0, 2, 2),
+                          ("replace", 2, 2, 1, 1),
+                          ("insert",  3, 3, 0, 2),
+                          ("equal",   3, 5, 1, 1)])
+
+
+    def __test_diff(self, a, b, expected):
+        opcodes = list(diffutils.MyersDiffer(a, b).get_opcodes())
+        self.failUnless(opcodes == expected)
+
+
 class DiffParserTest(unittest.TestCase):
     PREFIX = 'diffviewer/testdata'
 
--- diffviewer/models.py	(revision 696)
+++ diffviewer/models.py	(working copy)
@@ -49,6 +49,8 @@
     history = models.ForeignKey('DiffSetHistory', null=True, core=True,
                                 edit_inline=models.STACKED)
     repository = models.ForeignKey(Repository)
+    diffcompat = models.IntegerField('Differ compatibility version',
+                                     default=0)
 
     def save(self):
         if self.revision == 0 and self.history != None:
--- diffviewer/forms.py	(revision 696)
+++ diffviewer/forms.py	(working copy)
@@ -5,6 +5,7 @@
 from django.core import validators
 from django.core.validators import ValidationError
 
+from reviewboard.diffviewer.diffutils import DEFAULT_DIFF_COMPAT_VERSION
 from reviewboard.diffviewer.models import DiffSet, FileDiff
 from reviewboard.scmtools.models import Repository
 import reviewboard.diffviewer.parser as diffparser
@@ -52,7 +53,8 @@
             f.origInfo = revision
 
         diffset = DiffSet(name=file["filename"], revision=0,
-                          history=diffset_history)
+                          history=diffset_history,
+                          diffcompat=DEFAULT_DIFF_COMPAT_VERSION)
         diffset.repository = repository
         diffset.save()
 
