// Diff.java // --------- // part of YaCy // (C) by Michael Peter Christen; mc@yacy.net // first published on http://www.anomic.de // Frankfurt, Germany, 2007 // Created 03.02.2007 // // This file is contributed by Franz Brausze // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package net.yacy.data; import java.lang.reflect.Array; import java.util.ArrayList; import net.yacy.document.parser.html.CharacterCoding; /** * This class provides a diff-functionality. */ public class Diff { private final ArrayList parts = new ArrayList(); protected final Object[] original; protected final Object[] changed; /** * @param original the original String * @param changed the new String * @throws NullPointerException if one of the arguments is null */ public Diff(final String original, final String changed) { this(original, changed, 1); } /** * @param original the original String * @param changed the new String * @param minConsecutive the minimum number of consecutive equal characters in * both Strings. Smaller seperations will only be performed on the end of either * String if needed * @throws NullPointerException if original or changed is * null */ public Diff(final String original, final String changed, final int minConsecutive) { if (original == null || changed == null) throw new NullPointerException("input Strings must be null"); this.original = (Object[]) Array.newInstance(Comparable.class, original.length()); for (int i=0; i 0) ? minConsecutive : 1); } public Diff(final Object[] original, final Object[] changed, final int minConsecutive) { if (original == null || changed == null) throw new NullPointerException("input Objects must be null"); this.original = original; this.changed = changed; parse((minConsecutive > 0) ? minConsecutive : 1); } private void parse(final int minLength) { /* Matrix: find as long diagonals as possible, * delete the old horizontally and add the new vertically * * ~ OLD ~ * |T|H|E| |F|I|R|S|T| |S|E|N|T|E|N|C|E| * T|#| | | | | | | |#| | | | |#| | | | | * H| |#| | | | | | | | | | | | | | | | | * E| | |#| | | | | | | | |#| | |#| | |#| * | | | |#| | | | | |#| | | | | | | | | * N| | | | | | | | | | | | |#| | |#| | | * E| | |#| | | | | | | | |#| | |#| | |#| * ~ X| | | | | | | | | | | | | | | | | | | * N T|#| | | | | | | |#| | | | |#| | | | | * E | | | |#| | | | | |#| | | | | | | | | * W S| | | | | | | |#| | |#| | | | | | | | * ~ E| | |#| | | | | | | | |#| | |#| | |#| * N| | | | | | | | | | | | |#| | |#| | | * T|#| | | | | | | |#| | | | |#| | | | | * E| | |#| | | | | | | | |#| | |#| | |#| * N| | | | | | | | | | | | |#| | |#| | | * C| | | | | | | | | | | | | | | | |#| | * E| | |#| | | | | | | | |#| | |#| | |#| */ final boolean[][] matrix = new boolean[this.changed.length][this.original.length]; for (int y=0; yminLength line by line in a submatrix * { x, y, matrix[0].length, matrix.length} of the matrix:
*       {_1,__,__} -> X axis
*      ,{__,_1,__}
*      ,{__,__,_1}
*
    * TODO: some optimisation ideas *
  • search for a better algorithm on the inet!!! :)
  • *
  • pass only the part of the matrix where the search takes place - not the whole matrix everytime
  • *
  • break the inner loop if the rest of the matrix is smaller than minLength (and no diagonal has been found yet)
  • *
  • return diagonal topologicaly closest to the {0,0}
  • *
* @param x the starting position of the search on the optical horizontal axis * @param y the starting position of the search on the optical vertical axis
* @param matrix the matrix to search through * @param minLength the minimal desired length of a diagonal to find * @return a vector in the form { diagStartX, diagStartY, diagLength } where diagLength >= minLength */ private static int[] findDiagonal(final int x, final int y, final boolean[][] matrix, final int minLength) { int rx, ry, yy, xx, i; for (yy=y; yy= minLength) return new int[] { rx, ry, i }; // swap back the x and y axes for better readability } return null; } /** * @return the original Object[] passed to this class on instantiation */ public Object[] getOriginal() { return this.original; } /** * @return the new Object[] passed to this class on instantiation */ public Object[] getNew() { return this.changed; } /** * A diff is composed of different parts. Each of these parts stands for an * operation, like "do nothing", "add" or "delete". * * @see Part * @return all parts this diff consists of in correct order */ public Part[] getParts() { return this.parts.toArray(new Part[this.parts.size()]); } @Override public String toString() { final StringBuilder sb = new StringBuilder(this.parts.size() * 20); for (final Part part :parts) sb.append(part.toString()).append("\n"); return sb.toString(); } /** * This class represents a part of the diff, meaning one operation * (or one line of a "normal" diff) */ public class Part { /** The string this diff-part cares about has not been changed */ public static final int UNCHANGED = 0; /** The string this diff-part cares about has been added in the new version */ public static final int ADDED = 1; /** The string this diff-part cares about has been removed in the new version */ public static final int DELETED = 2; private final int action; private final int posOld; private final int posNew; Part(final int action, final int posOld, final int posNew) { this.action = action; this.posOld = posOld; this.posNew = posNew; } /** * @return whether the string shan't be changed, shall be added or deleted */ public int getAction() { return this.action; } public int getPosOld() { return this.posOld; } public int getPosNew() { return this.posNew; } /** * @return the plain string this diff-part cares about */ public String getString() { final StringBuilder sb = new StringBuilder(this.posNew - this.posOld); if (this.action == ADDED) { for (int i = this.posOld; i < this.posNew; i++) sb.append(Diff.this.changed[i]); } else { for (int i = this.posOld; i < this.posNew; i++) sb.append(Diff.this.original[i]); } return sb.toString(); } /** * @return the string this diff-part cares about in typical diff-notation: *
*
unchanged
"  STRING"
*
added
"+ STRING"
*
deleted
"- STRING"
*
*/ @Override public String toString() { return ((this.action == UNCHANGED) ? " " : (this.action == ADDED) ? "+" : "-") + " " + getString(); } } public static String toHTML(final Diff[] diffs) { final StringBuilder sb = new StringBuilder(diffs.length * 60); Diff.Part[] ps; for (Diff d : diffs) { sb.append("

\n"); ps = d.getParts(); for (Diff.Part part :ps) { sb.append("").append(CharacterCoding.unicode2html(part.getString(), true).replaceAll("\n", "
")); sb.append(""); } sb.append("

"); } return sb.toString(); } }