| px | top | add code | search | signup | login | help |
<?php
#
# DIFF - September 2002 by Joris van Eil
#
# email/MSN: jovebogus@hotmail.com
# ICQ: 15419406
# AIM: JvanEil
# Yahoo: jovebogus
#
# INPUT: 2 strings between which you want to find the differences
# OUTPUT: array with 2 elements:
# - ["s"] element = source element. This is an array which has an element for each
# character in the source string that was matched to a character in the destination
# string. The element for a matched character is an array consisting of 2 elements:
# - the ["a"] element containts the number of the character in de dest string
# to which this character was matched.
# - the ["l"] element is the length of the matched character sequence of which
# these linked characters are a part.
# - ["d"] element = destination element. This array is similar to the source array,
# but works the other way around. Holds an ["a"]/["l"] element for each character in
# the destination string that was matched to a character in the source string.
#
# EXAMPLE: compare these two strings:
#
# $source = "i like programming very much";
# $destination = "i like swimming very much";
#
# $result = diff($source, $destination);
#
# Now let's look at the result array. It contains elements [0] through [6] and [13]
# through [27]. The ["l"] element of [0] through [6] is 7, which is correct, because
# source characters 0 through 6 are linked to destination characters 0 through 6,
# which are both uninterrupted sequences of 7 characters: "i like "
# elements [13] through [27] are linked to characters 10 to 24 in the destination
# string. This is also an uninterrupted sequence of characters, this time 15
# characters in length (as seen in the ["l"] part of these elements):
# "mming very much".
# When we do a check which of the characters in the source string do not
# have a corresponding element in the source array ($result["s"]), we know which
# characters weren't matched, thus what text was left out.
# These are characters 7 through 12 ("progra"). We can do the same for the destination
# string to see what text was added. These are characters 7 trough 9: "swi";
#
# You can use this function in your content management system for an "undo" function when
# editing parts of the textual content of your site. That way, you don't have to
# back up the entire document each time you change 1 digit, but only backup the array of changes.
#
# REMEMBER: the output of this function is raw and large. Depending on what you want to
# do with it, you can shape the output in the "if($sub==0)" clause of the function.
# If you want to do the undo thing i mentioned above, you could let it output an array like this
# [0] -> take character 0 through 6 of the source string
# [1] -> insert text "swi"
# [2] -> take character 13 through 24 of the source string
#
# See how little data you need to reconstruct the new string from the backup?
#
#
# HAVE FUN!!!!!!!!
#
function diff($s1, $s2, $s = 0, $e = 0, $sourcedep = array(), $destdep = array(), $sub = 0)
{
if($sub==0) $e = strlen($s1)-1;
if(strpos($s2,substr($s1,$s,($e-$s+1)))!==false)
{
$b = $s;
$c = $e;
for($x = $s - 1;$x>=0;$x--)
{
if(strpos($s2,substr($s1,$x,($e-$x+1)))!==false) $b = $x;
else break;
}
for($x = $e + 1;$x<(strlen($s1)-1);$x++)
{
if(strpos($s2,substr($s1,$b,($x-$b+1)))!==false) $c = $x;
else break;
}
for($x=$b;$x<=$c;$x++)
{
if($destdep[($x - $b) + strpos($s2,substr($s1,$b,($c-$b+1)))]["l"]<($c - $b + 1) && $sourcedep[$x]["l"]<($c - $b + 1))
{
if(is_array($destdep[($x - $b) + strpos($s2,substr($s1,$b,($c-$b+1)))])) $sourcedep[$destdep[($x - $b) + strpos($s2,substr($s1,$b,($c-$b+1)))]["a"]] = null;
$sourcedep[$x]["a"] = ($x - $b) + strpos($s2,substr($s1,$b,($c-$b+1)));
$sourcedep[$x]["l"] = $c - $b + 1;
$destdep[($x - $b) + strpos($s2,substr($s1,$b,($c-$b+1)))]["a"] = $x;
$destdep[($x - $b) + strpos($s2,substr($s1,$b,($c-$b+1)))]["l"] = $c - $b + 1;
}
}
}
if(strpos($s2,substr($s1,$s,($e-$s+1)))===false&&strlen(substr($s1,$s,($e-$s+1)))>=2)
{
diff($s1, $s2, $s, $s + round(($e-$s+1)/2) - 1, &$sourcedep, &$destdep, 1);
diff($s1, $s2, $s + round(($e-$s+1)/2), $e, &$sourcedep, &$destdep, 1);
}
if($sub == 0)
{
ksort($sourcedep);
ksort($destdep);
foreach($sourcedep as $key => $val) if(!is_array($sourcedep[$key])) unset($sourcedep[$key]);
foreach($destdep as $key => $val) if(!is_array($destdep[$key])) unset($destdep[$key]);
$result ["s"] = $sourcedep;
$result ["d"] = $destdep;
return $result;
}
}
?>
Comments or questions?
PX is running PHP 5.2.11
Thanks to Miranda Productions for hosting and bandwidth.
Use of any code from PX is at your own risk.