工作上的LCS算法
帮之前同事解决他公司的一个bug,顺带优化了一个feature。 这个优化的功能类似于git的diff,要找到一行文字有变化的部分。 其实就是LCS算法。
在复习下LCS算法,参考了geeksforgeeks的代码1。
- 定义
LCS Problem Statement: Given two sequences, find the length of longest subsequence present in both of them. A subsequence is a sequence that appears in the same relative order, but not necessarily contiguous. For example, “abc”, “abg”, “bdf”, “aeg”, ‘”acefg”, .. etc are subsequences of “abcdefg”.
- 分析
假设有两个字符串
X = ‘AGGTAB’ 、 Y = ‘GXTXAYB’
长度用 m,n 表示
用 L(X, Y) 表示 lcs 的长度。 有:
1)如果S1、S2最后一个字符相同,则:
L(X[0..m-1], Y[0..n-1]) = 1 + L(X[0..m-2], Y[0..n-2])
2)如果S1、S2最后一个字符不相同,则:
L(X[0..m-1], Y[0..n-1]) = MAX ( L(X[0..m-2], Y[0..n-1]), L(X[0..m-1], Y[0..n-2]) )
可以实现代码如下:
/* A Naive recursive implementation of LCS problem */
#include <bits/stdc++.h>
using namespace std;
int max(int a, int b);
/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char *X, char *Y, int m, int n )
{
if (m == 0 || n == 0)
return 0;
if (X[m-1] == Y[n-1]) {
return 1 + lcs(X, Y, m-1, n-1);
}
else
return max(lcs(X, Y, m, n-1), lcs(X, Y, m-1, n));
}
/* Utility function to get max of 2 integers */
int max(int a, int b)
{
return (a > b)? a : b;
}
/* Driver code */
int main()
{
char X[] = "AGGTAB";
char Y[] = "GXTXAYB";
int m = strlen(X);
int n = strlen(Y);
cout<<"Length of LCS is "<< lcs( X, Y, m, n ) ;
return 0;
}
时间复杂度: 2^n
- 优化
在上述实现中,有重复计算的情况,
lcs("AXYT", "AYZX")
/
lcs("AXY", "AYZX") lcs("AXYT", "AYZ")
/ /
lcs("AX", "AYZX") lcs("AXY", "AYZ") lcs("AXY", "AYZ") lcs("AXYT", "AY")
可以优化如下代码:
/* Dynamic Programming C++ implementation of LCS problem */
#include<bits/stdc++.h>
using namespace std;
int max(int a, int b);
/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char *X, char *Y, int m, int n )
{
int L[m + 1][n + 1];
int i, j;
/* Following steps build L[m+1][n+1] in
bottom up fashion. Note that L[i][j]
contains length of LCS of X[0..i-1]
and Y[0..j-1] */
for (i = 0; i <= m; i++)
{
for (j = 0; j <= n; j++)
{
if (i == 0 || j == 0)
L[i][j] = 0;
else if (X[i - 1] == Y[j - 1])
L[i][j] = L[i - 1][j - 1] + 1;
else
L[i][j] = max(L[i - 1][j], L[i][j - 1]);
}
}
/* L[m][n] contains length of LCS
for X[0..n-1] and Y[0..m-1] */
return L[m][n];
}
/* Utility function to get max of 2 integers */
int max(int a, int b)
{
return (a > b)? a : b;
}
// Driver Code
int main()
{
char X[] = "AGGTAB";
char Y[] = "GXTXAYB";
int m = strlen(X);
int n = strlen(Y);
cout << "Length of LCS is "
<< lcs( X, Y, m, n );
return 0;
}
// This code is contributed by rathbhupendra
时间复杂度为:O(mn)
下面打印出来 lcs、长度、及index
/* Dynamic Programming implementation of LCS problem */
#include<iostream>
#include<cstring>
#include<cstdlib>
using namespace std;
/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
void lcs( char *X, char *Y, int m, int n )
{
int L[m+1][n+1];
/* Following steps build L[m+1][n+1] in bottom up fashion. Note
that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1] */
for (int i=0; i<=m; i++)
{
for (int j=0; j<=n; j++)
{
if (i == 0 || j == 0)
L[i][j] = 0;
else if (X[i-1] == Y[j-1])
L[i][j] = L[i-1][j-1] + 1;
else
L[i][j] = max(L[i-1][j], L[i][j-1]);
}
}
// Following code is used to print LCS
int index = L[m][n];
// Create a character array to store the lcs string
char lcs[index+1];
lcs[index] = '\0'; // Set the terminating character
// Start from the right-most-bottom-most corner and
// one by one store characters in lcs[]
int i = m, j = n;
while (i > 0 && j > 0)
{
// If current character in X[] and Y are same, then
// current character is part of LCS
if (X[i-1] == Y[j-1])
{
lcs[index-1] = X[i-1]; // Put current character in result
i--; j--; index--; // reduce values of i, j and index
// 分别打印 index
std::cout << "i: " << i << std::endl;
std::cout << "j: " << j << std::endl;
}
// If not same, then find the larger of two and
// go in the direction of larger value
else if (L[i-1][j] > L[i][j-1])
i--;
else
j--;
}
// Print the lcs
cout << "LCS of " << X << " and " << Y << " is " << lcs;
}
/* Driver program to test above function */
int main()
{
char X[] = "AGGTAB";
char Y[] = "GXTXAYB";
int m = strlen(X);
int n = strlen(Y);
lcs(X, Y, m, n);
return 0;
}
到这里,功能完成了。 更详细的可以看网上其他人的讲解2。