个性化阅读
专注于IT技术分析

算法设计:如何实现通配符模式匹配?

本文概述

给定文本和通配符模式, 请实现通配符模式匹配算法, 以查找通配符模式是否与文本匹配。匹配项应覆盖整个文本(而非部分文本)。

通配符模式可以包含字符”?”和” *”

‘?’–匹配任何单个字符

‘*’–匹配任何字符序列(包括空序列)

例如,

Text = "baaabab", Pattern = "*****ba*****ab", output : true
Pattern = "baaa?ab", output : true
Pattern = "ba*a?", output : true
Pattern = "a*ab", output : false
通配符模式匹配

通配符模式中每次出现的”?”字符都可以用任何其他字符替换, 而每次出现” *”时都可以使用一系列字符, 以使通配符模式在替换后变得与输入字符串相同。

让我们考虑一下模式中的任何字符。

情况1:字符为” *”

这里出现两种情况

  1. 我们可以忽略” *”字符, 然后移至图案中的下一个字符。
  2. ” *”字符与”文本”中的一个或多个字符匹配。在这里, 我们将移至字符串中的下一个字符。

情况2:字符是”?”

我们可以忽略文本中的当前字符, 而移至图案和文本中的下一个字符。

情况3:字符不是通配符

如果Text中的当前字符与Pattern中的当前字符匹配, 我们将移至Pattern和Text中的下一个字符。如果它们不匹配, 则通配符模式和文本不匹配。

我们可以使用动态编程来解决此问题–

T [i] [j]

如果给定字符串中的前i个字符与pattern的前j个字符匹配, 则为true。

推荐:请在”

实践

首先, 在继续解决方案之前。

DP初始化: 

// both text and pattern are null
T[0][0] = true; 

// pattern is null
T[i][0] = false; 

// text is null
T[0][j] = T[0][j - 1] if pattern[j – 1] is '*'

DP关系: 

// If current characters match, result is same as 
// result for lengths minus one. Characters match
// in two cases:
// a) If pattern character is '?' then it matches  
//    with any character of text. 
// b) If current characters in both match
if ( pattern[j – 1] == ‘?’) || 
     (pattern[j – 1] == text[i - 1])
    T[i][j] = T[i-1][j-1]   
 
// If we encounter ‘*’, two choices are possible-
// a) We ignore ‘*’ character and move to next 
//    character in the pattern, i.e., ‘*’ 
//    indicates an empty sequence.
// b) '*' character matches with ith character in
//     input 
else if (pattern[j – 1] == ‘*’)
    T[i][j] = T[i][j-1] || T[i-1][j]  

else // if (pattern[j – 1] != text[i - 1])
    T[i][j]  = false

下面是上述动态编程方法的实现。

C ++

// C++ program to implement wildcard
// pattern matching algorithm
#include <bits/stdc++.h>
using namespace std;
 
// Function that matches input str with
// given wildcard pattern
bool strmatch( char str[], char pattern[], int n, int m)
{
     // empty pattern can only match with
     // empty string
     if (m == 0)
         return (n == 0);
 
     // lookup table for storing results of
     // subproblems
     bool lookup[n + 1][m + 1];
 
     // initailze lookup table to false
     memset (lookup, false , sizeof (lookup));
 
     // empty pattern can match with empty string
     lookup[0][0] = true ;
 
     // Only '*' can match with empty string
     for ( int j = 1; j <= m; j++)
         if (pattern[j - 1] == '*' )
             lookup[0][j] = lookup[0][j - 1];
 
     // fill the table in bottom-up fashion
     for ( int i = 1; i <= n; i++) {
         for ( int j = 1; j <= m; j++) {
             // Two cases if we see a '*'
             // a) We ignore ‘*’ character and move
             //    to next  character in the pattern, //     i.e., ‘*’ indicates an empty sequence.
             // b) '*' character matches with ith
             //     character in input
             if (pattern[j - 1] == '*' )
                 lookup[i][j]
                     = lookup[i][j - 1] || lookup[i - 1][j];
 
             // Current characters are considered as
             // matching in two cases
             // (a) current character of pattern is '?'
             // (b) characters actually match
             else if (pattern[j - 1] == '?'
                      || str[i - 1] == pattern[j - 1])
                 lookup[i][j] = lookup[i - 1][j - 1];
 
             // If characters don't match
             else
                 lookup[i][j] = false ;
         }
     }
 
     return lookup[n][m];
}
 
int main()
{
     char str[] = "baaabab" ;
     char pattern[] = "*****ba*****ab" ;
     // char pattern[] = "ba*****ab";
     // char pattern[] = "ba*ab";
     // char pattern[] = "a*ab";
     // char pattern[] = "a*****ab";
     // char pattern[] = "*a*****ab";
     // char pattern[] = "ba*ab****";
     // char pattern[] = "****";
     // char pattern[] = "*";
     // char pattern[] = "aa?ab";
     // char pattern[] = "b*b";
     // char pattern[] = "a*a";
     // char pattern[] = "baaabab";
     // char pattern[] = "?baaabab";
     // char pattern[] = "*baaaba*";
 
     if (strmatch(str, pattern, strlen (str), strlen (pattern)))
         cout << "Yes" << endl;
     else
         cout << "No" << endl;
 
     return 0;
}

Java

// Java program to implement wildcard
// pattern matching algorithm
import java.util.Arrays;
public class GFG {
 
     // Function that matches input str with
     // given wildcard pattern
     static boolean strmatch(String str, String pattern, int n, int m)
     {
         // empty pattern can only match with
         // empty string
         if (m == 0 )
             return (n == 0 );
 
         // lookup table for storing results of
         // subproblems
         boolean [][] lookup = new boolean [n + 1 ][m + 1 ];
 
         // initailze lookup table to false
         for ( int i = 0 ; i < n + 1 ; i++)
             Arrays.fill(lookup[i], false );
 
         // empty pattern can match with empty string
         lookup[ 0 ][ 0 ] = true ;
 
         // Only '*' can match with empty string
         for ( int j = 1 ; j <= m; j++)
             if (pattern.charAt(j - 1 ) == '*' )
                 lookup[ 0 ][j] = lookup[ 0 ][j - 1 ];
 
         // fill the table in bottom-up fashion
         for ( int i = 1 ; i <= n; i++)
         {
             for ( int j = 1 ; j <= m; j++)
             {
                 // Two cases if we see a '*'
                 // a) We ignore '*'' character and move
                 //    to next  character in the pattern, //     i.e., '*' indicates an empty
                 //     sequence.
                 // b) '*' character matches with ith
                 //     character in input
                 if (pattern.charAt(j - 1 ) == '*' )
                     lookup[i][j] = lookup[i][j - 1 ]
                                    || lookup[i - 1 ][j];
 
                 // Current characters are considered as
                 // matching in two cases
                 // (a) current character of pattern is '?'
                 // (b) characters actually match
                 else if (pattern.charAt(j - 1 ) == '?'
                          || str.charAt(i - 1 )
                                 == pattern.charAt(j - 1 ))
                     lookup[i][j] = lookup[i - 1 ][j - 1 ];
 
                 // If characters don't match
                 else
                     lookup[i][j] = false ;
             }
         }
 
         return lookup[n][m];
     }
 
   
     // Driver code
     public static void main(String args[])
     {
         String str = "baaabab" ;
         String pattern = "*****ba*****ab" ;
         // String pattern = "ba*****ab";
         // String pattern = "ba*ab";
         // String pattern = "a*ab";
         // String pattern = "a*****ab";
         // String pattern = "*a*****ab";
         // String pattern = "ba*ab****";
         // String pattern = "****";
         // String pattern = "*";
         // String pattern = "aa?ab";
         // String pattern = "b*b";
         // String pattern = "a*a";
         // String pattern = "baaabab";
         // String pattern = "?baaabab";
         // String pattern = "*baaaba*";
 
         if (strmatch(str, pattern, str.length(), pattern.length()))
             System.out.println( "Yes" );
         else
             System.out.println( "No" );
     }
}
// This code is contributed by Sumit Ghosh

Python3

# Python program to implement wildcard
# pattern matching algorithm
 
# Function that matches input strr with
# given wildcard pattern
 
 
def strrmatch(strr, pattern, n, m):
 
     # empty pattern can only match with
     # empty strring
     if (m = = 0 ):
         return (n = = 0 )
 
     # lookup table for storing results of
     # subproblems
     lookup = [[ False for i in range (m + 1 )] for j in range (n + 1 )]
 
     # empty pattern can match with empty strring
     lookup[ 0 ][ 0 ] = True
 
     # Only '*' can match with empty strring
     for j in range ( 1 , m + 1 ):
         if (pattern[j - 1 ] = = '*' ):
             lookup[ 0 ][j] = lookup[ 0 ][j - 1 ]
 
     # fill the table in bottom-up fashion
     for i in range ( 1 , n + 1 ):
         for j in range ( 1 , m + 1 ):
 
             # Two cases if we see a '*'
             # a) We ignore ‘*’ character and move
             # to next character in the pattern, # i.e., ‘*’ indicates an empty sequence.
             # b) '*' character matches with ith
             # character in input
             if (pattern[j - 1 ] = = '*' ):
                 lookup[i][j] = lookup[i][j - 1 ] or lookup[i - 1 ][j]
 
             # Current characters are considered as
             # matching in two cases
             # (a) current character of pattern is '?'
             # (b) characters actually match
             elif (pattern[j - 1 ] = = '?' or strr[i - 1 ] = = pattern[j - 1 ]):
                 lookup[i][j] = lookup[i - 1 ][j - 1 ]
 
             # If characters don't match
             else :
                 lookup[i][j] = False
 
     return lookup[n][m]
 
# Driver code
 
 
strr = "baaabab"
pattern = "*****ba*****ab"
# char pattern[] = "ba*****ab"
# char pattern[] = "ba*ab"
# char pattern[] = "a*ab"
# char pattern[] = "a*****ab"
# char pattern[] = "*a*****ab"
# char pattern[] = "ba*ab****"
# char pattern[] = "****"
# char pattern[] = "*"
# char pattern[] = "aa?ab"
# char pattern[] = "b*b"
# char pattern[] = "a*a"
# char pattern[] = "baaabab"
# char pattern[] = "?baaabab"
# char pattern[] = "*baaaba*"
 
if (strrmatch(strr, pattern, len (strr), len (pattern))):
     print ( "Yes" )
else :
     print ( "No" )
 
# This code is contributed by shubhamsingh10

C#

// C# program to implement wildcard
// pattern matching algorithm
using System;
 
class GFG {
 
     // Function that matches input str with
     // given wildcard pattern
     static Boolean strmatch(String str, String pattern, int n, int m)
     {
         // empty pattern can only match with
         // empty string
         if (m == 0)
             return (n == 0);
 
         // lookup table for storing results of
         // subproblems
         Boolean[, ] lookup = new Boolean[n + 1, m + 1];
 
         // initailze lookup table to false
         for ( int i = 0; i < n + 1; i++)
             for ( int j = 0; j < m + 1; j++)
                 lookup[i, j] = false ;
 
         // empty pattern can match with
         // empty string
         lookup[0, 0] = true ;
 
         // Only '*' can match with empty string
         for ( int j = 1; j <= m; j++)
             if (pattern[j - 1] == '*' )
                 lookup[0, j] = lookup[0, j - 1];
 
         // fill the table in bottom-up fashion
         for ( int i = 1; i <= n; i++) {
             for ( int j = 1; j <= m; j++) {
                 // Two cases if we see a '*'
                 // a) We ignore '*'' character and move
                 // to next character in the pattern, //     i.e., '*' indicates an empty
                 //     sequence.
                 // b) '*' character matches with ith
                 //     character in input
                 if (pattern[j - 1] == '*' )
                     lookup[i, j] = lookup[i, j - 1]
                                    || lookup[i - 1, j];
 
                 // Current characters are considered as
                 // matching in two cases
                 // (a) current character of pattern is '?'
                 // (b) characters actually match
                 else if (pattern[j - 1] == '?'
                          || str[i - 1] == pattern[j - 1])
                     lookup[i, j] = lookup[i - 1, j - 1];
 
                 // If characters don't match
                 else
                     lookup[i, j] = false ;
             }
         }
         return lookup[n, m];
     }
 
     // Driver Code
     public static void Main(String[] args)
     {
         String str = "baaabab" ;
         String pattern = "*****ba*****ab" ;
         // String pattern = "ba*****ab";
         // String pattern = "ba*ab";
         // String pattern = "a*ab";
         // String pattern = "a*****ab";
         // String pattern = "*a*****ab";
         // String pattern = "ba*ab****";
         // String pattern = "****";
         // String pattern = "*";
         // String pattern = "aa?ab";
         // String pattern = "b*b";
         // String pattern = "a*a";
         // String pattern = "baaabab";
         // String pattern = "?baaabab";
         // String pattern = "*baaaba*";
 
         if (strmatch(str, pattern, str.Length, pattern.Length))
             Console.WriteLine( "Yes" );
         else
             Console.WriteLine( "No" );
     }
}
 
// This code is contributed by Rajput-Ji

输出如下

Yes

时间复杂度:

O(米×n)

辅助空间:

O(米×n)

DP备忘录解决方案:-

C ++

// C++ program to implement wildcard
// pattern matching algorithm
#include <bits/stdc++.h>
using namespace std;
 
// Function that matches input str with
// given wildcard pattern
vector<vector< int > > dp;
int finding(string& s, string& p, int n, int m)
{
     // return 1 if n and m are negative
     if (n < 0 && m < 0)
         return 1;
   
     // return 0 if m is negative
     if (m < 0)
         return 0;
   
     // return n if n is negative
     if (n < 0)
     {
         // while m is positve
         while (m >= 0)
         {
             if (p[m] != '*' )
                 return 0;
             m--;
         }
         return 1;
     }
    
     // if dp state is not visited
     if (dp[n][m] == -1)
     {
         if (p[m] == '*' )
         {
             return dp[n][m] = finding(s, p, n - 1, m)
                               || finding(s, p, n, m - 1);
         }
         else
         {
             if (p[m] != s[n] && p[m] != '?' )
                 return dp[n][m] = 0;
             else
                 return dp[n][m]
                        = finding(s, p, n - 1, m - 1);
         }
     }
   
     // return dp[n][m] if dp state is previsited
     return dp[n][m];
}
 
 
bool isMatch(string s, string p)
{
     dp.clear();
     
     // resize the dp array
     dp.resize(s.size() + 1, vector< int >(p.size() + 1, -1));
     return dp[s.size()][p.size()]
            = finding(s, p, s.size() - 1, p.size() - 1);
}
 
// Driver code
int main()
{
     string str = "baaabab" ;
     string pattern = "*****ba*****ab" ;
     // char pattern[] = "ba*****ab";
     // char pattern[] = "ba*ab";
     // char pattern[] = "a*ab";
     // char pattern[] = "a*****ab";
     // char pattern[] = "*a*****ab";
     // char pattern[] = "ba*ab****";
     // char pattern[] = "****";
     // char pattern[] = "*";
     // char pattern[] = "aa?ab";
     // char pattern[] = "b*b";
     // char pattern[] = "a*a";
     // char pattern[] = "baaabab";
     // char pattern[] = "?baaabab";
     // char pattern[] = "*baaaba*";
 
     if (isMatch(str, pattern))
         cout << "Yes" << endl;
     else
         cout << "No" << endl;
 
     return 0;
}

输出如下

Yes

时间复杂度:O(m x n)。

辅助空间:O(m×n)。

进一步改进:

通过仅使用最后一行的结果, 可以提高空间的复杂性。

另一个改进是, 你将模式中的连续” *”合并为单个” *”, 因为它们含义相同。例如, 对于模式” ***** ba ***** ab”, 如果我们合并连续的星星, 则结果字符串将为” * ba * ab”。因此, m的值从14减少到6。

如果发现任何不正确的地方, 或者想分享有关上述主题的更多信息, 请发表评论。

赞(0) 打赏
未经允许不得转载:srcmini » 算法设计:如何实现通配符模式匹配?
分享到: 更多 (0)

评论 抢沙发

评论前必须登录!

 

觉得文章有用就打赏一下文章作者

微信扫一扫打赏