Regex to match opening Tags
While fiddling around in RegexSnippets tonight I concocted this regex to match opening tags and to capture the various elements:
(?'openTag'<)
\s*?
(?'tagName'\??\w+)
(?:
\s*?
(?'attribName'\w+)
(?:\s*(?'attribSign'=)\s*)
(?'attribValue'
(?:\'[^\']*\'|\"[^\"]*\"|\w+)
)
)*
\s*?
(?'closeTag'[\/\?]?>)
...and here is a script that I wrote to test it with:
using System ; using System.Text.RegularExpressions ; namespace RegexSnippets.Tests { public class Foo { public static void Main() { string source = @"<A href="foo"><TD><A> <TABLE id=1 foo="bar">" ; string pattern = @"(?'openTag'<)\s*?(?'tagName'\??\w+)(\s*?(?'attribute'(?'attribName'\w+)(?:\s*(?'attribSign'=)\s*)(?'attribValue'(?:\'[^\']*\'|\"[^\"]*\"|\w+))))+\s*?(?'closeTag'[\/\?]?>)" ; Regex re = new Regex( pattern, RegexOptions.IgnoreCase|RegexOptions.Multiline|RegexOptions.Singleline ) ; for( Match m = re.Match( source ); m.Success; m = m.NextMatch() ) { Console.WriteLine( "Open tag = {0}", m.Groups["openTag"].Value ) ; Console.WriteLine( "Tagname = {0}", m.Groups["tagName"].Value ) ; if( m.Groups["attribName"].Success ) { for( int i = 0; i < m.Groups["attribName"].Captures.Count; i++ ) { Console.WriteLine( "Attrib Name = {0}", m.Groups["attribName"].Captures[i].Value ) ; Console.WriteLine( "Attrib Sign = {0}", m.Groups["attribSign"].Captures[i].Value ) ; Console.WriteLine( "Attrib Value = {0}", m.Groups["attribValue"].Captures[i].Value ) ; } } Console.WriteLine( "Close tag = {0}", m.Groups["closeTag"].Value ) ; Console.Write( "{0}**********************************{0}", Environment.NewLine ) ; } Console.ReadLine() ; } } }