@@ -50,6 +50,9 @@ def handle_endtag(self, tag):
5050
5151 # all other markup
5252
53+ def handle_bogus_comment (self , data ):
54+ self .append (("bogus comment" , data ))
55+
5356 def handle_comment (self , data ):
5457 self .append (("comment" , data ))
5558
@@ -606,14 +609,14 @@ def test_starttag_junk_chars(self):
606609 self ._run_check ("< " , [('data' , '< ' )])
607610 self ._run_check ("</>" , [])
608611 self ._run_check ("<$>" , [('data' , '<$>' )])
609- self ._run_check ("</$>" , [('comment' , '$' )])
612+ self ._run_check ("</$>" , [('bogus comment' , '$' )])
610613 self ._run_check ("</" , [('data' , '</' )])
611614 self ._run_check ("</a" , [])
612- self ._run_check ("</ a>" , [('comment' , ' a' )])
613- self ._run_check ("</ a" , [('comment' , ' a' )])
615+ self ._run_check ("</ a>" , [('bogus comment' , ' a' )])
616+ self ._run_check ("</ a" , [('bogus comment' , ' a' )])
614617 self ._run_check ("<a<a>" , [('starttag' , 'a<a' , [])])
615618 self ._run_check ("</a<a>" , [('endtag' , 'a<a' )])
616- self ._run_check ("<!" , [('comment' , '' )])
619+ self ._run_check ("<!" , [('bogus comment' , '' )])
617620 self ._run_check ("<a" , [])
618621 self ._run_check ("<a foo='bar'" , [])
619622 self ._run_check ("<a foo='bar" , [])
@@ -666,7 +669,7 @@ def test_declaration_junk_chars(self):
666669
667670 def test_illegal_declarations (self ):
668671 self ._run_check ('<!spacer type="block" height="25">' ,
669- [('comment' , 'spacer type="block" height="25"' )])
672+ [('bogus comment' , 'spacer type="block" height="25"' )])
670673
671674 def test_invalid_end_tags (self ):
672675 # A collection of broken end tags. <br> is used as separator.
@@ -681,8 +684,8 @@ def test_invalid_end_tags(self):
681684 # text and attributes are discarded
682685 ('endtag' , 'div' ),
683686 ('starttag' , 'br' , []),
684- # comment because the first char after </ is not a-zA-Z
685- ('comment' , '<h4' ),
687+ # bogus comment because the first char after </ is not a-zA-Z
688+ ('bogus comment' , '<h4' ),
686689 ('starttag' , 'br' , []),
687690 # attributes are discarded
688691 ('endtag' , 'li' ),
@@ -771,9 +774,9 @@ def test_eof_in_comments(self):
771774
772775 def test_eof_in_declarations (self ):
773776 data = [
774- ('<!' , [('comment' , '' )]),
775- ('<!-' , [('comment' , '-' )]),
776- ('<![' , [('comment' , '[' )]),
777+ ('<!' , [('bogus comment' , '' )]),
778+ ('<!-' , [('bogus comment' , '-' )]),
779+ ('<![' , [('bogus comment' , '[' )]),
777780 ('<!DOCTYPE' , [('decl' , 'DOCTYPE' )]),
778781 ('<!DOCTYPE ' , [('decl' , 'DOCTYPE ' )]),
779782 ('<!DOCTYPE html' , [('decl' , 'DOCTYPE html' )]),
@@ -791,7 +794,7 @@ def test_eof_in_cdata(self, content):
791794 self ._run_check ('<![CDATA[' + content ,
792795 [('unknown decl' , 'CDATA[' + content )])
793796 self ._run_check ('<![CDATA[' + content ,
794- [('comment' , '![CDATA[' + content )],
797+ [('bogus comment' , '![CDATA[' + content )],
795798 collector = EventCollector (autocdata = True ))
796799 self ._run_check ('<svg><text y="100"><![CDATA[' + content ,
797800 [('starttag' , 'svg' , []),
@@ -814,19 +817,19 @@ def test_bogus_comments(self):
814817 '<![CDATA]]>' # required '[' after CDATA
815818 )
816819 expected = [
817- ('comment' , 'ELEMENT br EMPTY' ),
818- ('comment' , ' not really a comment ' ),
819- ('comment' , ' not a comment either --' ),
820- ('comment' , ' -- close enough --' ),
821- ('comment' , '' ),
822- ('comment' , '<-- this was an empty comment' ),
823- ('comment' , '!! another bogus comment !!!' ),
824- ('comment' , '[with square brackets]!' ),
825- ('comment' , '[\n multiline\n bogusness\n ]!' ),
826- ('comment' , '[more brackets]-[and a hyphen]!' ),
827- ('comment' , '[cdata[should be uppercase]]' ),
828- ('comment' , '[CDATA [whitespaces are not ignored]]' ),
829- ('comment' , '[CDATA]]' ),
820+ ('bogus comment' , 'ELEMENT br EMPTY' ),
821+ ('bogus comment' , ' not really a comment ' ),
822+ ('bogus comment' , ' not a comment either --' ),
823+ ('bogus comment' , ' -- close enough --' ),
824+ ('bogus comment' , '' ),
825+ ('bogus comment' , '<-- this was an empty comment' ),
826+ ('bogus comment' , '!! another bogus comment !!!' ),
827+ ('bogus comment' , '[with square brackets]!' ),
828+ ('bogus comment' , '[\n multiline\n bogusness\n ]!' ),
829+ ('bogus comment' , '[more brackets]-[and a hyphen]!' ),
830+ ('bogus comment' , '[cdata[should be uppercase]]' ),
831+ ('bogus comment' , '[CDATA [whitespaces are not ignored]]' ),
832+ ('bogus comment' , '[CDATA]]' ),
830833 ]
831834 self ._run_check (html , expected )
832835
@@ -840,23 +843,23 @@ def test_broken_condcoms(self):
840843 '<![if !ie 6]><b>foo</b><![endif]>'
841844 '<![if (!IE)|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>' )
842845 expected = [
843- ('comment' , '[if !(IE)]' ),
846+ ('bogus comment' , '[if !(IE)]' ),
844847 ('data' , 'broken condcom' ),
845- ('comment' , '[endif]' ),
846- ('comment' , '[if ! IE]' ),
848+ ('bogus comment' , '[endif]' ),
849+ ('bogus comment' , '[if ! IE]' ),
847850 ('startendtag' , 'link' , [('href' , 'favicon.tiff' )]),
848- ('comment' , '[endif]' ),
849- ('comment' , '[if !IE 6]' ),
851+ ('bogus comment' , '[endif]' ),
852+ ('bogus comment' , '[if !IE 6]' ),
850853 ('startendtag' , 'img' , [('src' , 'firefox.png' )]),
851- ('comment' , '[endif]' ),
852- ('comment' , '[if !ie 6]' ),
854+ ('bogus comment' , '[endif]' ),
855+ ('bogus comment' , '[if !ie 6]' ),
853856 ('starttag' , 'b' , []),
854857 ('data' , 'foo' ),
855858 ('endtag' , 'b' ),
856- ('comment' , '[endif]' ),
857- ('comment' , '[if (!IE)|(lt IE 9)]' ),
859+ ('bogus comment' , '[endif]' ),
860+ ('bogus comment' , '[if (!IE)|(lt IE 9)]' ),
858861 ('startendtag' , 'img' , [('src' , 'mammoth.bmp' )]),
859- ('comment' , '[endif]' )
862+ ('bogus comment' , '[endif]' )
860863 ]
861864 self ._run_check (html , expected )
862865
@@ -896,14 +899,14 @@ def test_cdata_section(self):
896899 '<svg><text y="100"><![CDATA[foo<br>bar]]></text></svg>'
897900 '<![CDATA[foo<br>bar]]>' )
898901 expected = [
899- ('comment' , '[CDATA[foo<br' ),
902+ ('bogus comment' , '[CDATA[foo<br' ),
900903 ('data' , 'bar]]>' ),
901904 ('starttag' , 'svg' , []),
902905 ('starttag' , 'text' , [('y' , '100' )]),
903906 ('unknown decl' , 'CDATA[foo<br>bar' ),
904907 ('endtag' , 'text' ),
905908 ('endtag' , 'svg' ),
906- ('comment' , '[CDATA[foo<br' ),
909+ ('bogus comment' , '[CDATA[foo<br' ),
907910 ('data' , 'bar]]>' ),
908911 ]
909912 self ._run_check (html , expected , collector = EventCollector (autocdata = True ))
0 commit comments