Skip to content

Commit ec58206

Browse files
committed
Refactor HTML parser class
1 parent 6818c05 commit ec58206

1 file changed

Lines changed: 15 additions & 21 deletions

File tree

lib/webmention/parsers/html_parser.rb

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,8 @@ class HtmlParser < Parser
3030
# @return [Array<String>] An array of absolute URLs.
3131
def results
3232
@results ||=
33-
UrlExtractor.extract(*url_attributes)
34-
.map { |url| response_uri.join(url).to_s }
35-
.grep(Parser::URI_REGEXP)
33+
extract_urls_from(*url_attributes).map { |url| response_uri.join(url).to_s }
34+
.grep(Parser::URI_REGEXP)
3635
end
3736

3837
private
@@ -42,6 +41,19 @@ def doc
4241
Nokogiri.HTML5(response_body)
4342
end
4443

44+
# @param *attributes [Array<Nokogiri::XML::Attr>]
45+
#
46+
# @return [Array<String>]
47+
def extract_urls_from(*attributes)
48+
attributes.flat_map do |attribute|
49+
if attribute.name == 'srcset'
50+
attribute.value.split(',').map { |value| value.strip.match(/^\S+/).to_s }
51+
else
52+
attribute.value
53+
end
54+
end
55+
end
56+
4557
# @return [Nokogiri::XML::Element]
4658
def root_node
4759
doc.at_css(*ROOT_NODE_SELECTORS_ARRAY)
@@ -56,24 +68,6 @@ def url_attributes
5668
def url_nodes
5769
root_node.css(*CSS_SELECTORS_ARRAY)
5870
end
59-
60-
module UrlExtractor
61-
# @param *attributes [Array<Nokogiri::XML::Attr>]
62-
#
63-
# @return [Array<String>]
64-
def self.extract(*attributes)
65-
attributes.flat_map { |attribute| values_from(attribute) }
66-
end
67-
68-
# @param attribute [Nokogiri::XML::Attr]
69-
#
70-
# @return [String, Array<String>]
71-
def self.values_from(attribute)
72-
return attribute.value unless attribute.name == 'srcset'
73-
74-
attribute.value.split(',').map { |value| value.strip.match(/^\S+/).to_s }
75-
end
76-
end
7771
end
7872
end
7973
end

0 commit comments

Comments
 (0)