@@ -30,9 +30,8 @@ class HtmlParser < Parser
3030 # @return [Array<String>] An array of absolute URLs.
3131 def results
3232 @results ||=
33- UrlExtractor . extract ( *url_attributes )
34- . map { |url | response_uri . join ( url ) . to_s }
35- . grep ( Parser ::URI_REGEXP )
33+ extract_urls_from ( *url_attributes ) . map { |url | response_uri . join ( url ) . to_s }
34+ . grep ( Parser ::URI_REGEXP )
3635 end
3736
3837 private
@@ -42,6 +41,19 @@ def doc
4241 Nokogiri . HTML5 ( response_body )
4342 end
4443
44+ # @param *attributes [Array<Nokogiri::XML::Attr>]
45+ #
46+ # @return [Array<String>]
47+ def extract_urls_from ( *attributes )
48+ attributes . flat_map do |attribute |
49+ if attribute . name == 'srcset'
50+ attribute . value . split ( ',' ) . map { |value | value . strip . match ( /^\S +/ ) . to_s }
51+ else
52+ attribute . value
53+ end
54+ end
55+ end
56+
4557 # @return [Nokogiri::XML::Element]
4658 def root_node
4759 doc . at_css ( *ROOT_NODE_SELECTORS_ARRAY )
@@ -56,24 +68,6 @@ def url_attributes
5668 def url_nodes
5769 root_node . css ( *CSS_SELECTORS_ARRAY )
5870 end
59-
60- module UrlExtractor
61- # @param *attributes [Array<Nokogiri::XML::Attr>]
62- #
63- # @return [Array<String>]
64- def self . extract ( *attributes )
65- attributes . flat_map { |attribute | values_from ( attribute ) }
66- end
67-
68- # @param attribute [Nokogiri::XML::Attr]
69- #
70- # @return [String, Array<String>]
71- def self . values_from ( attribute )
72- return attribute . value unless attribute . name == 'srcset'
73-
74- attribute . value . split ( ',' ) . map { |value | value . strip . match ( /^\S +/ ) . to_s }
75- end
76- end
7771 end
7872 end
7973end
0 commit comments