class Nanoc::Extra::LinkCollector

@api private

Constants

URI_ATTRS

Public Class Methods

new(filenames, mode = nil) click to toggle source
# File lib/nanoc/extra/link_collector.rb, line 17
def initialize(filenames, mode = nil)
  Nanoc::Extra::JRubyNokogiriWarner.check_and_warn

  @filenames = filenames
  @filter =
    case mode
    when nil
      ->(_h) { true }
    when :external
      ->(h) { external_href?(h) }
    when :internal
      ->(h) { !external_href?(h) }
    else
      raise ArgumentError, 'Expected mode argument to be :internal, :external or nil'
    end
end

Public Instance Methods

external_href?(href) click to toggle source
# File lib/nanoc/extra/link_collector.rb, line 58
def external_href?(href)
  href =~ %r{^(\/\/|[a-z\-]+:)}
end
filenames_per_href() click to toggle source
# File lib/nanoc/extra/link_collector.rb, line 34
def filenames_per_href
  require 'nokogiri'
  filenames_per_href = {}
  @filenames.each do |filename|
    hrefs_in_file(filename).each do |href|
      filenames_per_href[href] ||= Set.new
      filenames_per_href[href] << filename
    end
  end
  filenames_per_href
end
filenames_per_resource_uri() click to toggle source
# File lib/nanoc/extra/link_collector.rb, line 46
def filenames_per_resource_uri
  require 'nokogiri'
  filenames_per_resource_uri = {}
  @filenames.each do |filename|
    resource_uris_in_file(filename).each do |resouce_uri|
      filenames_per_resource_uri[resouce_uri] ||= Set.new
      filenames_per_resource_uri[resouce_uri] << filename
    end
  end
  filenames_per_resource_uri
end
hrefs_in_file(filename) click to toggle source
# File lib/nanoc/extra/link_collector.rb, line 62
def hrefs_in_file(filename)
  uris_in_file filename, %w(a img)
end
resource_uris_in_file(filename) click to toggle source
# File lib/nanoc/extra/link_collector.rb, line 66
def resource_uris_in_file(filename)
  uris_in_file filename, %w(audio form img iframe link script video)
end

Private Instance Methods

uris_in_file(filename, tag_names) click to toggle source
# File lib/nanoc/extra/link_collector.rb, line 72
def uris_in_file(filename, tag_names)
  uris = Set.new
  doc = Nokogiri::HTML(::File.read(filename))
  tag_names.each do |tag_name|
    attr = URI_ATTRS[tag_name]
    doc.css(tag_name).each do |e|
      uris << e[attr] unless e[attr].nil?
    end
  end

  # Strip fragment
  uris.map! { |href| href.gsub(/#.*$/, '') }

  uris.select(&@filter)
end