class Nanoc::Extra::Checking::Checks::ExternalLinks

A validator that verifies that all external links point to a location that exists.

@api private

Public Instance Methods

excluded?(href) click to toggle source
# File lib/nanoc/extra/checking/checks/external_links.rb, line 169
def excluded?(href)
  excludes = @config.fetch(:checks, {}).fetch(:external_links, {}).fetch(:exclude, [])
  excludes.any? { |pattern| Regexp.new(pattern).match(href) }
end
excluded_file?(file) click to toggle source
# File lib/nanoc/extra/checking/checks/external_links.rb, line 174
def excluded_file?(file)
  excludes = @config.fetch(:checks, {}).fetch(:external_links, {}).fetch(:exclude_files, [])
  excludes.any? { |pattern| Regexp.new(pattern).match(file) }
end
path_for_url(url) click to toggle source
# File lib/nanoc/extra/checking/checks/external_links.rb, line 144
def path_for_url(url)
  path =
    if url.path.nil? || url.path.empty?
      '/'
    else
      url.path
    end

  if url.query
    path << '?' << url.query
  end

  path
end
request_url_once(url, req_method = Net::HTTP::Head) click to toggle source
# File lib/nanoc/extra/checking/checks/external_links.rb, line 159
def request_url_once(url, req_method = Net::HTTP::Head)
  req = req_method.new(path_for_url(url))
  http = Net::HTTP.new(url.host, url.port)
  if url.instance_of? URI::HTTPS
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE
  end
  http.request(req)
end
run() click to toggle source
# File lib/nanoc/extra/checking/checks/external_links.rb, line 14
def run
  # Find all broken external hrefs
  # TODO: de-duplicate this (duplicated in internal links check)
  filenames = output_filenames.select { |f| File.extname(f) == '.html' && !excluded_file?(f) }
  hrefs_with_filenames = ::Nanoc::Extra::LinkCollector.new(filenames, :external).filenames_per_href
  results = select_invalid(hrefs_with_filenames.keys)

  # Report them
  results.each do |res|
    filenames = hrefs_with_filenames[res.href]
    filenames.each do |filename|
      add_issue(
        "broken reference to #{res.href}: #{res.explanation}",
        subject: filename)
    end
  end
end
select_invalid(hrefs) click to toggle source
# File lib/nanoc/extra/checking/checks/external_links.rb, line 57
def select_invalid(hrefs)
  enum = ArrayEnumerator.new(hrefs.sort)
  mutex = Mutex.new
  invalid = Set.new

  threads = []
  10.times do
    threads << Thread.new do
      loop do
        href = enum.next
        break if href.nil?

        res = validate(href)
        next unless res

        mutex.synchronize do
          invalid << res
        end
      end
    end
  end
  threads.each(&:join)

  invalid
end
validate(href) click to toggle source
# File lib/nanoc/extra/checking/checks/external_links.rb, line 83
def validate(href)
  # Parse
  url = nil
  begin
    url = URI.parse(href)
  rescue URI::InvalidURIError
    return Result.new(href, 'invalid URI')
  end

  # Skip excluded URLs
  return nil if excluded?(href)

  # Skip non-HTTP URLs
  return nil if url.scheme !~ /^https?$/

  # Get status
  res = nil
  last_err = nil
  timeouts = [3, 5, 10, 30, 60]
  5.times do |i|
    begin
      Timeout.timeout(timeouts[i]) do
        res = request_url_once(url)
        if res.code == '405'
          res = request_url_once(url, Net::HTTP::Get)
        end
      end
    rescue => e
      last_err = e
      next # can not allow
    end

    if res.code =~ /^3..$/
      if i == 4
        return Result.new(href, 'too many redirects')
      end

      # Find proper location
      location = res['Location']
      if location !~ /^https?:\/\//
        base_url = url.dup
        base_url.path = (location =~ /^\// ? '' : '/')
        base_url.query = nil
        base_url.fragment = nil
        location = base_url.to_s + location
      end

      url = URI.parse(location)
    elsif res.code == '200'
      return nil
    else
      return Result.new(href, res.code)
    end
  end
  if last_err
    return Result.new(href, last_err.message)
  else
    raise 'should not have gotten here'
  end
end