Parent

Sanitize

Constants

REGEX_PROTOCOL

Matches an attribute value that could be treated by a browser as a URL with a protocol prefix, such as “http:” or “javascript:”. Any string of zero or more characters followed by a colon is considered a match, even if the colon is encoded as an entity and even if it’s an incomplete entity (which IE6 and Opera will still parse).

VERSION

Attributes

config[R]

Public Class Methods

clean(html, config = {}) click to toggle source

Returns a sanitized copy of html, using the settings in config if specified.

# File lib/sanitize.rb, line 51
def self.clean(html, config = {})
  Sanitize.new(config).clean(html)
end
clean!(html, config = {}) click to toggle source

Performs Sanitize#clean in place, returning html, or nil if no changes were made.

# File lib/sanitize.rb, line 57
def self.clean!(html, config = {})
  Sanitize.new(config).clean!(html)
end
clean_document(html, config = {}) click to toggle source

Performs a Sanitize#clean using a full-document HTML parser instead of the default fragment parser. This will add a DOCTYPE and html tag unless they are already present

# File lib/sanitize.rb, line 64
def self.clean_document(html, config = {})
  Sanitize.new(config).clean_document(html)
end
clean_document!(html, config = {}) click to toggle source

Performs Sanitize#clean_document in place, returning html, or nil if no changes were made.

# File lib/sanitize.rb, line 70
def self.clean_document!(html, config = {})
  Sanitize.new(config).clean_document!(html)
end
clean_node!(node, config = {}) click to toggle source

Sanitizes the specified Nokogiri::XML::Node and all its children.

# File lib/sanitize.rb, line 75
def self.clean_node!(node, config = {})
  Sanitize.new(config).clean_node!(node)
end
new(config = {}) click to toggle source

Returns a new Sanitize object initialized with the settings in config.

# File lib/sanitize.rb, line 84
def initialize(config = {})
  @config = Config::DEFAULT.merge(config)

  @transformers = {
    :breadth => Array(@config[:transformers_breadth].dup),
    :depth   => Array(@config[:transformers]) + Array(@config[:transformers_depth])
  }

  # Default depth transformers. These always run at the end of the chain,
  # after any custom transformers.
  @transformers[:depth] << Transformers::CleanComment unless @config[:allow_comments]

  @transformers[:depth] <<
      Transformers::CleanCDATA <<
      Transformers::CleanElement.new(@config)
end

Public Instance Methods

clean(html) click to toggle source

Returns a sanitized copy of html.

# File lib/sanitize.rb, line 102
def clean(html)
  if html
    dupe = html.dup
    clean!(dupe) || dupe
  end
end
clean!(html, parser = Nokogiri::HTML::DocumentFragment) click to toggle source

Performs clean in place, returning html, or nil if no changes were made.

# File lib/sanitize.rb, line 111
def clean!(html, parser = Nokogiri::HTML::DocumentFragment)
  fragment = parser.parse(html)
  clean_node!(fragment)

  output_method_params = {:encoding => @config[:output_encoding], :indent => 0}

  if @config[:output] == :xhtml
    output_method = fragment.method(:to_xhtml)
    output_method_params[:save_with] = Nokogiri::XML::Node::SaveOptions::AS_XHTML
  elsif @config[:output] == :html
    output_method = fragment.method(:to_html)
  else
    raise Error, "unsupported output format: #{@config[:output]}"
  end

  result = output_method.call(output_method_params)

  return result == html ? nil : html[0, html.length] = result
end
clean_document(html) click to toggle source
# File lib/sanitize.rb, line 131
def clean_document(html)
  unless html.nil?
    clean_document!(html.dup) || html
  end
end
clean_document!(html) click to toggle source
# File lib/sanitize.rb, line 137
def clean_document!(html)
  if !@config[:elements].include?('html') && !@config[:remove_contents]
    raise 'You must have the HTML element whitelisted to call #clean_document unless remove_contents is set to true'
    # otherwise Nokogiri will raise for having multiple root nodes when
    # it moves its children to the root document context
  end

  clean!(html, Nokogiri::HTML::Document)
end
clean_node!(node) click to toggle source

Sanitizes the specified Nokogiri::XML::Node and all its children.

# File lib/sanitize.rb, line 148
def clean_node!(node)
  raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)

  node_whitelist = Set.new

  unless @transformers[:breadth].empty?
    traverse_breadth(node) {|n| transform_node!(n, node_whitelist, :breadth) }
  end

  traverse_depth(node) {|n| transform_node!(n, node_whitelist, :depth) }
  node
end

[Validate]

Generated with the Darkfish Rdoc Generator 2.