# # = RubyPants - SmartyPants ported to Ruby # # Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com> # Copyright (C) 2004 Christian Neukirchen # # Incooporates ideas, comments and documentation by Chad Miller # Copyright (C) 2004 Chad Miller # # Original SmartyPants by John Gruber # Copyright (C) 2003 John Gruber # # # = RubyPants - SmartyPants ported to Ruby # # == Synopsis # # RubyPants is a Ruby port of the smart-quotes library SmartyPants. # # The original "SmartyPants" is a free web publishing plug-in for # Movable Type, Blosxom, and BBEdit that easily translates plain ASCII # punctuation characters into "smart" typographic punctuation HTML # entities. # # # == Description # # RubyPants can perform the following transformations: # # * Straight quotes (<tt>"</tt> and <tt>'</tt>) into "curly" quote # HTML entities # * Backticks-style quotes (<tt>``like this''</tt>) into "curly" quote # HTML entities # * Dashes (<tt>--</tt> and <tt>---</tt>) into en- and em-dash # entities # * Three consecutive dots (<tt>...</tt> or <tt>. . .</tt>) into an # ellipsis entity # # This means you can write, edit, and save your posts using plain old # ASCII straight quotes, plain dashes, and plain dots, but your # published posts (and final HTML output) will appear with smart # quotes, em-dashes, and proper ellipses. # # RubyPants does not modify characters within <tt><pre></tt>, # <tt><code></tt>, <tt><kbd></tt>, <tt><math></tt> or # <tt><script></tt> tag blocks. Typically, these tags are used to # display text where smart quotes and other "smart punctuation" would # not be appropriate, such as source code or example markup. # # # == Backslash Escapes # # If you need to use literal straight quotes (or plain hyphens and # periods), RubyPants accepts the following backslash escape sequences # to force non-smart punctuation. It does so by transforming the # escape sequence into a decimal-encoded HTML entity: # # \\ \" \' \. \- \` # # This is useful, for example, when you want to use straight quotes as # foot and inch marks: 6'2" tall; a 17" iMac. (Use <tt>6\'2\"</tt> # resp. <tt>17\"</tt>.) # # # == Algorithmic Shortcomings # # One situation in which quotes will get curled the wrong way is when # apostrophes are used at the start of leading contractions. For # example: # # 'Twas the night before Christmas. # # In the case above, RubyPants will turn the apostrophe into an # opening single-quote, when in fact it should be a closing one. I # don't think this problem can be solved in the general case--every # word processor I've tried gets this wrong as well. In such cases, # it's best to use the proper HTML entity for closing single-quotes # (``’``) by hand. # # # == Bugs # # To file bug reports or feature requests (except see above) please # send email to: mailto:chneukirchen@gmail.com # # If the bug involves quotes being curled the wrong way, please send # example text to illustrate. # # # == Authors # # John Gruber did all of the hard work of writing this software in # Perl for Movable Type and almost all of this useful documentation. # Chad Miller ported it to Python to use with Pyblosxom. # # Christian Neukirchen provided the Ruby port, as a general-purpose # library that follows the *Cloth api. # # # == Copyright and License # # === SmartyPants license: # # Copyright (c) 2003 John Gruber # (http://daringfireball.net) # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # * Neither the name "SmartyPants" nor the names of its contributors # may be used to endorse or promote products derived from this # software without specific prior written permission. # # This software is provided by the copyright holders and contributors # "as is" and any express or implied warranties, including, but not # limited to, the implied warranties of merchantability and fitness # for a particular purpose are disclaimed. In no event shall the # copyright owner or contributors be liable for any direct, indirect, # incidental, special, exemplary, or consequential damages (including, # but not limited to, procurement of substitute goods or services; # loss of use, data, or profits; or business interruption) however # caused and on any theory of liability, whether in contract, strict # liability, or tort (including negligence or otherwise) arising in # any way out of the use of this software, even if advised of the # possibility of such damage. # # === RubyPants license # # RubyPants is a derivative work of SmartyPants and smartypants.py. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # This software is provided by the copyright holders and contributors # "as is" and any express or implied warranties, including, but not # limited to, the implied warranties of merchantability and fitness # for a particular purpose are disclaimed. In no event shall the # copyright owner or contributors be liable for any direct, indirect, # incidental, special, exemplary, or consequential damages (including, # but not limited to, procurement of substitute goods or services; # loss of use, data, or profits; or business interruption) however # caused and on any theory of liability, whether in contract, strict # liability, or tort (including negligence or otherwise) arising in # any way out of the use of this software, even if advised of the # possibility of such damage. # # # == Links # # John Gruber:: http://daringfireball.net # SmartyPants:: http://daringfireball.net/projects/smartypants # # Chad Miller:: http://web.chad.org # # Christian Neukirchen:: http://kronavita.de/chris # class RubyPants < String VERSION = "0.1" # Allowed elements in the options array: # # 0 :: do nothing # 1 :: set all # 2 :: set all, using old school en- and em- dash shortcuts # 3 :: set all, using inverted old school en and em- dash shortcuts # -1 :: stupefy (translate HTML entities to their ASCII-counterparts) # # <tt>:quotes</tt> :: quotes # <tt>:backticks</tt> :: backtick quotes (``double'' only) # <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single') # <tt>:dashes</tt> :: dashes # <tt>:oldschool</tt> :: old school dashes # <tt>:inverted</tt> :: inverted old school dashes # <tt>:ellipses</tt> :: ellipses # <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to # <tt>"</tt> for Dreamweaver users # <tt>:stupefy</tt> :: translate SmartyPants HTML entities # to their ASCII counterparts. # def initialize(string, options=[2]) super string @options = [*options] end # Apply SmartyPants transformations. def to_html do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil convert_quotes = false if @options.include? 0 # Do nothing. return self elsif @options.include? 1 # Do everything, turn all options on. do_quotes = do_backticks = do_ellipses = true do_dashes = :normal elsif @options.include? 2 # Do everything, turn all options on, use old school dash shorthand. do_quotes = do_backticks = do_ellipses = true do_dashes = :oldschool elsif @options.include? 3 # Do everything, turn all options on, use inverted old school # dash shorthand. do_quotes = do_backticks = do_ellipses = true do_dashes = :inverted elsif @options.include?(-1) do_stupefy = true else do_quotes = @options.include? :quotes do_backticks = @options.include? :backticks do_backticks = :both if @options.include? :allbackticks do_dashes = :normal if @options.include? :dashes do_dashes = :oldschool if @options.include? :oldschool do_dashes = :inverted if @options.include? :inverted do_ellipses = @options.include? :ellipses convert_quotes = @options.include? :convertquotes do_stupefy = @options.include? :stupefy end # Parse the HTML tokens = tokenize # Keep track of when we're inside <pre> or <code> tags. in_pre = false # Here is the result stored in. result = "" # This is a cheat, used to get some context for one-character # tokens that consist of just a quote char. What we do is remember # the last character of the previous text token, to use as context # to curl single- character quote tokens correctly. prev_token_last_char = "" tokens.each { |token| if token.first == :tag result << token[1] if token[1] =~ %r!<(/?)(?:pre|code|kbd|script|math)[\s>]! in_pre = ($1 != "/") # Opening or closing tag? end else t = token[1] # Remember last char of this token before processing. last_char = t[-1] unless in_pre t = process_escapes t t.gsub!(/"/, '"') if convert_quotes if do_dashes t = educate_dashes t if do_dashes == :normal t = educate_dashes_oldschool t if do_dashes == :oldschool t = educate_dashes_inverted t if do_dashes == :inverted end t = educate_ellipses t if do_ellipses # Note: backticks need to be processed before quotes. if do_backticks t = educate_backticks t t = educate_single_backticks t if do_backticks == :both end if do_quotes if t == "'" # Special case: single-character ' token if prev_token_last_char =~ /\S/ t = "’" else t = "‘" end elsif t == '"' # Special case: single-character " token if prev_token_last_char =~ /\S/ t = "”" else t = "“" end else # Normal case: t = educate_quotes t end end t = stupefy_entities t if do_stupefy end prev_token_last_char = last_char result << t end } # Done result end protected # Return the string, with after processing the following backslash # escape sequences. This is useful if you want to force a "dumb" quote # or other character to appear. # # Escaped are: # \\ \" \' \. \- \` # def process_escapes(str) str.gsub(/\\\\/, '\'). gsub(/\\"/, '"'). gsub(/\\'/, '''). gsub(/\\\./, '.'). gsub(/\\-/, '-'). gsub(/\\`/, '`') end # The string, with each instance of "<tt>--</tt>" translated to an # em-dash HTML entity. # def educate_dashes(str) str.gsub(/--/, '—') end # The string, with each instance of "<tt>--</tt>" translated to an # en-dash HTML entity, and each "<tt>---</tt>" translated to an # em-dash HTML entity. # def educate_dashes_oldschool(str) str.gsub(/---/, '—').gsub(/--/, '–') end # Return the string, with each instance of "<tt>--</tt>" translated # to an em-dash HTML entity, and each "<tt>---</tt>" translated to # an en-dash HTML entity. Two reasons why: First, unlike the en- and # em-dash syntax supported by +educate_dashes_oldschool+, it's # compatible with existing entries written before SmartyPants 1.1, # back when "<tt>--</tt>" was only used for em-dashes. Second, # em-dashes are more common than en-dashes, and so it sort of makes # sense that the shortcut should be shorter to type. (Thanks to # Aaron Swartz for the idea.) # def educate_dashes_inverted(str) str.gsub(/---/, '–').gsub(/--/, '—') end # Return the string, with each instance of "<tt>...</tt>" translated # to an ellipsis HTML entity. Also converts the case where there are # spaces between the dots. # def educate_ellipses(str) str.gsub('...', '…').gsub('. . .', '…') end # Return the string, with <tt>``backticks''</tt>-style single quotes # translated into HTML curly quote entities. # def educate_backticks(str) str.gsub("``", '“').gsub("''", '”') end # Return the string, with <tt>`backticks'</tt>-style single quotes # translated into HTML curly quote entities. # def educate_single_backticks(str) str.gsub("`", '‘').gsub("'", '’') end # Return the string, with "educated" curly quote HTML entities. # def educate_quotes(str) punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]' str = str.dup # Special case if the very first character is a quote followed by # punctuation at a non-word-break. Close the quotes by brute # force: str.gsub!(/^'(?=#{punct_class}\B)/, '’') str.gsub!(/^"(?=#{punct_class}\B)/, '”') # Special case for double sets of quotes, e.g.: # <p>He said, "'Quoted' words in a larger quote."</p> str.gsub!(/"'(?=\w)/, '“‘') str.gsub!(/'"(?=\w)/, '‘“') # Special case for decade abbreviations (the '80s): str.gsub!(/'(?=\d\ds)/, '’') close_class = %![^\ \t\r\n\\[\{\(\-]! dec_dashes = '–|—' # Get most opening single quotes: str.gsub!(/(\s| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/, '\1‘') # Single closing quotes: str.gsub!(/(#{close_class})'/, '\1’') str.gsub!(/'(\s|s\b|$)/, '’\1') # Any remaining single quotes should be opening ones: str.gsub!(/'/, '‘') # Get most opening double quotes: str.gsub!(/(\s| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/, '\1“') # Double closing quotes: str.gsub!(/(#{close_class})"/, '\1”') str.gsub!(/"(\s|s\b|$)/, '”\1') # Any remaining quotes should be opening ones: str.gsub!(/"/, '“') str end # Return the string, with each SmartyPants HTML entity translated to # its ASCII counterpart. # def stupefy_entities(str) str. gsub(/–/, '-'). # en-dash gsub(/—/, '--'). # em-dash gsub(/‘/, "'"). # open single quote gsub(/’/, "'"). # close single quote gsub(/“/, '"'). # open double quote gsub(/”/, '"'). # close double quote gsub(/…/, '...') # ellipsis end # Return an array of the tokens comprising the string. Each token is # either a tag (possibly with nested, tags contained therein, such # as <tt><a href="<MTFoo>"></tt>, or a run of text between # tags. Each element of the array is a two-element array; the first # is either :tag or :text; the second is the actual value. # # Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's # MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php> # # This is actually the easier variant using tag_soup, as used by # Chad Miller in the Python port of SmartyPants. # def tokenize tag_soup = /([^<]*)(<[^>]*>)/ tokens = [] prev_end = 0 scan(tag_soup) { tokens << [:text, $1] if $1 != "" tokens << [:tag, $2] prev_end = $~.end(0) } if prev_end < size tokens << [:text, self[prev_end..-1]] end tokens end end