diff options
Diffstat (limited to 'lib/puppet/external/pson/pure')
-rw-r--r-- | lib/puppet/external/pson/pure/generator.rb | 836 | ||||
-rw-r--r-- | lib/puppet/external/pson/pure/parser.rb | 486 |
2 files changed, 661 insertions, 661 deletions
diff --git a/lib/puppet/external/pson/pure/generator.rb b/lib/puppet/external/pson/pure/generator.rb index 42981b9dc..ef8b36d31 100644 --- a/lib/puppet/external/pson/pure/generator.rb +++ b/lib/puppet/external/pson/pure/generator.rb @@ -1,429 +1,429 @@ module PSON - MAP = { - "\x0" => '\u0000', - "\x1" => '\u0001', - "\x2" => '\u0002', - "\x3" => '\u0003', - "\x4" => '\u0004', - "\x5" => '\u0005', - "\x6" => '\u0006', - "\x7" => '\u0007', - "\b" => '\b', - "\t" => '\t', - "\n" => '\n', - "\xb" => '\u000b', - "\f" => '\f', - "\r" => '\r', - "\xe" => '\u000e', - "\xf" => '\u000f', - "\x10" => '\u0010', - "\x11" => '\u0011', - "\x12" => '\u0012', - "\x13" => '\u0013', - "\x14" => '\u0014', - "\x15" => '\u0015', - "\x16" => '\u0016', - "\x17" => '\u0017', - "\x18" => '\u0018', - "\x19" => '\u0019', - "\x1a" => '\u001a', - "\x1b" => '\u001b', - "\x1c" => '\u001c', - "\x1d" => '\u001d', - "\x1e" => '\u001e', - "\x1f" => '\u001f', - '"' => '\"', - '\\' => '\\\\', - } # :nodoc: - - # Convert a UTF8 encoded Ruby string _string_ to a PSON string, encoded with - # UTF16 big endian characters as \u????, and return it. - if String.method_defined?(:force_encoding) - def utf8_to_pson(string) # :nodoc: - string = string.dup - string << '' # XXX workaround: avoid buffer sharing - string.force_encoding(Encoding::ASCII_8BIT) - string.gsub!(/["\\\x0-\x1f]/) { MAP[$MATCH] } - string.gsub!(/( - (?: - [\xc2-\xdf][\x80-\xbf] | - [\xe0-\xef][\x80-\xbf]{2} | - [\xf0-\xf4][\x80-\xbf]{3} - )+ | - [\x80-\xc1\xf5-\xff] # invalid - )/nx) { |c| - c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'" - s = PSON::UTF8toUTF16.iconv(c).unpack('H*')[0] - s.gsub!(/.{4}/n, '\\\\u\&') - } - string.force_encoding(Encoding::UTF_8) - string - rescue Iconv::Failure => e - raise GeneratorError, "Caught #{e.class}: #{e}" - end - else - def utf8_to_pson(string) # :nodoc: - string = string.gsub(/["\\\x0-\x1f]/) { MAP[$MATCH] } - string.gsub!(/( - (?: - [\xc2-\xdf][\x80-\xbf] | - [\xe0-\xef][\x80-\xbf]{2} | - [\xf0-\xf4][\x80-\xbf]{3} - )+ | - [\x80-\xc1\xf5-\xff] # invalid - )/nx) { |c| + MAP = { + "\x0" => '\u0000', + "\x1" => '\u0001', + "\x2" => '\u0002', + "\x3" => '\u0003', + "\x4" => '\u0004', + "\x5" => '\u0005', + "\x6" => '\u0006', + "\x7" => '\u0007', + "\b" => '\b', + "\t" => '\t', + "\n" => '\n', + "\xb" => '\u000b', + "\f" => '\f', + "\r" => '\r', + "\xe" => '\u000e', + "\xf" => '\u000f', + "\x10" => '\u0010', + "\x11" => '\u0011', + "\x12" => '\u0012', + "\x13" => '\u0013', + "\x14" => '\u0014', + "\x15" => '\u0015', + "\x16" => '\u0016', + "\x17" => '\u0017', + "\x18" => '\u0018', + "\x19" => '\u0019', + "\x1a" => '\u001a', + "\x1b" => '\u001b', + "\x1c" => '\u001c', + "\x1d" => '\u001d', + "\x1e" => '\u001e', + "\x1f" => '\u001f', + '"' => '\"', + '\\' => '\\\\', + } # :nodoc: + + # Convert a UTF8 encoded Ruby string _string_ to a PSON string, encoded with + # UTF16 big endian characters as \u????, and return it. + if String.method_defined?(:force_encoding) + def utf8_to_pson(string) # :nodoc: + string = string.dup + string << '' # XXX workaround: avoid buffer sharing + string.force_encoding(Encoding::ASCII_8BIT) + string.gsub!(/["\\\x0-\x1f]/) { MAP[$MATCH] } + string.gsub!(/( + (?: + [\xc2-\xdf][\x80-\xbf] | + [\xe0-\xef][\x80-\xbf]{2} | + [\xf0-\xf4][\x80-\xbf]{3} + )+ | + [\x80-\xc1\xf5-\xff] # invalid + )/nx) { |c| c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'" s = PSON::UTF8toUTF16.iconv(c).unpack('H*')[0] s.gsub!(/.{4}/n, '\\\\u\&') - } - string - rescue Iconv::Failure => e - raise GeneratorError, "Caught #{e.class}: #{e}" - end + } + string.force_encoding(Encoding::UTF_8) + string + rescue Iconv::Failure => e + raise GeneratorError, "Caught #{e.class}: #{e}" + end + else + def utf8_to_pson(string) # :nodoc: + string = string.gsub(/["\\\x0-\x1f]/) { MAP[$MATCH] } + string.gsub!(/( + (?: + [\xc2-\xdf][\x80-\xbf] | + [\xe0-\xef][\x80-\xbf]{2} | + [\xf0-\xf4][\x80-\xbf]{3} + )+ | + [\x80-\xc1\xf5-\xff] # invalid + )/nx) { |c| + c.size == 1 and raise GeneratorError, "invalid utf8 byte: '#{c}'" + s = PSON::UTF8toUTF16.iconv(c).unpack('H*')[0] + s.gsub!(/.{4}/n, '\\\\u\&') + } + string + rescue Iconv::Failure => e + raise GeneratorError, "Caught #{e.class}: #{e}" end - module_function :utf8_to_pson - - module Pure - module Generator - # This class is used to create State instances, that are use to hold data - # while generating a PSON text from a a Ruby data structure. - class State - # Creates a State object from _opts_, which ought to be Hash to create - # a new State instance configured by _opts_, something else to create - # an unconfigured instance. If _opts_ is a State object, it is just - # returned. - def self.from_state(opts) - case opts - when self - opts - when Hash - new(opts) - else - new - end - end - - # Instantiates a new State object, configured by _opts_. - # - # _opts_ can have the following keys: - # - # * *indent*: a string used to indent levels (default: ''), - # * *space*: a string that is put after, a : or , delimiter (default: ''), - # * *space_before*: a string that is put before a : pair delimiter (default: ''), - # * *object_nl*: a string that is put at the end of a PSON object (default: ''), - # * *array_nl*: a string that is put at the end of a PSON array (default: ''), - # * *check_circular*: true if checking for circular data structures - # should be done (the default), false otherwise. - # * *check_circular*: true if checking for circular data structures - # should be done, false (the default) otherwise. - # * *allow_nan*: true if NaN, Infinity, and -Infinity should be - # generated, otherwise an exception is thrown, if these values are - # encountered. This options defaults to false. - def initialize(opts = {}) - @seen = {} - @indent = '' - @space = '' - @space_before = '' - @object_nl = '' - @array_nl = '' - @check_circular = true - @allow_nan = false - configure opts - end - - # This string is used to indent levels in the PSON text. - attr_accessor :indent - - # This string is used to insert a space between the tokens in a PSON - # string. - attr_accessor :space - - # This string is used to insert a space before the ':' in PSON objects. - attr_accessor :space_before - - # This string is put at the end of a line that holds a PSON object (or - # Hash). - attr_accessor :object_nl - - # This string is put at the end of a line that holds a PSON array. - attr_accessor :array_nl - - # This integer returns the maximum level of data structure nesting in - # the generated PSON, max_nesting = 0 if no maximum is checked. - attr_accessor :max_nesting - - def check_max_nesting(depth) # :nodoc: - return if @max_nesting.zero? - current_nesting = depth + 1 - current_nesting > @max_nesting and - raise NestingError, "nesting of #{current_nesting} is too deep" - end - - # Returns true, if circular data structures should be checked, - # otherwise returns false. - def check_circular? - @check_circular - end - - # Returns true if NaN, Infinity, and -Infinity should be considered as - # valid PSON and output. - def allow_nan? - @allow_nan - end - - # Returns _true_, if _object_ was already seen during this generating - # run. - def seen?(object) - @seen.key?(object.__id__) - end - - # Remember _object_, to find out if it was already encountered (if a - # cyclic data structure is if a cyclic data structure is rendered). - def remember(object) - @seen[object.__id__] = true - end - - # Forget _object_ for this generating run. - def forget(object) - @seen.delete object.__id__ - end - - # Configure this State instance with the Hash _opts_, and return - # itself. - def configure(opts) - @indent = opts[:indent] if opts.key?(:indent) - @space = opts[:space] if opts.key?(:space) - @space_before = opts[:space_before] if opts.key?(:space_before) - @object_nl = opts[:object_nl] if opts.key?(:object_nl) - @array_nl = opts[:array_nl] if opts.key?(:array_nl) - @check_circular = !!opts[:check_circular] if opts.key?(:check_circular) - @allow_nan = !!opts[:allow_nan] if opts.key?(:allow_nan) - if !opts.key?(:max_nesting) # defaults to 19 - @max_nesting = 19 - elsif opts[:max_nesting] - @max_nesting = opts[:max_nesting] - else - @max_nesting = 0 - end - self - end - - # Returns the configuration instance variables as a hash, that can be - # passed to the configure method. - def to_h - result = {} - for iv in %w{indent space space_before object_nl array_nl check_circular allow_nan max_nesting} - result[iv.intern] = instance_variable_get("@#{iv}") - end - result - end + end + module_function :utf8_to_pson + + module Pure + module Generator + # This class is used to create State instances, that are use to hold data + # while generating a PSON text from a a Ruby data structure. + class State + # Creates a State object from _opts_, which ought to be Hash to create + # a new State instance configured by _opts_, something else to create + # an unconfigured instance. If _opts_ is a State object, it is just + # returned. + def self.from_state(opts) + case opts + when self + opts + when Hash + new(opts) + else + new + end + end + + # Instantiates a new State object, configured by _opts_. + # + # _opts_ can have the following keys: + # + # * *indent*: a string used to indent levels (default: ''), + # * *space*: a string that is put after, a : or , delimiter (default: ''), + # * *space_before*: a string that is put before a : pair delimiter (default: ''), + # * *object_nl*: a string that is put at the end of a PSON object (default: ''), + # * *array_nl*: a string that is put at the end of a PSON array (default: ''), + # * *check_circular*: true if checking for circular data structures + # should be done (the default), false otherwise. + # * *check_circular*: true if checking for circular data structures + # should be done, false (the default) otherwise. + # * *allow_nan*: true if NaN, Infinity, and -Infinity should be + # generated, otherwise an exception is thrown, if these values are + # encountered. This options defaults to false. + def initialize(opts = {}) + @seen = {} + @indent = '' + @space = '' + @space_before = '' + @object_nl = '' + @array_nl = '' + @check_circular = true + @allow_nan = false + configure opts + end + + # This string is used to indent levels in the PSON text. + attr_accessor :indent + + # This string is used to insert a space between the tokens in a PSON + # string. + attr_accessor :space + + # This string is used to insert a space before the ':' in PSON objects. + attr_accessor :space_before + + # This string is put at the end of a line that holds a PSON object (or + # Hash). + attr_accessor :object_nl + + # This string is put at the end of a line that holds a PSON array. + attr_accessor :array_nl + + # This integer returns the maximum level of data structure nesting in + # the generated PSON, max_nesting = 0 if no maximum is checked. + attr_accessor :max_nesting + + def check_max_nesting(depth) # :nodoc: + return if @max_nesting.zero? + current_nesting = depth + 1 + current_nesting > @max_nesting and + raise NestingError, "nesting of #{current_nesting} is too deep" + end + + # Returns true, if circular data structures should be checked, + # otherwise returns false. + def check_circular? + @check_circular + end + + # Returns true if NaN, Infinity, and -Infinity should be considered as + # valid PSON and output. + def allow_nan? + @allow_nan + end + + # Returns _true_, if _object_ was already seen during this generating + # run. + def seen?(object) + @seen.key?(object.__id__) + end + + # Remember _object_, to find out if it was already encountered (if a + # cyclic data structure is if a cyclic data structure is rendered). + def remember(object) + @seen[object.__id__] = true + end + + # Forget _object_ for this generating run. + def forget(object) + @seen.delete object.__id__ + end + + # Configure this State instance with the Hash _opts_, and return + # itself. + def configure(opts) + @indent = opts[:indent] if opts.key?(:indent) + @space = opts[:space] if opts.key?(:space) + @space_before = opts[:space_before] if opts.key?(:space_before) + @object_nl = opts[:object_nl] if opts.key?(:object_nl) + @array_nl = opts[:array_nl] if opts.key?(:array_nl) + @check_circular = !!opts[:check_circular] if opts.key?(:check_circular) + @allow_nan = !!opts[:allow_nan] if opts.key?(:allow_nan) + if !opts.key?(:max_nesting) # defaults to 19 + @max_nesting = 19 + elsif opts[:max_nesting] + @max_nesting = opts[:max_nesting] + else + @max_nesting = 0 + end + self + end + + # Returns the configuration instance variables as a hash, that can be + # passed to the configure method. + def to_h + result = {} + for iv in %w{indent space space_before object_nl array_nl check_circular allow_nan max_nesting} + result[iv.intern] = instance_variable_get("@#{iv}") + end + result + end + end + + module GeneratorMethods + module Object + # Converts this object to a string (calling #to_s), converts + # it to a PSON string, and returns the result. This is a fallback, if no + # special method #to_pson was defined for some object. + def to_pson(*) to_s.to_pson end + end + + module Hash + # Returns a PSON string containing a PSON object, that is unparsed from + # this Hash instance. + # _state_ is a PSON::State object, that can also be used to configure the + # produced PSON string output further. + # _depth_ is used to find out nesting depth, to indent accordingly. + def to_pson(state = nil, depth = 0, *) + if state + state = PSON.state.from_state(state) + state.check_max_nesting(depth) + pson_check_circular(state) { pson_transform(state, depth) } + else + pson_transform(state, depth) + end + end + + private + + def pson_check_circular(state) + if state and state.check_circular? + state.seen?(self) and raise PSON::CircularDatastructure, + "circular data structures not supported!" + state.remember self + end + yield + ensure + state and state.forget self + end + + def pson_shift(state, depth) + state and not state.object_nl.empty? or return '' + state.indent * depth + end + + def pson_transform(state, depth) + delim = ',' + if state + delim << state.object_nl + result = '{' + result << state.object_nl + result << map { |key,value| + s = pson_shift(state, depth + 1) + s << key.to_s.to_pson(state, depth + 1) + s << state.space_before + s << ':' + s << state.space + s << value.to_pson(state, depth + 1) + }.join(delim) + result << state.object_nl + result << pson_shift(state, depth) + result << '}' + else + result = '{' + result << map { |key,value| + key.to_s.to_pson << ':' << value.to_pson + }.join(delim) + result << '}' + end + result + end + end + + module Array + # Returns a PSON string containing a PSON array, that is unparsed from + # this Array instance. + # _state_ is a PSON::State object, that can also be used to configure the + # produced PSON string output further. + # _depth_ is used to find out nesting depth, to indent accordingly. + def to_pson(state = nil, depth = 0, *) + if state + state = PSON.state.from_state(state) + state.check_max_nesting(depth) + pson_check_circular(state) { pson_transform(state, depth) } + else + pson_transform(state, depth) + end + end + + private + + def pson_check_circular(state) + if state and state.check_circular? + state.seen?(self) and raise PSON::CircularDatastructure, + "circular data structures not supported!" + state.remember self + end + yield + ensure + state and state.forget self + end + + def pson_shift(state, depth) + state and not state.array_nl.empty? or return '' + state.indent * depth + end + + def pson_transform(state, depth) + delim = ',' + if state + delim << state.array_nl + result = '[' + result << state.array_nl + result << map { |value| + pson_shift(state, depth + 1) << value.to_pson(state, depth + 1) + }.join(delim) + result << state.array_nl + result << pson_shift(state, depth) + result << ']' + else + '[' << map { |value| value.to_pson }.join(delim) << ']' end + end + end + + module Integer + # Returns a PSON string representation for this Integer number. + def to_pson(*) to_s end + end - module GeneratorMethods - module Object - # Converts this object to a string (calling #to_s), converts - # it to a PSON string, and returns the result. This is a fallback, if no - # special method #to_pson was defined for some object. - def to_pson(*) to_s.to_pson end - end - - module Hash - # Returns a PSON string containing a PSON object, that is unparsed from - # this Hash instance. - # _state_ is a PSON::State object, that can also be used to configure the - # produced PSON string output further. - # _depth_ is used to find out nesting depth, to indent accordingly. - def to_pson(state = nil, depth = 0, *) - if state - state = PSON.state.from_state(state) - state.check_max_nesting(depth) - pson_check_circular(state) { pson_transform(state, depth) } - else - pson_transform(state, depth) - end - end - - private - - def pson_check_circular(state) - if state and state.check_circular? - state.seen?(self) and raise PSON::CircularDatastructure, - "circular data structures not supported!" - state.remember self - end - yield - ensure - state and state.forget self - end - - def pson_shift(state, depth) - state and not state.object_nl.empty? or return '' - state.indent * depth - end - - def pson_transform(state, depth) - delim = ',' - if state - delim << state.object_nl - result = '{' - result << state.object_nl - result << map { |key,value| - s = pson_shift(state, depth + 1) - s << key.to_s.to_pson(state, depth + 1) - s << state.space_before - s << ':' - s << state.space - s << value.to_pson(state, depth + 1) - }.join(delim) - result << state.object_nl - result << pson_shift(state, depth) - result << '}' - else - result = '{' - result << map { |key,value| - key.to_s.to_pson << ':' << value.to_pson - }.join(delim) - result << '}' - end - result - end - end - - module Array - # Returns a PSON string containing a PSON array, that is unparsed from - # this Array instance. - # _state_ is a PSON::State object, that can also be used to configure the - # produced PSON string output further. - # _depth_ is used to find out nesting depth, to indent accordingly. - def to_pson(state = nil, depth = 0, *) - if state - state = PSON.state.from_state(state) - state.check_max_nesting(depth) - pson_check_circular(state) { pson_transform(state, depth) } - else - pson_transform(state, depth) - end - end - - private - - def pson_check_circular(state) - if state and state.check_circular? - state.seen?(self) and raise PSON::CircularDatastructure, - "circular data structures not supported!" - state.remember self - end - yield - ensure - state and state.forget self - end - - def pson_shift(state, depth) - state and not state.array_nl.empty? or return '' - state.indent * depth - end - - def pson_transform(state, depth) - delim = ',' - if state - delim << state.array_nl - result = '[' - result << state.array_nl - result << map { |value| - pson_shift(state, depth + 1) << value.to_pson(state, depth + 1) - }.join(delim) - result << state.array_nl - result << pson_shift(state, depth) - result << ']' - else - '[' << map { |value| value.to_pson }.join(delim) << ']' - end - end - end - - module Integer - # Returns a PSON string representation for this Integer number. - def to_pson(*) to_s end - end - - module Float - # Returns a PSON string representation for this Float number. - def to_pson(state = nil, *) - case - when infinite? - if !state || state.allow_nan? - to_s - else - raise GeneratorError, "#{self} not allowed in PSON" - end - when nan? - if !state || state.allow_nan? - to_s - else - raise GeneratorError, "#{self} not allowed in PSON" - end - else - to_s - end - end - end - - module String - # This string should be encoded with UTF-8 A call to this method - # returns a PSON string encoded with UTF16 big endian characters as - # \u????. - def to_pson(*) - '"' << PSON.utf8_to_pson(self) << '"' - end - - # Module that holds the extinding methods if, the String module is - # included. - module Extend - # Raw Strings are PSON Objects (the raw bytes are stored in an array for the - # key "raw"). The Ruby String can be created by this module method. - def pson_create(o) - o['raw'].pack('C*') - end - end - - # Extends _modul_ with the String::Extend module. - def self.included(modul) - modul.extend Extend - end - - # This method creates a raw object hash, that can be nested into - # other data structures and will be unparsed as a raw string. This - # method should be used, if you want to convert raw strings to PSON - # instead of UTF-8 strings, e. g. binary data. - def to_pson_raw_object - { - PSON.create_id => self.class.name, - 'raw' => self.unpack('C*'), - } - end - - # This method creates a PSON text from the result of - # a call to to_pson_raw_object of this String. - def to_pson_raw(*args) - to_pson_raw_object.to_pson(*args) - end - end - - module TrueClass - # Returns a PSON string for true: 'true'. - def to_pson(*) 'true' end - end - - module FalseClass - # Returns a PSON string for false: 'false'. - def to_pson(*) 'false' end - end - - module NilClass - # Returns a PSON string for nil: 'null'. - def to_pson(*) 'null' end - end + module Float + # Returns a PSON string representation for this Float number. + def to_pson(state = nil, *) + case + when infinite? + if !state || state.allow_nan? + to_s + else + raise GeneratorError, "#{self} not allowed in PSON" + end + when nan? + if !state || state.allow_nan? + to_s + else + raise GeneratorError, "#{self} not allowed in PSON" + end + else + to_s end + end + end + + module String + # This string should be encoded with UTF-8 A call to this method + # returns a PSON string encoded with UTF16 big endian characters as + # \u????. + def to_pson(*) + '"' << PSON.utf8_to_pson(self) << '"' + end + + # Module that holds the extinding methods if, the String module is + # included. + module Extend + # Raw Strings are PSON Objects (the raw bytes are stored in an array for the + # key "raw"). The Ruby String can be created by this module method. + def pson_create(o) + o['raw'].pack('C*') + end + end + + # Extends _modul_ with the String::Extend module. + def self.included(modul) + modul.extend Extend + end + + # This method creates a raw object hash, that can be nested into + # other data structures and will be unparsed as a raw string. This + # method should be used, if you want to convert raw strings to PSON + # instead of UTF-8 strings, e. g. binary data. + def to_pson_raw_object + { + PSON.create_id => self.class.name, + 'raw' => self.unpack('C*'), + } + end + + # This method creates a PSON text from the result of + # a call to to_pson_raw_object of this String. + def to_pson_raw(*args) + to_pson_raw_object.to_pson(*args) + end + end + + module TrueClass + # Returns a PSON string for true: 'true'. + def to_pson(*) 'true' end + end + + module FalseClass + # Returns a PSON string for false: 'false'. + def to_pson(*) 'false' end + end + + module NilClass + # Returns a PSON string for nil: 'null'. + def to_pson(*) 'null' end end + end end + end end diff --git a/lib/puppet/external/pson/pure/parser.rb b/lib/puppet/external/pson/pure/parser.rb index 2d0a03066..6048f67e0 100644 --- a/lib/puppet/external/pson/pure/parser.rb +++ b/lib/puppet/external/pson/pure/parser.rb @@ -1,264 +1,264 @@ require 'strscan' module PSON - module Pure - # This class implements the PSON parser that is used to parse a PSON string - # into a Ruby data structure. - class Parser < StringScanner - STRING = /" ((?:[^\x0-\x1f"\\] | - # escaped special characters: - \\["\\\/bfnrt] | - \\u[0-9a-fA-F]{4} | - # match all but escaped special characters: - \\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*) - "/nx - INTEGER = /(-?0|-?[1-9]\d*)/ - FLOAT = /(-? - (?:0|[1-9]\d*) - (?: - \.\d+(?i:e[+-]?\d+) | - \.\d+ | - (?i:e[+-]?\d+) - ) - )/x - NAN = /NaN/ - INFINITY = /Infinity/ - MINUS_INFINITY = /-Infinity/ - OBJECT_OPEN = /\{/ - OBJECT_CLOSE = /\}/ - ARRAY_OPEN = /\[/ - ARRAY_CLOSE = /\]/ - PAIR_DELIMITER = /:/ - COLLECTION_DELIMITER = /,/ - TRUE = /true/ - FALSE = /false/ - NULL = /null/ - IGNORE = %r( - (?: - //[^\n\r]*[\n\r]| # line comments - /\* # c-style comments - (?: - [^*/]| # normal chars - /[^*]| # slashes that do not start a nested comment - \*[^/]| # asterisks that do not end this comment - /(?=\*/) # single slash before this comment's end - )* - \*/ # the End of this comment - |[ \t\r\n]+ # whitespaces: space, horicontal tab, lf, cr - )+ - )mx + module Pure + # This class implements the PSON parser that is used to parse a PSON string + # into a Ruby data structure. + class Parser < StringScanner + STRING = /" ((?:[^\x0-\x1f"\\] | + # escaped special characters: + \\["\\\/bfnrt] | + \\u[0-9a-fA-F]{4} | + # match all but escaped special characters: + \\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*) + "/nx + INTEGER = /(-?0|-?[1-9]\d*)/ + FLOAT = /(-? + (?:0|[1-9]\d*) + (?: + \.\d+(?i:e[+-]?\d+) | + \.\d+ | + (?i:e[+-]?\d+) + ) + )/x + NAN = /NaN/ + INFINITY = /Infinity/ + MINUS_INFINITY = /-Infinity/ + OBJECT_OPEN = /\{/ + OBJECT_CLOSE = /\}/ + ARRAY_OPEN = /\[/ + ARRAY_CLOSE = /\]/ + PAIR_DELIMITER = /:/ + COLLECTION_DELIMITER = /,/ + TRUE = /true/ + FALSE = /false/ + NULL = /null/ + IGNORE = %r( + (?: + //[^\n\r]*[\n\r]| # line comments + /\* # c-style comments + (?: + [^*/]| # normal chars + /[^*]| # slashes that do not start a nested comment + \*[^/]| # asterisks that do not end this comment + /(?=\*/) # single slash before this comment's end + )* + \*/ # the End of this comment + |[ \t\r\n]+ # whitespaces: space, horicontal tab, lf, cr + )+ + )mx - UNPARSED = Object.new + UNPARSED = Object.new - # Creates a new PSON::Pure::Parser instance for the string _source_. - # - # It will be configured by the _opts_ hash. _opts_ can have the following - # keys: - # * *max_nesting*: The maximum depth of nesting allowed in the parsed data - # structures. Disable depth checking with :max_nesting => false|nil|0, - # it defaults to 19. - # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in - # defiance of RFC 4627 to be parsed by the Parser. This option defaults - # to false. - # * *create_additions*: If set to false, the Parser doesn't create - # additions even if a matchin class and create_id was found. This option - # defaults to true. - # * *object_class*: Defaults to Hash - # * *array_class*: Defaults to Array - def initialize(source, opts = {}) - super - if !opts.key?(:max_nesting) # defaults to 19 - @max_nesting = 19 - elsif opts[:max_nesting] - @max_nesting = opts[:max_nesting] - else - @max_nesting = 0 - end - @allow_nan = !!opts[:allow_nan] - ca = true - ca = opts[:create_additions] if opts.key?(:create_additions) - @create_id = ca ? PSON.create_id : nil - @object_class = opts[:object_class] || Hash - @array_class = opts[:array_class] || Array - end + # Creates a new PSON::Pure::Parser instance for the string _source_. + # + # It will be configured by the _opts_ hash. _opts_ can have the following + # keys: + # * *max_nesting*: The maximum depth of nesting allowed in the parsed data + # structures. Disable depth checking with :max_nesting => false|nil|0, + # it defaults to 19. + # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in + # defiance of RFC 4627 to be parsed by the Parser. This option defaults + # to false. + # * *create_additions*: If set to false, the Parser doesn't create + # additions even if a matchin class and create_id was found. This option + # defaults to true. + # * *object_class*: Defaults to Hash + # * *array_class*: Defaults to Array + def initialize(source, opts = {}) + super + if !opts.key?(:max_nesting) # defaults to 19 + @max_nesting = 19 + elsif opts[:max_nesting] + @max_nesting = opts[:max_nesting] + else + @max_nesting = 0 + end + @allow_nan = !!opts[:allow_nan] + ca = true + ca = opts[:create_additions] if opts.key?(:create_additions) + @create_id = ca ? PSON.create_id : nil + @object_class = opts[:object_class] || Hash + @array_class = opts[:array_class] || Array + end - alias source string + alias source string - # Parses the current PSON string _source_ and returns the complete data - # structure as a result. - def parse - reset - obj = nil - until eos? - case - when scan(OBJECT_OPEN) - obj and raise ParserError, "source '#{peek(20)}' not in PSON!" - @current_nesting = 1 - obj = parse_object - when scan(ARRAY_OPEN) - obj and raise ParserError, "source '#{peek(20)}' not in PSON!" - @current_nesting = 1 - obj = parse_array - when skip(IGNORE) - ; - else - raise ParserError, "source '#{peek(20)}' not in PSON!" - end - end - obj or raise ParserError, "source did not contain any PSON!" - obj - end + # Parses the current PSON string _source_ and returns the complete data + # structure as a result. + def parse + reset + obj = nil + until eos? + case + when scan(OBJECT_OPEN) + obj and raise ParserError, "source '#{peek(20)}' not in PSON!" + @current_nesting = 1 + obj = parse_object + when scan(ARRAY_OPEN) + obj and raise ParserError, "source '#{peek(20)}' not in PSON!" + @current_nesting = 1 + obj = parse_array + when skip(IGNORE) + ; + else + raise ParserError, "source '#{peek(20)}' not in PSON!" + end + end + obj or raise ParserError, "source did not contain any PSON!" + obj + end - private + private - # Unescape characters in strings. - UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr } + # Unescape characters in strings. + UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr } - UNESCAPE_MAP.update( - { - ?" => '"', - ?\\ => '\\', - ?/ => '/', - ?b => "\b", - ?f => "\f", - ?n => "\n", - ?r => "\r", - ?t => "\t", - ?u => nil, + UNESCAPE_MAP.update( + { + ?" => '"', + ?\\ => '\\', + ?/ => '/', + ?b => "\b", + ?f => "\f", + ?n => "\n", + ?r => "\r", + ?t => "\t", + ?u => nil, - }) + }) - def parse_string - if scan(STRING) - return '' if self[1].empty? - string = self[1].gsub(%r{(?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff])}n) do |c| - if u = UNESCAPE_MAP[$MATCH[1]] - u - else # \uXXXX - bytes = '' - i = 0 - while c[6 * i] == ?\\ && c[6 * i + 1] == ?u - bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16) - i += 1 - end - PSON::UTF16toUTF8.iconv(bytes) - end - end - string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) - string - else - UNPARSED - end - rescue Iconv::Failure => e - raise GeneratorError, "Caught #{e.class}: #{e}" + def parse_string + if scan(STRING) + return '' if self[1].empty? + string = self[1].gsub(%r{(?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff])}n) do |c| + if u = UNESCAPE_MAP[$MATCH[1]] + u + else # \uXXXX + bytes = '' + i = 0 + while c[6 * i] == ?\\ && c[6 * i + 1] == ?u + bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16) + i += 1 + end + PSON::UTF16toUTF8.iconv(bytes) end + end + string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) + string + else + UNPARSED + end + rescue Iconv::Failure => e + raise GeneratorError, "Caught #{e.class}: #{e}" + end - def parse_value - case - when scan(FLOAT) - Float(self[1]) - when scan(INTEGER) - Integer(self[1]) - when scan(TRUE) - true - when scan(FALSE) - false - when scan(NULL) - nil - when (string = parse_string) != UNPARSED - string - when scan(ARRAY_OPEN) - @current_nesting += 1 - ary = parse_array - @current_nesting -= 1 - ary - when scan(OBJECT_OPEN) - @current_nesting += 1 - obj = parse_object - @current_nesting -= 1 - obj - when @allow_nan && scan(NAN) - NaN - when @allow_nan && scan(INFINITY) - Infinity - when @allow_nan && scan(MINUS_INFINITY) - MinusInfinity - else - UNPARSED - end - end + def parse_value + case + when scan(FLOAT) + Float(self[1]) + when scan(INTEGER) + Integer(self[1]) + when scan(TRUE) + true + when scan(FALSE) + false + when scan(NULL) + nil + when (string = parse_string) != UNPARSED + string + when scan(ARRAY_OPEN) + @current_nesting += 1 + ary = parse_array + @current_nesting -= 1 + ary + when scan(OBJECT_OPEN) + @current_nesting += 1 + obj = parse_object + @current_nesting -= 1 + obj + when @allow_nan && scan(NAN) + NaN + when @allow_nan && scan(INFINITY) + Infinity + when @allow_nan && scan(MINUS_INFINITY) + MinusInfinity + else + UNPARSED + end + end - def parse_array - raise NestingError, "nesting of #@current_nesting is too deep" if - @max_nesting.nonzero? && @current_nesting > @max_nesting - result = @array_class.new - delim = false - until eos? - case - when (value = parse_value) != UNPARSED - delim = false - result << value - skip(IGNORE) - if scan(COLLECTION_DELIMITER) - delim = true - elsif match?(ARRAY_CLOSE) - ; - else - raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!" - end - when scan(ARRAY_CLOSE) - raise ParserError, "expected next element in array at '#{peek(20)}'!" if delim - break - when skip(IGNORE) - ; - else - raise ParserError, "unexpected token in array at '#{peek(20)}'!" - end - end - result + def parse_array + raise NestingError, "nesting of #@current_nesting is too deep" if + @max_nesting.nonzero? && @current_nesting > @max_nesting + result = @array_class.new + delim = false + until eos? + case + when (value = parse_value) != UNPARSED + delim = false + result << value + skip(IGNORE) + if scan(COLLECTION_DELIMITER) + delim = true + elsif match?(ARRAY_CLOSE) + ; + else + raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!" end + when scan(ARRAY_CLOSE) + raise ParserError, "expected next element in array at '#{peek(20)}'!" if delim + break + when skip(IGNORE) + ; + else + raise ParserError, "unexpected token in array at '#{peek(20)}'!" + end + end + result + end - def parse_object - raise NestingError, "nesting of #@current_nesting is too deep" if - @max_nesting.nonzero? && @current_nesting > @max_nesting - result = @object_class.new - delim = false - until eos? - case - when (string = parse_string) != UNPARSED - skip(IGNORE) - raise ParserError, "expected ':' in object at '#{peek(20)}'!" unless scan(PAIR_DELIMITER) - skip(IGNORE) - unless (value = parse_value).equal? UNPARSED - result[string] = value - delim = false - skip(IGNORE) - if scan(COLLECTION_DELIMITER) - delim = true - elsif match?(OBJECT_CLOSE) - ; - else - raise ParserError, "expected ',' or '}' in object at '#{peek(20)}'!" - end - else - raise ParserError, "expected value in object at '#{peek(20)}'!" - end - when scan(OBJECT_CLOSE) - raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!" if delim - if @create_id and klassname = result[@create_id] - klass = PSON.deep_const_get klassname - break unless klass and klass.pson_creatable? - result = klass.pson_create(result) - end - break - when skip(IGNORE) - ; - else - raise ParserError, "unexpected token in object at '#{peek(20)}'!" - end - end - result + def parse_object + raise NestingError, "nesting of #@current_nesting is too deep" if + @max_nesting.nonzero? && @current_nesting > @max_nesting + result = @object_class.new + delim = false + until eos? + case + when (string = parse_string) != UNPARSED + skip(IGNORE) + raise ParserError, "expected ':' in object at '#{peek(20)}'!" unless scan(PAIR_DELIMITER) + skip(IGNORE) + unless (value = parse_value).equal? UNPARSED + result[string] = value + delim = false + skip(IGNORE) + if scan(COLLECTION_DELIMITER) + delim = true + elsif match?(OBJECT_CLOSE) + ; + else + raise ParserError, "expected ',' or '}' in object at '#{peek(20)}'!" + end + else + raise ParserError, "expected value in object at '#{peek(20)}'!" + end + when scan(OBJECT_CLOSE) + raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!" if delim + if @create_id and klassname = result[@create_id] + klass = PSON.deep_const_get klassname + break unless klass and klass.pson_creatable? + result = klass.pson_create(result) end + break + when skip(IGNORE) + ; + else + raise ParserError, "unexpected token in object at '#{peek(20)}'!" + end end + result + end end + end end |