From 90dea6d415bfc5734bc87c2797b26cca311246bc Mon Sep 17 00:00:00 2001 From: scott Chacon Date: Tue, 20 Nov 2007 13:24:44 -0800 Subject: have the pure ruby bindings working to some degree --- lib/git.rb | 3 + lib/git/raw/git.rb | 63 ------ lib/git/raw/internal/loose.rb | 138 ++++++------ lib/git/raw/internal/mmap.rb | 70 ++++--- lib/git/raw/internal/object.rb | 38 ++-- lib/git/raw/internal/pack.rb | 430 +++++++++++++++++++------------------- lib/git/raw/object.rb | 2 +- tests/units/test_raw_internals.rb | 22 +- 8 files changed, 373 insertions(+), 393 deletions(-) delete mode 100644 lib/git/raw/git.rb diff --git a/lib/git.rb b/lib/git.rb index 02a3b8a..49a61ea 100644 --- a/lib/git.rb +++ b/lib/git.rb @@ -23,6 +23,9 @@ require 'git/diff' require 'git/status' require 'git/author' +require 'git/raw/repository' + + # Git/Ruby Library # # This provides bindings for working with git in complex diff --git a/lib/git/raw/git.rb b/lib/git/raw/git.rb deleted file mode 100644 index 004e795..0000000 --- a/lib/git/raw/git.rb +++ /dev/null @@ -1,63 +0,0 @@ -require 'git/internal/object' -require 'git/internal/pack' -require 'git/internal/loose' -require 'git/object' - -module Git - class Repository - def initialize(git_dir) - @git_dir = git_dir - @loose = Internal::LooseStorage.new(git_path("objects")) - @packs = [] - initpacks - end - - def git_path(path) - return "#@git_dir/#{path}" - end - - def get_object_by_sha1(sha1) - r = get_raw_object_by_sha1(sha1) - return nil if !r - Object.from_raw(r, self) - end - - def get_raw_object_by_sha1(sha1) - sha1 = [sha1].pack("H*") - - # try packs - @packs.each do |pack| - o = pack[sha1] - return o if o - end - - # try loose storage - o = @loose[sha1] - return o if o - - # try packs again, maybe the object got packed in the meantime - initpacks - @packs.each do |pack| - o = pack[sha1] - return o if o - end - - nil - end - - def initpacks - @packs.each do |pack| - pack.close - end - @packs = [] - Dir.open(git_path("objects/pack/")) do |dir| - dir.each do |entry| - if entry =~ /\.pack$/i - @packs << Git::Internal::PackStorage.new(git_path("objects/pack/" \ - + entry)) - end - end - end - end - end -end diff --git a/lib/git/raw/internal/loose.rb b/lib/git/raw/internal/loose.rb index 0e4020c..d8ec6fb 100644 --- a/lib/git/raw/internal/loose.rb +++ b/lib/git/raw/internal/loose.rb @@ -3,85 +3,89 @@ require 'digest/sha1' require 'git/raw/internal/object' -module Git module Raw module Internal - class LooseObjectError < StandardError - end +module Git + module Raw + module Internal + class LooseObjectError < StandardError + end - class LooseStorage - def initialize(directory) - @directory = directory - end + class LooseStorage + def initialize(directory) + @directory = directory + end - def [](sha1) - sha1 = sha1.unpack("H*")[0] + def [](sha1) + sha1 = sha1.unpack("H*")[0] - path = @directory+'/'+sha1[0...2]+'/'+sha1[2..40] - begin - get_raw_object(File.read(path)) - rescue Errno::ENOENT - nil - end - end + path = @directory+'/'+sha1[0...2]+'/'+sha1[2..40] + begin + get_raw_object(File.read(path)) + rescue Errno::ENOENT + nil + end + end - def get_raw_object(buf) - if buf.length < 2 - raise LooseObjectError, "object file too small" - end + def get_raw_object(buf) + if buf.length < 2 + raise LooseObjectError, "object file too small" + end - if legacy_loose_object?(buf) - content = Zlib::Inflate.inflate(buf) - header, content = content.split(/\0/, 2) - if !header || !content - raise LooseObjectError, "invalid object header" - end - type, size = header.split(/ /, 2) - if !%w(blob tree commit tag).include?(type) || size !~ /^\d+$/ - raise LooseObjectError, "invalid object header" + if legacy_loose_object?(buf) + content = Zlib::Inflate.inflate(buf) + header, content = content.split(/\0/, 2) + if !header || !content + raise LooseObjectError, "invalid object header" + end + type, size = header.split(/ /, 2) + if !%w(blob tree commit tag).include?(type) || size !~ /^\d+$/ + raise LooseObjectError, "invalid object header" + end + type = type.to_sym + size = size.to_i + else + type, size, used = unpack_object_header_gently(buf) + content = Zlib::Inflate.inflate(buf[used..-1]) + end + raise LooseObjectError, "size mismatch" if content.length != size + return RawObject.new(type, content) end - type = type.to_sym - size = size.to_i - else - type, size, used = unpack_object_header_gently(buf) - content = Zlib::Inflate.inflate(buf[used..-1]) - end - raise LooseObjectError, "size mismatch" if content.length != size - return RawObject.new(type, content) - end - # private - def unpack_object_header_gently(buf) - used = 0 - c = buf[used] - used += 1 + # private + def unpack_object_header_gently(buf) + used = 0 + c = buf[used] + used += 1 + + type = (c >> 4) & 7; + size = c & 15; + shift = 4; + while c & 0x80 != 0 + if buf.length <= used + raise LooseObjectError, "object file too short" + end + c = buf[used] + used += 1 - type = (c >> 4) & 7; - size = c & 15; - shift = 4; - while c & 0x80 != 0 - if buf.length <= used - raise LooseObjectError, "object file too short" + size += (c & 0x7f) << shift + shift += 7 + end + type = OBJ_TYPES[type] + if ![:blob, :tree, :commit, :tag].include?(type) + raise LooseObjectError, "invalid loose object type" + end + return [type, size, used] end - c = buf[used] - used += 1 + private :unpack_object_header_gently - size += (c & 0x7f) << shift - shift += 7 - end - type = OBJ_TYPES[type] - if ![:blob, :tree, :commit, :tag].include?(type) - raise LooseObjectError, "invalid loose object type" + def legacy_loose_object?(buf) + word = (buf[0] << 8) + buf[1] + buf[0] == 0x78 && word % 31 == 0 + end + private :legacy_loose_object? end - return [type, size, used] - end - private :unpack_object_header_gently - - def legacy_loose_object?(buf) - word = (buf[0] << 8) + buf[1] - buf[0] == 0x78 && word % 31 == 0 - end - private :legacy_loose_object? + end end -end end +end if $0 == __FILE__ require 'find' diff --git a/lib/git/raw/internal/mmap.rb b/lib/git/raw/internal/mmap.rb index d7390b1..15b5628 100644 --- a/lib/git/raw/internal/mmap.rb +++ b/lib/git/raw/internal/mmap.rb @@ -2,43 +2,47 @@ begin require 'mmap' rescue LoadError -module Git module Raw module Internal - class Mmap - def initialize(file) - @file = file - @offset = nil - end +module Git + module Raw + module Internal + class Mmap + def initialize(file) + @file = file + @offset = nil + end - def unmap - @file = nil - end + def unmap + @file = nil + end - def [](*idx) - idx = idx[0] if idx.length == 1 - case idx - when Range - offset = idx.first - len = idx.last - idx.first + idx.exclude_end? ? 0 : 1 - when Fixnum - offset = idx - len = nil - when Array - offset, len = idx - else - raise RuntimeError, "invalid index param: #{idx.class}" - end - if @offset != offset - @file.seek(offset) - end - @offset = offset + len ? len : 1 - if not len - @file.read(1)[0] - else - @file.read(len) + def [](*idx) + idx = idx[0] if idx.length == 1 + case idx + when Range + offset = idx.first + len = idx.last - idx.first + idx.exclude_end? ? 0 : 1 + when Fixnum + offset = idx + len = nil + when Array + offset, len = idx + else + raise RuntimeError, "invalid index param: #{idx.class}" + end + if @offset != offset + @file.seek(offset) + end + @offset = offset + len ? len : 1 + if not len + @file.read(1)[0] + else + @file.read(len) + end + end end end - end -end end + end +end end # rescue LoadError diff --git a/lib/git/raw/internal/object.rb b/lib/git/raw/internal/object.rb index b81df2b..7f95685 100644 --- a/lib/git/raw/internal/object.rb +++ b/lib/git/raw/internal/object.rb @@ -1,23 +1,27 @@ require 'digest/sha1' -module Git module Raw module Internal - OBJ_NONE = 0 - OBJ_COMMIT = 1 - OBJ_TREE = 2 - OBJ_BLOB = 3 - OBJ_TAG = 4 +module Git + module Raw + module Internal + OBJ_NONE = 0 + OBJ_COMMIT = 1 + OBJ_TREE = 2 + OBJ_BLOB = 3 + OBJ_TAG = 4 - OBJ_TYPES = [nil, :commit, :tree, :blob, :tag].freeze + OBJ_TYPES = [nil, :commit, :tree, :blob, :tag].freeze - class RawObject - attr_accessor :type, :content - def initialize(type, content) - @type = type - @content = content - end + class RawObject + attr_accessor :type, :content + def initialize(type, content) + @type = type + @content = content + end - def sha1 - Digest::SHA1.digest("%s %d\0" % [@type, @content.length] + @content) - end + def sha1 + Digest::SHA1.digest("%s %d\0" % [@type, @content.length] + @content) + end + end + end end -end end +end diff --git a/lib/git/raw/internal/pack.rb b/lib/git/raw/internal/pack.rb index edfeada..6980a98 100644 --- a/lib/git/raw/internal/pack.rb +++ b/lib/git/raw/internal/pack.rb @@ -2,232 +2,240 @@ require 'zlib' require 'git/raw/internal/object' require 'git/raw/internal/mmap' -module Git module Raw module Internal - class PackFormatError < StandardError - end - - class PackStorage - OBJ_OFS_DELTA = 6 - OBJ_REF_DELTA = 7 - - FanOutCount = 256 - SHA1Size = 20 - IdxOffsetSize = 4 - OffsetSize = 4 - OffsetStart = FanOutCount * IdxOffsetSize - SHA1Start = OffsetStart + OffsetSize - EntrySize = OffsetSize + SHA1Size - - def initialize(file) - if file =~ /\.idx$/ - file = file[0...-3] + 'pack' +module Git + module Raw + module Internal + class PackFormatError < StandardError end - @name = file - @packfile = File.open(file) - @idxfile = File.open(file[0...-4]+'idx') - @idx = Mmap.new(@idxfile) - - @offsets = [0] - FanOutCount.times do |i| - pos = @idx[i * IdxOffsetSize,IdxOffsetSize].unpack('N')[0] - if pos < @offsets[i] - raise PackFormatError, "pack #@name has discontinuous index #{i}" + class PackStorage + OBJ_OFS_DELTA = 6 + OBJ_REF_DELTA = 7 + + FanOutCount = 256 + SHA1Size = 20 + IdxOffsetSize = 4 + OffsetSize = 4 + OffsetStart = FanOutCount * IdxOffsetSize + SHA1Start = OffsetStart + OffsetSize + EntrySize = OffsetSize + SHA1Size + + def initialize(file) + if file =~ /\.idx$/ + file = file[0...-3] + 'pack' + end + + @name = file + @packfile = File.open(file) + @idxfile = File.open(file[0...-4]+'idx') + @idx = Mmap.new(@idxfile) + + @offsets = [0] + FanOutCount.times do |i| + pos = @idx[i * IdxOffsetSize,IdxOffsetSize].unpack('N')[0] + if pos < @offsets[i] + raise PackFormatError, "pack #@name has discontinuous index #{i}" + end + @offsets << pos + end + + @size = @offsets[-1] end - @offsets << pos - end - - @size = @offsets[-1] - end - def close - @packfile.close - @idx.unmap - @idxfile.close - end - - def [](sha1) - offset = find_object(sha1) - return nil if !offset - return parse_object(offset) - end - - def each_entry - pos = OffsetStart - @size.times do - offset = @idx[pos,OffsetSize].unpack('N')[0] - sha1 = @idx[pos+OffsetSize,SHA1Size] - pos += EntrySize - yield sha1, offset - end - end - - def each_sha1 - # unpacking the offset is quite expensive, so - # we avoid using #each - pos = SHA1Start - @size.times do - sha1 = @idx[pos,SHA1Size] - pos += EntrySize - yield sha1 - end - end - - def find_object(sha1) - slot = sha1[0] - first, last = @offsets[slot,2] - while first < last - mid = (first + last) / 2 - midsha1 = @idx[SHA1Start + mid * EntrySize,SHA1Size] - cmp = midsha1 <=> sha1 - - if cmp < 0 - first = mid + 1 - elsif cmp > 0 - last = mid - else - pos = OffsetStart + mid * EntrySize - offset = @idx[pos,OffsetSize].unpack('N')[0] - return offset + def name + @name + end + + def close + @packfile.close + @idx.unmap + @idxfile.close end - end - - nil - end - private :find_object - def parse_object(offset) - data, type = unpack_object(offset) - RawObject.new(OBJ_TYPES[type], data) - end - protected :parse_object - - def unpack_object(offset) - obj_offset = offset - @packfile.seek(offset) - - c = @packfile.read(1)[0] - size = c & 0xf - type = (c >> 4) & 7 - shift = 4 - offset += 1 - while c & 0x80 != 0 - c = @packfile.read(1)[0] - size |= ((c & 0x7f) << shift) - shift += 7 - offset += 1 - end + def [](sha1) + offset = find_object(sha1) + return nil if !offset + return parse_object(offset) + end - case type - when OBJ_OFS_DELTA, OBJ_REF_DELTA - data, type = unpack_deltified(type, offset, obj_offset, size) - when OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG - data = unpack_compressed(offset, size) - else - raise PackFormatError, "invalid type #{type}" - end - [data, type] - end - private :unpack_object - - def unpack_deltified(type, offset, obj_offset, size) - @packfile.seek(offset) - data = @packfile.read(SHA1Size) - - if type == OBJ_OFS_DELTA - i = 0 - c = data[i] - base_offset = c & 0x7f - while c & 0x80 != 0 - c = data[i += 1] - base_offset += 1 - base_offset <<= 7 - base_offset |= c & 0x7f + def each_entry + pos = OffsetStart + @size.times do + offset = @idx[pos,OffsetSize].unpack('N')[0] + sha1 = @idx[pos+OffsetSize,SHA1Size] + pos += EntrySize + yield sha1, offset + end end - base_offset = obj_offset - base_offset - offset += i + 1 - else - base_offset = find_object(data) - offset += SHA1Size - end - base, type = unpack_object(base_offset) - delta = unpack_compressed(offset, size) - [patch_delta(base, delta), type] - end - private :unpack_deltified - - def unpack_compressed(offset, destsize) - outdata = "" - @packfile.seek(offset) - zstr = Zlib::Inflate.new - while outdata.size < destsize - indata = @packfile.read(4096) - if indata.size == 0 - raise PackFormatError, 'error reading pack data' + def each_sha1 + # unpacking the offset is quite expensive, so + # we avoid using #each + pos = SHA1Start + @size.times do + sha1 = @idx[pos,SHA1Size] + pos += EntrySize + yield sha1 + end end - outdata += zstr.inflate(indata) - end - if outdata.size > destsize - raise PackFormatError, 'error reading pack data' - end - zstr.close - outdata - end - private :unpack_compressed - def patch_delta(base, delta) - src_size, pos = patch_delta_header_size(delta, 0) - if src_size != base.size - raise PackFormatError, 'invalid delta data' - end + def find_object(sha1) + slot = sha1[0] + first, last = @offsets[slot,2] + while first < last + mid = (first + last) / 2 + midsha1 = @idx[SHA1Start + mid * EntrySize,SHA1Size] + cmp = midsha1 <=> sha1 + + if cmp < 0 + first = mid + 1 + elsif cmp > 0 + last = mid + else + pos = OffsetStart + mid * EntrySize + offset = @idx[pos,OffsetSize].unpack('N')[0] + return offset + end + end + + nil + end + private :find_object - dest_size, pos = patch_delta_header_size(delta, pos) - dest = "" - while pos < delta.size - c = delta[pos] - pos += 1 - if c & 0x80 != 0 - pos -= 1 - cp_off = cp_size = 0 - cp_off = delta[pos += 1] if c & 0x01 != 0 - cp_off |= delta[pos += 1] << 8 if c & 0x02 != 0 - cp_off |= delta[pos += 1] << 16 if c & 0x04 != 0 - cp_off |= delta[pos += 1] << 24 if c & 0x08 != 0 - cp_size = delta[pos += 1] if c & 0x10 != 0 - cp_size |= delta[pos += 1] << 8 if c & 0x20 != 0 - cp_size |= delta[pos += 1] << 16 if c & 0x40 != 0 - cp_size = 0x10000 if cp_size == 0 - pos += 1 - dest += base[cp_off,cp_size] - elsif c != 0 - dest += delta[pos,c] - pos += c - else - raise PackFormatError, 'invalid delta data' + def parse_object(offset) + data, type = unpack_object(offset) + RawObject.new(OBJ_TYPES[type], data) end - end - dest - end - private :patch_delta - - def patch_delta_header_size(delta, pos) - size = 0 - shift = 0 - begin - c = delta[pos] - if c == nil - raise PackFormatError, 'invalid delta header' + protected :parse_object + + def unpack_object(offset) + obj_offset = offset + @packfile.seek(offset) + + c = @packfile.read(1)[0] + size = c & 0xf + type = (c >> 4) & 7 + shift = 4 + offset += 1 + while c & 0x80 != 0 + c = @packfile.read(1)[0] + size |= ((c & 0x7f) << shift) + shift += 7 + offset += 1 + end + + case type + when OBJ_OFS_DELTA, OBJ_REF_DELTA + data, type = unpack_deltified(type, offset, obj_offset, size) + when OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG + data = unpack_compressed(offset, size) + else + raise PackFormatError, "invalid type #{type}" + end + [data, type] + end + private :unpack_object + + def unpack_deltified(type, offset, obj_offset, size) + @packfile.seek(offset) + data = @packfile.read(SHA1Size) + + if type == OBJ_OFS_DELTA + i = 0 + c = data[i] + base_offset = c & 0x7f + while c & 0x80 != 0 + c = data[i += 1] + base_offset += 1 + base_offset <<= 7 + base_offset |= c & 0x7f + end + base_offset = obj_offset - base_offset + offset += i + 1 + else + base_offset = find_object(data) + offset += SHA1Size + end + + base, type = unpack_object(base_offset) + delta = unpack_compressed(offset, size) + [patch_delta(base, delta), type] + end + private :unpack_deltified + + def unpack_compressed(offset, destsize) + outdata = "" + @packfile.seek(offset) + zstr = Zlib::Inflate.new + while outdata.size < destsize + indata = @packfile.read(4096) + if indata.size == 0 + raise PackFormatError, 'error reading pack data' + end + outdata += zstr.inflate(indata) + end + if outdata.size > destsize + raise PackFormatError, 'error reading pack data' + end + zstr.close + outdata end - pos += 1 - size |= (c & 0x7f) << shift - shift += 7 - end while c & 0x80 != 0 - [size, pos] + private :unpack_compressed + + def patch_delta(base, delta) + src_size, pos = patch_delta_header_size(delta, 0) + if src_size != base.size + raise PackFormatError, 'invalid delta data' + end + + dest_size, pos = patch_delta_header_size(delta, pos) + dest = "" + while pos < delta.size + c = delta[pos] + pos += 1 + if c & 0x80 != 0 + pos -= 1 + cp_off = cp_size = 0 + cp_off = delta[pos += 1] if c & 0x01 != 0 + cp_off |= delta[pos += 1] << 8 if c & 0x02 != 0 + cp_off |= delta[pos += 1] << 16 if c & 0x04 != 0 + cp_off |= delta[pos += 1] << 24 if c & 0x08 != 0 + cp_size = delta[pos += 1] if c & 0x10 != 0 + cp_size |= delta[pos += 1] << 8 if c & 0x20 != 0 + cp_size |= delta[pos += 1] << 16 if c & 0x40 != 0 + cp_size = 0x10000 if cp_size == 0 + pos += 1 + dest += base[cp_off,cp_size] + elsif c != 0 + dest += delta[pos,c] + pos += c + else + raise PackFormatError, 'invalid delta data' + end + end + dest + end + private :patch_delta + + def patch_delta_header_size(delta, pos) + size = 0 + shift = 0 + begin + c = delta[pos] + if c == nil + raise PackFormatError, 'invalid delta header' + end + pos += 1 + size |= (c & 0x7f) << shift + shift += 7 + end while c & 0x80 != 0 + [size, pos] + end + private :patch_delta_header_size + end end - private :patch_delta_header_size - end -end end + end +end if $0 == __FILE__ ARGV.each do |path| diff --git a/lib/git/raw/object.rb b/lib/git/raw/object.rb index 7e3e618..f10d853 100644 --- a/lib/git/raw/object.rb +++ b/lib/git/raw/object.rb @@ -263,6 +263,6 @@ module Git :tag end end - + end end \ No newline at end of file diff --git a/tests/units/test_raw_internals.rb b/tests/units/test_raw_internals.rb index b135e52..4299a2b 100644 --- a/tests/units/test_raw_internals.rb +++ b/tests/units/test_raw_internals.rb @@ -6,10 +6,30 @@ class TestRawInternals < Test::Unit::TestCase def setup set_file_paths - @git = Git.open(@wdir) end def test_raw_log + g = Git.bare(@wbare) + #g.repack + + c = g.object("HEAD") + puts sha = c.sha + + repo = Git::Raw::Repository.new(@wbare) + while sha do + o = repo.get_raw_object_by_sha1(sha) + c = Git::Raw::Object.from_raw(o) + + sha = c.parent.first + puts sha + end + + g.log(60).each do |c| + puts c.sha + end + + puts c.inspect + end end \ No newline at end of file -- cgit