From d07a2c04c1599fe707831afdd29397cc36e02fa6 Mon Sep 17 00:00:00 2001 From: scott Chacon Date: Tue, 20 Nov 2007 12:07:46 -0800 Subject: added files from the gitrb project, which seems abandoned, but which is great code --- lib/git/raw/git.rb | 63 ++++++++++ lib/git/raw/internal/loose.rb | 96 +++++++++++++++ lib/git/raw/internal/mmap.rb | 44 +++++++ lib/git/raw/internal/object.rb | 23 ++++ lib/git/raw/internal/pack.rb | 240 ++++++++++++++++++++++++++++++++++++ lib/git/raw/object.rb | 268 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 734 insertions(+) create mode 100644 lib/git/raw/git.rb create mode 100644 lib/git/raw/internal/loose.rb create mode 100644 lib/git/raw/internal/mmap.rb create mode 100644 lib/git/raw/internal/object.rb create mode 100644 lib/git/raw/internal/pack.rb create mode 100644 lib/git/raw/object.rb (limited to 'lib/git') diff --git a/lib/git/raw/git.rb b/lib/git/raw/git.rb new file mode 100644 index 0000000..004e795 --- /dev/null +++ b/lib/git/raw/git.rb @@ -0,0 +1,63 @@ +require 'git/internal/object' +require 'git/internal/pack' +require 'git/internal/loose' +require 'git/object' + +module Git + class Repository + def initialize(git_dir) + @git_dir = git_dir + @loose = Internal::LooseStorage.new(git_path("objects")) + @packs = [] + initpacks + end + + def git_path(path) + return "#@git_dir/#{path}" + end + + def get_object_by_sha1(sha1) + r = get_raw_object_by_sha1(sha1) + return nil if !r + Object.from_raw(r, self) + end + + def get_raw_object_by_sha1(sha1) + sha1 = [sha1].pack("H*") + + # try packs + @packs.each do |pack| + o = pack[sha1] + return o if o + end + + # try loose storage + o = @loose[sha1] + return o if o + + # try packs again, maybe the object got packed in the meantime + initpacks + @packs.each do |pack| + o = pack[sha1] + return o if o + end + + nil + end + + def initpacks + @packs.each do |pack| + pack.close + end + @packs = [] + Dir.open(git_path("objects/pack/")) do |dir| + dir.each do |entry| + if entry =~ /\.pack$/i + @packs << Git::Internal::PackStorage.new(git_path("objects/pack/" \ + + entry)) + end + end + end + end + end +end diff --git a/lib/git/raw/internal/loose.rb b/lib/git/raw/internal/loose.rb new file mode 100644 index 0000000..0e4020c --- /dev/null +++ b/lib/git/raw/internal/loose.rb @@ -0,0 +1,96 @@ +require 'zlib' +require 'digest/sha1' + +require 'git/raw/internal/object' + +module Git module Raw module Internal + class LooseObjectError < StandardError + end + + class LooseStorage + def initialize(directory) + @directory = directory + end + + def [](sha1) + sha1 = sha1.unpack("H*")[0] + + path = @directory+'/'+sha1[0...2]+'/'+sha1[2..40] + begin + get_raw_object(File.read(path)) + rescue Errno::ENOENT + nil + end + end + + def get_raw_object(buf) + if buf.length < 2 + raise LooseObjectError, "object file too small" + end + + if legacy_loose_object?(buf) + content = Zlib::Inflate.inflate(buf) + header, content = content.split(/\0/, 2) + if !header || !content + raise LooseObjectError, "invalid object header" + end + type, size = header.split(/ /, 2) + if !%w(blob tree commit tag).include?(type) || size !~ /^\d+$/ + raise LooseObjectError, "invalid object header" + end + type = type.to_sym + size = size.to_i + else + type, size, used = unpack_object_header_gently(buf) + content = Zlib::Inflate.inflate(buf[used..-1]) + end + raise LooseObjectError, "size mismatch" if content.length != size + return RawObject.new(type, content) + end + + # private + def unpack_object_header_gently(buf) + used = 0 + c = buf[used] + used += 1 + + type = (c >> 4) & 7; + size = c & 15; + shift = 4; + while c & 0x80 != 0 + if buf.length <= used + raise LooseObjectError, "object file too short" + end + c = buf[used] + used += 1 + + size += (c & 0x7f) << shift + shift += 7 + end + type = OBJ_TYPES[type] + if ![:blob, :tree, :commit, :tag].include?(type) + raise LooseObjectError, "invalid loose object type" + end + return [type, size, used] + end + private :unpack_object_header_gently + + def legacy_loose_object?(buf) + word = (buf[0] << 8) + buf[1] + buf[0] == 0x78 && word % 31 == 0 + end + private :legacy_loose_object? + end +end end + +if $0 == __FILE__ + require 'find' + ARGV.each do |path| + storage = Git::Internal::LooseStorage.new(path) + Find.find(path) do |p| + next if !/\/([0-9a-f]{2})\/([0-9a-f]{38})$/.match(p) + obj = storage[[$1+$2].pack("H*")] + puts "%s %s" % [obj.sha1.unpack("H*")[0], obj.type] + end + end +end diff --git a/lib/git/raw/internal/mmap.rb b/lib/git/raw/internal/mmap.rb new file mode 100644 index 0000000..d7390b1 --- /dev/null +++ b/lib/git/raw/internal/mmap.rb @@ -0,0 +1,44 @@ +begin + require 'mmap' +rescue LoadError + +module Git module Raw module Internal + class Mmap + def initialize(file) + @file = file + @offset = nil + end + + def unmap + @file = nil + end + + def [](*idx) + idx = idx[0] if idx.length == 1 + case idx + when Range + offset = idx.first + len = idx.last - idx.first + idx.exclude_end? ? 0 : 1 + when Fixnum + offset = idx + len = nil + when Array + offset, len = idx + else + raise RuntimeError, "invalid index param: #{idx.class}" + end + if @offset != offset + @file.seek(offset) + end + @offset = offset + len ? len : 1 + if not len + @file.read(1)[0] + else + @file.read(len) + end + end + end +end end + +end # rescue LoadError + diff --git a/lib/git/raw/internal/object.rb b/lib/git/raw/internal/object.rb new file mode 100644 index 0000000..b81df2b --- /dev/null +++ b/lib/git/raw/internal/object.rb @@ -0,0 +1,23 @@ +require 'digest/sha1' + +module Git module Raw module Internal + OBJ_NONE = 0 + OBJ_COMMIT = 1 + OBJ_TREE = 2 + OBJ_BLOB = 3 + OBJ_TAG = 4 + + OBJ_TYPES = [nil, :commit, :tree, :blob, :tag].freeze + + class RawObject + attr_accessor :type, :content + def initialize(type, content) + @type = type + @content = content + end + + def sha1 + Digest::SHA1.digest("%s %d\0" % [@type, @content.length] + @content) + end + end +end end diff --git a/lib/git/raw/internal/pack.rb b/lib/git/raw/internal/pack.rb new file mode 100644 index 0000000..edfeada --- /dev/null +++ b/lib/git/raw/internal/pack.rb @@ -0,0 +1,240 @@ +require 'zlib' +require 'git/raw/internal/object' +require 'git/raw/internal/mmap' + +module Git module Raw module Internal + class PackFormatError < StandardError + end + + class PackStorage + OBJ_OFS_DELTA = 6 + OBJ_REF_DELTA = 7 + + FanOutCount = 256 + SHA1Size = 20 + IdxOffsetSize = 4 + OffsetSize = 4 + OffsetStart = FanOutCount * IdxOffsetSize + SHA1Start = OffsetStart + OffsetSize + EntrySize = OffsetSize + SHA1Size + + def initialize(file) + if file =~ /\.idx$/ + file = file[0...-3] + 'pack' + end + + @name = file + @packfile = File.open(file) + @idxfile = File.open(file[0...-4]+'idx') + @idx = Mmap.new(@idxfile) + + @offsets = [0] + FanOutCount.times do |i| + pos = @idx[i * IdxOffsetSize,IdxOffsetSize].unpack('N')[0] + if pos < @offsets[i] + raise PackFormatError, "pack #@name has discontinuous index #{i}" + end + @offsets << pos + end + + @size = @offsets[-1] + end + + def close + @packfile.close + @idx.unmap + @idxfile.close + end + + def [](sha1) + offset = find_object(sha1) + return nil if !offset + return parse_object(offset) + end + + def each_entry + pos = OffsetStart + @size.times do + offset = @idx[pos,OffsetSize].unpack('N')[0] + sha1 = @idx[pos+OffsetSize,SHA1Size] + pos += EntrySize + yield sha1, offset + end + end + + def each_sha1 + # unpacking the offset is quite expensive, so + # we avoid using #each + pos = SHA1Start + @size.times do + sha1 = @idx[pos,SHA1Size] + pos += EntrySize + yield sha1 + end + end + + def find_object(sha1) + slot = sha1[0] + first, last = @offsets[slot,2] + while first < last + mid = (first + last) / 2 + midsha1 = @idx[SHA1Start + mid * EntrySize,SHA1Size] + cmp = midsha1 <=> sha1 + + if cmp < 0 + first = mid + 1 + elsif cmp > 0 + last = mid + else + pos = OffsetStart + mid * EntrySize + offset = @idx[pos,OffsetSize].unpack('N')[0] + return offset + end + end + + nil + end + private :find_object + + def parse_object(offset) + data, type = unpack_object(offset) + RawObject.new(OBJ_TYPES[type], data) + end + protected :parse_object + + def unpack_object(offset) + obj_offset = offset + @packfile.seek(offset) + + c = @packfile.read(1)[0] + size = c & 0xf + type = (c >> 4) & 7 + shift = 4 + offset += 1 + while c & 0x80 != 0 + c = @packfile.read(1)[0] + size |= ((c & 0x7f) << shift) + shift += 7 + offset += 1 + end + + case type + when OBJ_OFS_DELTA, OBJ_REF_DELTA + data, type = unpack_deltified(type, offset, obj_offset, size) + when OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG + data = unpack_compressed(offset, size) + else + raise PackFormatError, "invalid type #{type}" + end + [data, type] + end + private :unpack_object + + def unpack_deltified(type, offset, obj_offset, size) + @packfile.seek(offset) + data = @packfile.read(SHA1Size) + + if type == OBJ_OFS_DELTA + i = 0 + c = data[i] + base_offset = c & 0x7f + while c & 0x80 != 0 + c = data[i += 1] + base_offset += 1 + base_offset <<= 7 + base_offset |= c & 0x7f + end + base_offset = obj_offset - base_offset + offset += i + 1 + else + base_offset = find_object(data) + offset += SHA1Size + end + + base, type = unpack_object(base_offset) + delta = unpack_compressed(offset, size) + [patch_delta(base, delta), type] + end + private :unpack_deltified + + def unpack_compressed(offset, destsize) + outdata = "" + @packfile.seek(offset) + zstr = Zlib::Inflate.new + while outdata.size < destsize + indata = @packfile.read(4096) + if indata.size == 0 + raise PackFormatError, 'error reading pack data' + end + outdata += zstr.inflate(indata) + end + if outdata.size > destsize + raise PackFormatError, 'error reading pack data' + end + zstr.close + outdata + end + private :unpack_compressed + + def patch_delta(base, delta) + src_size, pos = patch_delta_header_size(delta, 0) + if src_size != base.size + raise PackFormatError, 'invalid delta data' + end + + dest_size, pos = patch_delta_header_size(delta, pos) + dest = "" + while pos < delta.size + c = delta[pos] + pos += 1 + if c & 0x80 != 0 + pos -= 1 + cp_off = cp_size = 0 + cp_off = delta[pos += 1] if c & 0x01 != 0 + cp_off |= delta[pos += 1] << 8 if c & 0x02 != 0 + cp_off |= delta[pos += 1] << 16 if c & 0x04 != 0 + cp_off |= delta[pos += 1] << 24 if c & 0x08 != 0 + cp_size = delta[pos += 1] if c & 0x10 != 0 + cp_size |= delta[pos += 1] << 8 if c & 0x20 != 0 + cp_size |= delta[pos += 1] << 16 if c & 0x40 != 0 + cp_size = 0x10000 if cp_size == 0 + pos += 1 + dest += base[cp_off,cp_size] + elsif c != 0 + dest += delta[pos,c] + pos += c + else + raise PackFormatError, 'invalid delta data' + end + end + dest + end + private :patch_delta + + def patch_delta_header_size(delta, pos) + size = 0 + shift = 0 + begin + c = delta[pos] + if c == nil + raise PackFormatError, 'invalid delta header' + end + pos += 1 + size |= (c & 0x7f) << shift + shift += 7 + end while c & 0x80 != 0 + [size, pos] + end + private :patch_delta_header_size + end +end end + +if $0 == __FILE__ + ARGV.each do |path| + storage = Git::Internal::PackStorage.new(path) + storage.each_sha1 do |sha1| + obj = storage[sha1] + puts "%s %s" % [obj.sha1.unpack('H*'), obj.type] + end + end +end diff --git a/lib/git/raw/object.rb b/lib/git/raw/object.rb new file mode 100644 index 0000000..7e3e618 --- /dev/null +++ b/lib/git/raw/object.rb @@ -0,0 +1,268 @@ +require 'digest/sha1' + +module Git + module Raw + + # class for author/committer/tagger lines + class UserInfo + attr_accessor :name, :email, :date, :offset + + def initialize(str) + m = /^(.*?) <(.*)> (\d+) ([+-])0*(\d+?)$/.match(str) + if !m + raise RuntimeError, "invalid %s header in commit" % key + end + @name = m[1] + @email = m[2] + @date = Time.at(Integer(m[3])) + @offset = (m[4] == "-" ? -1 : 1)*Integer(m[5]) + end + + def to_s + "%s <%s> %s %+05d" % [@name, @email, @date.to_i, @offset] + end + end + + # base class for all git objects (blob, tree, commit, tag) + class Object + attr_accessor :repository + + def Object.from_raw(rawobject, repository = nil) + case rawobject.type + when :blob + return Blob.from_raw(rawobject, repository) + when :tree + return Tree.from_raw(rawobject, repository) + when :commit + return Commit.from_raw(rawobject, repository) + when :tag + return Tag.from_raw(rawobject, repository) + else + raise RuntimeError, "got invalid object-type" + end + end + + def initialize + raise NotImplemented, "abstract class" + end + + def type + raise NotImplemented, "abstract class" + end + + def raw_content + raise NotImplemented, "abstract class" + end + + def sha1 + Digest::SHA1.hexdigest("%s %d\0" % \ + [self.type, self.raw_content.length] + \ + self.raw_content) + end + end + + class Blob < Object + attr_accessor :content + + def self.from_raw(rawobject, repository) + new(rawobject.content) + end + + def initialize(content, repository=nil) + @content = content + @repository = repository + end + + def type + :blob + end + + def raw_content + @content + end + end + + class DirectoryEntry + S_IFMT = 00170000 + S_IFLNK = 0120000 + S_IFREG = 0100000 + S_IFDIR = 0040000 + + attr_accessor :mode, :name, :sha1 + def initialize(buf) + m = /^(\d+) (.*)\0(.{20})$/m.match(buf) + if !m + raise RuntimeError, "invalid directory entry" + end + @mode = 0 + m[1].each_byte do |i| + @mode = (@mode << 3) | (i-'0'[0]) + end + @name = m[2] + @sha1 = m[3].unpack("H*")[0] + + if ![S_IFLNK, S_IFDIR, S_IFREG].include?(@mode & S_IFMT) + raise RuntimeError, "unknown type for directory entry" + end + end + + def type + case @mode & S_IFMT + when S_IFLNK + @type = :link + when S_IFDIR + @type = :directory + when S_IFREG + @type = :file + else + raise RuntimeError, "unknown type for directory entry" + end + end + + def type=(type) + case @type + when :link + @mode = (@mode & ~S_IFMT) | S_IFLNK + when :directory + @mode = (@mode & ~S_IFMT) | S_IFDIR + when :file + @mode = (@mode & ~S_IFMT) | S_IFREG + else + raise RuntimeError, "invalid type" + end + end + + def raw + "%o %s\0%s" % [@mode, @name, [@sha1].pack("H*")] + end + end + + class Tree < Object + attr_accessor :entry + + def self.from_raw(rawobject, repository=nil) + entries = [] + rawobject.content.scan(/\d+ .*?\0.{20}/m) do |raw| + entries << DirectoryEntry.new(raw) + end + new(entries, repository) + end + + def initialize(entries=[], repository = nil) + @entry = entries + @repository = repository + end + + def type + :tree + end + + def raw_content + # TODO: sort correctly + #@entry.sort { |a,b| a.name <=> b.name }. + @entry. + collect { |e| e.raw }.join + end + end + + class Commit < Object + attr_accessor :author, :committer, :tree, :parent, :message + + def self.from_raw(rawobject, repository=nil) + parent = [] + tree = author = committer = nil + + headers, message = rawobject.content.split(/\n\n/, 2) + headers = headers.split(/\n/).map { |header| header.split(/ /, 2) } + headers.each do |key, value| + case key + when "tree" + tree = value + when "parent" + parent.push(value) + when "author" + author = UserInfo.new(value) + when "committer" + committer = UserInfo.new(value) + else + warn "unknown header '%s' in commit %s" % \ + [key, rawobject.sha1.unpack("H*")[0]] + end + end + if not tree && author && committer + raise RuntimeError, "incomplete raw commit object" + end + new(tree, parent, author, committer, message, repository) + end + + def initialize(tree, parent, author, committer, message, repository=nil) + @tree = tree + @author = author + @parent = parent + @committer = committer + @message = message + @repository = repository + end + + def type + :commit + end + + def raw_content + "tree %s\n%sauthor %s\ncommitter %s\n\n" % [ + @tree, + @parent.collect { |i| "parent %s\n" % i }.join, + @author, @committer] + @message + end + end + + class Tag < Object + attr_accessor :object, :type, :tag, :tagger, :message + + def self.from_raw(rawobject, repository=nil) + headers, message = rawobject.content.split(/\n\n/, 2) + headers = headers.split(/\n/).map { |header| header.split(/ /, 2) } + headers.each do |key, value| + case key + when "object" + object = value + when "type" + if !["blob", "tree", "commit", "tag"].include?(value) + raise RuntimeError, "invalid type in tag" + end + type = value.to_sym + when "tag" + tag = value + when "tagger" + tagger = UserInfo.new(value) + else + warn "unknown header '%s' in tag" % \ + [key, rawobject.sha1.unpack("H*")[0]] + end + if not object && type && tag && tagger + raise RuntimeError, "incomplete raw tag object" + end + end + new(object, type, tag, tagger, repository) + end + + def initialize(object, type, tag, tagger, repository=nil) + @object = object + @type = type + @tag = tag + @tagger = tagger + @repository = repository + end + + def raw_content + "object %s\ntype %s\ntag %s\ntagger %s\n\n" % \ + [@object, @type, @tag, @tagger] + @message + end + + def type + :tag + end + end + +end +end \ No newline at end of file -- cgit