summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/git/raw/git.rb63
-rw-r--r--lib/git/raw/internal/loose.rb96
-rw-r--r--lib/git/raw/internal/mmap.rb44
-rw-r--r--lib/git/raw/internal/object.rb23
-rw-r--r--lib/git/raw/internal/pack.rb240
-rw-r--r--lib/git/raw/object.rb268
-rw-r--r--tests/units/test_raw_internals.rb15
7 files changed, 749 insertions, 0 deletions
diff --git a/lib/git/raw/git.rb b/lib/git/raw/git.rb
new file mode 100644
index 0000000..004e795
--- /dev/null
+++ b/lib/git/raw/git.rb
@@ -0,0 +1,63 @@
+require 'git/internal/object'
+require 'git/internal/pack'
+require 'git/internal/loose'
+require 'git/object'
+
+module Git
+ class Repository
+ def initialize(git_dir)
+ @git_dir = git_dir
+ @loose = Internal::LooseStorage.new(git_path("objects"))
+ @packs = []
+ initpacks
+ end
+
+ def git_path(path)
+ return "#@git_dir/#{path}"
+ end
+
+ def get_object_by_sha1(sha1)
+ r = get_raw_object_by_sha1(sha1)
+ return nil if !r
+ Object.from_raw(r, self)
+ end
+
+ def get_raw_object_by_sha1(sha1)
+ sha1 = [sha1].pack("H*")
+
+ # try packs
+ @packs.each do |pack|
+ o = pack[sha1]
+ return o if o
+ end
+
+ # try loose storage
+ o = @loose[sha1]
+ return o if o
+
+ # try packs again, maybe the object got packed in the meantime
+ initpacks
+ @packs.each do |pack|
+ o = pack[sha1]
+ return o if o
+ end
+
+ nil
+ end
+
+ def initpacks
+ @packs.each do |pack|
+ pack.close
+ end
+ @packs = []
+ Dir.open(git_path("objects/pack/")) do |dir|
+ dir.each do |entry|
+ if entry =~ /\.pack$/i
+ @packs << Git::Internal::PackStorage.new(git_path("objects/pack/" \
+ + entry))
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/git/raw/internal/loose.rb b/lib/git/raw/internal/loose.rb
new file mode 100644
index 0000000..0e4020c
--- /dev/null
+++ b/lib/git/raw/internal/loose.rb
@@ -0,0 +1,96 @@
+require 'zlib'
+require 'digest/sha1'
+
+require 'git/raw/internal/object'
+
+module Git module Raw module Internal
+ class LooseObjectError < StandardError
+ end
+
+ class LooseStorage
+ def initialize(directory)
+ @directory = directory
+ end
+
+ def [](sha1)
+ sha1 = sha1.unpack("H*")[0]
+
+ path = @directory+'/'+sha1[0...2]+'/'+sha1[2..40]
+ begin
+ get_raw_object(File.read(path))
+ rescue Errno::ENOENT
+ nil
+ end
+ end
+
+ def get_raw_object(buf)
+ if buf.length < 2
+ raise LooseObjectError, "object file too small"
+ end
+
+ if legacy_loose_object?(buf)
+ content = Zlib::Inflate.inflate(buf)
+ header, content = content.split(/\0/, 2)
+ if !header || !content
+ raise LooseObjectError, "invalid object header"
+ end
+ type, size = header.split(/ /, 2)
+ if !%w(blob tree commit tag).include?(type) || size !~ /^\d+$/
+ raise LooseObjectError, "invalid object header"
+ end
+ type = type.to_sym
+ size = size.to_i
+ else
+ type, size, used = unpack_object_header_gently(buf)
+ content = Zlib::Inflate.inflate(buf[used..-1])
+ end
+ raise LooseObjectError, "size mismatch" if content.length != size
+ return RawObject.new(type, content)
+ end
+
+ # private
+ def unpack_object_header_gently(buf)
+ used = 0
+ c = buf[used]
+ used += 1
+
+ type = (c >> 4) & 7;
+ size = c & 15;
+ shift = 4;
+ while c & 0x80 != 0
+ if buf.length <= used
+ raise LooseObjectError, "object file too short"
+ end
+ c = buf[used]
+ used += 1
+
+ size += (c & 0x7f) << shift
+ shift += 7
+ end
+ type = OBJ_TYPES[type]
+ if ![:blob, :tree, :commit, :tag].include?(type)
+ raise LooseObjectError, "invalid loose object type"
+ end
+ return [type, size, used]
+ end
+ private :unpack_object_header_gently
+
+ def legacy_loose_object?(buf)
+ word = (buf[0] << 8) + buf[1]
+ buf[0] == 0x78 && word % 31 == 0
+ end
+ private :legacy_loose_object?
+ end
+end end
+
+if $0 == __FILE__
+ require 'find'
+ ARGV.each do |path|
+ storage = Git::Internal::LooseStorage.new(path)
+ Find.find(path) do |p|
+ next if !/\/([0-9a-f]{2})\/([0-9a-f]{38})$/.match(p)
+ obj = storage[[$1+$2].pack("H*")]
+ puts "%s %s" % [obj.sha1.unpack("H*")[0], obj.type]
+ end
+ end
+end
diff --git a/lib/git/raw/internal/mmap.rb b/lib/git/raw/internal/mmap.rb
new file mode 100644
index 0000000..d7390b1
--- /dev/null
+++ b/lib/git/raw/internal/mmap.rb
@@ -0,0 +1,44 @@
+begin
+ require 'mmap'
+rescue LoadError
+
+module Git module Raw module Internal
+ class Mmap
+ def initialize(file)
+ @file = file
+ @offset = nil
+ end
+
+ def unmap
+ @file = nil
+ end
+
+ def [](*idx)
+ idx = idx[0] if idx.length == 1
+ case idx
+ when Range
+ offset = idx.first
+ len = idx.last - idx.first + idx.exclude_end? ? 0 : 1
+ when Fixnum
+ offset = idx
+ len = nil
+ when Array
+ offset, len = idx
+ else
+ raise RuntimeError, "invalid index param: #{idx.class}"
+ end
+ if @offset != offset
+ @file.seek(offset)
+ end
+ @offset = offset + len ? len : 1
+ if not len
+ @file.read(1)[0]
+ else
+ @file.read(len)
+ end
+ end
+ end
+end end
+
+end # rescue LoadError
+
diff --git a/lib/git/raw/internal/object.rb b/lib/git/raw/internal/object.rb
new file mode 100644
index 0000000..b81df2b
--- /dev/null
+++ b/lib/git/raw/internal/object.rb
@@ -0,0 +1,23 @@
+require 'digest/sha1'
+
+module Git module Raw module Internal
+ OBJ_NONE = 0
+ OBJ_COMMIT = 1
+ OBJ_TREE = 2
+ OBJ_BLOB = 3
+ OBJ_TAG = 4
+
+ OBJ_TYPES = [nil, :commit, :tree, :blob, :tag].freeze
+
+ class RawObject
+ attr_accessor :type, :content
+ def initialize(type, content)
+ @type = type
+ @content = content
+ end
+
+ def sha1
+ Digest::SHA1.digest("%s %d\0" % [@type, @content.length] + @content)
+ end
+ end
+end end
diff --git a/lib/git/raw/internal/pack.rb b/lib/git/raw/internal/pack.rb
new file mode 100644
index 0000000..edfeada
--- /dev/null
+++ b/lib/git/raw/internal/pack.rb
@@ -0,0 +1,240 @@
+require 'zlib'
+require 'git/raw/internal/object'
+require 'git/raw/internal/mmap'
+
+module Git module Raw module Internal
+ class PackFormatError < StandardError
+ end
+
+ class PackStorage
+ OBJ_OFS_DELTA = 6
+ OBJ_REF_DELTA = 7
+
+ FanOutCount = 256
+ SHA1Size = 20
+ IdxOffsetSize = 4
+ OffsetSize = 4
+ OffsetStart = FanOutCount * IdxOffsetSize
+ SHA1Start = OffsetStart + OffsetSize
+ EntrySize = OffsetSize + SHA1Size
+
+ def initialize(file)
+ if file =~ /\.idx$/
+ file = file[0...-3] + 'pack'
+ end
+
+ @name = file
+ @packfile = File.open(file)
+ @idxfile = File.open(file[0...-4]+'idx')
+ @idx = Mmap.new(@idxfile)
+
+ @offsets = [0]
+ FanOutCount.times do |i|
+ pos = @idx[i * IdxOffsetSize,IdxOffsetSize].unpack('N')[0]
+ if pos < @offsets[i]
+ raise PackFormatError, "pack #@name has discontinuous index #{i}"
+ end
+ @offsets << pos
+ end
+
+ @size = @offsets[-1]
+ end
+
+ def close
+ @packfile.close
+ @idx.unmap
+ @idxfile.close
+ end
+
+ def [](sha1)
+ offset = find_object(sha1)
+ return nil if !offset
+ return parse_object(offset)
+ end
+
+ def each_entry
+ pos = OffsetStart
+ @size.times do
+ offset = @idx[pos,OffsetSize].unpack('N')[0]
+ sha1 = @idx[pos+OffsetSize,SHA1Size]
+ pos += EntrySize
+ yield sha1, offset
+ end
+ end
+
+ def each_sha1
+ # unpacking the offset is quite expensive, so
+ # we avoid using #each
+ pos = SHA1Start
+ @size.times do
+ sha1 = @idx[pos,SHA1Size]
+ pos += EntrySize
+ yield sha1
+ end
+ end
+
+ def find_object(sha1)
+ slot = sha1[0]
+ first, last = @offsets[slot,2]
+ while first < last
+ mid = (first + last) / 2
+ midsha1 = @idx[SHA1Start + mid * EntrySize,SHA1Size]
+ cmp = midsha1 <=> sha1
+
+ if cmp < 0
+ first = mid + 1
+ elsif cmp > 0
+ last = mid
+ else
+ pos = OffsetStart + mid * EntrySize
+ offset = @idx[pos,OffsetSize].unpack('N')[0]
+ return offset
+ end
+ end
+
+ nil
+ end
+ private :find_object
+
+ def parse_object(offset)
+ data, type = unpack_object(offset)
+ RawObject.new(OBJ_TYPES[type], data)
+ end
+ protected :parse_object
+
+ def unpack_object(offset)
+ obj_offset = offset
+ @packfile.seek(offset)
+
+ c = @packfile.read(1)[0]
+ size = c & 0xf
+ type = (c >> 4) & 7
+ shift = 4
+ offset += 1
+ while c & 0x80 != 0
+ c = @packfile.read(1)[0]
+ size |= ((c & 0x7f) << shift)
+ shift += 7
+ offset += 1
+ end
+
+ case type
+ when OBJ_OFS_DELTA, OBJ_REF_DELTA
+ data, type = unpack_deltified(type, offset, obj_offset, size)
+ when OBJ_COMMIT, OBJ_TREE, OBJ_BLOB, OBJ_TAG
+ data = unpack_compressed(offset, size)
+ else
+ raise PackFormatError, "invalid type #{type}"
+ end
+ [data, type]
+ end
+ private :unpack_object
+
+ def unpack_deltified(type, offset, obj_offset, size)
+ @packfile.seek(offset)
+ data = @packfile.read(SHA1Size)
+
+ if type == OBJ_OFS_DELTA
+ i = 0
+ c = data[i]
+ base_offset = c & 0x7f
+ while c & 0x80 != 0
+ c = data[i += 1]
+ base_offset += 1
+ base_offset <<= 7
+ base_offset |= c & 0x7f
+ end
+ base_offset = obj_offset - base_offset
+ offset += i + 1
+ else
+ base_offset = find_object(data)
+ offset += SHA1Size
+ end
+
+ base, type = unpack_object(base_offset)
+ delta = unpack_compressed(offset, size)
+ [patch_delta(base, delta), type]
+ end
+ private :unpack_deltified
+
+ def unpack_compressed(offset, destsize)
+ outdata = ""
+ @packfile.seek(offset)
+ zstr = Zlib::Inflate.new
+ while outdata.size < destsize
+ indata = @packfile.read(4096)
+ if indata.size == 0
+ raise PackFormatError, 'error reading pack data'
+ end
+ outdata += zstr.inflate(indata)
+ end
+ if outdata.size > destsize
+ raise PackFormatError, 'error reading pack data'
+ end
+ zstr.close
+ outdata
+ end
+ private :unpack_compressed
+
+ def patch_delta(base, delta)
+ src_size, pos = patch_delta_header_size(delta, 0)
+ if src_size != base.size
+ raise PackFormatError, 'invalid delta data'
+ end
+
+ dest_size, pos = patch_delta_header_size(delta, pos)
+ dest = ""
+ while pos < delta.size
+ c = delta[pos]
+ pos += 1
+ if c & 0x80 != 0
+ pos -= 1
+ cp_off = cp_size = 0
+ cp_off = delta[pos += 1] if c & 0x01 != 0
+ cp_off |= delta[pos += 1] << 8 if c & 0x02 != 0
+ cp_off |= delta[pos += 1] << 16 if c & 0x04 != 0
+ cp_off |= delta[pos += 1] << 24 if c & 0x08 != 0
+ cp_size = delta[pos += 1] if c & 0x10 != 0
+ cp_size |= delta[pos += 1] << 8 if c & 0x20 != 0
+ cp_size |= delta[pos += 1] << 16 if c & 0x40 != 0
+ cp_size = 0x10000 if cp_size == 0
+ pos += 1
+ dest += base[cp_off,cp_size]
+ elsif c != 0
+ dest += delta[pos,c]
+ pos += c
+ else
+ raise PackFormatError, 'invalid delta data'
+ end
+ end
+ dest
+ end
+ private :patch_delta
+
+ def patch_delta_header_size(delta, pos)
+ size = 0
+ shift = 0
+ begin
+ c = delta[pos]
+ if c == nil
+ raise PackFormatError, 'invalid delta header'
+ end
+ pos += 1
+ size |= (c & 0x7f) << shift
+ shift += 7
+ end while c & 0x80 != 0
+ [size, pos]
+ end
+ private :patch_delta_header_size
+ end
+end end
+
+if $0 == __FILE__
+ ARGV.each do |path|
+ storage = Git::Internal::PackStorage.new(path)
+ storage.each_sha1 do |sha1|
+ obj = storage[sha1]
+ puts "%s %s" % [obj.sha1.unpack('H*'), obj.type]
+ end
+ end
+end
diff --git a/lib/git/raw/object.rb b/lib/git/raw/object.rb
new file mode 100644
index 0000000..7e3e618
--- /dev/null
+++ b/lib/git/raw/object.rb
@@ -0,0 +1,268 @@
+require 'digest/sha1'
+
+module Git
+ module Raw
+
+ # class for author/committer/tagger lines
+ class UserInfo
+ attr_accessor :name, :email, :date, :offset
+
+ def initialize(str)
+ m = /^(.*?) <(.*)> (\d+) ([+-])0*(\d+?)$/.match(str)
+ if !m
+ raise RuntimeError, "invalid %s header in commit" % key
+ end
+ @name = m[1]
+ @email = m[2]
+ @date = Time.at(Integer(m[3]))
+ @offset = (m[4] == "-" ? -1 : 1)*Integer(m[5])
+ end
+
+ def to_s
+ "%s <%s> %s %+05d" % [@name, @email, @date.to_i, @offset]
+ end
+ end
+
+ # base class for all git objects (blob, tree, commit, tag)
+ class Object
+ attr_accessor :repository
+
+ def Object.from_raw(rawobject, repository = nil)
+ case rawobject.type
+ when :blob
+ return Blob.from_raw(rawobject, repository)
+ when :tree
+ return Tree.from_raw(rawobject, repository)
+ when :commit
+ return Commit.from_raw(rawobject, repository)
+ when :tag
+ return Tag.from_raw(rawobject, repository)
+ else
+ raise RuntimeError, "got invalid object-type"
+ end
+ end
+
+ def initialize
+ raise NotImplemented, "abstract class"
+ end
+
+ def type
+ raise NotImplemented, "abstract class"
+ end
+
+ def raw_content
+ raise NotImplemented, "abstract class"
+ end
+
+ def sha1
+ Digest::SHA1.hexdigest("%s %d\0" % \
+ [self.type, self.raw_content.length] + \
+ self.raw_content)
+ end
+ end
+
+ class Blob < Object
+ attr_accessor :content
+
+ def self.from_raw(rawobject, repository)
+ new(rawobject.content)
+ end
+
+ def initialize(content, repository=nil)
+ @content = content
+ @repository = repository
+ end
+
+ def type
+ :blob
+ end
+
+ def raw_content
+ @content
+ end
+ end
+
+ class DirectoryEntry
+ S_IFMT = 00170000
+ S_IFLNK = 0120000
+ S_IFREG = 0100000
+ S_IFDIR = 0040000
+
+ attr_accessor :mode, :name, :sha1
+ def initialize(buf)
+ m = /^(\d+) (.*)\0(.{20})$/m.match(buf)
+ if !m
+ raise RuntimeError, "invalid directory entry"
+ end
+ @mode = 0
+ m[1].each_byte do |i|
+ @mode = (@mode << 3) | (i-'0'[0])
+ end
+ @name = m[2]
+ @sha1 = m[3].unpack("H*")[0]
+
+ if ![S_IFLNK, S_IFDIR, S_IFREG].include?(@mode & S_IFMT)
+ raise RuntimeError, "unknown type for directory entry"
+ end
+ end
+
+ def type
+ case @mode & S_IFMT
+ when S_IFLNK
+ @type = :link
+ when S_IFDIR
+ @type = :directory
+ when S_IFREG
+ @type = :file
+ else
+ raise RuntimeError, "unknown type for directory entry"
+ end
+ end
+
+ def type=(type)
+ case @type
+ when :link
+ @mode = (@mode & ~S_IFMT) | S_IFLNK
+ when :directory
+ @mode = (@mode & ~S_IFMT) | S_IFDIR
+ when :file
+ @mode = (@mode & ~S_IFMT) | S_IFREG
+ else
+ raise RuntimeError, "invalid type"
+ end
+ end
+
+ def raw
+ "%o %s\0%s" % [@mode, @name, [@sha1].pack("H*")]
+ end
+ end
+
+ class Tree < Object
+ attr_accessor :entry
+
+ def self.from_raw(rawobject, repository=nil)
+ entries = []
+ rawobject.content.scan(/\d+ .*?\0.{20}/m) do |raw|
+ entries << DirectoryEntry.new(raw)
+ end
+ new(entries, repository)
+ end
+
+ def initialize(entries=[], repository = nil)
+ @entry = entries
+ @repository = repository
+ end
+
+ def type
+ :tree
+ end
+
+ def raw_content
+ # TODO: sort correctly
+ #@entry.sort { |a,b| a.name <=> b.name }.
+ @entry.
+ collect { |e| e.raw }.join
+ end
+ end
+
+ class Commit < Object
+ attr_accessor :author, :committer, :tree, :parent, :message
+
+ def self.from_raw(rawobject, repository=nil)
+ parent = []
+ tree = author = committer = nil
+
+ headers, message = rawobject.content.split(/\n\n/, 2)
+ headers = headers.split(/\n/).map { |header| header.split(/ /, 2) }
+ headers.each do |key, value|
+ case key
+ when "tree"
+ tree = value
+ when "parent"
+ parent.push(value)
+ when "author"
+ author = UserInfo.new(value)
+ when "committer"
+ committer = UserInfo.new(value)
+ else
+ warn "unknown header '%s' in commit %s" % \
+ [key, rawobject.sha1.unpack("H*")[0]]
+ end
+ end
+ if not tree && author && committer
+ raise RuntimeError, "incomplete raw commit object"
+ end
+ new(tree, parent, author, committer, message, repository)
+ end
+
+ def initialize(tree, parent, author, committer, message, repository=nil)
+ @tree = tree
+ @author = author
+ @parent = parent
+ @committer = committer
+ @message = message
+ @repository = repository
+ end
+
+ def type
+ :commit
+ end
+
+ def raw_content
+ "tree %s\n%sauthor %s\ncommitter %s\n\n" % [
+ @tree,
+ @parent.collect { |i| "parent %s\n" % i }.join,
+ @author, @committer] + @message
+ end
+ end
+
+ class Tag < Object
+ attr_accessor :object, :type, :tag, :tagger, :message
+
+ def self.from_raw(rawobject, repository=nil)
+ headers, message = rawobject.content.split(/\n\n/, 2)
+ headers = headers.split(/\n/).map { |header| header.split(/ /, 2) }
+ headers.each do |key, value|
+ case key
+ when "object"
+ object = value
+ when "type"
+ if !["blob", "tree", "commit", "tag"].include?(value)
+ raise RuntimeError, "invalid type in tag"
+ end
+ type = value.to_sym
+ when "tag"
+ tag = value
+ when "tagger"
+ tagger = UserInfo.new(value)
+ else
+ warn "unknown header '%s' in tag" % \
+ [key, rawobject.sha1.unpack("H*")[0]]
+ end
+ if not object && type && tag && tagger
+ raise RuntimeError, "incomplete raw tag object"
+ end
+ end
+ new(object, type, tag, tagger, repository)
+ end
+
+ def initialize(object, type, tag, tagger, repository=nil)
+ @object = object
+ @type = type
+ @tag = tag
+ @tagger = tagger
+ @repository = repository
+ end
+
+ def raw_content
+ "object %s\ntype %s\ntag %s\ntagger %s\n\n" % \
+ [@object, @type, @tag, @tagger] + @message
+ end
+
+ def type
+ :tag
+ end
+ end
+
+end
+end \ No newline at end of file
diff --git a/tests/units/test_raw_internals.rb b/tests/units/test_raw_internals.rb
new file mode 100644
index 0000000..b135e52
--- /dev/null
+++ b/tests/units/test_raw_internals.rb
@@ -0,0 +1,15 @@
+#!/usr/bin/env ruby
+
+require File.dirname(__FILE__) + '/../test_helper'
+
+class TestRawInternals < Test::Unit::TestCase
+
+ def setup
+ set_file_paths
+ @git = Git.open(@wdir)
+ end
+
+ def test_raw_log
+ end
+
+end \ No newline at end of file