upload
This commit is contained in:
38
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/__init__.py
Normal file
38
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/__init__.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Initialize the object database module"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
#{ Initialization
|
||||
|
||||
|
||||
def _init_externals():
|
||||
"""Initialize external projects by putting them into the path"""
|
||||
if 'PYOXIDIZER' not in os.environ:
|
||||
where = os.path.join(os.path.dirname(__file__), 'ext', 'smmap')
|
||||
if os.path.exists(where):
|
||||
sys.path.append(where)
|
||||
|
||||
import smmap
|
||||
del smmap
|
||||
# END handle imports
|
||||
|
||||
#} END initialization
|
||||
|
||||
_init_externals()
|
||||
|
||||
__author__ = "Sebastian Thiel"
|
||||
__contact__ = "byronimo@gmail.com"
|
||||
__homepage__ = "https://github.com/gitpython-developers/gitdb"
|
||||
version_info = (4, 0, 10)
|
||||
__version__ = '.'.join(str(i) for i in version_info)
|
||||
|
||||
|
||||
# default imports
|
||||
from gitdb.base import *
|
||||
from gitdb.db import *
|
||||
from gitdb.stream import *
|
315
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/base.py
Normal file
315
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/base.py
Normal file
@@ -0,0 +1,315 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Module with basic data structures - they are designed to be lightweight and fast"""
|
||||
from gitdb.util import bin_to_hex
|
||||
|
||||
from gitdb.fun import (
|
||||
type_id_to_type_map,
|
||||
type_to_type_id_map
|
||||
)
|
||||
|
||||
__all__ = ('OInfo', 'OPackInfo', 'ODeltaPackInfo',
|
||||
'OStream', 'OPackStream', 'ODeltaPackStream',
|
||||
'IStream', 'InvalidOInfo', 'InvalidOStream')
|
||||
|
||||
#{ ODB Bases
|
||||
|
||||
|
||||
class OInfo(tuple):
|
||||
|
||||
"""Carries information about an object in an ODB, providing information
|
||||
about the binary sha of the object, the type_string as well as the uncompressed size
|
||||
in bytes.
|
||||
|
||||
It can be accessed using tuple notation and using attribute access notation::
|
||||
|
||||
assert dbi[0] == dbi.binsha
|
||||
assert dbi[1] == dbi.type
|
||||
assert dbi[2] == dbi.size
|
||||
|
||||
The type is designed to be as lightweight as possible."""
|
||||
__slots__ = tuple()
|
||||
|
||||
def __new__(cls, sha, type, size):
|
||||
return tuple.__new__(cls, (sha, type, size))
|
||||
|
||||
def __init__(self, *args):
|
||||
tuple.__init__(self)
|
||||
|
||||
#{ Interface
|
||||
@property
|
||||
def binsha(self):
|
||||
""":return: our sha as binary, 20 bytes"""
|
||||
return self[0]
|
||||
|
||||
@property
|
||||
def hexsha(self):
|
||||
""":return: our sha, hex encoded, 40 bytes"""
|
||||
return bin_to_hex(self[0])
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return self[1]
|
||||
|
||||
@property
|
||||
def type_id(self):
|
||||
return type_to_type_id_map[self[1]]
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
return self[2]
|
||||
#} END interface
|
||||
|
||||
|
||||
class OPackInfo(tuple):
|
||||
|
||||
"""As OInfo, but provides a type_id property to retrieve the numerical type id, and
|
||||
does not include a sha.
|
||||
|
||||
Additionally, the pack_offset is the absolute offset into the packfile at which
|
||||
all object information is located. The data_offset property points to the absolute
|
||||
location in the pack at which that actual data stream can be found."""
|
||||
__slots__ = tuple()
|
||||
|
||||
def __new__(cls, packoffset, type, size):
|
||||
return tuple.__new__(cls, (packoffset, type, size))
|
||||
|
||||
def __init__(self, *args):
|
||||
tuple.__init__(self)
|
||||
|
||||
#{ Interface
|
||||
|
||||
@property
|
||||
def pack_offset(self):
|
||||
return self[0]
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return type_id_to_type_map[self[1]]
|
||||
|
||||
@property
|
||||
def type_id(self):
|
||||
return self[1]
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
return self[2]
|
||||
|
||||
#} END interface
|
||||
|
||||
|
||||
class ODeltaPackInfo(OPackInfo):
|
||||
|
||||
"""Adds delta specific information,
|
||||
Either the 20 byte sha which points to some object in the database,
|
||||
or the negative offset from the pack_offset, so that pack_offset - delta_info yields
|
||||
the pack offset of the base object"""
|
||||
__slots__ = tuple()
|
||||
|
||||
def __new__(cls, packoffset, type, size, delta_info):
|
||||
return tuple.__new__(cls, (packoffset, type, size, delta_info))
|
||||
|
||||
#{ Interface
|
||||
@property
|
||||
def delta_info(self):
|
||||
return self[3]
|
||||
#} END interface
|
||||
|
||||
|
||||
class OStream(OInfo):
|
||||
|
||||
"""Base for object streams retrieved from the database, providing additional
|
||||
information about the stream.
|
||||
Generally, ODB streams are read-only as objects are immutable"""
|
||||
__slots__ = tuple()
|
||||
|
||||
def __new__(cls, sha, type, size, stream, *args, **kwargs):
|
||||
"""Helps with the initialization of subclasses"""
|
||||
return tuple.__new__(cls, (sha, type, size, stream))
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
tuple.__init__(self)
|
||||
|
||||
#{ Stream Reader Interface
|
||||
|
||||
def read(self, size=-1):
|
||||
return self[3].read(size)
|
||||
|
||||
@property
|
||||
def stream(self):
|
||||
return self[3]
|
||||
|
||||
#} END stream reader interface
|
||||
|
||||
|
||||
class ODeltaStream(OStream):
|
||||
|
||||
"""Uses size info of its stream, delaying reads"""
|
||||
|
||||
def __new__(cls, sha, type, size, stream, *args, **kwargs):
|
||||
"""Helps with the initialization of subclasses"""
|
||||
return tuple.__new__(cls, (sha, type, size, stream))
|
||||
|
||||
#{ Stream Reader Interface
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
return self[3].size
|
||||
|
||||
#} END stream reader interface
|
||||
|
||||
|
||||
class OPackStream(OPackInfo):
|
||||
|
||||
"""Next to pack object information, a stream outputting an undeltified base object
|
||||
is provided"""
|
||||
__slots__ = tuple()
|
||||
|
||||
def __new__(cls, packoffset, type, size, stream, *args):
|
||||
"""Helps with the initialization of subclasses"""
|
||||
return tuple.__new__(cls, (packoffset, type, size, stream))
|
||||
|
||||
#{ Stream Reader Interface
|
||||
def read(self, size=-1):
|
||||
return self[3].read(size)
|
||||
|
||||
@property
|
||||
def stream(self):
|
||||
return self[3]
|
||||
#} END stream reader interface
|
||||
|
||||
|
||||
class ODeltaPackStream(ODeltaPackInfo):
|
||||
|
||||
"""Provides a stream outputting the uncompressed offset delta information"""
|
||||
__slots__ = tuple()
|
||||
|
||||
def __new__(cls, packoffset, type, size, delta_info, stream):
|
||||
return tuple.__new__(cls, (packoffset, type, size, delta_info, stream))
|
||||
|
||||
#{ Stream Reader Interface
|
||||
def read(self, size=-1):
|
||||
return self[4].read(size)
|
||||
|
||||
@property
|
||||
def stream(self):
|
||||
return self[4]
|
||||
#} END stream reader interface
|
||||
|
||||
|
||||
class IStream(list):
|
||||
|
||||
"""Represents an input content stream to be fed into the ODB. It is mutable to allow
|
||||
the ODB to record information about the operations outcome right in this instance.
|
||||
|
||||
It provides interfaces for the OStream and a StreamReader to allow the instance
|
||||
to blend in without prior conversion.
|
||||
|
||||
The only method your content stream must support is 'read'"""
|
||||
__slots__ = tuple()
|
||||
|
||||
def __new__(cls, type, size, stream, sha=None):
|
||||
return list.__new__(cls, (sha, type, size, stream, None))
|
||||
|
||||
def __init__(self, type, size, stream, sha=None):
|
||||
list.__init__(self, (sha, type, size, stream, None))
|
||||
|
||||
#{ Interface
|
||||
@property
|
||||
def hexsha(self):
|
||||
""":return: our sha, hex encoded, 40 bytes"""
|
||||
return bin_to_hex(self[0])
|
||||
|
||||
def _error(self):
|
||||
""":return: the error that occurred when processing the stream, or None"""
|
||||
return self[4]
|
||||
|
||||
def _set_error(self, exc):
|
||||
"""Set this input stream to the given exc, may be None to reset the error"""
|
||||
self[4] = exc
|
||||
|
||||
error = property(_error, _set_error)
|
||||
|
||||
#} END interface
|
||||
|
||||
#{ Stream Reader Interface
|
||||
|
||||
def read(self, size=-1):
|
||||
"""Implements a simple stream reader interface, passing the read call on
|
||||
to our internal stream"""
|
||||
return self[3].read(size)
|
||||
|
||||
#} END stream reader interface
|
||||
|
||||
#{ interface
|
||||
|
||||
def _set_binsha(self, binsha):
|
||||
self[0] = binsha
|
||||
|
||||
def _binsha(self):
|
||||
return self[0]
|
||||
|
||||
binsha = property(_binsha, _set_binsha)
|
||||
|
||||
def _type(self):
|
||||
return self[1]
|
||||
|
||||
def _set_type(self, type):
|
||||
self[1] = type
|
||||
|
||||
type = property(_type, _set_type)
|
||||
|
||||
def _size(self):
|
||||
return self[2]
|
||||
|
||||
def _set_size(self, size):
|
||||
self[2] = size
|
||||
|
||||
size = property(_size, _set_size)
|
||||
|
||||
def _stream(self):
|
||||
return self[3]
|
||||
|
||||
def _set_stream(self, stream):
|
||||
self[3] = stream
|
||||
|
||||
stream = property(_stream, _set_stream)
|
||||
|
||||
#} END odb info interface
|
||||
|
||||
|
||||
class InvalidOInfo(tuple):
|
||||
|
||||
"""Carries information about a sha identifying an object which is invalid in
|
||||
the queried database. The exception attribute provides more information about
|
||||
the cause of the issue"""
|
||||
__slots__ = tuple()
|
||||
|
||||
def __new__(cls, sha, exc):
|
||||
return tuple.__new__(cls, (sha, exc))
|
||||
|
||||
def __init__(self, sha, exc):
|
||||
tuple.__init__(self, (sha, exc))
|
||||
|
||||
@property
|
||||
def binsha(self):
|
||||
return self[0]
|
||||
|
||||
@property
|
||||
def hexsha(self):
|
||||
return bin_to_hex(self[0])
|
||||
|
||||
@property
|
||||
def error(self):
|
||||
""":return: exception instance explaining the failure"""
|
||||
return self[1]
|
||||
|
||||
|
||||
class InvalidOStream(InvalidOInfo):
|
||||
|
||||
"""Carries information about an invalid ODB stream"""
|
||||
__slots__ = tuple()
|
||||
|
||||
#} END ODB Bases
|
@@ -0,0 +1,4 @@
|
||||
BYTE_SPACE = b' '
|
||||
NULL_BYTE = b'\0'
|
||||
NULL_HEX_SHA = "0" * 40
|
||||
NULL_BIN_SHA = NULL_BYTE * 20
|
@@ -0,0 +1,11 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
|
||||
from gitdb.db.base import *
|
||||
from gitdb.db.loose import *
|
||||
from gitdb.db.mem import *
|
||||
from gitdb.db.pack import *
|
||||
from gitdb.db.git import *
|
||||
from gitdb.db.ref import *
|
278
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/base.py
Normal file
278
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/base.py
Normal file
@@ -0,0 +1,278 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Contains implementations of database retrieveing objects"""
|
||||
from gitdb.util import (
|
||||
join,
|
||||
LazyMixin,
|
||||
hex_to_bin
|
||||
)
|
||||
|
||||
from gitdb.utils.encoding import force_text
|
||||
from gitdb.exc import (
|
||||
BadObject,
|
||||
AmbiguousObjectName
|
||||
)
|
||||
|
||||
from itertools import chain
|
||||
from functools import reduce
|
||||
|
||||
|
||||
__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'CompoundDB', 'CachingDB')
|
||||
|
||||
|
||||
class ObjectDBR:
|
||||
|
||||
"""Defines an interface for object database lookup.
|
||||
Objects are identified either by their 20 byte bin sha"""
|
||||
|
||||
def __contains__(self, sha):
|
||||
return self.has_obj
|
||||
|
||||
#{ Query Interface
|
||||
def has_object(self, sha):
|
||||
"""
|
||||
Whether the object identified by the given 20 bytes
|
||||
binary sha is contained in the database
|
||||
|
||||
:return: True if the object identified by the given 20 bytes
|
||||
binary sha is contained in the database"""
|
||||
raise NotImplementedError("To be implemented in subclass")
|
||||
|
||||
def info(self, sha):
|
||||
""" :return: OInfo instance
|
||||
:param sha: bytes binary sha
|
||||
:raise BadObject:"""
|
||||
raise NotImplementedError("To be implemented in subclass")
|
||||
|
||||
def stream(self, sha):
|
||||
""":return: OStream instance
|
||||
:param sha: 20 bytes binary sha
|
||||
:raise BadObject:"""
|
||||
raise NotImplementedError("To be implemented in subclass")
|
||||
|
||||
def size(self):
|
||||
""":return: amount of objects in this database"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def sha_iter(self):
|
||||
"""Return iterator yielding 20 byte shas for all objects in this data base"""
|
||||
raise NotImplementedError()
|
||||
|
||||
#} END query interface
|
||||
|
||||
|
||||
class ObjectDBW:
|
||||
|
||||
"""Defines an interface to create objects in the database"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._ostream = None
|
||||
|
||||
#{ Edit Interface
|
||||
def set_ostream(self, stream):
|
||||
"""
|
||||
Adjusts the stream to which all data should be sent when storing new objects
|
||||
|
||||
:param stream: if not None, the stream to use, if None the default stream
|
||||
will be used.
|
||||
:return: previously installed stream, or None if there was no override
|
||||
:raise TypeError: if the stream doesn't have the supported functionality"""
|
||||
cstream = self._ostream
|
||||
self._ostream = stream
|
||||
return cstream
|
||||
|
||||
def ostream(self):
|
||||
"""
|
||||
Return the output stream
|
||||
|
||||
:return: overridden output stream this instance will write to, or None
|
||||
if it will write to the default stream"""
|
||||
return self._ostream
|
||||
|
||||
def store(self, istream):
|
||||
"""
|
||||
Create a new object in the database
|
||||
:return: the input istream object with its sha set to its corresponding value
|
||||
|
||||
:param istream: IStream compatible instance. If its sha is already set
|
||||
to a value, the object will just be stored in the our database format,
|
||||
in which case the input stream is expected to be in object format ( header + contents ).
|
||||
:raise IOError: if data could not be written"""
|
||||
raise NotImplementedError("To be implemented in subclass")
|
||||
|
||||
#} END edit interface
|
||||
|
||||
|
||||
class FileDBBase:
|
||||
|
||||
"""Provides basic facilities to retrieve files of interest, including
|
||||
caching facilities to help mapping hexsha's to objects"""
|
||||
|
||||
def __init__(self, root_path):
|
||||
"""Initialize this instance to look for its files at the given root path
|
||||
All subsequent operations will be relative to this path
|
||||
:raise InvalidDBRoot:
|
||||
**Note:** The base will not perform any accessablity checking as the base
|
||||
might not yet be accessible, but become accessible before the first
|
||||
access."""
|
||||
super().__init__()
|
||||
self._root_path = root_path
|
||||
|
||||
#{ Interface
|
||||
def root_path(self):
|
||||
""":return: path at which this db operates"""
|
||||
return self._root_path
|
||||
|
||||
def db_path(self, rela_path):
|
||||
"""
|
||||
:return: the given relative path relative to our database root, allowing
|
||||
to pontentially access datafiles"""
|
||||
return join(self._root_path, force_text(rela_path))
|
||||
#} END interface
|
||||
|
||||
|
||||
class CachingDB:
|
||||
|
||||
"""A database which uses caches to speed-up access"""
|
||||
|
||||
#{ Interface
|
||||
def update_cache(self, force=False):
|
||||
"""
|
||||
Call this method if the underlying data changed to trigger an update
|
||||
of the internal caching structures.
|
||||
|
||||
:param force: if True, the update must be performed. Otherwise the implementation
|
||||
may decide not to perform an update if it thinks nothing has changed.
|
||||
:return: True if an update was performed as something change indeed"""
|
||||
|
||||
# END interface
|
||||
|
||||
|
||||
def _databases_recursive(database, output):
|
||||
"""Fill output list with database from db, in order. Deals with Loose, Packed
|
||||
and compound databases."""
|
||||
if isinstance(database, CompoundDB):
|
||||
dbs = database.databases()
|
||||
output.extend(db for db in dbs if not isinstance(db, CompoundDB))
|
||||
for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
|
||||
_databases_recursive(cdb, output)
|
||||
else:
|
||||
output.append(database)
|
||||
# END handle database type
|
||||
|
||||
|
||||
class CompoundDB(ObjectDBR, LazyMixin, CachingDB):
|
||||
|
||||
"""A database which delegates calls to sub-databases.
|
||||
|
||||
Databases are stored in the lazy-loaded _dbs attribute.
|
||||
Define _set_cache_ to update it with your databases"""
|
||||
|
||||
def _set_cache_(self, attr):
|
||||
if attr == '_dbs':
|
||||
self._dbs = list()
|
||||
elif attr == '_db_cache':
|
||||
self._db_cache = dict()
|
||||
else:
|
||||
super()._set_cache_(attr)
|
||||
|
||||
def _db_query(self, sha):
|
||||
""":return: database containing the given 20 byte sha
|
||||
:raise BadObject:"""
|
||||
# most databases use binary representations, prevent converting
|
||||
# it every time a database is being queried
|
||||
try:
|
||||
return self._db_cache[sha]
|
||||
except KeyError:
|
||||
pass
|
||||
# END first level cache
|
||||
|
||||
for db in self._dbs:
|
||||
if db.has_object(sha):
|
||||
self._db_cache[sha] = db
|
||||
return db
|
||||
# END for each database
|
||||
raise BadObject(sha)
|
||||
|
||||
#{ ObjectDBR interface
|
||||
|
||||
def has_object(self, sha):
|
||||
try:
|
||||
self._db_query(sha)
|
||||
return True
|
||||
except BadObject:
|
||||
return False
|
||||
# END handle exceptions
|
||||
|
||||
def info(self, sha):
|
||||
return self._db_query(sha).info(sha)
|
||||
|
||||
def stream(self, sha):
|
||||
return self._db_query(sha).stream(sha)
|
||||
|
||||
def size(self):
|
||||
""":return: total size of all contained databases"""
|
||||
return reduce(lambda x, y: x + y, (db.size() for db in self._dbs), 0)
|
||||
|
||||
def sha_iter(self):
|
||||
return chain(*(db.sha_iter() for db in self._dbs))
|
||||
|
||||
#} END object DBR Interface
|
||||
|
||||
#{ Interface
|
||||
|
||||
def databases(self):
|
||||
""":return: tuple of database instances we use for lookups"""
|
||||
return tuple(self._dbs)
|
||||
|
||||
def update_cache(self, force=False):
|
||||
# something might have changed, clear everything
|
||||
self._db_cache.clear()
|
||||
stat = False
|
||||
for db in self._dbs:
|
||||
if isinstance(db, CachingDB):
|
||||
stat |= db.update_cache(force)
|
||||
# END if is caching db
|
||||
# END for each database to update
|
||||
return stat
|
||||
|
||||
def partial_to_complete_sha_hex(self, partial_hexsha):
|
||||
"""
|
||||
:return: 20 byte binary sha1 from the given less-than-40 byte hexsha (bytes or str)
|
||||
:param partial_hexsha: hexsha with less than 40 byte
|
||||
:raise AmbiguousObjectName: """
|
||||
databases = list()
|
||||
_databases_recursive(self, databases)
|
||||
partial_hexsha = force_text(partial_hexsha)
|
||||
len_partial_hexsha = len(partial_hexsha)
|
||||
if len_partial_hexsha % 2 != 0:
|
||||
partial_binsha = hex_to_bin(partial_hexsha + "0")
|
||||
else:
|
||||
partial_binsha = hex_to_bin(partial_hexsha)
|
||||
# END assure successful binary conversion
|
||||
|
||||
candidate = None
|
||||
for db in databases:
|
||||
full_bin_sha = None
|
||||
try:
|
||||
if hasattr(db, 'partial_to_complete_sha_hex'):
|
||||
full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
|
||||
else:
|
||||
full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
|
||||
# END handle database type
|
||||
except BadObject:
|
||||
continue
|
||||
# END ignore bad objects
|
||||
if full_bin_sha:
|
||||
if candidate and candidate != full_bin_sha:
|
||||
raise AmbiguousObjectName(partial_hexsha)
|
||||
candidate = full_bin_sha
|
||||
# END handle candidate
|
||||
# END for each db
|
||||
if not candidate:
|
||||
raise BadObject(partial_binsha)
|
||||
return candidate
|
||||
|
||||
#} END interface
|
85
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/git.py
Normal file
85
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/git.py
Normal file
@@ -0,0 +1,85 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
from gitdb.db.base import (
|
||||
CompoundDB,
|
||||
ObjectDBW,
|
||||
FileDBBase
|
||||
)
|
||||
|
||||
from gitdb.db.loose import LooseObjectDB
|
||||
from gitdb.db.pack import PackedDB
|
||||
from gitdb.db.ref import ReferenceDB
|
||||
|
||||
from gitdb.exc import InvalidDBRoot
|
||||
|
||||
import os
|
||||
|
||||
__all__ = ('GitDB', )
|
||||
|
||||
|
||||
class GitDB(FileDBBase, ObjectDBW, CompoundDB):
|
||||
|
||||
"""A git-style object database, which contains all objects in the 'objects'
|
||||
subdirectory
|
||||
|
||||
``IMPORTANT``: The usage of this implementation is highly discouraged as it fails to release file-handles.
|
||||
This can be a problem with long-running processes and/or big repositories.
|
||||
"""
|
||||
# Configuration
|
||||
PackDBCls = PackedDB
|
||||
LooseDBCls = LooseObjectDB
|
||||
ReferenceDBCls = ReferenceDB
|
||||
|
||||
# Directories
|
||||
packs_dir = 'pack'
|
||||
loose_dir = ''
|
||||
alternates_dir = os.path.join('info', 'alternates')
|
||||
|
||||
def __init__(self, root_path):
|
||||
"""Initialize ourselves on a git objects directory"""
|
||||
super().__init__(root_path)
|
||||
|
||||
def _set_cache_(self, attr):
|
||||
if attr == '_dbs' or attr == '_loose_db':
|
||||
self._dbs = list()
|
||||
loose_db = None
|
||||
for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
|
||||
(self.loose_dir, self.LooseDBCls),
|
||||
(self.alternates_dir, self.ReferenceDBCls)):
|
||||
path = self.db_path(subpath)
|
||||
if os.path.exists(path):
|
||||
self._dbs.append(dbcls(path))
|
||||
if dbcls is self.LooseDBCls:
|
||||
loose_db = self._dbs[-1]
|
||||
# END remember loose db
|
||||
# END check path exists
|
||||
# END for each db type
|
||||
|
||||
# should have at least one subdb
|
||||
if not self._dbs:
|
||||
raise InvalidDBRoot(self.root_path())
|
||||
# END handle error
|
||||
|
||||
# we the first one should have the store method
|
||||
assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality"
|
||||
|
||||
# finally set the value
|
||||
self._loose_db = loose_db
|
||||
else:
|
||||
super()._set_cache_(attr)
|
||||
# END handle attrs
|
||||
|
||||
#{ ObjectDBW interface
|
||||
|
||||
def store(self, istream):
|
||||
return self._loose_db.store(istream)
|
||||
|
||||
def ostream(self):
|
||||
return self._loose_db.ostream()
|
||||
|
||||
def set_ostream(self, ostream):
|
||||
return self._loose_db.set_ostream(ostream)
|
||||
|
||||
#} END objectdbw interface
|
258
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/loose.py
Normal file
258
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/loose.py
Normal file
@@ -0,0 +1,258 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
from gitdb.db.base import (
|
||||
FileDBBase,
|
||||
ObjectDBR,
|
||||
ObjectDBW
|
||||
)
|
||||
|
||||
|
||||
from gitdb.exc import (
|
||||
BadObject,
|
||||
AmbiguousObjectName
|
||||
)
|
||||
|
||||
from gitdb.stream import (
|
||||
DecompressMemMapReader,
|
||||
FDCompressedSha1Writer,
|
||||
FDStream,
|
||||
Sha1Writer
|
||||
)
|
||||
|
||||
from gitdb.base import (
|
||||
OStream,
|
||||
OInfo
|
||||
)
|
||||
|
||||
from gitdb.util import (
|
||||
file_contents_ro_filepath,
|
||||
ENOENT,
|
||||
hex_to_bin,
|
||||
bin_to_hex,
|
||||
exists,
|
||||
chmod,
|
||||
isdir,
|
||||
isfile,
|
||||
remove,
|
||||
mkdir,
|
||||
rename,
|
||||
dirname,
|
||||
basename,
|
||||
join
|
||||
)
|
||||
|
||||
from gitdb.fun import (
|
||||
chunk_size,
|
||||
loose_object_header_info,
|
||||
write_object,
|
||||
stream_copy
|
||||
)
|
||||
|
||||
from gitdb.utils.encoding import force_bytes
|
||||
|
||||
import tempfile
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
__all__ = ('LooseObjectDB', )
|
||||
|
||||
|
||||
class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
|
||||
|
||||
"""A database which operates on loose object files"""
|
||||
|
||||
# CONFIGURATION
|
||||
# chunks in which data will be copied between streams
|
||||
stream_chunk_size = chunk_size
|
||||
|
||||
# On windows we need to keep it writable, otherwise it cannot be removed
|
||||
# either
|
||||
new_objects_mode = int("444", 8)
|
||||
if os.name == 'nt':
|
||||
new_objects_mode = int("644", 8)
|
||||
|
||||
def __init__(self, root_path):
|
||||
super().__init__(root_path)
|
||||
self._hexsha_to_file = dict()
|
||||
# Additional Flags - might be set to 0 after the first failure
|
||||
# Depending on the root, this might work for some mounts, for others not, which
|
||||
# is why it is per instance
|
||||
self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
|
||||
|
||||
#{ Interface
|
||||
def object_path(self, hexsha):
|
||||
"""
|
||||
:return: path at which the object with the given hexsha would be stored,
|
||||
relative to the database root"""
|
||||
return join(hexsha[:2], hexsha[2:])
|
||||
|
||||
def readable_db_object_path(self, hexsha):
|
||||
"""
|
||||
:return: readable object path to the object identified by hexsha
|
||||
:raise BadObject: If the object file does not exist"""
|
||||
try:
|
||||
return self._hexsha_to_file[hexsha]
|
||||
except KeyError:
|
||||
pass
|
||||
# END ignore cache misses
|
||||
|
||||
# try filesystem
|
||||
path = self.db_path(self.object_path(hexsha))
|
||||
if exists(path):
|
||||
self._hexsha_to_file[hexsha] = path
|
||||
return path
|
||||
# END handle cache
|
||||
raise BadObject(hexsha)
|
||||
|
||||
def partial_to_complete_sha_hex(self, partial_hexsha):
|
||||
""":return: 20 byte binary sha1 string which matches the given name uniquely
|
||||
:param name: hexadecimal partial name (bytes or ascii string)
|
||||
:raise AmbiguousObjectName:
|
||||
:raise BadObject: """
|
||||
candidate = None
|
||||
for binsha in self.sha_iter():
|
||||
if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
|
||||
# it can't ever find the same object twice
|
||||
if candidate is not None:
|
||||
raise AmbiguousObjectName(partial_hexsha)
|
||||
candidate = binsha
|
||||
# END for each object
|
||||
if candidate is None:
|
||||
raise BadObject(partial_hexsha)
|
||||
return candidate
|
||||
|
||||
#} END interface
|
||||
|
||||
def _map_loose_object(self, sha):
|
||||
"""
|
||||
:return: memory map of that file to allow random read access
|
||||
:raise BadObject: if object could not be located"""
|
||||
db_path = self.db_path(self.object_path(bin_to_hex(sha)))
|
||||
try:
|
||||
return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
|
||||
except OSError as e:
|
||||
if e.errno != ENOENT:
|
||||
# try again without noatime
|
||||
try:
|
||||
return file_contents_ro_filepath(db_path)
|
||||
except OSError as new_e:
|
||||
raise BadObject(sha) from new_e
|
||||
# didn't work because of our flag, don't try it again
|
||||
self._fd_open_flags = 0
|
||||
else:
|
||||
raise BadObject(sha) from e
|
||||
# END handle error
|
||||
# END exception handling
|
||||
|
||||
def set_ostream(self, stream):
|
||||
""":raise TypeError: if the stream does not support the Sha1Writer interface"""
|
||||
if stream is not None and not isinstance(stream, Sha1Writer):
|
||||
raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
|
||||
return super().set_ostream(stream)
|
||||
|
||||
def info(self, sha):
|
||||
m = self._map_loose_object(sha)
|
||||
try:
|
||||
typ, size = loose_object_header_info(m)
|
||||
return OInfo(sha, typ, size)
|
||||
finally:
|
||||
if hasattr(m, 'close'):
|
||||
m.close()
|
||||
# END assure release of system resources
|
||||
|
||||
def stream(self, sha):
|
||||
m = self._map_loose_object(sha)
|
||||
type, size, stream = DecompressMemMapReader.new(m, close_on_deletion=True)
|
||||
return OStream(sha, type, size, stream)
|
||||
|
||||
def has_object(self, sha):
|
||||
try:
|
||||
self.readable_db_object_path(bin_to_hex(sha))
|
||||
return True
|
||||
except BadObject:
|
||||
return False
|
||||
# END check existence
|
||||
|
||||
def store(self, istream):
|
||||
"""note: The sha we produce will be hex by nature"""
|
||||
tmp_path = None
|
||||
writer = self.ostream()
|
||||
if writer is None:
|
||||
# open a tmp file to write the data to
|
||||
fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
|
||||
|
||||
if istream.binsha is None:
|
||||
writer = FDCompressedSha1Writer(fd)
|
||||
else:
|
||||
writer = FDStream(fd)
|
||||
# END handle direct stream copies
|
||||
# END handle custom writer
|
||||
|
||||
try:
|
||||
try:
|
||||
if istream.binsha is not None:
|
||||
# copy as much as possible, the actual uncompressed item size might
|
||||
# be smaller than the compressed version
|
||||
stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size)
|
||||
else:
|
||||
# write object with header, we have to make a new one
|
||||
write_object(istream.type, istream.size, istream.read, writer.write,
|
||||
chunk_size=self.stream_chunk_size)
|
||||
# END handle direct stream copies
|
||||
finally:
|
||||
if tmp_path:
|
||||
writer.close()
|
||||
# END assure target stream is closed
|
||||
except:
|
||||
if tmp_path:
|
||||
os.remove(tmp_path)
|
||||
raise
|
||||
# END assure tmpfile removal on error
|
||||
|
||||
hexsha = None
|
||||
if istream.binsha:
|
||||
hexsha = istream.hexsha
|
||||
else:
|
||||
hexsha = writer.sha(as_hex=True)
|
||||
# END handle sha
|
||||
|
||||
if tmp_path:
|
||||
obj_path = self.db_path(self.object_path(hexsha))
|
||||
obj_dir = dirname(obj_path)
|
||||
if not isdir(obj_dir):
|
||||
mkdir(obj_dir)
|
||||
# END handle destination directory
|
||||
# rename onto existing doesn't work on NTFS
|
||||
if isfile(obj_path):
|
||||
remove(tmp_path)
|
||||
else:
|
||||
rename(tmp_path, obj_path)
|
||||
# end rename only if needed
|
||||
|
||||
# make sure its readable for all ! It started out as rw-- tmp file
|
||||
# but needs to be rwrr
|
||||
chmod(obj_path, self.new_objects_mode)
|
||||
# END handle dry_run
|
||||
|
||||
istream.binsha = hex_to_bin(hexsha)
|
||||
return istream
|
||||
|
||||
def sha_iter(self):
|
||||
# find all files which look like an object, extract sha from there
|
||||
for root, dirs, files in os.walk(self.root_path()):
|
||||
root_base = basename(root)
|
||||
if len(root_base) != 2:
|
||||
continue
|
||||
|
||||
for f in files:
|
||||
if len(f) != 38:
|
||||
continue
|
||||
yield hex_to_bin(root_base + f)
|
||||
# END for each file
|
||||
# END for each walk iteration
|
||||
|
||||
def size(self):
|
||||
return len(tuple(self.sha_iter()))
|
110
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/mem.py
Normal file
110
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/mem.py
Normal file
@@ -0,0 +1,110 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Contains the MemoryDatabase implementation"""
|
||||
from gitdb.db.loose import LooseObjectDB
|
||||
from gitdb.db.base import (
|
||||
ObjectDBR,
|
||||
ObjectDBW
|
||||
)
|
||||
|
||||
from gitdb.base import (
|
||||
OStream,
|
||||
IStream,
|
||||
)
|
||||
|
||||
from gitdb.exc import (
|
||||
BadObject,
|
||||
UnsupportedOperation
|
||||
)
|
||||
|
||||
from gitdb.stream import (
|
||||
ZippedStoreShaWriter,
|
||||
DecompressMemMapReader,
|
||||
)
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
__all__ = ("MemoryDB", )
|
||||
|
||||
|
||||
class MemoryDB(ObjectDBR, ObjectDBW):
|
||||
|
||||
"""A memory database stores everything to memory, providing fast IO and object
|
||||
retrieval. It should be used to buffer results and obtain SHAs before writing
|
||||
it to the actual physical storage, as it allows to query whether object already
|
||||
exists in the target storage before introducing actual IO"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._db = LooseObjectDB("path/doesnt/matter")
|
||||
|
||||
# maps 20 byte shas to their OStream objects
|
||||
self._cache = dict()
|
||||
|
||||
def set_ostream(self, stream):
|
||||
raise UnsupportedOperation("MemoryDB's always stream into memory")
|
||||
|
||||
def store(self, istream):
|
||||
zstream = ZippedStoreShaWriter()
|
||||
self._db.set_ostream(zstream)
|
||||
|
||||
istream = self._db.store(istream)
|
||||
zstream.close() # close to flush
|
||||
zstream.seek(0)
|
||||
|
||||
# don't provide a size, the stream is written in object format, hence the
|
||||
# header needs decompression
|
||||
decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
|
||||
self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
|
||||
|
||||
return istream
|
||||
|
||||
def has_object(self, sha):
|
||||
return sha in self._cache
|
||||
|
||||
def info(self, sha):
|
||||
# we always return streams, which are infos as well
|
||||
return self.stream(sha)
|
||||
|
||||
def stream(self, sha):
|
||||
try:
|
||||
ostream = self._cache[sha]
|
||||
# rewind stream for the next one to read
|
||||
ostream.stream.seek(0)
|
||||
return ostream
|
||||
except KeyError as e:
|
||||
raise BadObject(sha) from e
|
||||
# END exception handling
|
||||
|
||||
def size(self):
|
||||
return len(self._cache)
|
||||
|
||||
def sha_iter(self):
|
||||
return self._cache.keys()
|
||||
|
||||
#{ Interface
|
||||
def stream_copy(self, sha_iter, odb):
|
||||
"""Copy the streams as identified by sha's yielded by sha_iter into the given odb
|
||||
The streams will be copied directly
|
||||
**Note:** the object will only be written if it did not exist in the target db
|
||||
|
||||
:return: amount of streams actually copied into odb. If smaller than the amount
|
||||
of input shas, one or more objects did already exist in odb"""
|
||||
count = 0
|
||||
for sha in sha_iter:
|
||||
if odb.has_object(sha):
|
||||
continue
|
||||
# END check object existence
|
||||
|
||||
ostream = self.stream(sha)
|
||||
# compressed data including header
|
||||
sio = BytesIO(ostream.stream.data())
|
||||
istream = IStream(ostream.type, ostream.size, sio, sha)
|
||||
|
||||
odb.store(istream)
|
||||
count += 1
|
||||
# END for each sha
|
||||
return count
|
||||
#} END interface
|
206
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/pack.py
Normal file
206
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/pack.py
Normal file
@@ -0,0 +1,206 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Module containing a database to deal with packs"""
|
||||
from gitdb.db.base import (
|
||||
FileDBBase,
|
||||
ObjectDBR,
|
||||
CachingDB
|
||||
)
|
||||
|
||||
from gitdb.util import LazyMixin
|
||||
|
||||
from gitdb.exc import (
|
||||
BadObject,
|
||||
UnsupportedOperation,
|
||||
AmbiguousObjectName
|
||||
)
|
||||
|
||||
from gitdb.pack import PackEntity
|
||||
|
||||
from functools import reduce
|
||||
|
||||
import os
|
||||
import glob
|
||||
|
||||
__all__ = ('PackedDB', )
|
||||
|
||||
#{ Utilities
|
||||
|
||||
|
||||
class PackedDB(FileDBBase, ObjectDBR, CachingDB, LazyMixin):
|
||||
|
||||
"""A database operating on a set of object packs"""
|
||||
|
||||
# sort the priority list every N queries
|
||||
# Higher values are better, performance tests don't show this has
|
||||
# any effect, but it should have one
|
||||
_sort_interval = 500
|
||||
|
||||
def __init__(self, root_path):
|
||||
super().__init__(root_path)
|
||||
# list of lists with three items:
|
||||
# * hits - number of times the pack was hit with a request
|
||||
# * entity - Pack entity instance
|
||||
# * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
|
||||
# self._entities = list() # lazy loaded list
|
||||
self._hit_count = 0 # amount of hits
|
||||
self._st_mtime = 0 # last modification data of our root path
|
||||
|
||||
def _set_cache_(self, attr):
|
||||
if attr == '_entities':
|
||||
self._entities = list()
|
||||
self.update_cache(force=True)
|
||||
# END handle entities initialization
|
||||
|
||||
def _sort_entities(self):
|
||||
self._entities.sort(key=lambda l: l[0], reverse=True)
|
||||
|
||||
def _pack_info(self, sha):
|
||||
""":return: tuple(entity, index) for an item at the given sha
|
||||
:param sha: 20 or 40 byte sha
|
||||
:raise BadObject:
|
||||
**Note:** This method is not thread-safe, but may be hit in multi-threaded
|
||||
operation. The worst thing that can happen though is a counter that
|
||||
was not incremented, or the list being in wrong order. So we safe
|
||||
the time for locking here, lets see how that goes"""
|
||||
# presort ?
|
||||
if self._hit_count % self._sort_interval == 0:
|
||||
self._sort_entities()
|
||||
# END update sorting
|
||||
|
||||
for item in self._entities:
|
||||
index = item[2](sha)
|
||||
if index is not None:
|
||||
item[0] += 1 # one hit for you
|
||||
self._hit_count += 1 # general hit count
|
||||
return (item[1], index)
|
||||
# END index found in pack
|
||||
# END for each item
|
||||
|
||||
# no hit, see whether we have to update packs
|
||||
# NOTE: considering packs don't change very often, we safe this call
|
||||
# and leave it to the super-caller to trigger that
|
||||
raise BadObject(sha)
|
||||
|
||||
#{ Object DB Read
|
||||
|
||||
def has_object(self, sha):
|
||||
try:
|
||||
self._pack_info(sha)
|
||||
return True
|
||||
except BadObject:
|
||||
return False
|
||||
# END exception handling
|
||||
|
||||
def info(self, sha):
|
||||
entity, index = self._pack_info(sha)
|
||||
return entity.info_at_index(index)
|
||||
|
||||
def stream(self, sha):
|
||||
entity, index = self._pack_info(sha)
|
||||
return entity.stream_at_index(index)
|
||||
|
||||
def sha_iter(self):
|
||||
for entity in self.entities():
|
||||
index = entity.index()
|
||||
sha_by_index = index.sha
|
||||
for index in range(index.size()):
|
||||
yield sha_by_index(index)
|
||||
# END for each index
|
||||
# END for each entity
|
||||
|
||||
def size(self):
|
||||
sizes = [item[1].index().size() for item in self._entities]
|
||||
return reduce(lambda x, y: x + y, sizes, 0)
|
||||
|
||||
#} END object db read
|
||||
|
||||
#{ object db write
|
||||
|
||||
def store(self, istream):
|
||||
"""Storing individual objects is not feasible as a pack is designed to
|
||||
hold multiple objects. Writing or rewriting packs for single objects is
|
||||
inefficient"""
|
||||
raise UnsupportedOperation()
|
||||
|
||||
#} END object db write
|
||||
|
||||
#{ Interface
|
||||
|
||||
def update_cache(self, force=False):
|
||||
"""
|
||||
Update our cache with the actually existing packs on disk. Add new ones,
|
||||
and remove deleted ones. We keep the unchanged ones
|
||||
|
||||
:param force: If True, the cache will be updated even though the directory
|
||||
does not appear to have changed according to its modification timestamp.
|
||||
:return: True if the packs have been updated so there is new information,
|
||||
False if there was no change to the pack database"""
|
||||
stat = os.stat(self.root_path())
|
||||
if not force and stat.st_mtime <= self._st_mtime:
|
||||
return False
|
||||
# END abort early on no change
|
||||
self._st_mtime = stat.st_mtime
|
||||
|
||||
# packs are supposed to be prefixed with pack- by git-convention
|
||||
# get all pack files, figure out what changed
|
||||
pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
|
||||
our_pack_files = {item[1].pack().path() for item in self._entities}
|
||||
|
||||
# new packs
|
||||
for pack_file in (pack_files - our_pack_files):
|
||||
# init the hit-counter/priority with the size, a good measure for hit-
|
||||
# probability. Its implemented so that only 12 bytes will be read
|
||||
entity = PackEntity(pack_file)
|
||||
self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
|
||||
# END for each new packfile
|
||||
|
||||
# removed packs
|
||||
for pack_file in (our_pack_files - pack_files):
|
||||
del_index = -1
|
||||
for i, item in enumerate(self._entities):
|
||||
if item[1].pack().path() == pack_file:
|
||||
del_index = i
|
||||
break
|
||||
# END found index
|
||||
# END for each entity
|
||||
assert del_index != -1
|
||||
del(self._entities[del_index])
|
||||
# END for each removed pack
|
||||
|
||||
# reinitialize prioritiess
|
||||
self._sort_entities()
|
||||
return True
|
||||
|
||||
def entities(self):
|
||||
""":return: list of pack entities operated upon by this database"""
|
||||
return [item[1] for item in self._entities]
|
||||
|
||||
def partial_to_complete_sha(self, partial_binsha, canonical_length):
|
||||
""":return: 20 byte sha as inferred by the given partial binary sha
|
||||
:param partial_binsha: binary sha with less than 20 bytes
|
||||
:param canonical_length: length of the corresponding canonical representation.
|
||||
It is required as binary sha's cannot display whether the original hex sha
|
||||
had an odd or even number of characters
|
||||
:raise AmbiguousObjectName:
|
||||
:raise BadObject: """
|
||||
candidate = None
|
||||
for item in self._entities:
|
||||
item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
|
||||
if item_index is not None:
|
||||
sha = item[1].index().sha(item_index)
|
||||
if candidate and candidate != sha:
|
||||
raise AmbiguousObjectName(partial_binsha)
|
||||
candidate = sha
|
||||
# END handle full sha could be found
|
||||
# END for each entity
|
||||
|
||||
if candidate:
|
||||
return candidate
|
||||
|
||||
# still not found ?
|
||||
raise BadObject(partial_binsha)
|
||||
|
||||
#} END interface
|
82
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/ref.py
Normal file
82
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/db/ref.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
import codecs
|
||||
from gitdb.db.base import (
|
||||
CompoundDB,
|
||||
)
|
||||
|
||||
__all__ = ('ReferenceDB', )
|
||||
|
||||
|
||||
class ReferenceDB(CompoundDB):
|
||||
|
||||
"""A database consisting of database referred to in a file"""
|
||||
|
||||
# Configuration
|
||||
# Specifies the object database to use for the paths found in the alternates
|
||||
# file. If None, it defaults to the GitDB
|
||||
ObjectDBCls = None
|
||||
|
||||
def __init__(self, ref_file):
|
||||
super().__init__()
|
||||
self._ref_file = ref_file
|
||||
|
||||
def _set_cache_(self, attr):
|
||||
if attr == '_dbs':
|
||||
self._dbs = list()
|
||||
self._update_dbs_from_ref_file()
|
||||
else:
|
||||
super()._set_cache_(attr)
|
||||
# END handle attrs
|
||||
|
||||
def _update_dbs_from_ref_file(self):
|
||||
dbcls = self.ObjectDBCls
|
||||
if dbcls is None:
|
||||
# late import
|
||||
from gitdb.db.git import GitDB
|
||||
dbcls = GitDB
|
||||
# END get db type
|
||||
|
||||
# try to get as many as possible, don't fail if some are unavailable
|
||||
ref_paths = list()
|
||||
try:
|
||||
with codecs.open(self._ref_file, 'r', encoding="utf-8") as f:
|
||||
ref_paths = [l.strip() for l in f]
|
||||
except OSError:
|
||||
pass
|
||||
# END handle alternates
|
||||
|
||||
ref_paths_set = set(ref_paths)
|
||||
cur_ref_paths_set = {db.root_path() for db in self._dbs}
|
||||
|
||||
# remove existing
|
||||
for path in (cur_ref_paths_set - ref_paths_set):
|
||||
for i, db in enumerate(self._dbs[:]):
|
||||
if db.root_path() == path:
|
||||
del(self._dbs[i])
|
||||
continue
|
||||
# END del matching db
|
||||
# END for each path to remove
|
||||
|
||||
# add new
|
||||
# sort them to maintain order
|
||||
added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
|
||||
for path in added_paths:
|
||||
try:
|
||||
db = dbcls(path)
|
||||
# force an update to verify path
|
||||
if isinstance(db, CompoundDB):
|
||||
db.databases()
|
||||
# END verification
|
||||
self._dbs.append(db)
|
||||
except Exception:
|
||||
# ignore invalid paths or issues
|
||||
pass
|
||||
# END for each path to add
|
||||
|
||||
def update_cache(self, force=False):
|
||||
# re-read alternates and update databases
|
||||
self._update_dbs_from_ref_file()
|
||||
return super().update_cache(force)
|
46
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/exc.py
Normal file
46
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/exc.py
Normal file
@@ -0,0 +1,46 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Module with common exceptions"""
|
||||
from gitdb.util import to_hex_sha
|
||||
|
||||
|
||||
class ODBError(Exception):
|
||||
"""All errors thrown by the object database"""
|
||||
|
||||
|
||||
class InvalidDBRoot(ODBError):
|
||||
"""Thrown if an object database cannot be initialized at the given path"""
|
||||
|
||||
|
||||
class BadObject(ODBError):
|
||||
"""The object with the given SHA does not exist. Instantiate with the
|
||||
failed sha"""
|
||||
|
||||
def __str__(self):
|
||||
return "BadObject: %s" % to_hex_sha(self.args[0])
|
||||
|
||||
|
||||
class BadName(ODBError):
|
||||
"""A name provided to rev_parse wasn't understood"""
|
||||
|
||||
def __str__(self):
|
||||
return "Ref '%s' did not resolve to an object" % self.args[0]
|
||||
|
||||
|
||||
class ParseError(ODBError):
|
||||
"""Thrown if the parsing of a file failed due to an invalid format"""
|
||||
|
||||
|
||||
class AmbiguousObjectName(ODBError):
|
||||
"""Thrown if a possibly shortened name does not uniquely represent a single object
|
||||
in the database"""
|
||||
|
||||
|
||||
class BadObjectType(ODBError):
|
||||
"""The object had an unsupported type"""
|
||||
|
||||
|
||||
class UnsupportedOperation(ODBError):
|
||||
"""Thrown if the given operation cannot be supported by the object database"""
|
704
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/fun.py
Normal file
704
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/fun.py
Normal file
@@ -0,0 +1,704 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Contains basic c-functions which usually contain performance critical code
|
||||
Keeping this code separate from the beginning makes it easier to out-source
|
||||
it into c later, if required"""
|
||||
|
||||
import zlib
|
||||
from gitdb.util import byte_ord
|
||||
decompressobj = zlib.decompressobj
|
||||
|
||||
import mmap
|
||||
from itertools import islice
|
||||
from functools import reduce
|
||||
|
||||
from gitdb.const import NULL_BYTE, BYTE_SPACE
|
||||
from gitdb.utils.encoding import force_text
|
||||
from gitdb.typ import (
|
||||
str_blob_type,
|
||||
str_commit_type,
|
||||
str_tree_type,
|
||||
str_tag_type,
|
||||
)
|
||||
|
||||
from io import StringIO
|
||||
|
||||
# INVARIANTS
|
||||
OFS_DELTA = 6
|
||||
REF_DELTA = 7
|
||||
delta_types = (OFS_DELTA, REF_DELTA)
|
||||
|
||||
type_id_to_type_map = {
|
||||
0: b'', # EXT 1
|
||||
1: str_commit_type,
|
||||
2: str_tree_type,
|
||||
3: str_blob_type,
|
||||
4: str_tag_type,
|
||||
5: b'', # EXT 2
|
||||
OFS_DELTA: "OFS_DELTA", # OFFSET DELTA
|
||||
REF_DELTA: "REF_DELTA" # REFERENCE DELTA
|
||||
}
|
||||
|
||||
type_to_type_id_map = {
|
||||
str_commit_type: 1,
|
||||
str_tree_type: 2,
|
||||
str_blob_type: 3,
|
||||
str_tag_type: 4,
|
||||
"OFS_DELTA": OFS_DELTA,
|
||||
"REF_DELTA": REF_DELTA,
|
||||
}
|
||||
|
||||
# used when dealing with larger streams
|
||||
chunk_size = 1000 * mmap.PAGESIZE
|
||||
|
||||
__all__ = ('is_loose_object', 'loose_object_header_info', 'msb_size', 'pack_object_header_info',
|
||||
'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data',
|
||||
'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header')
|
||||
|
||||
|
||||
#{ Structures
|
||||
|
||||
def _set_delta_rbound(d, size):
|
||||
"""Truncate the given delta to the given size
|
||||
:param size: size relative to our target offset, may not be 0, must be smaller or equal
|
||||
to our size
|
||||
:return: d"""
|
||||
d.ts = size
|
||||
|
||||
# NOTE: data is truncated automatically when applying the delta
|
||||
# MUST NOT DO THIS HERE
|
||||
return d
|
||||
|
||||
|
||||
def _move_delta_lbound(d, bytes):
|
||||
"""Move the delta by the given amount of bytes, reducing its size so that its
|
||||
right bound stays static
|
||||
:param bytes: amount of bytes to move, must be smaller than delta size
|
||||
:return: d"""
|
||||
if bytes == 0:
|
||||
return
|
||||
|
||||
d.to += bytes
|
||||
d.so += bytes
|
||||
d.ts -= bytes
|
||||
if d.data is not None:
|
||||
d.data = d.data[bytes:]
|
||||
# END handle data
|
||||
|
||||
return d
|
||||
|
||||
|
||||
def delta_duplicate(src):
|
||||
return DeltaChunk(src.to, src.ts, src.so, src.data)
|
||||
|
||||
|
||||
def delta_chunk_apply(dc, bbuf, write):
|
||||
"""Apply own data to the target buffer
|
||||
:param bbuf: buffer providing source bytes for copy operations
|
||||
:param write: write method to call with data to write"""
|
||||
if dc.data is None:
|
||||
# COPY DATA FROM SOURCE
|
||||
write(bbuf[dc.so:dc.so + dc.ts])
|
||||
else:
|
||||
# APPEND DATA
|
||||
# what's faster: if + 4 function calls or just a write with a slice ?
|
||||
# Considering data can be larger than 127 bytes now, it should be worth it
|
||||
if dc.ts < len(dc.data):
|
||||
write(dc.data[:dc.ts])
|
||||
else:
|
||||
write(dc.data)
|
||||
# END handle truncation
|
||||
# END handle chunk mode
|
||||
|
||||
|
||||
class DeltaChunk:
|
||||
|
||||
"""Represents a piece of a delta, it can either add new data, or copy existing
|
||||
one from a source buffer"""
|
||||
__slots__ = (
|
||||
'to', # start offset in the target buffer in bytes
|
||||
'ts', # size of this chunk in the target buffer in bytes
|
||||
'so', # start offset in the source buffer in bytes or None
|
||||
'data', # chunk of bytes to be added to the target buffer,
|
||||
# DeltaChunkList to use as base, or None
|
||||
)
|
||||
|
||||
def __init__(self, to, ts, so, data):
|
||||
self.to = to
|
||||
self.ts = ts
|
||||
self.so = so
|
||||
self.data = data
|
||||
|
||||
def __repr__(self):
|
||||
return "DeltaChunk(%i, %i, %s, %s)" % (self.to, self.ts, self.so, self.data or "")
|
||||
|
||||
#{ Interface
|
||||
|
||||
def rbound(self):
|
||||
return self.to + self.ts
|
||||
|
||||
def has_data(self):
|
||||
""":return: True if the instance has data to add to the target stream"""
|
||||
return self.data is not None
|
||||
|
||||
#} END interface
|
||||
|
||||
|
||||
def _closest_index(dcl, absofs):
|
||||
""":return: index at which the given absofs should be inserted. The index points
|
||||
to the DeltaChunk with a target buffer absofs that equals or is greater than
|
||||
absofs.
|
||||
**Note:** global method for performance only, it belongs to DeltaChunkList"""
|
||||
lo = 0
|
||||
hi = len(dcl)
|
||||
while lo < hi:
|
||||
mid = (lo + hi) / 2
|
||||
dc = dcl[mid]
|
||||
if dc.to > absofs:
|
||||
hi = mid
|
||||
elif dc.rbound() > absofs or dc.to == absofs:
|
||||
return mid
|
||||
else:
|
||||
lo = mid + 1
|
||||
# END handle bound
|
||||
# END for each delta absofs
|
||||
return len(dcl) - 1
|
||||
|
||||
|
||||
def delta_list_apply(dcl, bbuf, write):
|
||||
"""Apply the chain's changes and write the final result using the passed
|
||||
write function.
|
||||
:param bbuf: base buffer containing the base of all deltas contained in this
|
||||
list. It will only be used if the chunk in question does not have a base
|
||||
chain.
|
||||
:param write: function taking a string of bytes to write to the output"""
|
||||
for dc in dcl:
|
||||
delta_chunk_apply(dc, bbuf, write)
|
||||
# END for each dc
|
||||
|
||||
|
||||
def delta_list_slice(dcl, absofs, size, ndcl):
|
||||
""":return: Subsection of this list at the given absolute offset, with the given
|
||||
size in bytes.
|
||||
:return: None"""
|
||||
cdi = _closest_index(dcl, absofs) # delta start index
|
||||
cd = dcl[cdi]
|
||||
slen = len(dcl)
|
||||
lappend = ndcl.append
|
||||
|
||||
if cd.to != absofs:
|
||||
tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
|
||||
_move_delta_lbound(tcd, absofs - cd.to)
|
||||
tcd.ts = min(tcd.ts, size)
|
||||
lappend(tcd)
|
||||
size -= tcd.ts
|
||||
cdi += 1
|
||||
# END lbound overlap handling
|
||||
|
||||
while cdi < slen and size:
|
||||
# are we larger than the current block
|
||||
cd = dcl[cdi]
|
||||
if cd.ts <= size:
|
||||
lappend(DeltaChunk(cd.to, cd.ts, cd.so, cd.data))
|
||||
size -= cd.ts
|
||||
else:
|
||||
tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
|
||||
tcd.ts = size
|
||||
lappend(tcd)
|
||||
size -= tcd.ts
|
||||
break
|
||||
# END hadle size
|
||||
cdi += 1
|
||||
# END for each chunk
|
||||
|
||||
|
||||
class DeltaChunkList(list):
|
||||
|
||||
"""List with special functionality to deal with DeltaChunks.
|
||||
There are two types of lists we represent. The one was created bottom-up, working
|
||||
towards the latest delta, the other kind was created top-down, working from the
|
||||
latest delta down to the earliest ancestor. This attribute is queryable
|
||||
after all processing with is_reversed."""
|
||||
|
||||
__slots__ = tuple()
|
||||
|
||||
def rbound(self):
|
||||
""":return: rightmost extend in bytes, absolute"""
|
||||
if len(self) == 0:
|
||||
return 0
|
||||
return self[-1].rbound()
|
||||
|
||||
def lbound(self):
|
||||
""":return: leftmost byte at which this chunklist starts"""
|
||||
if len(self) == 0:
|
||||
return 0
|
||||
return self[0].to
|
||||
|
||||
def size(self):
|
||||
""":return: size of bytes as measured by our delta chunks"""
|
||||
return self.rbound() - self.lbound()
|
||||
|
||||
def apply(self, bbuf, write):
|
||||
"""Only used by public clients, internally we only use the global routines
|
||||
for performance"""
|
||||
return delta_list_apply(self, bbuf, write)
|
||||
|
||||
def compress(self):
|
||||
"""Alter the list to reduce the amount of nodes. Currently we concatenate
|
||||
add-chunks
|
||||
:return: self"""
|
||||
slen = len(self)
|
||||
if slen < 2:
|
||||
return self
|
||||
i = 0
|
||||
|
||||
first_data_index = None
|
||||
while i < slen:
|
||||
dc = self[i]
|
||||
i += 1
|
||||
if dc.data is None:
|
||||
if first_data_index is not None and i - 2 - first_data_index > 1:
|
||||
# if first_data_index is not None:
|
||||
nd = StringIO() # new data
|
||||
so = self[first_data_index].to # start offset in target buffer
|
||||
for x in range(first_data_index, i - 1):
|
||||
xdc = self[x]
|
||||
nd.write(xdc.data[:xdc.ts])
|
||||
# END collect data
|
||||
|
||||
del(self[first_data_index:i - 1])
|
||||
buf = nd.getvalue()
|
||||
self.insert(first_data_index, DeltaChunk(so, len(buf), 0, buf))
|
||||
|
||||
slen = len(self)
|
||||
i = first_data_index + 1
|
||||
|
||||
# END concatenate data
|
||||
first_data_index = None
|
||||
continue
|
||||
# END skip non-data chunks
|
||||
|
||||
if first_data_index is None:
|
||||
first_data_index = i - 1
|
||||
# END iterate list
|
||||
|
||||
# if slen_orig != len(self):
|
||||
# print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100)
|
||||
return self
|
||||
|
||||
def check_integrity(self, target_size=-1):
|
||||
"""Verify the list has non-overlapping chunks only, and the total size matches
|
||||
target_size
|
||||
:param target_size: if not -1, the total size of the chain must be target_size
|
||||
:raise AssertionError: if the size doesn't match"""
|
||||
if target_size > -1:
|
||||
assert self[-1].rbound() == target_size
|
||||
assert reduce(lambda x, y: x + y, (d.ts for d in self), 0) == target_size
|
||||
# END target size verification
|
||||
|
||||
if len(self) < 2:
|
||||
return
|
||||
|
||||
# check data
|
||||
for dc in self:
|
||||
assert dc.ts > 0
|
||||
if dc.has_data():
|
||||
assert len(dc.data) >= dc.ts
|
||||
# END for each dc
|
||||
|
||||
left = islice(self, 0, len(self) - 1)
|
||||
right = iter(self)
|
||||
right.next()
|
||||
# this is very pythonic - we might have just use index based access here,
|
||||
# but this could actually be faster
|
||||
for lft, rgt in zip(left, right):
|
||||
assert lft.rbound() == rgt.to
|
||||
assert lft.to + lft.ts == rgt.to
|
||||
# END for each pair
|
||||
|
||||
|
||||
class TopdownDeltaChunkList(DeltaChunkList):
|
||||
|
||||
"""Represents a list which is generated by feeding its ancestor streams one by
|
||||
one"""
|
||||
__slots__ = tuple()
|
||||
|
||||
def connect_with_next_base(self, bdcl):
|
||||
"""Connect this chain with the next level of our base delta chunklist.
|
||||
The goal in this game is to mark as many of our chunks rigid, hence they
|
||||
cannot be changed by any of the upcoming bases anymore. Once all our
|
||||
chunks are marked like that, we can stop all processing
|
||||
:param bdcl: data chunk list being one of our bases. They must be fed in
|
||||
consecutively and in order, towards the earliest ancestor delta
|
||||
:return: True if processing was done. Use it to abort processing of
|
||||
remaining streams if False is returned"""
|
||||
nfc = 0 # number of frozen chunks
|
||||
dci = 0 # delta chunk index
|
||||
slen = len(self) # len of self
|
||||
ccl = list() # temporary list
|
||||
while dci < slen:
|
||||
dc = self[dci]
|
||||
dci += 1
|
||||
|
||||
# all add-chunks which are already topmost don't need additional processing
|
||||
if dc.data is not None:
|
||||
nfc += 1
|
||||
continue
|
||||
# END skip add chunks
|
||||
|
||||
# copy chunks
|
||||
# integrate the portion of the base list into ourselves. Lists
|
||||
# dont support efficient insertion ( just one at a time ), but for now
|
||||
# we live with it. Internally, its all just a 32/64bit pointer, and
|
||||
# the portions of moved memory should be smallish. Maybe we just rebuild
|
||||
# ourselves in order to reduce the amount of insertions ...
|
||||
del(ccl[:])
|
||||
delta_list_slice(bdcl, dc.so, dc.ts, ccl)
|
||||
|
||||
# move the target bounds into place to match with our chunk
|
||||
ofs = dc.to - dc.so
|
||||
for cdc in ccl:
|
||||
cdc.to += ofs
|
||||
# END update target bounds
|
||||
|
||||
if len(ccl) == 1:
|
||||
self[dci - 1] = ccl[0]
|
||||
else:
|
||||
# maybe try to compute the expenses here, and pick the right algorithm
|
||||
# It would normally be faster than copying everything physically though
|
||||
# TODO: Use a deque here, and decide by the index whether to extend
|
||||
# or extend left !
|
||||
post_dci = self[dci:]
|
||||
del(self[dci - 1:]) # include deletion of dc
|
||||
self.extend(ccl)
|
||||
self.extend(post_dci)
|
||||
|
||||
slen = len(self)
|
||||
dci += len(ccl) - 1 # deleted dc, added rest
|
||||
|
||||
# END handle chunk replacement
|
||||
# END for each chunk
|
||||
|
||||
if nfc == slen:
|
||||
return False
|
||||
# END handle completeness
|
||||
return True
|
||||
|
||||
|
||||
#} END structures
|
||||
|
||||
#{ Routines
|
||||
|
||||
def is_loose_object(m):
|
||||
"""
|
||||
:return: True the file contained in memory map m appears to be a loose object.
|
||||
Only the first two bytes are needed"""
|
||||
b0, b1 = map(ord, m[:2])
|
||||
word = (b0 << 8) + b1
|
||||
return b0 == 0x78 and (word % 31) == 0
|
||||
|
||||
|
||||
def loose_object_header_info(m):
|
||||
"""
|
||||
:return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
|
||||
object as well as its uncompressed size in bytes.
|
||||
:param m: memory map from which to read the compressed object data"""
|
||||
decompress_size = 8192 # is used in cgit as well
|
||||
hdr = decompressobj().decompress(m, decompress_size)
|
||||
type_name, size = hdr[:hdr.find(NULL_BYTE)].split(BYTE_SPACE)
|
||||
|
||||
return type_name, int(size)
|
||||
|
||||
|
||||
def pack_object_header_info(data):
|
||||
"""
|
||||
:return: tuple(type_id, uncompressed_size_in_bytes, byte_offset)
|
||||
The type_id should be interpreted according to the ``type_id_to_type_map`` map
|
||||
The byte-offset specifies the start of the actual zlib compressed datastream
|
||||
:param m: random-access memory, like a string or memory map"""
|
||||
c = byte_ord(data[0]) # first byte
|
||||
i = 1 # next char to read
|
||||
type_id = (c >> 4) & 7 # numeric type
|
||||
size = c & 15 # starting size
|
||||
s = 4 # starting bit-shift size
|
||||
while c & 0x80:
|
||||
c = byte_ord(data[i])
|
||||
i += 1
|
||||
size += (c & 0x7f) << s
|
||||
s += 7
|
||||
# END character loop
|
||||
# end performance at expense of maintenance ...
|
||||
return (type_id, size, i)
|
||||
|
||||
|
||||
def create_pack_object_header(obj_type, obj_size):
|
||||
"""
|
||||
:return: string defining the pack header comprised of the object type
|
||||
and its incompressed size in bytes
|
||||
|
||||
:param obj_type: pack type_id of the object
|
||||
:param obj_size: uncompressed size in bytes of the following object stream"""
|
||||
c = 0 # 1 byte
|
||||
hdr = bytearray() # output string
|
||||
|
||||
c = (obj_type << 4) | (obj_size & 0xf)
|
||||
obj_size >>= 4
|
||||
while obj_size:
|
||||
hdr.append(c | 0x80)
|
||||
c = obj_size & 0x7f
|
||||
obj_size >>= 7
|
||||
# END until size is consumed
|
||||
hdr.append(c)
|
||||
# end handle interpreter
|
||||
return hdr
|
||||
|
||||
|
||||
def msb_size(data, offset=0):
|
||||
"""
|
||||
:return: tuple(read_bytes, size) read the msb size from the given random
|
||||
access data starting at the given byte offset"""
|
||||
size = 0
|
||||
i = 0
|
||||
l = len(data)
|
||||
hit_msb = False
|
||||
while i < l:
|
||||
c = data[i + offset]
|
||||
size |= (c & 0x7f) << i * 7
|
||||
i += 1
|
||||
if not c & 0x80:
|
||||
hit_msb = True
|
||||
break
|
||||
# END check msb bit
|
||||
# END while in range
|
||||
# end performance ...
|
||||
if not hit_msb:
|
||||
raise AssertionError("Could not find terminating MSB byte in data stream")
|
||||
return i + offset, size
|
||||
|
||||
|
||||
def loose_object_header(type, size):
|
||||
"""
|
||||
:return: bytes representing the loose object header, which is immediately
|
||||
followed by the content stream of size 'size'"""
|
||||
return ('%s %i\0' % (force_text(type), size)).encode('ascii')
|
||||
|
||||
|
||||
def write_object(type, size, read, write, chunk_size=chunk_size):
|
||||
"""
|
||||
Write the object as identified by type, size and source_stream into the
|
||||
target_stream
|
||||
|
||||
:param type: type string of the object
|
||||
:param size: amount of bytes to write from source_stream
|
||||
:param read: read method of a stream providing the content data
|
||||
:param write: write method of the output stream
|
||||
:param close_target_stream: if True, the target stream will be closed when
|
||||
the routine exits, even if an error is thrown
|
||||
:return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
|
||||
tbw = 0 # total num bytes written
|
||||
|
||||
# WRITE HEADER: type SP size NULL
|
||||
tbw += write(loose_object_header(type, size))
|
||||
tbw += stream_copy(read, write, size, chunk_size)
|
||||
|
||||
return tbw
|
||||
|
||||
|
||||
def stream_copy(read, write, size, chunk_size):
|
||||
"""
|
||||
Copy a stream up to size bytes using the provided read and write methods,
|
||||
in chunks of chunk_size
|
||||
|
||||
**Note:** its much like stream_copy utility, but operates just using methods"""
|
||||
dbw = 0 # num data bytes written
|
||||
|
||||
# WRITE ALL DATA UP TO SIZE
|
||||
while True:
|
||||
cs = min(chunk_size, size - dbw)
|
||||
# NOTE: not all write methods return the amount of written bytes, like
|
||||
# mmap.write. Its bad, but we just deal with it ... perhaps its not
|
||||
# even less efficient
|
||||
# data_len = write(read(cs))
|
||||
# dbw += data_len
|
||||
data = read(cs)
|
||||
data_len = len(data)
|
||||
dbw += data_len
|
||||
write(data)
|
||||
if data_len < cs or dbw == size:
|
||||
break
|
||||
# END check for stream end
|
||||
# END duplicate data
|
||||
return dbw
|
||||
|
||||
|
||||
def connect_deltas(dstreams):
|
||||
"""
|
||||
Read the condensed delta chunk information from dstream and merge its information
|
||||
into a list of existing delta chunks
|
||||
|
||||
:param dstreams: iterable of delta stream objects, the delta to be applied last
|
||||
comes first, then all its ancestors in order
|
||||
:return: DeltaChunkList, containing all operations to apply"""
|
||||
tdcl = None # topmost dcl
|
||||
|
||||
dcl = tdcl = TopdownDeltaChunkList()
|
||||
for dsi, ds in enumerate(dstreams):
|
||||
# print "Stream", dsi
|
||||
db = ds.read()
|
||||
delta_buf_size = ds.size
|
||||
|
||||
# read header
|
||||
i, base_size = msb_size(db)
|
||||
i, target_size = msb_size(db, i)
|
||||
|
||||
# interpret opcodes
|
||||
tbw = 0 # amount of target bytes written
|
||||
while i < delta_buf_size:
|
||||
c = ord(db[i])
|
||||
i += 1
|
||||
if c & 0x80:
|
||||
cp_off, cp_size = 0, 0
|
||||
if (c & 0x01):
|
||||
cp_off = ord(db[i])
|
||||
i += 1
|
||||
if (c & 0x02):
|
||||
cp_off |= (ord(db[i]) << 8)
|
||||
i += 1
|
||||
if (c & 0x04):
|
||||
cp_off |= (ord(db[i]) << 16)
|
||||
i += 1
|
||||
if (c & 0x08):
|
||||
cp_off |= (ord(db[i]) << 24)
|
||||
i += 1
|
||||
if (c & 0x10):
|
||||
cp_size = ord(db[i])
|
||||
i += 1
|
||||
if (c & 0x20):
|
||||
cp_size |= (ord(db[i]) << 8)
|
||||
i += 1
|
||||
if (c & 0x40):
|
||||
cp_size |= (ord(db[i]) << 16)
|
||||
i += 1
|
||||
|
||||
if not cp_size:
|
||||
cp_size = 0x10000
|
||||
|
||||
rbound = cp_off + cp_size
|
||||
if (rbound < cp_size or
|
||||
rbound > base_size):
|
||||
break
|
||||
|
||||
dcl.append(DeltaChunk(tbw, cp_size, cp_off, None))
|
||||
tbw += cp_size
|
||||
elif c:
|
||||
# NOTE: in C, the data chunks should probably be concatenated here.
|
||||
# In python, we do it as a post-process
|
||||
dcl.append(DeltaChunk(tbw, c, 0, db[i:i + c]))
|
||||
i += c
|
||||
tbw += c
|
||||
else:
|
||||
raise ValueError("unexpected delta opcode 0")
|
||||
# END handle command byte
|
||||
# END while processing delta data
|
||||
|
||||
dcl.compress()
|
||||
|
||||
# merge the lists !
|
||||
if dsi > 0:
|
||||
if not tdcl.connect_with_next_base(dcl):
|
||||
break
|
||||
# END handle merge
|
||||
|
||||
# prepare next base
|
||||
dcl = DeltaChunkList()
|
||||
# END for each delta stream
|
||||
|
||||
return tdcl
|
||||
|
||||
|
||||
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
|
||||
"""
|
||||
Apply data from a delta buffer using a source buffer to the target file
|
||||
|
||||
:param src_buf: random access data from which the delta was created
|
||||
:param src_buf_size: size of the source buffer in bytes
|
||||
:param delta_buf_size: size for the delta buffer in bytes
|
||||
:param delta_buf: random access delta data
|
||||
:param write: write method taking a chunk of bytes
|
||||
|
||||
**Note:** transcribed to python from the similar routine in patch-delta.c"""
|
||||
i = 0
|
||||
db = delta_buf
|
||||
while i < delta_buf_size:
|
||||
c = db[i]
|
||||
i += 1
|
||||
if c & 0x80:
|
||||
cp_off, cp_size = 0, 0
|
||||
if (c & 0x01):
|
||||
cp_off = db[i]
|
||||
i += 1
|
||||
if (c & 0x02):
|
||||
cp_off |= (db[i] << 8)
|
||||
i += 1
|
||||
if (c & 0x04):
|
||||
cp_off |= (db[i] << 16)
|
||||
i += 1
|
||||
if (c & 0x08):
|
||||
cp_off |= (db[i] << 24)
|
||||
i += 1
|
||||
if (c & 0x10):
|
||||
cp_size = db[i]
|
||||
i += 1
|
||||
if (c & 0x20):
|
||||
cp_size |= (db[i] << 8)
|
||||
i += 1
|
||||
if (c & 0x40):
|
||||
cp_size |= (db[i] << 16)
|
||||
i += 1
|
||||
|
||||
if not cp_size:
|
||||
cp_size = 0x10000
|
||||
|
||||
rbound = cp_off + cp_size
|
||||
if (rbound < cp_size or
|
||||
rbound > src_buf_size):
|
||||
break
|
||||
write(src_buf[cp_off:cp_off + cp_size])
|
||||
elif c:
|
||||
write(db[i:i + c])
|
||||
i += c
|
||||
else:
|
||||
raise ValueError("unexpected delta opcode 0")
|
||||
# END handle command byte
|
||||
# END while processing delta data
|
||||
|
||||
# yes, lets use the exact same error message that git uses :)
|
||||
assert i == delta_buf_size, "delta replay has gone wild"
|
||||
|
||||
|
||||
def is_equal_canonical_sha(canonical_length, match, sha1):
|
||||
"""
|
||||
:return: True if the given lhs and rhs 20 byte binary shas
|
||||
The comparison will take the canonical_length of the match sha into account,
|
||||
hence the comparison will only use the last 4 bytes for uneven canonical representations
|
||||
:param match: less than 20 byte sha
|
||||
:param sha1: 20 byte sha"""
|
||||
binary_length = canonical_length // 2
|
||||
if match[:binary_length] != sha1[:binary_length]:
|
||||
return False
|
||||
|
||||
if canonical_length - binary_length and \
|
||||
(byte_ord(match[-1]) ^ byte_ord(sha1[len(match) - 1])) & 0xf0:
|
||||
return False
|
||||
# END handle uneven canonnical length
|
||||
return True
|
||||
|
||||
#} END routines
|
||||
|
||||
|
||||
try:
|
||||
from gitdb_speedups._perf import connect_deltas
|
||||
except ImportError:
|
||||
pass
|
1031
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/pack.py
Normal file
1031
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/pack.py
Normal file
File diff suppressed because it is too large
Load Diff
730
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/stream.py
Normal file
730
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/stream.py
Normal file
@@ -0,0 +1,730 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
import mmap
|
||||
import os
|
||||
import sys
|
||||
import zlib
|
||||
|
||||
from gitdb.fun import (
|
||||
msb_size,
|
||||
stream_copy,
|
||||
apply_delta_data,
|
||||
connect_deltas,
|
||||
delta_types
|
||||
)
|
||||
|
||||
from gitdb.util import (
|
||||
allocate_memory,
|
||||
LazyMixin,
|
||||
make_sha,
|
||||
write,
|
||||
close,
|
||||
)
|
||||
|
||||
from gitdb.const import NULL_BYTE, BYTE_SPACE
|
||||
from gitdb.utils.encoding import force_bytes
|
||||
|
||||
has_perf_mod = False
|
||||
try:
|
||||
from gitdb_speedups._perf import apply_delta as c_apply_delta
|
||||
has_perf_mod = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
__all__ = ('DecompressMemMapReader', 'FDCompressedSha1Writer', 'DeltaApplyReader',
|
||||
'Sha1Writer', 'FlexibleSha1Writer', 'ZippedStoreShaWriter', 'FDCompressedSha1Writer',
|
||||
'FDStream', 'NullStream')
|
||||
|
||||
|
||||
#{ RO Streams
|
||||
|
||||
class DecompressMemMapReader(LazyMixin):
|
||||
|
||||
"""Reads data in chunks from a memory map and decompresses it. The client sees
|
||||
only the uncompressed data, respective file-like read calls are handling on-demand
|
||||
buffered decompression accordingly
|
||||
|
||||
A constraint on the total size of bytes is activated, simulating
|
||||
a logical file within a possibly larger physical memory area
|
||||
|
||||
To read efficiently, you clearly don't want to read individual bytes, instead,
|
||||
read a few kilobytes at least.
|
||||
|
||||
**Note:** The chunk-size should be carefully selected as it will involve quite a bit
|
||||
of string copying due to the way the zlib is implemented. Its very wasteful,
|
||||
hence we try to find a good tradeoff between allocation time and number of
|
||||
times we actually allocate. An own zlib implementation would be good here
|
||||
to better support streamed reading - it would only need to keep the mmap
|
||||
and decompress it into chunks, that's all ... """
|
||||
__slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close',
|
||||
'_cbr', '_phi')
|
||||
|
||||
max_read_size = 512 * 1024 # currently unused
|
||||
|
||||
def __init__(self, m, close_on_deletion, size=None):
|
||||
"""Initialize with mmap for stream reading
|
||||
:param m: must be content data - use new if you have object data and no size"""
|
||||
self._m = m
|
||||
self._zip = zlib.decompressobj()
|
||||
self._buf = None # buffer of decompressed bytes
|
||||
self._buflen = 0 # length of bytes in buffer
|
||||
if size is not None:
|
||||
self._s = size # size of uncompressed data to read in total
|
||||
self._br = 0 # num uncompressed bytes read
|
||||
self._cws = 0 # start byte of compression window
|
||||
self._cwe = 0 # end byte of compression window
|
||||
self._cbr = 0 # number of compressed bytes read
|
||||
self._phi = False # is True if we parsed the header info
|
||||
self._close = close_on_deletion # close the memmap on deletion ?
|
||||
|
||||
def _set_cache_(self, attr):
|
||||
assert attr == '_s'
|
||||
# only happens for size, which is a marker to indicate we still
|
||||
# have to parse the header from the stream
|
||||
self._parse_header_info()
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
|
||||
def _parse_header_info(self):
|
||||
"""If this stream contains object data, parse the header info and skip the
|
||||
stream to a point where each read will yield object content
|
||||
|
||||
:return: parsed type_string, size"""
|
||||
# read header
|
||||
# should really be enough, cgit uses 8192 I believe
|
||||
# And for good reason !! This needs to be that high for the header to be read correctly in all cases
|
||||
maxb = 8192
|
||||
self._s = maxb
|
||||
hdr = self.read(maxb)
|
||||
hdrend = hdr.find(NULL_BYTE)
|
||||
typ, size = hdr[:hdrend].split(BYTE_SPACE)
|
||||
size = int(size)
|
||||
self._s = size
|
||||
|
||||
# adjust internal state to match actual header length that we ignore
|
||||
# The buffer will be depleted first on future reads
|
||||
self._br = 0
|
||||
hdrend += 1
|
||||
self._buf = BytesIO(hdr[hdrend:])
|
||||
self._buflen = len(hdr) - hdrend
|
||||
|
||||
self._phi = True
|
||||
|
||||
return typ, size
|
||||
|
||||
#{ Interface
|
||||
|
||||
@classmethod
|
||||
def new(self, m, close_on_deletion=False):
|
||||
"""Create a new DecompressMemMapReader instance for acting as a read-only stream
|
||||
This method parses the object header from m and returns the parsed
|
||||
type and size, as well as the created stream instance.
|
||||
|
||||
:param m: memory map on which to operate. It must be object data ( header + contents )
|
||||
:param close_on_deletion: if True, the memory map will be closed once we are
|
||||
being deleted"""
|
||||
inst = DecompressMemMapReader(m, close_on_deletion, 0)
|
||||
typ, size = inst._parse_header_info()
|
||||
return typ, size, inst
|
||||
|
||||
def data(self):
|
||||
""":return: random access compatible data we are working on"""
|
||||
return self._m
|
||||
|
||||
def close(self):
|
||||
"""Close our underlying stream of compressed bytes if this was allowed during initialization
|
||||
:return: True if we closed the underlying stream
|
||||
:note: can be called safely
|
||||
"""
|
||||
if self._close:
|
||||
if hasattr(self._m, 'close'):
|
||||
self._m.close()
|
||||
self._close = False
|
||||
# END handle resource freeing
|
||||
|
||||
def compressed_bytes_read(self):
|
||||
"""
|
||||
:return: number of compressed bytes read. This includes the bytes it
|
||||
took to decompress the header ( if there was one )"""
|
||||
# ABSTRACT: When decompressing a byte stream, it can be that the first
|
||||
# x bytes which were requested match the first x bytes in the loosely
|
||||
# compressed datastream. This is the worst-case assumption that the reader
|
||||
# does, it assumes that it will get at least X bytes from X compressed bytes
|
||||
# in call cases.
|
||||
# The caveat is that the object, according to our known uncompressed size,
|
||||
# is already complete, but there are still some bytes left in the compressed
|
||||
# stream that contribute to the amount of compressed bytes.
|
||||
# How can we know that we are truly done, and have read all bytes we need
|
||||
# to read ?
|
||||
# Without help, we cannot know, as we need to obtain the status of the
|
||||
# decompression. If it is not finished, we need to decompress more data
|
||||
# until it is finished, to yield the actual number of compressed bytes
|
||||
# belonging to the decompressed object
|
||||
# We are using a custom zlib module for this, if its not present,
|
||||
# we try to put in additional bytes up for decompression if feasible
|
||||
# and check for the unused_data.
|
||||
|
||||
# Only scrub the stream forward if we are officially done with the
|
||||
# bytes we were to have.
|
||||
if self._br == self._s and not self._zip.unused_data:
|
||||
# manipulate the bytes-read to allow our own read method to continue
|
||||
# but keep the window at its current position
|
||||
self._br = 0
|
||||
if hasattr(self._zip, 'status'):
|
||||
while self._zip.status == zlib.Z_OK:
|
||||
self.read(mmap.PAGESIZE)
|
||||
# END scrub-loop custom zlib
|
||||
else:
|
||||
# pass in additional pages, until we have unused data
|
||||
while not self._zip.unused_data and self._cbr != len(self._m):
|
||||
self.read(mmap.PAGESIZE)
|
||||
# END scrub-loop default zlib
|
||||
# END handle stream scrubbing
|
||||
|
||||
# reset bytes read, just to be sure
|
||||
self._br = self._s
|
||||
# END handle stream scrubbing
|
||||
|
||||
# unused data ends up in the unconsumed tail, which was removed
|
||||
# from the count already
|
||||
return self._cbr
|
||||
|
||||
#} END interface
|
||||
|
||||
def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
|
||||
"""Allows to reset the stream to restart reading
|
||||
:raise ValueError: If offset and whence are not 0"""
|
||||
if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
|
||||
raise ValueError("Can only seek to position 0")
|
||||
# END handle offset
|
||||
|
||||
self._zip = zlib.decompressobj()
|
||||
self._br = self._cws = self._cwe = self._cbr = 0
|
||||
if self._phi:
|
||||
self._phi = False
|
||||
del(self._s) # trigger header parsing on first access
|
||||
# END skip header
|
||||
|
||||
def read(self, size=-1):
|
||||
if size < 1:
|
||||
size = self._s - self._br
|
||||
else:
|
||||
size = min(size, self._s - self._br)
|
||||
# END clamp size
|
||||
|
||||
if size == 0:
|
||||
return b''
|
||||
# END handle depletion
|
||||
|
||||
# deplete the buffer, then just continue using the decompress object
|
||||
# which has an own buffer. We just need this to transparently parse the
|
||||
# header from the zlib stream
|
||||
dat = b''
|
||||
if self._buf:
|
||||
if self._buflen >= size:
|
||||
# have enough data
|
||||
dat = self._buf.read(size)
|
||||
self._buflen -= size
|
||||
self._br += size
|
||||
return dat
|
||||
else:
|
||||
dat = self._buf.read() # ouch, duplicates data
|
||||
size -= self._buflen
|
||||
self._br += self._buflen
|
||||
|
||||
self._buflen = 0
|
||||
self._buf = None
|
||||
# END handle buffer len
|
||||
# END handle buffer
|
||||
|
||||
# decompress some data
|
||||
# Abstract: zlib needs to operate on chunks of our memory map ( which may
|
||||
# be large ), as it will otherwise and always fill in the 'unconsumed_tail'
|
||||
# attribute which possible reads our whole map to the end, forcing
|
||||
# everything to be read from disk even though just a portion was requested.
|
||||
# As this would be a nogo, we workaround it by passing only chunks of data,
|
||||
# moving the window into the memory map along as we decompress, which keeps
|
||||
# the tail smaller than our chunk-size. This causes 'only' the chunk to be
|
||||
# copied once, and another copy of a part of it when it creates the unconsumed
|
||||
# tail. We have to use it to hand in the appropriate amount of bytes during
|
||||
# the next read.
|
||||
tail = self._zip.unconsumed_tail
|
||||
if tail:
|
||||
# move the window, make it as large as size demands. For code-clarity,
|
||||
# we just take the chunk from our map again instead of reusing the unconsumed
|
||||
# tail. The latter one would safe some memory copying, but we could end up
|
||||
# with not getting enough data uncompressed, so we had to sort that out as well.
|
||||
# Now we just assume the worst case, hence the data is uncompressed and the window
|
||||
# needs to be as large as the uncompressed bytes we want to read.
|
||||
self._cws = self._cwe - len(tail)
|
||||
self._cwe = self._cws + size
|
||||
else:
|
||||
cws = self._cws
|
||||
self._cws = self._cwe
|
||||
self._cwe = cws + size
|
||||
# END handle tail
|
||||
|
||||
# if window is too small, make it larger so zip can decompress something
|
||||
if self._cwe - self._cws < 8:
|
||||
self._cwe = self._cws + 8
|
||||
# END adjust winsize
|
||||
|
||||
# takes a slice, but doesn't copy the data, it says ...
|
||||
indata = self._m[self._cws:self._cwe]
|
||||
|
||||
# get the actual window end to be sure we don't use it for computations
|
||||
self._cwe = self._cws + len(indata)
|
||||
dcompdat = self._zip.decompress(indata, size)
|
||||
# update the amount of compressed bytes read
|
||||
# We feed possibly overlapping chunks, which is why the unconsumed tail
|
||||
# has to be taken into consideration, as well as the unused data
|
||||
# if we hit the end of the stream
|
||||
# NOTE: Behavior changed in PY2.7 onward, which requires special handling to make the tests work properly.
|
||||
# They are thorough, and I assume it is truly working.
|
||||
# Why is this logic as convoluted as it is ? Please look at the table in
|
||||
# https://github.com/gitpython-developers/gitdb/issues/19 to learn about the test-results.
|
||||
# Basically, on py2.6, you want to use branch 1, whereas on all other python version, the second branch
|
||||
# will be the one that works.
|
||||
# However, the zlib VERSIONs as well as the platform check is used to further match the entries in the
|
||||
# table in the github issue. This is it ... it was the only way I could make this work everywhere.
|
||||
# IT's CERTAINLY GOING TO BITE US IN THE FUTURE ... .
|
||||
if zlib.ZLIB_VERSION in ('1.2.7', '1.2.5') and not sys.platform == 'darwin':
|
||||
unused_datalen = len(self._zip.unconsumed_tail)
|
||||
else:
|
||||
unused_datalen = len(self._zip.unconsumed_tail) + len(self._zip.unused_data)
|
||||
# # end handle very special case ...
|
||||
|
||||
self._cbr += len(indata) - unused_datalen
|
||||
self._br += len(dcompdat)
|
||||
|
||||
if dat:
|
||||
dcompdat = dat + dcompdat
|
||||
# END prepend our cached data
|
||||
|
||||
# it can happen, depending on the compression, that we get less bytes
|
||||
# than ordered as it needs the final portion of the data as well.
|
||||
# Recursively resolve that.
|
||||
# Note: dcompdat can be empty even though we still appear to have bytes
|
||||
# to read, if we are called by compressed_bytes_read - it manipulates
|
||||
# us to empty the stream
|
||||
if dcompdat and (len(dcompdat) - len(dat)) < size and self._br < self._s:
|
||||
dcompdat += self.read(size - len(dcompdat))
|
||||
# END handle special case
|
||||
return dcompdat
|
||||
|
||||
|
||||
class DeltaApplyReader(LazyMixin):
|
||||
|
||||
"""A reader which dynamically applies pack deltas to a base object, keeping the
|
||||
memory demands to a minimum.
|
||||
|
||||
The size of the final object is only obtainable once all deltas have been
|
||||
applied, unless it is retrieved from a pack index.
|
||||
|
||||
The uncompressed Delta has the following layout (MSB being a most significant
|
||||
bit encoded dynamic size):
|
||||
|
||||
* MSB Source Size - the size of the base against which the delta was created
|
||||
* MSB Target Size - the size of the resulting data after the delta was applied
|
||||
* A list of one byte commands (cmd) which are followed by a specific protocol:
|
||||
|
||||
* cmd & 0x80 - copy delta_data[offset:offset+size]
|
||||
|
||||
* Followed by an encoded offset into the delta data
|
||||
* Followed by an encoded size of the chunk to copy
|
||||
|
||||
* cmd & 0x7f - insert
|
||||
|
||||
* insert cmd bytes from the delta buffer into the output stream
|
||||
|
||||
* cmd == 0 - invalid operation ( or error in delta stream )
|
||||
"""
|
||||
__slots__ = (
|
||||
"_bstream", # base stream to which to apply the deltas
|
||||
"_dstreams", # tuple of delta stream readers
|
||||
"_mm_target", # memory map of the delta-applied data
|
||||
"_size", # actual number of bytes in _mm_target
|
||||
"_br" # number of bytes read
|
||||
)
|
||||
|
||||
#{ Configuration
|
||||
k_max_memory_move = 250 * 1000 * 1000
|
||||
#} END configuration
|
||||
|
||||
def __init__(self, stream_list):
|
||||
"""Initialize this instance with a list of streams, the first stream being
|
||||
the delta to apply on top of all following deltas, the last stream being the
|
||||
base object onto which to apply the deltas"""
|
||||
assert len(stream_list) > 1, "Need at least one delta and one base stream"
|
||||
|
||||
self._bstream = stream_list[-1]
|
||||
self._dstreams = tuple(stream_list[:-1])
|
||||
self._br = 0
|
||||
|
||||
def _set_cache_too_slow_without_c(self, attr):
|
||||
# the direct algorithm is fastest and most direct if there is only one
|
||||
# delta. Also, the extra overhead might not be worth it for items smaller
|
||||
# than X - definitely the case in python, every function call costs
|
||||
# huge amounts of time
|
||||
# if len(self._dstreams) * self._bstream.size < self.k_max_memory_move:
|
||||
if len(self._dstreams) == 1:
|
||||
return self._set_cache_brute_(attr)
|
||||
|
||||
# Aggregate all deltas into one delta in reverse order. Hence we take
|
||||
# the last delta, and reverse-merge its ancestor delta, until we receive
|
||||
# the final delta data stream.
|
||||
dcl = connect_deltas(self._dstreams)
|
||||
|
||||
# call len directly, as the (optional) c version doesn't implement the sequence
|
||||
# protocol
|
||||
if dcl.rbound() == 0:
|
||||
self._size = 0
|
||||
self._mm_target = allocate_memory(0)
|
||||
return
|
||||
# END handle empty list
|
||||
|
||||
self._size = dcl.rbound()
|
||||
self._mm_target = allocate_memory(self._size)
|
||||
|
||||
bbuf = allocate_memory(self._bstream.size)
|
||||
stream_copy(self._bstream.read, bbuf.write, self._bstream.size, 256 * mmap.PAGESIZE)
|
||||
|
||||
# APPLY CHUNKS
|
||||
write = self._mm_target.write
|
||||
dcl.apply(bbuf, write)
|
||||
|
||||
self._mm_target.seek(0)
|
||||
|
||||
def _set_cache_brute_(self, attr):
|
||||
"""If we are here, we apply the actual deltas"""
|
||||
# TODO: There should be a special case if there is only one stream
|
||||
# Then the default-git algorithm should perform a tad faster, as the
|
||||
# delta is not peaked into, causing less overhead.
|
||||
buffer_info_list = list()
|
||||
max_target_size = 0
|
||||
for dstream in self._dstreams:
|
||||
buf = dstream.read(512) # read the header information + X
|
||||
offset, src_size = msb_size(buf)
|
||||
offset, target_size = msb_size(buf, offset)
|
||||
buffer_info_list.append((buf[offset:], offset, src_size, target_size))
|
||||
max_target_size = max(max_target_size, target_size)
|
||||
# END for each delta stream
|
||||
|
||||
# sanity check - the first delta to apply should have the same source
|
||||
# size as our actual base stream
|
||||
base_size = self._bstream.size
|
||||
target_size = max_target_size
|
||||
|
||||
# if we have more than 1 delta to apply, we will swap buffers, hence we must
|
||||
# assure that all buffers we use are large enough to hold all the results
|
||||
if len(self._dstreams) > 1:
|
||||
base_size = target_size = max(base_size, max_target_size)
|
||||
# END adjust buffer sizes
|
||||
|
||||
# Allocate private memory map big enough to hold the first base buffer
|
||||
# We need random access to it
|
||||
bbuf = allocate_memory(base_size)
|
||||
stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)
|
||||
|
||||
# allocate memory map large enough for the largest (intermediate) target
|
||||
# We will use it as scratch space for all delta ops. If the final
|
||||
# target buffer is smaller than our allocated space, we just use parts
|
||||
# of it upon return.
|
||||
tbuf = allocate_memory(target_size)
|
||||
|
||||
# for each delta to apply, memory map the decompressed delta and
|
||||
# work on the op-codes to reconstruct everything.
|
||||
# For the actual copying, we use a seek and write pattern of buffer
|
||||
# slices.
|
||||
final_target_size = None
|
||||
for (dbuf, offset, src_size, target_size), dstream in zip(reversed(buffer_info_list), reversed(self._dstreams)):
|
||||
# allocate a buffer to hold all delta data - fill in the data for
|
||||
# fast access. We do this as we know that reading individual bytes
|
||||
# from our stream would be slower than necessary ( although possible )
|
||||
# The dbuf buffer contains commands after the first two MSB sizes, the
|
||||
# offset specifies the amount of bytes read to get the sizes.
|
||||
ddata = allocate_memory(dstream.size - offset)
|
||||
ddata.write(dbuf)
|
||||
# read the rest from the stream. The size we give is larger than necessary
|
||||
stream_copy(dstream.read, ddata.write, dstream.size, 256 * mmap.PAGESIZE)
|
||||
|
||||
#######################################################################
|
||||
if 'c_apply_delta' in globals():
|
||||
c_apply_delta(bbuf, ddata, tbuf)
|
||||
else:
|
||||
apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
|
||||
#######################################################################
|
||||
|
||||
# finally, swap out source and target buffers. The target is now the
|
||||
# base for the next delta to apply
|
||||
bbuf, tbuf = tbuf, bbuf
|
||||
bbuf.seek(0)
|
||||
tbuf.seek(0)
|
||||
final_target_size = target_size
|
||||
# END for each delta to apply
|
||||
|
||||
# its already seeked to 0, constrain it to the actual size
|
||||
# NOTE: in the end of the loop, it swaps buffers, hence our target buffer
|
||||
# is not tbuf, but bbuf !
|
||||
self._mm_target = bbuf
|
||||
self._size = final_target_size
|
||||
|
||||
#{ Configuration
|
||||
if not has_perf_mod:
|
||||
_set_cache_ = _set_cache_brute_
|
||||
else:
|
||||
_set_cache_ = _set_cache_too_slow_without_c
|
||||
|
||||
#} END configuration
|
||||
|
||||
def read(self, count=0):
|
||||
bl = self._size - self._br # bytes left
|
||||
if count < 1 or count > bl:
|
||||
count = bl
|
||||
# NOTE: we could check for certain size limits, and possibly
|
||||
# return buffers instead of strings to prevent byte copying
|
||||
data = self._mm_target.read(count)
|
||||
self._br += len(data)
|
||||
return data
|
||||
|
||||
def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
|
||||
"""Allows to reset the stream to restart reading
|
||||
|
||||
:raise ValueError: If offset and whence are not 0"""
|
||||
if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
|
||||
raise ValueError("Can only seek to position 0")
|
||||
# END handle offset
|
||||
self._br = 0
|
||||
self._mm_target.seek(0)
|
||||
|
||||
#{ Interface
|
||||
|
||||
@classmethod
|
||||
def new(cls, stream_list):
|
||||
"""
|
||||
Convert the given list of streams into a stream which resolves deltas
|
||||
when reading from it.
|
||||
|
||||
:param stream_list: two or more stream objects, first stream is a Delta
|
||||
to the object that you want to resolve, followed by N additional delta
|
||||
streams. The list's last stream must be a non-delta stream.
|
||||
|
||||
:return: Non-Delta OPackStream object whose stream can be used to obtain
|
||||
the decompressed resolved data
|
||||
:raise ValueError: if the stream list cannot be handled"""
|
||||
if len(stream_list) < 2:
|
||||
raise ValueError("Need at least two streams")
|
||||
# END single object special handling
|
||||
|
||||
if stream_list[-1].type_id in delta_types:
|
||||
raise ValueError(
|
||||
"Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type)
|
||||
# END check stream
|
||||
return cls(stream_list)
|
||||
|
||||
#} END interface
|
||||
|
||||
#{ OInfo like Interface
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
return self._bstream.type
|
||||
|
||||
@property
|
||||
def type_id(self):
|
||||
return self._bstream.type_id
|
||||
|
||||
@property
|
||||
def size(self):
|
||||
""":return: number of uncompressed bytes in the stream"""
|
||||
return self._size
|
||||
|
||||
#} END oinfo like interface
|
||||
|
||||
|
||||
#} END RO streams
|
||||
|
||||
|
||||
#{ W Streams
|
||||
|
||||
class Sha1Writer:
|
||||
|
||||
"""Simple stream writer which produces a sha whenever you like as it degests
|
||||
everything it is supposed to write"""
|
||||
__slots__ = "sha1"
|
||||
|
||||
def __init__(self):
|
||||
self.sha1 = make_sha()
|
||||
|
||||
#{ Stream Interface
|
||||
|
||||
def write(self, data):
|
||||
""":raise IOError: If not all bytes could be written
|
||||
:param data: byte object
|
||||
:return: length of incoming data"""
|
||||
|
||||
self.sha1.update(data)
|
||||
|
||||
return len(data)
|
||||
|
||||
# END stream interface
|
||||
|
||||
#{ Interface
|
||||
|
||||
def sha(self, as_hex=False):
|
||||
""":return: sha so far
|
||||
:param as_hex: if True, sha will be hex-encoded, binary otherwise"""
|
||||
if as_hex:
|
||||
return self.sha1.hexdigest()
|
||||
return self.sha1.digest()
|
||||
|
||||
#} END interface
|
||||
|
||||
|
||||
class FlexibleSha1Writer(Sha1Writer):
|
||||
|
||||
"""Writer producing a sha1 while passing on the written bytes to the given
|
||||
write function"""
|
||||
__slots__ = 'writer'
|
||||
|
||||
def __init__(self, writer):
|
||||
Sha1Writer.__init__(self)
|
||||
self.writer = writer
|
||||
|
||||
def write(self, data):
|
||||
Sha1Writer.write(self, data)
|
||||
self.writer(data)
|
||||
|
||||
|
||||
class ZippedStoreShaWriter(Sha1Writer):
|
||||
|
||||
"""Remembers everything someone writes to it and generates a sha"""
|
||||
__slots__ = ('buf', 'zip')
|
||||
|
||||
def __init__(self):
|
||||
Sha1Writer.__init__(self)
|
||||
self.buf = BytesIO()
|
||||
self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
|
||||
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self.buf, attr)
|
||||
|
||||
def write(self, data):
|
||||
alen = Sha1Writer.write(self, data)
|
||||
self.buf.write(self.zip.compress(data))
|
||||
|
||||
return alen
|
||||
|
||||
def close(self):
|
||||
self.buf.write(self.zip.flush())
|
||||
|
||||
def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
|
||||
"""Seeking currently only supports to rewind written data
|
||||
Multiple writes are not supported"""
|
||||
if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
|
||||
raise ValueError("Can only seek to position 0")
|
||||
# END handle offset
|
||||
self.buf.seek(0)
|
||||
|
||||
def getvalue(self):
|
||||
""":return: string value from the current stream position to the end"""
|
||||
return self.buf.getvalue()
|
||||
|
||||
|
||||
class FDCompressedSha1Writer(Sha1Writer):
|
||||
|
||||
"""Digests data written to it, making the sha available, then compress the
|
||||
data and write it to the file descriptor
|
||||
|
||||
**Note:** operates on raw file descriptors
|
||||
**Note:** for this to work, you have to use the close-method of this instance"""
|
||||
__slots__ = ("fd", "sha1", "zip")
|
||||
|
||||
# default exception
|
||||
exc = IOError("Failed to write all bytes to filedescriptor")
|
||||
|
||||
def __init__(self, fd):
|
||||
super().__init__()
|
||||
self.fd = fd
|
||||
self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
|
||||
|
||||
#{ Stream Interface
|
||||
|
||||
def write(self, data):
|
||||
""":raise IOError: If not all bytes could be written
|
||||
:return: length of incoming data"""
|
||||
self.sha1.update(data)
|
||||
cdata = self.zip.compress(data)
|
||||
bytes_written = write(self.fd, cdata)
|
||||
|
||||
if bytes_written != len(cdata):
|
||||
raise self.exc
|
||||
|
||||
return len(data)
|
||||
|
||||
def close(self):
|
||||
remainder = self.zip.flush()
|
||||
if write(self.fd, remainder) != len(remainder):
|
||||
raise self.exc
|
||||
return close(self.fd)
|
||||
|
||||
#} END stream interface
|
||||
|
||||
|
||||
class FDStream:
|
||||
|
||||
"""A simple wrapper providing the most basic functions on a file descriptor
|
||||
with the fileobject interface. Cannot use os.fdopen as the resulting stream
|
||||
takes ownership"""
|
||||
__slots__ = ("_fd", '_pos')
|
||||
|
||||
def __init__(self, fd):
|
||||
self._fd = fd
|
||||
self._pos = 0
|
||||
|
||||
def write(self, data):
|
||||
self._pos += len(data)
|
||||
os.write(self._fd, data)
|
||||
|
||||
def read(self, count=0):
|
||||
if count == 0:
|
||||
count = os.path.getsize(self._filepath)
|
||||
# END handle read everything
|
||||
|
||||
bytes = os.read(self._fd, count)
|
||||
self._pos += len(bytes)
|
||||
return bytes
|
||||
|
||||
def fileno(self):
|
||||
return self._fd
|
||||
|
||||
def tell(self):
|
||||
return self._pos
|
||||
|
||||
def close(self):
|
||||
close(self._fd)
|
||||
|
||||
|
||||
class NullStream:
|
||||
|
||||
"""A stream that does nothing but providing a stream interface.
|
||||
Use it like /dev/null"""
|
||||
__slots__ = tuple()
|
||||
|
||||
def read(self, size=0):
|
||||
return ''
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def write(self, data):
|
||||
return len(data)
|
||||
|
||||
|
||||
#} END W streams
|
@@ -0,0 +1,4 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
192
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/test/lib.py
Normal file
192
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/test/lib.py
Normal file
@@ -0,0 +1,192 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Utilities used in ODB testing"""
|
||||
from gitdb import OStream
|
||||
|
||||
import sys
|
||||
import random
|
||||
from array import array
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
import glob
|
||||
import unittest
|
||||
import tempfile
|
||||
import shutil
|
||||
import os
|
||||
import gc
|
||||
import logging
|
||||
from functools import wraps
|
||||
|
||||
|
||||
#{ Bases
|
||||
|
||||
class TestBase(unittest.TestCase):
|
||||
"""Base class for all tests
|
||||
|
||||
TestCase providing access to readonly repositories using the following member variables.
|
||||
|
||||
* gitrepopath
|
||||
|
||||
* read-only base path of the git source repository, i.e. .../git/.git
|
||||
"""
|
||||
|
||||
#{ Invvariants
|
||||
k_env_git_repo = "GITDB_TEST_GIT_REPO_BASE"
|
||||
#} END invariants
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
try:
|
||||
super().setUpClass()
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
cls.gitrepopath = os.environ.get(cls.k_env_git_repo)
|
||||
if not cls.gitrepopath:
|
||||
logging.info(
|
||||
"You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository", cls.k_env_git_repo)
|
||||
ospd = os.path.dirname
|
||||
cls.gitrepopath = os.path.join(ospd(ospd(ospd(__file__))), '.git')
|
||||
# end assure gitrepo is set
|
||||
assert cls.gitrepopath.endswith('.git')
|
||||
|
||||
|
||||
#} END bases
|
||||
|
||||
#{ Decorators
|
||||
|
||||
def with_rw_directory(func):
|
||||
"""Create a temporary directory which can be written to, remove it if the
|
||||
test succeeds, but leave it otherwise to aid additional debugging"""
|
||||
|
||||
def wrapper(self):
|
||||
path = tempfile.mktemp(prefix=func.__name__)
|
||||
os.mkdir(path)
|
||||
keep = False
|
||||
try:
|
||||
try:
|
||||
return func(self, path)
|
||||
except Exception:
|
||||
sys.stderr.write(f"Test {type(self).__name__}.{func.__name__} failed, output is at {path!r}\n")
|
||||
keep = True
|
||||
raise
|
||||
finally:
|
||||
# Need to collect here to be sure all handles have been closed. It appears
|
||||
# a windows-only issue. In fact things should be deleted, as well as
|
||||
# memory maps closed, once objects go out of scope. For some reason
|
||||
# though this is not the case here unless we collect explicitly.
|
||||
if not keep:
|
||||
gc.collect()
|
||||
shutil.rmtree(path)
|
||||
# END handle exception
|
||||
# END wrapper
|
||||
|
||||
wrapper.__name__ = func.__name__
|
||||
return wrapper
|
||||
|
||||
|
||||
def with_packs_rw(func):
|
||||
"""Function that provides a path into which the packs for testing should be
|
||||
copied. Will pass on the path to the actual function afterwards"""
|
||||
|
||||
def wrapper(self, path):
|
||||
src_pack_glob = fixture_path('packs/*')
|
||||
copy_files_globbed(src_pack_glob, path, hard_link_ok=True)
|
||||
return func(self, path)
|
||||
# END wrapper
|
||||
|
||||
wrapper.__name__ = func.__name__
|
||||
return wrapper
|
||||
|
||||
#} END decorators
|
||||
|
||||
#{ Routines
|
||||
|
||||
|
||||
def fixture_path(relapath=''):
|
||||
""":return: absolute path into the fixture directory
|
||||
:param relapath: relative path into the fixtures directory, or ''
|
||||
to obtain the fixture directory itself"""
|
||||
return os.path.join(os.path.dirname(__file__), 'fixtures', relapath)
|
||||
|
||||
|
||||
def copy_files_globbed(source_glob, target_dir, hard_link_ok=False):
|
||||
"""Copy all files found according to the given source glob into the target directory
|
||||
:param hard_link_ok: if True, hard links will be created if possible. Otherwise
|
||||
the files will be copied"""
|
||||
for src_file in glob.glob(source_glob):
|
||||
if hard_link_ok and hasattr(os, 'link'):
|
||||
target = os.path.join(target_dir, os.path.basename(src_file))
|
||||
try:
|
||||
os.link(src_file, target)
|
||||
except OSError:
|
||||
shutil.copy(src_file, target_dir)
|
||||
# END handle cross device links ( and resulting failure )
|
||||
else:
|
||||
shutil.copy(src_file, target_dir)
|
||||
# END try hard link
|
||||
# END for each file to copy
|
||||
|
||||
|
||||
def make_bytes(size_in_bytes, randomize=False):
|
||||
""":return: string with given size in bytes
|
||||
:param randomize: try to produce a very random stream"""
|
||||
actual_size = size_in_bytes // 4
|
||||
producer = range(actual_size)
|
||||
if randomize:
|
||||
producer = list(producer)
|
||||
random.shuffle(producer)
|
||||
# END randomize
|
||||
a = array('i', producer)
|
||||
return a.tobytes()
|
||||
|
||||
|
||||
def make_object(type, data):
|
||||
""":return: bytes resembling an uncompressed object"""
|
||||
odata = "blob %i\0" % len(data)
|
||||
return odata.encode("ascii") + data
|
||||
|
||||
|
||||
def make_memory_file(size_in_bytes, randomize=False):
|
||||
""":return: tuple(size_of_stream, stream)
|
||||
:param randomize: try to produce a very random stream"""
|
||||
d = make_bytes(size_in_bytes, randomize)
|
||||
return len(d), BytesIO(d)
|
||||
|
||||
#} END routines
|
||||
|
||||
#{ Stream Utilities
|
||||
|
||||
|
||||
class DummyStream:
|
||||
|
||||
def __init__(self):
|
||||
self.was_read = False
|
||||
self.bytes = 0
|
||||
self.closed = False
|
||||
|
||||
def read(self, size):
|
||||
self.was_read = True
|
||||
self.bytes = size
|
||||
|
||||
def close(self):
|
||||
self.closed = True
|
||||
|
||||
def _assert(self):
|
||||
assert self.was_read
|
||||
|
||||
|
||||
class DeriveTest(OStream):
|
||||
|
||||
def __init__(self, sha, type, size, stream, *args, **kwargs):
|
||||
self.myarg = kwargs.pop('myarg')
|
||||
self.args = args
|
||||
|
||||
def _assert(self):
|
||||
assert self.args
|
||||
assert self.myarg
|
||||
|
||||
#} END stream utilitiess
|
@@ -0,0 +1,105 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Test for object db"""
|
||||
from gitdb.test.lib import (
|
||||
TestBase,
|
||||
DummyStream,
|
||||
DeriveTest,
|
||||
)
|
||||
|
||||
from gitdb import (
|
||||
OInfo,
|
||||
OPackInfo,
|
||||
ODeltaPackInfo,
|
||||
OStream,
|
||||
OPackStream,
|
||||
ODeltaPackStream,
|
||||
IStream
|
||||
)
|
||||
from gitdb.util import (
|
||||
NULL_BIN_SHA
|
||||
)
|
||||
|
||||
from gitdb.typ import (
|
||||
str_blob_type
|
||||
)
|
||||
|
||||
|
||||
class TestBaseTypes(TestBase):
|
||||
|
||||
def test_streams(self):
|
||||
# test info
|
||||
sha = NULL_BIN_SHA
|
||||
s = 20
|
||||
blob_id = 3
|
||||
|
||||
info = OInfo(sha, str_blob_type, s)
|
||||
assert info.binsha == sha
|
||||
assert info.type == str_blob_type
|
||||
assert info.type_id == blob_id
|
||||
assert info.size == s
|
||||
|
||||
# test pack info
|
||||
# provides type_id
|
||||
pinfo = OPackInfo(0, blob_id, s)
|
||||
assert pinfo.type == str_blob_type
|
||||
assert pinfo.type_id == blob_id
|
||||
assert pinfo.pack_offset == 0
|
||||
|
||||
dpinfo = ODeltaPackInfo(0, blob_id, s, sha)
|
||||
assert dpinfo.type == str_blob_type
|
||||
assert dpinfo.type_id == blob_id
|
||||
assert dpinfo.delta_info == sha
|
||||
assert dpinfo.pack_offset == 0
|
||||
|
||||
# test ostream
|
||||
stream = DummyStream()
|
||||
ostream = OStream(*(info + (stream, )))
|
||||
assert ostream.stream is stream
|
||||
ostream.read(15)
|
||||
stream._assert()
|
||||
assert stream.bytes == 15
|
||||
ostream.read(20)
|
||||
assert stream.bytes == 20
|
||||
|
||||
# test packstream
|
||||
postream = OPackStream(*(pinfo + (stream, )))
|
||||
assert postream.stream is stream
|
||||
postream.read(10)
|
||||
stream._assert()
|
||||
assert stream.bytes == 10
|
||||
|
||||
# test deltapackstream
|
||||
dpostream = ODeltaPackStream(*(dpinfo + (stream, )))
|
||||
dpostream.stream is stream
|
||||
dpostream.read(5)
|
||||
stream._assert()
|
||||
assert stream.bytes == 5
|
||||
|
||||
# derive with own args
|
||||
DeriveTest(sha, str_blob_type, s, stream, 'mine', myarg=3)._assert()
|
||||
|
||||
# test istream
|
||||
istream = IStream(str_blob_type, s, stream)
|
||||
assert istream.binsha == None
|
||||
istream.binsha = sha
|
||||
assert istream.binsha == sha
|
||||
|
||||
assert len(istream.binsha) == 20
|
||||
assert len(istream.hexsha) == 40
|
||||
|
||||
assert istream.size == s
|
||||
istream.size = s * 2
|
||||
istream.size == s * 2
|
||||
assert istream.type == str_blob_type
|
||||
istream.type = "something"
|
||||
assert istream.type == "something"
|
||||
assert istream.stream is stream
|
||||
istream.stream = None
|
||||
assert istream.stream is None
|
||||
|
||||
assert istream.error is None
|
||||
istream.error = Exception()
|
||||
assert isinstance(istream.error, Exception)
|
@@ -0,0 +1,43 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Module with examples from the tutorial section of the docs"""
|
||||
import os
|
||||
from gitdb.test.lib import TestBase
|
||||
from gitdb import IStream
|
||||
from gitdb.db import LooseObjectDB
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
class TestExamples(TestBase):
|
||||
|
||||
def test_base(self):
|
||||
ldb = LooseObjectDB(os.path.join(self.gitrepopath, 'objects'))
|
||||
|
||||
for sha1 in ldb.sha_iter():
|
||||
oinfo = ldb.info(sha1)
|
||||
ostream = ldb.stream(sha1)
|
||||
assert oinfo[:3] == ostream[:3]
|
||||
|
||||
assert len(ostream.read()) == ostream.size
|
||||
assert ldb.has_object(oinfo.binsha)
|
||||
# END for each sha in database
|
||||
# assure we close all files
|
||||
try:
|
||||
del(ostream)
|
||||
del(oinfo)
|
||||
except UnboundLocalError:
|
||||
pass
|
||||
# END ignore exception if there are no loose objects
|
||||
|
||||
data = b"my data"
|
||||
istream = IStream("blob", len(data), BytesIO(data))
|
||||
|
||||
# the object does not yet have a sha
|
||||
assert istream.binsha is None
|
||||
ldb.store(istream)
|
||||
# now the sha is set
|
||||
assert len(istream.binsha) == 20
|
||||
assert ldb.has_object(istream.binsha)
|
@@ -0,0 +1,249 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Test everything about packs reading and writing"""
|
||||
from gitdb.test.lib import (
|
||||
TestBase,
|
||||
with_rw_directory,
|
||||
fixture_path
|
||||
)
|
||||
|
||||
from gitdb.stream import DeltaApplyReader
|
||||
|
||||
from gitdb.pack import (
|
||||
PackEntity,
|
||||
PackIndexFile,
|
||||
PackFile
|
||||
)
|
||||
|
||||
from gitdb.base import (
|
||||
OInfo,
|
||||
OStream,
|
||||
)
|
||||
|
||||
from gitdb.fun import delta_types
|
||||
from gitdb.exc import UnsupportedOperation
|
||||
from gitdb.util import to_bin_sha
|
||||
|
||||
import pytest
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
|
||||
#{ Utilities
|
||||
def bin_sha_from_filename(filename):
|
||||
return to_bin_sha(os.path.splitext(os.path.basename(filename))[0][5:])
|
||||
#} END utilities
|
||||
|
||||
|
||||
class TestPack(TestBase):
|
||||
|
||||
packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67)
|
||||
packindexfile_v2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx'), 2, 30)
|
||||
packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42)
|
||||
packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2])
|
||||
packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2])
|
||||
packfile_v2_3_ascii = (
|
||||
fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2])
|
||||
|
||||
def _assert_index_file(self, index, version, size):
|
||||
assert index.packfile_checksum() != index.indexfile_checksum()
|
||||
assert len(index.packfile_checksum()) == 20
|
||||
assert len(index.indexfile_checksum()) == 20
|
||||
assert index.version() == version
|
||||
assert index.size() == size
|
||||
assert len(index.offsets()) == size
|
||||
|
||||
# get all data of all objects
|
||||
for oidx in range(index.size()):
|
||||
sha = index.sha(oidx)
|
||||
assert oidx == index.sha_to_index(sha)
|
||||
|
||||
entry = index.entry(oidx)
|
||||
assert len(entry) == 3
|
||||
|
||||
assert entry[0] == index.offset(oidx)
|
||||
assert entry[1] == sha
|
||||
assert entry[2] == index.crc(oidx)
|
||||
|
||||
# verify partial sha
|
||||
for l in (4, 8, 11, 17, 20):
|
||||
assert index.partial_sha_to_index(sha[:l], l * 2) == oidx
|
||||
|
||||
# END for each object index in indexfile
|
||||
self.assertRaises(ValueError, index.partial_sha_to_index, "\0", 2)
|
||||
|
||||
def _assert_pack_file(self, pack, version, size):
|
||||
assert pack.version() == 2
|
||||
assert pack.size() == size
|
||||
assert len(pack.checksum()) == 20
|
||||
|
||||
num_obj = 0
|
||||
for obj in pack.stream_iter():
|
||||
num_obj += 1
|
||||
info = pack.info(obj.pack_offset)
|
||||
stream = pack.stream(obj.pack_offset)
|
||||
|
||||
assert info.pack_offset == stream.pack_offset
|
||||
assert info.type_id == stream.type_id
|
||||
assert hasattr(stream, 'read')
|
||||
|
||||
# it should be possible to read from both streams
|
||||
assert obj.read() == stream.read()
|
||||
|
||||
streams = pack.collect_streams(obj.pack_offset)
|
||||
assert streams
|
||||
|
||||
# read the stream
|
||||
try:
|
||||
dstream = DeltaApplyReader.new(streams)
|
||||
except ValueError:
|
||||
# ignore these, old git versions use only ref deltas,
|
||||
# which we haven't resolved ( as we are without an index )
|
||||
# Also ignore non-delta streams
|
||||
continue
|
||||
# END get deltastream
|
||||
|
||||
# read all
|
||||
data = dstream.read()
|
||||
assert len(data) == dstream.size
|
||||
|
||||
# test seek
|
||||
dstream.seek(0)
|
||||
assert dstream.read() == data
|
||||
|
||||
# read chunks
|
||||
# NOTE: the current implementation is safe, it basically transfers
|
||||
# all calls to the underlying memory map
|
||||
|
||||
# END for each object
|
||||
assert num_obj == size
|
||||
|
||||
def test_pack_index(self):
|
||||
# check version 1 and 2
|
||||
for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2):
|
||||
index = PackIndexFile(indexfile)
|
||||
self._assert_index_file(index, version, size)
|
||||
# END run tests
|
||||
|
||||
def test_pack(self):
|
||||
# there is this special version 3, but apparently its like 2 ...
|
||||
for packfile, version, size in (self.packfile_v2_3_ascii, self.packfile_v2_1, self.packfile_v2_2):
|
||||
pack = PackFile(packfile)
|
||||
self._assert_pack_file(pack, version, size)
|
||||
# END for each pack to test
|
||||
|
||||
@with_rw_directory
|
||||
def test_pack_entity(self, rw_dir):
|
||||
pack_objs = list()
|
||||
for packinfo, indexinfo in ((self.packfile_v2_1, self.packindexfile_v1),
|
||||
(self.packfile_v2_2, self.packindexfile_v2),
|
||||
(self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
|
||||
packfile, version, size = packinfo
|
||||
indexfile, version, size = indexinfo
|
||||
entity = PackEntity(packfile)
|
||||
assert entity.pack().path() == packfile
|
||||
assert entity.index().path() == indexfile
|
||||
pack_objs.extend(entity.stream_iter())
|
||||
|
||||
count = 0
|
||||
for info, stream in zip(entity.info_iter(), entity.stream_iter()):
|
||||
count += 1
|
||||
assert info.binsha == stream.binsha
|
||||
assert len(info.binsha) == 20
|
||||
assert info.type_id == stream.type_id
|
||||
assert info.size == stream.size
|
||||
|
||||
# we return fully resolved items, which is implied by the sha centric access
|
||||
assert not info.type_id in delta_types
|
||||
|
||||
# try all calls
|
||||
assert len(entity.collect_streams(info.binsha))
|
||||
oinfo = entity.info(info.binsha)
|
||||
assert isinstance(oinfo, OInfo)
|
||||
assert oinfo.binsha is not None
|
||||
ostream = entity.stream(info.binsha)
|
||||
assert isinstance(ostream, OStream)
|
||||
assert ostream.binsha is not None
|
||||
|
||||
# verify the stream
|
||||
try:
|
||||
assert entity.is_valid_stream(info.binsha, use_crc=True)
|
||||
except UnsupportedOperation:
|
||||
pass
|
||||
# END ignore version issues
|
||||
assert entity.is_valid_stream(info.binsha, use_crc=False)
|
||||
# END for each info, stream tuple
|
||||
assert count == size
|
||||
|
||||
# END for each entity
|
||||
|
||||
# pack writing - write all packs into one
|
||||
# index path can be None
|
||||
pack_path1 = tempfile.mktemp('', "pack1", rw_dir)
|
||||
pack_path2 = tempfile.mktemp('', "pack2", rw_dir)
|
||||
index_path = tempfile.mktemp('', 'index', rw_dir)
|
||||
iteration = 0
|
||||
|
||||
def rewind_streams():
|
||||
for obj in pack_objs:
|
||||
obj.stream.seek(0)
|
||||
# END utility
|
||||
for ppath, ipath, num_obj in zip((pack_path1, pack_path2),
|
||||
(index_path, None),
|
||||
(len(pack_objs), None)):
|
||||
iwrite = None
|
||||
if ipath:
|
||||
ifile = open(ipath, 'wb')
|
||||
iwrite = ifile.write
|
||||
# END handle ip
|
||||
|
||||
# make sure we rewind the streams ... we work on the same objects over and over again
|
||||
if iteration > 0:
|
||||
rewind_streams()
|
||||
# END rewind streams
|
||||
iteration += 1
|
||||
|
||||
with open(ppath, 'wb') as pfile:
|
||||
pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj)
|
||||
assert os.path.getsize(ppath) > 100
|
||||
|
||||
# verify pack
|
||||
pf = PackFile(ppath)
|
||||
assert pf.size() == len(pack_objs)
|
||||
assert pf.version() == PackFile.pack_version_default
|
||||
assert pf.checksum() == pack_sha
|
||||
pf.close()
|
||||
|
||||
# verify index
|
||||
if ipath is not None:
|
||||
ifile.close()
|
||||
assert os.path.getsize(ipath) > 100
|
||||
idx = PackIndexFile(ipath)
|
||||
assert idx.version() == PackIndexFile.index_version_default
|
||||
assert idx.packfile_checksum() == pack_sha
|
||||
assert idx.indexfile_checksum() == index_sha
|
||||
assert idx.size() == len(pack_objs)
|
||||
idx.close()
|
||||
# END verify files exist
|
||||
# END for each packpath, indexpath pair
|
||||
|
||||
# verify the packs thoroughly
|
||||
rewind_streams()
|
||||
entity = PackEntity.create(pack_objs, rw_dir)
|
||||
count = 0
|
||||
for info in entity.info_iter():
|
||||
count += 1
|
||||
for use_crc in range(2):
|
||||
assert entity.is_valid_stream(info.binsha, use_crc)
|
||||
# END for each crc mode
|
||||
# END for each info
|
||||
assert count == len(pack_objs)
|
||||
entity.close()
|
||||
|
||||
def test_pack_64(self):
|
||||
# TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets
|
||||
# of course without really needing such a huge pack
|
||||
pytest.skip('not implemented')
|
@@ -0,0 +1,164 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Test for object db"""
|
||||
|
||||
from gitdb.test.lib import (
|
||||
TestBase,
|
||||
DummyStream,
|
||||
make_bytes,
|
||||
make_object,
|
||||
fixture_path
|
||||
)
|
||||
|
||||
from gitdb import (
|
||||
DecompressMemMapReader,
|
||||
FDCompressedSha1Writer,
|
||||
LooseObjectDB,
|
||||
Sha1Writer,
|
||||
MemoryDB,
|
||||
IStream,
|
||||
)
|
||||
from gitdb.util import hex_to_bin
|
||||
|
||||
import zlib
|
||||
from gitdb.typ import (
|
||||
str_blob_type
|
||||
)
|
||||
|
||||
import tempfile
|
||||
import os
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
class TestStream(TestBase):
|
||||
|
||||
"""Test stream classes"""
|
||||
|
||||
data_sizes = (15, 10000, 1000 * 1024 + 512)
|
||||
|
||||
def _assert_stream_reader(self, stream, cdata, rewind_stream=lambda s: None):
|
||||
"""Make stream tests - the orig_stream is seekable, allowing it to be
|
||||
rewound and reused
|
||||
:param cdata: the data we expect to read from stream, the contents
|
||||
:param rewind_stream: function called to rewind the stream to make it ready
|
||||
for reuse"""
|
||||
ns = 10
|
||||
assert len(cdata) > ns - 1, "Data must be larger than %i, was %i" % (ns, len(cdata))
|
||||
|
||||
# read in small steps
|
||||
ss = len(cdata) // ns
|
||||
for i in range(ns):
|
||||
data = stream.read(ss)
|
||||
chunk = cdata[i * ss:(i + 1) * ss]
|
||||
assert data == chunk
|
||||
# END for each step
|
||||
rest = stream.read()
|
||||
if rest:
|
||||
assert rest == cdata[-len(rest):]
|
||||
# END handle rest
|
||||
|
||||
if isinstance(stream, DecompressMemMapReader):
|
||||
assert len(stream.data()) == stream.compressed_bytes_read()
|
||||
# END handle special type
|
||||
|
||||
rewind_stream(stream)
|
||||
|
||||
# read everything
|
||||
rdata = stream.read()
|
||||
assert rdata == cdata
|
||||
|
||||
if isinstance(stream, DecompressMemMapReader):
|
||||
assert len(stream.data()) == stream.compressed_bytes_read()
|
||||
# END handle special type
|
||||
|
||||
def test_decompress_reader(self):
|
||||
for close_on_deletion in range(2):
|
||||
for with_size in range(2):
|
||||
for ds in self.data_sizes:
|
||||
cdata = make_bytes(ds, randomize=False)
|
||||
|
||||
# zdata = zipped actual data
|
||||
# cdata = original content data
|
||||
|
||||
# create reader
|
||||
if with_size:
|
||||
# need object data
|
||||
zdata = zlib.compress(make_object(str_blob_type, cdata))
|
||||
typ, size, reader = DecompressMemMapReader.new(zdata, close_on_deletion)
|
||||
assert size == len(cdata)
|
||||
assert typ == str_blob_type
|
||||
|
||||
# even if we don't set the size, it will be set automatically on first read
|
||||
test_reader = DecompressMemMapReader(zdata, close_on_deletion=False)
|
||||
assert test_reader._s == len(cdata)
|
||||
else:
|
||||
# here we need content data
|
||||
zdata = zlib.compress(cdata)
|
||||
reader = DecompressMemMapReader(zdata, close_on_deletion, len(cdata))
|
||||
assert reader._s == len(cdata)
|
||||
# END get reader
|
||||
|
||||
self._assert_stream_reader(reader, cdata, lambda r: r.seek(0))
|
||||
|
||||
# put in a dummy stream for closing
|
||||
dummy = DummyStream()
|
||||
reader._m = dummy
|
||||
|
||||
assert not dummy.closed
|
||||
del(reader)
|
||||
assert dummy.closed == close_on_deletion
|
||||
# END for each datasize
|
||||
# END whether size should be used
|
||||
# END whether stream should be closed when deleted
|
||||
|
||||
def test_sha_writer(self):
|
||||
writer = Sha1Writer()
|
||||
assert 2 == writer.write(b"hi")
|
||||
assert len(writer.sha(as_hex=1)) == 40
|
||||
assert len(writer.sha(as_hex=0)) == 20
|
||||
|
||||
# make sure it does something ;)
|
||||
prev_sha = writer.sha()
|
||||
writer.write(b"hi again")
|
||||
assert writer.sha() != prev_sha
|
||||
|
||||
def test_compressed_writer(self):
|
||||
for ds in self.data_sizes:
|
||||
fd, path = tempfile.mkstemp()
|
||||
ostream = FDCompressedSha1Writer(fd)
|
||||
data = make_bytes(ds, randomize=False)
|
||||
|
||||
# for now, just a single write, code doesn't care about chunking
|
||||
assert len(data) == ostream.write(data)
|
||||
ostream.close()
|
||||
|
||||
# its closed already
|
||||
self.assertRaises(OSError, os.close, fd)
|
||||
|
||||
# read everything back, compare to data we zip
|
||||
fd = os.open(path, os.O_RDONLY | getattr(os, 'O_BINARY', 0))
|
||||
written_data = os.read(fd, os.path.getsize(path))
|
||||
assert len(written_data) == os.path.getsize(path)
|
||||
os.close(fd)
|
||||
assert written_data == zlib.compress(data, 1) # best speed
|
||||
|
||||
os.remove(path)
|
||||
# END for each os
|
||||
|
||||
def test_decompress_reader_special_case(self):
|
||||
odb = LooseObjectDB(fixture_path('objects'))
|
||||
mdb = MemoryDB()
|
||||
for sha in (b'888401851f15db0eed60eb1bc29dec5ddcace911',
|
||||
b'7bb839852ed5e3a069966281bb08d50012fb309b',):
|
||||
ostream = odb.stream(hex_to_bin(sha))
|
||||
|
||||
# if there is a bug, we will be missing one byte exactly !
|
||||
data = ostream.read()
|
||||
assert len(data) == ostream.size
|
||||
|
||||
# Putting it back in should yield nothing new - after all, we have
|
||||
dump = mdb.store(IStream(ostream.type, ostream.size, BytesIO(data)))
|
||||
assert dump.hexsha == sha
|
||||
# end for each loose object sha to test
|
@@ -0,0 +1,100 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Test for object db"""
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
from gitdb.test.lib import TestBase
|
||||
from gitdb.util import (
|
||||
to_hex_sha,
|
||||
to_bin_sha,
|
||||
NULL_HEX_SHA,
|
||||
LockedFD
|
||||
)
|
||||
|
||||
|
||||
class TestUtils(TestBase):
|
||||
|
||||
def test_basics(self):
|
||||
assert to_hex_sha(NULL_HEX_SHA) == NULL_HEX_SHA
|
||||
assert len(to_bin_sha(NULL_HEX_SHA)) == 20
|
||||
assert to_hex_sha(to_bin_sha(NULL_HEX_SHA)) == NULL_HEX_SHA.encode("ascii")
|
||||
|
||||
def _cmp_contents(self, file_path, data):
|
||||
# raise if data from file at file_path
|
||||
# does not match data string
|
||||
with open(file_path, "rb") as fp:
|
||||
assert fp.read() == data.encode("ascii")
|
||||
|
||||
def test_lockedfd(self):
|
||||
my_file = tempfile.mktemp()
|
||||
orig_data = "hello"
|
||||
new_data = "world"
|
||||
with open(my_file, "wb") as my_file_fp:
|
||||
my_file_fp.write(orig_data.encode("ascii"))
|
||||
|
||||
try:
|
||||
lfd = LockedFD(my_file)
|
||||
lockfilepath = lfd._lockfilepath()
|
||||
|
||||
# cannot end before it was started
|
||||
self.assertRaises(AssertionError, lfd.rollback)
|
||||
self.assertRaises(AssertionError, lfd.commit)
|
||||
|
||||
# open for writing
|
||||
assert not os.path.isfile(lockfilepath)
|
||||
wfd = lfd.open(write=True)
|
||||
assert lfd._fd is wfd
|
||||
assert os.path.isfile(lockfilepath)
|
||||
|
||||
# write data and fail
|
||||
os.write(wfd, new_data.encode("ascii"))
|
||||
lfd.rollback()
|
||||
assert lfd._fd is None
|
||||
self._cmp_contents(my_file, orig_data)
|
||||
assert not os.path.isfile(lockfilepath)
|
||||
|
||||
# additional call doesn't fail
|
||||
lfd.commit()
|
||||
lfd.rollback()
|
||||
|
||||
# test reading
|
||||
lfd = LockedFD(my_file)
|
||||
rfd = lfd.open(write=False)
|
||||
assert os.read(rfd, len(orig_data)) == orig_data.encode("ascii")
|
||||
|
||||
assert os.path.isfile(lockfilepath)
|
||||
# deletion rolls back
|
||||
del(lfd)
|
||||
assert not os.path.isfile(lockfilepath)
|
||||
|
||||
# write data - concurrently
|
||||
lfd = LockedFD(my_file)
|
||||
olfd = LockedFD(my_file)
|
||||
assert not os.path.isfile(lockfilepath)
|
||||
wfdstream = lfd.open(write=True, stream=True) # this time as stream
|
||||
assert os.path.isfile(lockfilepath)
|
||||
# another one fails
|
||||
self.assertRaises(IOError, olfd.open)
|
||||
|
||||
wfdstream.write(new_data.encode("ascii"))
|
||||
lfd.commit()
|
||||
assert not os.path.isfile(lockfilepath)
|
||||
self._cmp_contents(my_file, new_data)
|
||||
|
||||
# could test automatic _end_writing on destruction
|
||||
finally:
|
||||
os.remove(my_file)
|
||||
# END final cleanup
|
||||
|
||||
# try non-existing file for reading
|
||||
lfd = LockedFD(tempfile.mktemp())
|
||||
try:
|
||||
lfd.open(write=False)
|
||||
except OSError:
|
||||
assert not os.path.exists(lfd._lockfilepath())
|
||||
else:
|
||||
self.fail("expected OSError")
|
||||
# END handle exceptions
|
10
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/typ.py
Normal file
10
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/typ.py
Normal file
@@ -0,0 +1,10 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
"""Module containing information about types known to the database"""
|
||||
|
||||
str_blob_type = b'blob'
|
||||
str_commit_type = b'commit'
|
||||
str_tree_type = b'tree'
|
||||
str_tag_type = b'tag'
|
398
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/util.py
Normal file
398
zero-cost-nas/.eggs/gitdb-4.0.10-py3.8.egg/gitdb/util.py
Normal file
@@ -0,0 +1,398 @@
|
||||
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
||||
#
|
||||
# This module is part of GitDB and is released under
|
||||
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
||||
import binascii
|
||||
import os
|
||||
import mmap
|
||||
import sys
|
||||
import time
|
||||
import errno
|
||||
|
||||
from io import BytesIO
|
||||
|
||||
from smmap import (
|
||||
StaticWindowMapManager,
|
||||
SlidingWindowMapManager,
|
||||
SlidingWindowMapBuffer
|
||||
)
|
||||
|
||||
# initialize our global memory manager instance
|
||||
# Use it to free cached (and unused) resources.
|
||||
mman = SlidingWindowMapManager()
|
||||
# END handle mman
|
||||
|
||||
import hashlib
|
||||
|
||||
try:
|
||||
from struct import unpack_from
|
||||
except ImportError:
|
||||
from struct import unpack, calcsize
|
||||
__calcsize_cache = dict()
|
||||
|
||||
def unpack_from(fmt, data, offset=0):
|
||||
try:
|
||||
size = __calcsize_cache[fmt]
|
||||
except KeyError:
|
||||
size = calcsize(fmt)
|
||||
__calcsize_cache[fmt] = size
|
||||
# END exception handling
|
||||
return unpack(fmt, data[offset: offset + size])
|
||||
# END own unpack_from implementation
|
||||
|
||||
|
||||
#{ Aliases
|
||||
|
||||
hex_to_bin = binascii.a2b_hex
|
||||
bin_to_hex = binascii.b2a_hex
|
||||
|
||||
# errors
|
||||
ENOENT = errno.ENOENT
|
||||
|
||||
# os shortcuts
|
||||
exists = os.path.exists
|
||||
mkdir = os.mkdir
|
||||
chmod = os.chmod
|
||||
isdir = os.path.isdir
|
||||
isfile = os.path.isfile
|
||||
rename = os.rename
|
||||
dirname = os.path.dirname
|
||||
basename = os.path.basename
|
||||
join = os.path.join
|
||||
read = os.read
|
||||
write = os.write
|
||||
close = os.close
|
||||
fsync = os.fsync
|
||||
|
||||
|
||||
def _retry(func, *args, **kwargs):
|
||||
# Wrapper around functions, that are problematic on "Windows". Sometimes
|
||||
# the OS or someone else has still a handle to the file
|
||||
if sys.platform == "win32":
|
||||
for _ in range(10):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception:
|
||||
time.sleep(0.1)
|
||||
return func(*args, **kwargs)
|
||||
else:
|
||||
return func(*args, **kwargs)
|
||||
|
||||
|
||||
def remove(*args, **kwargs):
|
||||
return _retry(os.remove, *args, **kwargs)
|
||||
|
||||
|
||||
# Backwards compatibility imports
|
||||
from gitdb.const import (
|
||||
NULL_BIN_SHA,
|
||||
NULL_HEX_SHA
|
||||
)
|
||||
|
||||
#} END Aliases
|
||||
|
||||
#{ compatibility stuff ...
|
||||
|
||||
|
||||
class _RandomAccessBytesIO:
|
||||
|
||||
"""Wrapper to provide required functionality in case memory maps cannot or may
|
||||
not be used. This is only really required in python 2.4"""
|
||||
__slots__ = '_sio'
|
||||
|
||||
def __init__(self, buf=''):
|
||||
self._sio = BytesIO(buf)
|
||||
|
||||
def __getattr__(self, attr):
|
||||
return getattr(self._sio, attr)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.getvalue())
|
||||
|
||||
def __getitem__(self, i):
|
||||
return self.getvalue()[i]
|
||||
|
||||
def __getslice__(self, start, end):
|
||||
return self.getvalue()[start:end]
|
||||
|
||||
|
||||
def byte_ord(b):
|
||||
"""
|
||||
Return the integer representation of the byte string. This supports Python
|
||||
3 byte arrays as well as standard strings.
|
||||
"""
|
||||
try:
|
||||
return ord(b)
|
||||
except TypeError:
|
||||
return b
|
||||
|
||||
#} END compatibility stuff ...
|
||||
|
||||
#{ Routines
|
||||
|
||||
|
||||
def make_sha(source=b''):
|
||||
"""A python2.4 workaround for the sha/hashlib module fiasco
|
||||
|
||||
**Note** From the dulwich project """
|
||||
try:
|
||||
return hashlib.sha1(source)
|
||||
except NameError:
|
||||
import sha
|
||||
sha1 = sha.sha(source)
|
||||
return sha1
|
||||
|
||||
|
||||
def allocate_memory(size):
|
||||
""":return: a file-protocol accessible memory block of the given size"""
|
||||
if size == 0:
|
||||
return _RandomAccessBytesIO(b'')
|
||||
# END handle empty chunks gracefully
|
||||
|
||||
try:
|
||||
return mmap.mmap(-1, size) # read-write by default
|
||||
except OSError:
|
||||
# setup real memory instead
|
||||
# this of course may fail if the amount of memory is not available in
|
||||
# one chunk - would only be the case in python 2.4, being more likely on
|
||||
# 32 bit systems.
|
||||
return _RandomAccessBytesIO(b"\0" * size)
|
||||
# END handle memory allocation
|
||||
|
||||
|
||||
def file_contents_ro(fd, stream=False, allow_mmap=True):
|
||||
""":return: read-only contents of the file represented by the file descriptor fd
|
||||
|
||||
:param fd: file descriptor opened for reading
|
||||
:param stream: if False, random access is provided, otherwise the stream interface
|
||||
is provided.
|
||||
:param allow_mmap: if True, its allowed to map the contents into memory, which
|
||||
allows large files to be handled and accessed efficiently. The file-descriptor
|
||||
will change its position if this is False"""
|
||||
try:
|
||||
if allow_mmap:
|
||||
# supports stream and random access
|
||||
try:
|
||||
return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
|
||||
except OSError:
|
||||
# python 2.4 issue, 0 wants to be the actual size
|
||||
return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ)
|
||||
# END handle python 2.4
|
||||
except OSError:
|
||||
pass
|
||||
# END exception handling
|
||||
|
||||
# read manually
|
||||
contents = os.read(fd, os.fstat(fd).st_size)
|
||||
if stream:
|
||||
return _RandomAccessBytesIO(contents)
|
||||
return contents
|
||||
|
||||
|
||||
def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0):
|
||||
"""Get the file contents at filepath as fast as possible
|
||||
|
||||
:return: random access compatible memory of the given filepath
|
||||
:param stream: see ``file_contents_ro``
|
||||
:param allow_mmap: see ``file_contents_ro``
|
||||
:param flags: additional flags to pass to os.open
|
||||
:raise OSError: If the file could not be opened
|
||||
|
||||
**Note** for now we don't try to use O_NOATIME directly as the right value needs to be
|
||||
shared per database in fact. It only makes a real difference for loose object
|
||||
databases anyway, and they use it with the help of the ``flags`` parameter"""
|
||||
fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags)
|
||||
try:
|
||||
return file_contents_ro(fd, stream, allow_mmap)
|
||||
finally:
|
||||
close(fd)
|
||||
# END assure file is closed
|
||||
|
||||
|
||||
def sliding_ro_buffer(filepath, flags=0):
|
||||
"""
|
||||
:return: a buffer compatible object which uses our mapped memory manager internally
|
||||
ready to read the whole given filepath"""
|
||||
return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags)
|
||||
|
||||
|
||||
def to_hex_sha(sha):
|
||||
""":return: hexified version of sha"""
|
||||
if len(sha) == 40:
|
||||
return sha
|
||||
return bin_to_hex(sha)
|
||||
|
||||
|
||||
def to_bin_sha(sha):
|
||||
if len(sha) == 20:
|
||||
return sha
|
||||
return hex_to_bin(sha)
|
||||
|
||||
|
||||
#} END routines
|
||||
|
||||
|
||||
#{ Utilities
|
||||
|
||||
class LazyMixin:
|
||||
|
||||
"""
|
||||
Base class providing an interface to lazily retrieve attribute values upon
|
||||
first access. If slots are used, memory will only be reserved once the attribute
|
||||
is actually accessed and retrieved the first time. All future accesses will
|
||||
return the cached value as stored in the Instance's dict or slot.
|
||||
"""
|
||||
|
||||
__slots__ = tuple()
|
||||
|
||||
def __getattr__(self, attr):
|
||||
"""
|
||||
Whenever an attribute is requested that we do not know, we allow it
|
||||
to be created and set. Next time the same attribute is requested, it is simply
|
||||
returned from our dict/slots. """
|
||||
self._set_cache_(attr)
|
||||
# will raise in case the cache was not created
|
||||
return object.__getattribute__(self, attr)
|
||||
|
||||
def _set_cache_(self, attr):
|
||||
"""
|
||||
This method should be overridden in the derived class.
|
||||
It should check whether the attribute named by attr can be created
|
||||
and cached. Do nothing if you do not know the attribute or call your subclass
|
||||
|
||||
The derived class may create as many additional attributes as it deems
|
||||
necessary in case a git command returns more information than represented
|
||||
in the single attribute."""
|
||||
pass
|
||||
|
||||
|
||||
class LockedFD:
|
||||
|
||||
"""
|
||||
This class facilitates a safe read and write operation to a file on disk.
|
||||
If we write to 'file', we obtain a lock file at 'file.lock' and write to
|
||||
that instead. If we succeed, the lock file will be renamed to overwrite
|
||||
the original file.
|
||||
|
||||
When reading, we obtain a lock file, but to prevent other writers from
|
||||
succeeding while we are reading the file.
|
||||
|
||||
This type handles error correctly in that it will assure a consistent state
|
||||
on destruction.
|
||||
|
||||
**note** with this setup, parallel reading is not possible"""
|
||||
__slots__ = ("_filepath", '_fd', '_write')
|
||||
|
||||
def __init__(self, filepath):
|
||||
"""Initialize an instance with the givne filepath"""
|
||||
self._filepath = filepath
|
||||
self._fd = None
|
||||
self._write = None # if True, we write a file
|
||||
|
||||
def __del__(self):
|
||||
# will do nothing if the file descriptor is already closed
|
||||
if self._fd is not None:
|
||||
self.rollback()
|
||||
|
||||
def _lockfilepath(self):
|
||||
return "%s.lock" % self._filepath
|
||||
|
||||
def open(self, write=False, stream=False):
|
||||
"""
|
||||
Open the file descriptor for reading or writing, both in binary mode.
|
||||
|
||||
:param write: if True, the file descriptor will be opened for writing. Other
|
||||
wise it will be opened read-only.
|
||||
:param stream: if True, the file descriptor will be wrapped into a simple stream
|
||||
object which supports only reading or writing
|
||||
:return: fd to read from or write to. It is still maintained by this instance
|
||||
and must not be closed directly
|
||||
:raise IOError: if the lock could not be retrieved
|
||||
:raise OSError: If the actual file could not be opened for reading
|
||||
|
||||
**note** must only be called once"""
|
||||
if self._write is not None:
|
||||
raise AssertionError("Called %s multiple times" % self.open)
|
||||
|
||||
self._write = write
|
||||
|
||||
# try to open the lock file
|
||||
binary = getattr(os, 'O_BINARY', 0)
|
||||
lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary
|
||||
try:
|
||||
fd = os.open(self._lockfilepath(), lockmode, int("600", 8))
|
||||
if not write:
|
||||
os.close(fd)
|
||||
else:
|
||||
self._fd = fd
|
||||
# END handle file descriptor
|
||||
except OSError as e:
|
||||
raise OSError("Lock at %r could not be obtained" % self._lockfilepath()) from e
|
||||
# END handle lock retrieval
|
||||
|
||||
# open actual file if required
|
||||
if self._fd is None:
|
||||
# we could specify exclusive here, as we obtained the lock anyway
|
||||
try:
|
||||
self._fd = os.open(self._filepath, os.O_RDONLY | binary)
|
||||
except:
|
||||
# assure we release our lockfile
|
||||
remove(self._lockfilepath())
|
||||
raise
|
||||
# END handle lockfile
|
||||
# END open descriptor for reading
|
||||
|
||||
if stream:
|
||||
# need delayed import
|
||||
from gitdb.stream import FDStream
|
||||
return FDStream(self._fd)
|
||||
else:
|
||||
return self._fd
|
||||
# END handle stream
|
||||
|
||||
def commit(self):
|
||||
"""When done writing, call this function to commit your changes into the
|
||||
actual file.
|
||||
The file descriptor will be closed, and the lockfile handled.
|
||||
|
||||
**Note** can be called multiple times"""
|
||||
self._end_writing(successful=True)
|
||||
|
||||
def rollback(self):
|
||||
"""Abort your operation without any changes. The file descriptor will be
|
||||
closed, and the lock released.
|
||||
|
||||
**Note** can be called multiple times"""
|
||||
self._end_writing(successful=False)
|
||||
|
||||
def _end_writing(self, successful=True):
|
||||
"""Handle the lock according to the write mode """
|
||||
if self._write is None:
|
||||
raise AssertionError("Cannot end operation if it wasn't started yet")
|
||||
|
||||
if self._fd is None:
|
||||
return
|
||||
|
||||
os.close(self._fd)
|
||||
self._fd = None
|
||||
|
||||
lockfile = self._lockfilepath()
|
||||
if self._write and successful:
|
||||
# on windows, rename does not silently overwrite the existing one
|
||||
if sys.platform == "win32":
|
||||
if isfile(self._filepath):
|
||||
remove(self._filepath)
|
||||
# END remove if exists
|
||||
# END win32 special handling
|
||||
os.rename(lockfile, self._filepath)
|
||||
|
||||
# assure others can at least read the file - the tmpfile left it at rw--
|
||||
# We may also write that file, on windows that boils down to a remove-
|
||||
# protection as well
|
||||
chmod(self._filepath, int("644", 8))
|
||||
else:
|
||||
# just delete the file so far, we failed
|
||||
remove(lockfile)
|
||||
# END successful handling
|
||||
|
||||
#} END utilities
|
@@ -0,0 +1,18 @@
|
||||
def force_bytes(data, encoding="utf-8"):
|
||||
if isinstance(data, bytes):
|
||||
return data
|
||||
|
||||
if isinstance(data, str):
|
||||
return data.encode(encoding)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def force_text(data, encoding="utf-8"):
|
||||
if isinstance(data, str):
|
||||
return data
|
||||
|
||||
if isinstance(data, bytes):
|
||||
return data.decode(encoding)
|
||||
|
||||
return str(data, encoding)
|
Reference in New Issue
Block a user