Don't double-decompress gzipped HTTP responses
[jelmer/dulwich.git] / dulwich / client.py
1 # client.py -- Implementation of the client side git protocols
2 # Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3 #
4 # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5 # General Public License as public by the Free Software Foundation; version 2.0
6 # or (at your option) any later version. You can redistribute it and/or
7 # modify it under the terms of either of these two licenses.
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 #
15 # You should have received a copy of the licenses; if not, see
16 # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17 # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18 # License, Version 2.0.
19 #
20
21 """Client side support for the Git protocol.
22
23 The Dulwich client supports the following capabilities:
24
25  * thin-pack
26  * multi_ack_detailed
27  * multi_ack
28  * side-band-64k
29  * ofs-delta
30  * quiet
31  * report-status
32  * delete-refs
33
34 Known capabilities that are not supported:
35
36  * shallow
37  * no-progress
38  * include-tag
39 """
40
41 from contextlib import closing
42 from io import BytesIO, BufferedReader
43 import gzip
44 import select
45 import socket
46 import subprocess
47 import sys
48
49 try:
50     from urllib import quote as urlquote
51     from urllib import unquote as urlunquote
52 except ImportError:
53     from urllib.parse import quote as urlquote
54     from urllib.parse import unquote as urlunquote
55
56 try:
57     import urlparse
58 except ImportError:
59     import urllib.parse as urlparse
60
61 import dulwich
62 from dulwich.errors import (
63     GitProtocolError,
64     NotGitRepository,
65     SendPackError,
66     UpdateRefsError,
67     )
68 from dulwich.protocol import (
69     _RBUFSIZE,
70     agent_string,
71     capability_agent,
72     extract_capability_names,
73     CAPABILITY_AGENT,
74     CAPABILITY_DELETE_REFS,
75     CAPABILITY_MULTI_ACK,
76     CAPABILITY_MULTI_ACK_DETAILED,
77     CAPABILITY_OFS_DELTA,
78     CAPABILITY_QUIET,
79     CAPABILITY_REPORT_STATUS,
80     CAPABILITY_SYMREF,
81     CAPABILITY_SIDE_BAND_64K,
82     CAPABILITY_THIN_PACK,
83     CAPABILITIES_REF,
84     KNOWN_RECEIVE_CAPABILITIES,
85     KNOWN_UPLOAD_CAPABILITIES,
86     COMMAND_DONE,
87     COMMAND_HAVE,
88     COMMAND_WANT,
89     SIDE_BAND_CHANNEL_DATA,
90     SIDE_BAND_CHANNEL_PROGRESS,
91     SIDE_BAND_CHANNEL_FATAL,
92     PktLineParser,
93     Protocol,
94     ProtocolFile,
95     TCP_GIT_PORT,
96     ZERO_SHA,
97     extract_capabilities,
98     parse_capability,
99     )
100 from dulwich.pack import (
101     write_pack_data,
102     write_pack_objects,
103     )
104 from dulwich.refs import (
105     read_info_refs,
106     ANNOTATED_TAG_SUFFIX,
107     )
108
109
110 class InvalidWants(Exception):
111     """Invalid wants."""
112
113     def __init__(self, wants):
114         Exception.__init__(
115             self,
116             "requested wants not in server provided refs: %r" % wants)
117
118
119 def _fileno_can_read(fileno):
120     """Check if a file descriptor is readable."""
121     return len(select.select([fileno], [], [], 0)[0]) > 0
122
123
124 def _win32_peek_avail(handle):
125     """Wrapper around PeekNamedPipe to check how many bytes are available."""
126     from ctypes import byref, wintypes, windll
127     c_avail = wintypes.DWORD()
128     c_message = wintypes.DWORD()
129     success = windll.kernel32.PeekNamedPipe(
130         handle, None, 0, None, byref(c_avail),
131         byref(c_message))
132     if not success:
133         raise OSError(wintypes.GetLastError())
134     return c_avail.value
135
136
137 COMMON_CAPABILITIES = [CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K]
138 UPLOAD_CAPABILITIES = ([CAPABILITY_THIN_PACK, CAPABILITY_MULTI_ACK,
139                         CAPABILITY_MULTI_ACK_DETAILED] + COMMON_CAPABILITIES)
140 RECEIVE_CAPABILITIES = [CAPABILITY_REPORT_STATUS] + COMMON_CAPABILITIES
141
142
143 class ReportStatusParser(object):
144     """Handle status as reported by servers with 'report-status' capability.
145     """
146
147     def __init__(self):
148         self._done = False
149         self._pack_status = None
150         self._ref_status_ok = True
151         self._ref_statuses = []
152
153     def check(self):
154         """Check if there were any errors and, if so, raise exceptions.
155
156         :raise SendPackError: Raised when the server could not unpack
157         :raise UpdateRefsError: Raised when refs could not be updated
158         """
159         if self._pack_status not in (b'unpack ok', None):
160             raise SendPackError(self._pack_status)
161         if not self._ref_status_ok:
162             ref_status = {}
163             ok = set()
164             for status in self._ref_statuses:
165                 if b' ' not in status:
166                     # malformed response, move on to the next one
167                     continue
168                 status, ref = status.split(b' ', 1)
169
170                 if status == b'ng':
171                     if b' ' in ref:
172                         ref, status = ref.split(b' ', 1)
173                 else:
174                     ok.add(ref)
175                 ref_status[ref] = status
176             # TODO(jelmer): don't assume encoding of refs is ascii.
177             raise UpdateRefsError(', '.join([
178                 refname.decode('ascii') for refname in ref_status
179                 if refname not in ok]) +
180                 ' failed to update', ref_status=ref_status)
181
182     def handle_packet(self, pkt):
183         """Handle a packet.
184
185         :raise GitProtocolError: Raised when packets are received after a
186             flush packet.
187         """
188         if self._done:
189             raise GitProtocolError("received more data after status report")
190         if pkt is None:
191             self._done = True
192             return
193         if self._pack_status is None:
194             self._pack_status = pkt.strip()
195         else:
196             ref_status = pkt.strip()
197             self._ref_statuses.append(ref_status)
198             if not ref_status.startswith(b'ok '):
199                 self._ref_status_ok = False
200
201
202 def read_pkt_refs(proto):
203     server_capabilities = None
204     refs = {}
205     # Receive refs from server
206     for pkt in proto.read_pkt_seq():
207         (sha, ref) = pkt.rstrip(b'\n').split(None, 1)
208         if sha == b'ERR':
209             raise GitProtocolError(ref)
210         if server_capabilities is None:
211             (ref, server_capabilities) = extract_capabilities(ref)
212         refs[ref] = sha
213
214     if len(refs) == 0:
215         return {}, set([])
216     if refs == {CAPABILITIES_REF: ZERO_SHA}:
217         refs = {}
218     return refs, set(server_capabilities)
219
220
221 class FetchPackResult(object):
222     """Result of a fetch-pack operation.
223
224     :var refs: Dictionary with all remote refs
225     :var symrefs: Dictionary with remote symrefs
226     :var agent: User agent string
227     """
228
229     _FORWARDED_ATTRS = [
230             'clear', 'copy', 'fromkeys', 'get', 'has_key', 'items',
231             'iteritems', 'iterkeys', 'itervalues', 'keys', 'pop', 'popitem',
232             'setdefault', 'update', 'values', 'viewitems', 'viewkeys',
233             'viewvalues']
234
235     def __init__(self, refs, symrefs, agent):
236         self.refs = refs
237         self.symrefs = symrefs
238         self.agent = agent
239
240     def _warn_deprecated(self):
241         import warnings
242         warnings.warn(
243             "Use FetchPackResult.refs instead.",
244             DeprecationWarning, stacklevel=3)
245
246     def __eq__(self, other):
247         if isinstance(other, dict):
248             self._warn_deprecated()
249             return (self.refs == other)
250         return (self.refs == other.refs and
251                 self.symrefs == other.symrefs and
252                 self.agent == other.agent)
253
254     def __contains__(self, name):
255         self._warn_deprecated()
256         return name in self.refs
257
258     def __getitem__(self, name):
259         self._warn_deprecated()
260         return self.refs[name]
261
262     def __len__(self):
263         self._warn_deprecated()
264         return len(self.refs)
265
266     def __iter__(self):
267         self._warn_deprecated()
268         return iter(self.refs)
269
270     def __getattribute__(self, name):
271         if name in type(self)._FORWARDED_ATTRS:
272             self._warn_deprecated()
273             return getattr(self.refs, name)
274         return super(FetchPackResult, self).__getattribute__(name)
275
276     def __repr__(self):
277         return "%s(%r, %r, %r)" % (
278                 self.__class__.__name__, self.refs, self.symrefs, self.agent)
279
280
281 # TODO(durin42): this doesn't correctly degrade if the server doesn't
282 # support some capabilities. This should work properly with servers
283 # that don't support multi_ack.
284 class GitClient(object):
285     """Git smart server client.
286
287     """
288
289     def __init__(self, thin_packs=True, report_activity=None, quiet=False):
290         """Create a new GitClient instance.
291
292         :param thin_packs: Whether or not thin packs should be retrieved
293         :param report_activity: Optional callback for reporting transport
294             activity.
295         """
296         self._report_activity = report_activity
297         self._report_status_parser = None
298         self._fetch_capabilities = set(UPLOAD_CAPABILITIES)
299         self._fetch_capabilities.add(capability_agent())
300         self._send_capabilities = set(RECEIVE_CAPABILITIES)
301         self._send_capabilities.add(capability_agent())
302         if quiet:
303             self._send_capabilities.add(CAPABILITY_QUIET)
304         if not thin_packs:
305             self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
306
307     def get_url(self, path):
308         """Retrieves full url to given path.
309
310         :param path: Repository path (as string)
311         :return: Url to path (as string)
312         """
313         raise NotImplementedError(self.get_url)
314
315     @classmethod
316     def from_parsedurl(cls, parsedurl, **kwargs):
317         """Create an instance of this client from a urlparse.parsed object.
318
319         :param parsedurl: Result of urlparse.urlparse()
320         :return: A `GitClient` object
321         """
322         raise NotImplementedError(cls.from_parsedurl)
323
324     def send_pack(self, path, update_refs, generate_pack_data,
325                   progress=None):
326         """Upload a pack to a remote repository.
327
328         :param path: Repository path (as bytestring)
329         :param update_refs: Function to determine changes to remote refs.
330             Receive dict with existing remote refs, returns dict with
331             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
332         :param generate_pack_data: Function that can return a tuple
333             with number of objects and list of pack data to include
334         :param progress: Optional progress function
335
336         :raises SendPackError: if server rejects the pack data
337         :raises UpdateRefsError: if the server supports report-status
338                                  and rejects ref updates
339         :return: new_refs dictionary containing the changes that were made
340             {refname: new_ref}, including deleted refs.
341         """
342         raise NotImplementedError(self.send_pack)
343
344     def fetch(self, path, target, determine_wants=None, progress=None):
345         """Fetch into a target repository.
346
347         :param path: Path to fetch from (as bytestring)
348         :param target: Target repository to fetch into
349         :param determine_wants: Optional function to determine what refs
350             to fetch. Receives dictionary of name->sha, should return
351             list of shas to fetch. Defaults to all shas.
352         :param progress: Optional progress function
353         :return: Dictionary with all remote refs (not just those fetched)
354         """
355         if determine_wants is None:
356             determine_wants = target.object_store.determine_wants_all
357         if CAPABILITY_THIN_PACK in self._fetch_capabilities:
358             # TODO(jelmer): Avoid reading entire file into memory and
359             # only processing it after the whole file has been fetched.
360             f = BytesIO()
361
362             def commit():
363                 if f.tell():
364                     f.seek(0)
365                     target.object_store.add_thin_pack(f.read, None)
366
367             def abort():
368                 pass
369         else:
370             f, commit, abort = target.object_store.add_pack()
371         try:
372             result = self.fetch_pack(
373                 path, determine_wants, target.get_graph_walker(), f.write,
374                 progress)
375         except BaseException:
376             abort()
377             raise
378         else:
379             commit()
380         return result
381
382     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
383                    progress=None):
384         """Retrieve a pack from a git smart server.
385
386         :param path: Remote path to fetch from
387         :param determine_wants: Function determine what refs
388             to fetch. Receives dictionary of name->sha, should return
389             list of shas to fetch.
390         :param graph_walker: Object with next() and ack().
391         :param pack_data: Callback called for each bit of data in the pack
392         :param progress: Callback for progress reports (strings)
393         :return: FetchPackResult object
394         """
395         raise NotImplementedError(self.fetch_pack)
396
397     def get_refs(self, path):
398         """Retrieve the current refs from a git smart server.
399
400         :param path: Path to the repo to fetch from. (as bytestring)
401         """
402         raise NotImplementedError(self.get_refs)
403
404     def _parse_status_report(self, proto):
405         unpack = proto.read_pkt_line().strip()
406         if unpack != b'unpack ok':
407             st = True
408             # flush remaining error data
409             while st is not None:
410                 st = proto.read_pkt_line()
411             raise SendPackError(unpack)
412         statuses = []
413         errs = False
414         ref_status = proto.read_pkt_line()
415         while ref_status:
416             ref_status = ref_status.strip()
417             statuses.append(ref_status)
418             if not ref_status.startswith(b'ok '):
419                 errs = True
420             ref_status = proto.read_pkt_line()
421
422         if errs:
423             ref_status = {}
424             ok = set()
425             for status in statuses:
426                 if b' ' not in status:
427                     # malformed response, move on to the next one
428                     continue
429                 status, ref = status.split(b' ', 1)
430
431                 if status == b'ng':
432                     if b' ' in ref:
433                         ref, status = ref.split(b' ', 1)
434                 else:
435                     ok.add(ref)
436                 ref_status[ref] = status
437             raise UpdateRefsError(', '.join([
438                 refname for refname in ref_status if refname not in ok]) +
439                 b' failed to update', ref_status=ref_status)
440
441     def _read_side_band64k_data(self, proto, channel_callbacks):
442         """Read per-channel data.
443
444         This requires the side-band-64k capability.
445
446         :param proto: Protocol object to read from
447         :param channel_callbacks: Dictionary mapping channels to packet
448             handlers to use. None for a callback discards channel data.
449         """
450         for pkt in proto.read_pkt_seq():
451             channel = ord(pkt[:1])
452             pkt = pkt[1:]
453             try:
454                 cb = channel_callbacks[channel]
455             except KeyError:
456                 raise AssertionError('Invalid sideband channel %d' % channel)
457             else:
458                 if cb is not None:
459                     cb(pkt)
460
461     def _handle_receive_pack_head(self, proto, capabilities, old_refs,
462                                   new_refs):
463         """Handle the head of a 'git-receive-pack' request.
464
465         :param proto: Protocol object to read from
466         :param capabilities: List of negotiated capabilities
467         :param old_refs: Old refs, as received from the server
468         :param new_refs: Refs to change
469         :return: (have, want) tuple
470         """
471         want = []
472         have = [x for x in old_refs.values() if not x == ZERO_SHA]
473         sent_capabilities = False
474
475         for refname in new_refs:
476             if not isinstance(refname, bytes):
477                 raise TypeError('refname is not a bytestring: %r' % refname)
478             old_sha1 = old_refs.get(refname, ZERO_SHA)
479             if not isinstance(old_sha1, bytes):
480                 raise TypeError('old sha1 for %s is not a bytestring: %r' %
481                                 (refname, old_sha1))
482             new_sha1 = new_refs.get(refname, ZERO_SHA)
483             if not isinstance(new_sha1, bytes):
484                 raise TypeError('old sha1 for %s is not a bytestring %r' %
485                                 (refname, new_sha1))
486
487             if old_sha1 != new_sha1:
488                 if sent_capabilities:
489                     proto.write_pkt_line(old_sha1 + b' ' + new_sha1 + b' ' +
490                                          refname)
491                 else:
492                     proto.write_pkt_line(
493                         old_sha1 + b' ' + new_sha1 + b' ' + refname + b'\0' +
494                         b' '.join(capabilities))
495                     sent_capabilities = True
496             if new_sha1 not in have and new_sha1 != ZERO_SHA:
497                 want.append(new_sha1)
498         proto.write_pkt_line(None)
499         return (have, want)
500
501     def _negotiate_receive_pack_capabilities(self, server_capabilities):
502         negotiated_capabilities = (
503             self._send_capabilities & server_capabilities)
504         unknown_capabilities = (  # noqa: F841
505             extract_capability_names(server_capabilities) -
506             KNOWN_RECEIVE_CAPABILITIES)
507         # TODO(jelmer): warn about unknown capabilities
508         return negotiated_capabilities
509
510     def _handle_receive_pack_tail(self, proto, capabilities, progress=None):
511         """Handle the tail of a 'git-receive-pack' request.
512
513         :param proto: Protocol object to read from
514         :param capabilities: List of negotiated capabilities
515         :param progress: Optional progress reporting function
516         """
517         if CAPABILITY_SIDE_BAND_64K in capabilities:
518             if progress is None:
519                 def progress(x):
520                     pass
521             channel_callbacks = {2: progress}
522             if CAPABILITY_REPORT_STATUS in capabilities:
523                 channel_callbacks[1] = PktLineParser(
524                     self._report_status_parser.handle_packet).parse
525             self._read_side_band64k_data(proto, channel_callbacks)
526         else:
527             if CAPABILITY_REPORT_STATUS in capabilities:
528                 for pkt in proto.read_pkt_seq():
529                     self._report_status_parser.handle_packet(pkt)
530         if self._report_status_parser is not None:
531             self._report_status_parser.check()
532
533     def _negotiate_upload_pack_capabilities(self, server_capabilities):
534         unknown_capabilities = (  # noqa: F841
535             extract_capability_names(server_capabilities) -
536             KNOWN_UPLOAD_CAPABILITIES)
537         # TODO(jelmer): warn about unknown capabilities
538         symrefs = {}
539         agent = None
540         for capability in server_capabilities:
541             k, v = parse_capability(capability)
542             if k == CAPABILITY_SYMREF:
543                 (src, dst) = v.split(b':', 1)
544                 symrefs[src] = dst
545             if k == CAPABILITY_AGENT:
546                 agent = v
547
548         negotiated_capabilities = (
549             self._fetch_capabilities & server_capabilities)
550         return (negotiated_capabilities, symrefs, agent)
551
552     def _handle_upload_pack_head(self, proto, capabilities, graph_walker,
553                                  wants, can_read):
554         """Handle the head of a 'git-upload-pack' request.
555
556         :param proto: Protocol object to read from
557         :param capabilities: List of negotiated capabilities
558         :param graph_walker: GraphWalker instance to call .ack() on
559         :param wants: List of commits to fetch
560         :param can_read: function that returns a boolean that indicates
561             whether there is extra graph data to read on proto
562         """
563         assert isinstance(wants, list) and isinstance(wants[0], bytes)
564         proto.write_pkt_line(COMMAND_WANT + b' ' + wants[0] + b' ' +
565                              b' '.join(capabilities) + b'\n')
566         for want in wants[1:]:
567             proto.write_pkt_line(COMMAND_WANT + b' ' + want + b'\n')
568         proto.write_pkt_line(None)
569         have = next(graph_walker)
570         while have:
571             proto.write_pkt_line(COMMAND_HAVE + b' ' + have + b'\n')
572             if can_read():
573                 pkt = proto.read_pkt_line()
574                 parts = pkt.rstrip(b'\n').split(b' ')
575                 if parts[0] == b'ACK':
576                     graph_walker.ack(parts[1])
577                     if parts[2] in (b'continue', b'common'):
578                         pass
579                     elif parts[2] == b'ready':
580                         break
581                     else:
582                         raise AssertionError(
583                             "%s not in ('continue', 'ready', 'common)" %
584                             parts[2])
585             have = next(graph_walker)
586         proto.write_pkt_line(COMMAND_DONE + b'\n')
587
588     def _handle_upload_pack_tail(self, proto, capabilities, graph_walker,
589                                  pack_data, progress=None, rbufsize=_RBUFSIZE):
590         """Handle the tail of a 'git-upload-pack' request.
591
592         :param proto: Protocol object to read from
593         :param capabilities: List of negotiated capabilities
594         :param graph_walker: GraphWalker instance to call .ack() on
595         :param pack_data: Function to call with pack data
596         :param progress: Optional progress reporting function
597         :param rbufsize: Read buffer size
598         """
599         pkt = proto.read_pkt_line()
600         while pkt:
601             parts = pkt.rstrip(b'\n').split(b' ')
602             if parts[0] == b'ACK':
603                 graph_walker.ack(parts[1])
604             if len(parts) < 3 or parts[2] not in (
605                     b'ready', b'continue', b'common'):
606                 break
607             pkt = proto.read_pkt_line()
608         if CAPABILITY_SIDE_BAND_64K in capabilities:
609             if progress is None:
610                 # Just ignore progress data
611
612                 def progress(x):
613                     pass
614             self._read_side_band64k_data(proto, {
615                 SIDE_BAND_CHANNEL_DATA: pack_data,
616                 SIDE_BAND_CHANNEL_PROGRESS: progress}
617             )
618         else:
619             while True:
620                 data = proto.read(rbufsize)
621                 if data == b"":
622                     break
623                 pack_data(data)
624
625
626 def check_wants(wants, refs):
627     """Check that a set of wants is valid.
628
629     :param wants: Set of object SHAs to fetch
630     :param refs: Refs dictionary to check against
631     """
632     missing = set(wants) - {
633             v for (k, v) in refs.items()
634             if not k.endswith(ANNOTATED_TAG_SUFFIX)}
635     if missing:
636         raise InvalidWants(missing)
637
638
639 class TraditionalGitClient(GitClient):
640     """Traditional Git client."""
641
642     DEFAULT_ENCODING = 'utf-8'
643
644     def __init__(self, path_encoding=DEFAULT_ENCODING, **kwargs):
645         self._remote_path_encoding = path_encoding
646         super(TraditionalGitClient, self).__init__(**kwargs)
647
648     def _connect(self, cmd, path):
649         """Create a connection to the server.
650
651         This method is abstract - concrete implementations should
652         implement their own variant which connects to the server and
653         returns an initialized Protocol object with the service ready
654         for use and a can_read function which may be used to see if
655         reads would block.
656
657         :param cmd: The git service name to which we should connect.
658         :param path: The path we should pass to the service. (as bytestirng)
659         """
660         raise NotImplementedError()
661
662     def send_pack(self, path, update_refs, generate_pack_data,
663                   progress=None):
664         """Upload a pack to a remote repository.
665
666         :param path: Repository path (as bytestring)
667         :param update_refs: Function to determine changes to remote refs.
668             Receive dict with existing remote refs, returns dict with
669             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
670         :param generate_pack_data: Function that can return a tuple with
671             number of objects and pack data to upload.
672         :param progress: Optional callback called with progress updates
673
674         :raises SendPackError: if server rejects the pack data
675         :raises UpdateRefsError: if the server supports report-status
676                                  and rejects ref updates
677         :return: new_refs dictionary containing the changes that were made
678             {refname: new_ref}, including deleted refs.
679         """
680         proto, unused_can_read = self._connect(b'receive-pack', path)
681         with proto:
682             old_refs, server_capabilities = read_pkt_refs(proto)
683             negotiated_capabilities = \
684                 self._negotiate_receive_pack_capabilities(server_capabilities)
685             if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
686                 self._report_status_parser = ReportStatusParser()
687             report_status_parser = self._report_status_parser
688
689             try:
690                 new_refs = orig_new_refs = update_refs(dict(old_refs))
691             except BaseException:
692                 proto.write_pkt_line(None)
693                 raise
694
695             if CAPABILITY_DELETE_REFS not in server_capabilities:
696                 # Server does not support deletions. Fail later.
697                 new_refs = dict(orig_new_refs)
698                 for ref, sha in orig_new_refs.items():
699                     if sha == ZERO_SHA:
700                         if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
701                             report_status_parser._ref_statuses.append(
702                                 b'ng ' + sha +
703                                 b' remote does not support deleting refs')
704                             report_status_parser._ref_status_ok = False
705                         del new_refs[ref]
706
707             if new_refs is None:
708                 proto.write_pkt_line(None)
709                 return old_refs
710
711             if len(new_refs) == 0 and len(orig_new_refs):
712                 # NOOP - Original new refs filtered out by policy
713                 proto.write_pkt_line(None)
714                 if report_status_parser is not None:
715                     report_status_parser.check()
716                 return old_refs
717
718             (have, want) = self._handle_receive_pack_head(
719                 proto, negotiated_capabilities, old_refs, new_refs)
720             if (not want and
721                     set(new_refs.items()).issubset(set(old_refs.items()))):
722                 return new_refs
723             pack_data_count, pack_data = generate_pack_data(
724                 have, want,
725                 ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities))
726
727             dowrite = bool(pack_data_count)
728             dowrite = dowrite or any(old_refs.get(ref) != sha
729                                      for (ref, sha) in new_refs.items()
730                                      if sha != ZERO_SHA)
731             if dowrite:
732                 write_pack_data(proto.write_file(), pack_data_count, pack_data)
733
734             self._handle_receive_pack_tail(
735                 proto, negotiated_capabilities, progress)
736             return new_refs
737
738     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
739                    progress=None):
740         """Retrieve a pack from a git smart server.
741
742         :param path: Remote path to fetch from
743         :param determine_wants: Function determine what refs
744             to fetch. Receives dictionary of name->sha, should return
745             list of shas to fetch.
746         :param graph_walker: Object with next() and ack().
747         :param pack_data: Callback called for each bit of data in the pack
748         :param progress: Callback for progress reports (strings)
749         :return: FetchPackResult object
750         """
751         proto, can_read = self._connect(b'upload-pack', path)
752         with proto:
753             refs, server_capabilities = read_pkt_refs(proto)
754             negotiated_capabilities, symrefs, agent = (
755                     self._negotiate_upload_pack_capabilities(
756                             server_capabilities))
757
758             if refs is None:
759                 proto.write_pkt_line(None)
760                 return FetchPackResult(refs, symrefs, agent)
761
762             try:
763                 wants = determine_wants(refs)
764             except BaseException:
765                 proto.write_pkt_line(None)
766                 raise
767             if wants is not None:
768                 wants = [cid for cid in wants if cid != ZERO_SHA]
769             if not wants:
770                 proto.write_pkt_line(None)
771                 return FetchPackResult(refs, symrefs, agent)
772             check_wants(wants, refs)
773             self._handle_upload_pack_head(
774                 proto, negotiated_capabilities, graph_walker, wants, can_read)
775             self._handle_upload_pack_tail(
776                 proto, negotiated_capabilities, graph_walker, pack_data,
777                 progress)
778             return FetchPackResult(refs, symrefs, agent)
779
780     def get_refs(self, path):
781         """Retrieve the current refs from a git smart server."""
782         # stock `git ls-remote` uses upload-pack
783         proto, _ = self._connect(b'upload-pack', path)
784         with proto:
785             refs, _ = read_pkt_refs(proto)
786             proto.write_pkt_line(None)
787             return refs
788
789     def archive(self, path, committish, write_data, progress=None,
790                 write_error=None, format=None, subdirs=None, prefix=None):
791         proto, can_read = self._connect(b'upload-archive', path)
792         with proto:
793             if format is not None:
794                 proto.write_pkt_line(b"argument --format=" + format)
795             proto.write_pkt_line(b"argument " + committish)
796             if subdirs is not None:
797                 for subdir in subdirs:
798                     proto.write_pkt_line(b"argument " + subdir)
799             if prefix is not None:
800                 proto.write_pkt_line(b"argument --prefix=" + prefix)
801             proto.write_pkt_line(None)
802             pkt = proto.read_pkt_line()
803             if pkt == b"NACK\n":
804                 return
805             elif pkt == b"ACK\n":
806                 pass
807             elif pkt.startswith(b"ERR "):
808                 raise GitProtocolError(pkt[4:].rstrip(b"\n"))
809             else:
810                 raise AssertionError("invalid response %r" % pkt)
811             ret = proto.read_pkt_line()
812             if ret is not None:
813                 raise AssertionError("expected pkt tail")
814             self._read_side_band64k_data(proto, {
815                 SIDE_BAND_CHANNEL_DATA: write_data,
816                 SIDE_BAND_CHANNEL_PROGRESS: progress,
817                 SIDE_BAND_CHANNEL_FATAL: write_error})
818
819
820 class TCPGitClient(TraditionalGitClient):
821     """A Git Client that works over TCP directly (i.e. git://)."""
822
823     def __init__(self, host, port=None, **kwargs):
824         if port is None:
825             port = TCP_GIT_PORT
826         self._host = host
827         self._port = port
828         super(TCPGitClient, self).__init__(**kwargs)
829
830     @classmethod
831     def from_parsedurl(cls, parsedurl, **kwargs):
832         return cls(parsedurl.hostname, port=parsedurl.port, **kwargs)
833
834     def get_url(self, path):
835         netloc = self._host
836         if self._port is not None and self._port != TCP_GIT_PORT:
837             netloc += ":%d" % self._port
838         return urlparse.urlunsplit(("git", netloc, path, '', ''))
839
840     def _connect(self, cmd, path):
841         if not isinstance(cmd, bytes):
842             raise TypeError(cmd)
843         if not isinstance(path, bytes):
844             path = path.encode(self._remote_path_encoding)
845         sockaddrs = socket.getaddrinfo(
846             self._host, self._port, socket.AF_UNSPEC, socket.SOCK_STREAM)
847         s = None
848         err = socket.error("no address found for %s" % self._host)
849         for (family, socktype, proto, canonname, sockaddr) in sockaddrs:
850             s = socket.socket(family, socktype, proto)
851             s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
852             try:
853                 s.connect(sockaddr)
854                 break
855             except socket.error as err:
856                 if s is not None:
857                     s.close()
858                 s = None
859         if s is None:
860             raise err
861         # -1 means system default buffering
862         rfile = s.makefile('rb', -1)
863         # 0 means unbuffered
864         wfile = s.makefile('wb', 0)
865
866         def close():
867             rfile.close()
868             wfile.close()
869             s.close()
870
871         proto = Protocol(rfile.read, wfile.write, close,
872                          report_activity=self._report_activity)
873         if path.startswith(b"/~"):
874             path = path[1:]
875         # TODO(jelmer): Alternative to ascii?
876         proto.send_cmd(
877             b'git-' + cmd, path, b'host=' + self._host.encode('ascii'))
878         return proto, lambda: _fileno_can_read(s)
879
880
881 class SubprocessWrapper(object):
882     """A socket-like object that talks to a subprocess via pipes."""
883
884     def __init__(self, proc):
885         self.proc = proc
886         if sys.version_info[0] == 2:
887             self.read = proc.stdout.read
888         else:
889             self.read = BufferedReader(proc.stdout).read
890         self.write = proc.stdin.write
891
892     def can_read(self):
893         if sys.platform == 'win32':
894             from msvcrt import get_osfhandle
895             handle = get_osfhandle(self.proc.stdout.fileno())
896             return _win32_peek_avail(handle) != 0
897         else:
898             return _fileno_can_read(self.proc.stdout.fileno())
899
900     def close(self):
901         self.proc.stdin.close()
902         self.proc.stdout.close()
903         if self.proc.stderr:
904             self.proc.stderr.close()
905         self.proc.wait()
906
907
908 def find_git_command():
909     """Find command to run for system Git (usually C Git).
910     """
911     if sys.platform == 'win32':  # support .exe, .bat and .cmd
912         try:  # to avoid overhead
913             import win32api
914         except ImportError:  # run through cmd.exe with some overhead
915             return ['cmd', '/c', 'git']
916         else:
917             status, git = win32api.FindExecutable('git')
918             return [git]
919     else:
920         return ['git']
921
922
923 class SubprocessGitClient(TraditionalGitClient):
924     """Git client that talks to a server using a subprocess."""
925
926     def __init__(self, **kwargs):
927         self._connection = None
928         self._stderr = None
929         self._stderr = kwargs.get('stderr')
930         if 'stderr' in kwargs:
931             del kwargs['stderr']
932         super(SubprocessGitClient, self).__init__(**kwargs)
933
934     @classmethod
935     def from_parsedurl(cls, parsedurl, **kwargs):
936         return cls(**kwargs)
937
938     git_command = None
939
940     def _connect(self, service, path):
941         if not isinstance(service, bytes):
942             raise TypeError(service)
943         if isinstance(path, bytes):
944             path = path.decode(self._remote_path_encoding)
945         if self.git_command is None:
946             git_command = find_git_command()
947         argv = git_command + [service.decode('ascii'), path]
948         p = SubprocessWrapper(
949             subprocess.Popen(argv, bufsize=0, stdin=subprocess.PIPE,
950                              stdout=subprocess.PIPE,
951                              stderr=self._stderr))
952         return Protocol(p.read, p.write, p.close,
953                         report_activity=self._report_activity), p.can_read
954
955
956 class LocalGitClient(GitClient):
957     """Git Client that just uses a local Repo."""
958
959     def __init__(self, thin_packs=True, report_activity=None, config=None):
960         """Create a new LocalGitClient instance.
961
962         :param thin_packs: Whether or not thin packs should be retrieved
963         :param report_activity: Optional callback for reporting transport
964             activity.
965         """
966         self._report_activity = report_activity
967         # Ignore the thin_packs argument
968
969     def get_url(self, path):
970         return urlparse.urlunsplit(('file', '', path, '', ''))
971
972     @classmethod
973     def from_parsedurl(cls, parsedurl, **kwargs):
974         return cls(**kwargs)
975
976     @classmethod
977     def _open_repo(cls, path):
978         from dulwich.repo import Repo
979         if not isinstance(path, str):
980             path = path.decode(sys.getfilesystemencoding())
981         return closing(Repo(path))
982
983     def send_pack(self, path, update_refs, generate_pack_data,
984                   progress=None):
985         """Upload a pack to a remote repository.
986
987         :param path: Repository path (as bytestring)
988         :param update_refs: Function to determine changes to remote refs.
989             Receive dict with existing remote refs, returns dict with
990             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
991         :param generate_pack_data: Function that can return a tuple
992             with number of items and pack data to upload.
993         :param progress: Optional progress function
994
995         :raises SendPackError: if server rejects the pack data
996         :raises UpdateRefsError: if the server supports report-status
997                                  and rejects ref updates
998         :return: new_refs dictionary containing the changes that were made
999             {refname: new_ref}, including deleted refs.
1000         """
1001         if not progress:
1002             def progress(x):
1003                 pass
1004
1005         with self._open_repo(path) as target:
1006             old_refs = target.get_refs()
1007             new_refs = update_refs(dict(old_refs))
1008
1009             have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
1010             want = []
1011             for refname, new_sha1 in new_refs.items():
1012                 if (new_sha1 not in have and
1013                         new_sha1 not in want and
1014                         new_sha1 != ZERO_SHA):
1015                     want.append(new_sha1)
1016
1017             if (not want and
1018                     set(new_refs.items()).issubset(set(old_refs.items()))):
1019                 return new_refs
1020
1021             target.object_store.add_pack_data(
1022                 *generate_pack_data(have, want, ofs_delta=True))
1023
1024             for refname, new_sha1 in new_refs.items():
1025                 old_sha1 = old_refs.get(refname, ZERO_SHA)
1026                 if new_sha1 != ZERO_SHA:
1027                     if not target.refs.set_if_equals(
1028                             refname, old_sha1, new_sha1):
1029                         progress('unable to set %s to %s' %
1030                                  (refname, new_sha1))
1031                 else:
1032                     if not target.refs.remove_if_equals(refname, old_sha1):
1033                         progress('unable to remove %s' % refname)
1034
1035         return new_refs
1036
1037     def fetch(self, path, target, determine_wants=None, progress=None):
1038         """Fetch into a target repository.
1039
1040         :param path: Path to fetch from (as bytestring)
1041         :param target: Target repository to fetch into
1042         :param determine_wants: Optional function determine what refs
1043             to fetch. Receives dictionary of name->sha, should return
1044             list of shas to fetch. Defaults to all shas.
1045         :param progress: Optional progress function
1046         :return: FetchPackResult object
1047         """
1048         with self._open_repo(path) as r:
1049             refs = r.fetch(target, determine_wants=determine_wants,
1050                            progress=progress)
1051             return FetchPackResult(refs, r.refs.get_symrefs(),
1052                                    agent_string())
1053
1054     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
1055                    progress=None):
1056         """Retrieve a pack from a git smart server.
1057
1058         :param path: Remote path to fetch from
1059         :param determine_wants: Function determine what refs
1060             to fetch. Receives dictionary of name->sha, should return
1061             list of shas to fetch.
1062         :param graph_walker: Object with next() and ack().
1063         :param pack_data: Callback called for each bit of data in the pack
1064         :param progress: Callback for progress reports (strings)
1065         :return: FetchPackResult object
1066         """
1067         with self._open_repo(path) as r:
1068             objects_iter = r.fetch_objects(
1069                 determine_wants, graph_walker, progress)
1070             symrefs = r.refs.get_symrefs()
1071             agent = agent_string()
1072
1073             # Did the process short-circuit (e.g. in a stateless RPC call)?
1074             # Note that the client still expects a 0-object pack in most cases.
1075             if objects_iter is None:
1076                 return FetchPackResult(None, symrefs, agent)
1077             protocol = ProtocolFile(None, pack_data)
1078             write_pack_objects(protocol, objects_iter)
1079             return FetchPackResult(r.get_refs(), symrefs, agent)
1080
1081     def get_refs(self, path):
1082         """Retrieve the current refs from a git smart server."""
1083
1084         with self._open_repo(path) as target:
1085             return target.get_refs()
1086
1087
1088 # What Git client to use for local access
1089 default_local_git_client_cls = LocalGitClient
1090
1091
1092 class SSHVendor(object):
1093     """A client side SSH implementation."""
1094
1095     def connect_ssh(self, host, command, username=None, port=None,
1096                     password=None, key_filename=None):
1097         # This function was deprecated in 0.9.1
1098         import warnings
1099         warnings.warn(
1100             "SSHVendor.connect_ssh has been renamed to SSHVendor.run_command",
1101             DeprecationWarning)
1102         return self.run_command(host, command, username=username, port=port,
1103                                 password=password, key_filename=key_filename)
1104
1105     def run_command(self, host, command, username=None, port=None,
1106                     password=None, key_filename=None):
1107         """Connect to an SSH server.
1108
1109         Run a command remotely and return a file-like object for interaction
1110         with the remote command.
1111
1112         :param host: Host name
1113         :param command: Command to run (as argv array)
1114         :param username: Optional ame of user to log in as
1115         :param port: Optional SSH port to use
1116         :param password: Optional ssh password for login or private key
1117         :param key_filename: Optional path to private keyfile
1118         """
1119         raise NotImplementedError(self.run_command)
1120
1121
1122 class StrangeHostname(Exception):
1123     """Refusing to connect to strange SSH hostname."""
1124
1125     def __init__(self, hostname):
1126         super(StrangeHostname, self).__init__(hostname)
1127
1128
1129 class SubprocessSSHVendor(SSHVendor):
1130     """SSH vendor that shells out to the local 'ssh' command."""
1131
1132     def run_command(self, host, command, username=None, port=None,
1133                     password=None, key_filename=None):
1134
1135         if password is not None:
1136             raise NotImplementedError(
1137                 "Setting password not supported by SubprocessSSHVendor.")
1138
1139         args = ['ssh', '-x']
1140
1141         if port:
1142             args.extend(['-p', str(port)])
1143
1144         if key_filename:
1145             args.extend(['-i', str(key_filename)])
1146
1147         if username:
1148             host = '%s@%s' % (username, host)
1149         if host.startswith('-'):
1150             raise StrangeHostname(hostname=host)
1151         args.append(host)
1152
1153         proc = subprocess.Popen(args + [command], bufsize=0,
1154                                 stdin=subprocess.PIPE,
1155                                 stdout=subprocess.PIPE)
1156         return SubprocessWrapper(proc)
1157
1158
1159 class PLinkSSHVendor(SSHVendor):
1160     """SSH vendor that shells out to the local 'plink' command."""
1161
1162     def run_command(self, host, command, username=None, port=None,
1163                     password=None, key_filename=None):
1164
1165         if sys.platform == 'win32':
1166             args = ['plink.exe', '-ssh']
1167         else:
1168             args = ['plink', '-ssh']
1169
1170         if password is not None:
1171             import warnings
1172             warnings.warn(
1173                 "Invoking PLink with a password exposes the password in the "
1174                 "process list.")
1175             args.extend(['-pw', str(password)])
1176
1177         if port:
1178             args.extend(['-P', str(port)])
1179
1180         if key_filename:
1181             args.extend(['-i', str(key_filename)])
1182
1183         if username:
1184             host = '%s@%s' % (username, host)
1185         if host.startswith('-'):
1186             raise StrangeHostname(hostname=host)
1187         args.append(host)
1188
1189         proc = subprocess.Popen(args + [command], bufsize=0,
1190                                 stdin=subprocess.PIPE,
1191                                 stdout=subprocess.PIPE)
1192         return SubprocessWrapper(proc)
1193
1194
1195 def ParamikoSSHVendor(**kwargs):
1196     import warnings
1197     warnings.warn(
1198         "ParamikoSSHVendor has been moved to dulwich.contrib.paramiko_vendor.",
1199         DeprecationWarning)
1200     from dulwich.contrib.paramiko_vendor import ParamikoSSHVendor
1201     return ParamikoSSHVendor(**kwargs)
1202
1203
1204 # Can be overridden by users
1205 get_ssh_vendor = SubprocessSSHVendor
1206
1207
1208 class SSHGitClient(TraditionalGitClient):
1209
1210     def __init__(self, host, port=None, username=None, vendor=None,
1211                  config=None, password=None, key_filename=None, **kwargs):
1212         self.host = host
1213         self.port = port
1214         self.username = username
1215         self.password = password
1216         self.key_filename = key_filename
1217         super(SSHGitClient, self).__init__(**kwargs)
1218         self.alternative_paths = {}
1219         if vendor is not None:
1220             self.ssh_vendor = vendor
1221         else:
1222             self.ssh_vendor = get_ssh_vendor()
1223
1224     def get_url(self, path):
1225         netloc = self.host
1226         if self.port is not None:
1227             netloc += ":%d" % self.port
1228
1229         if self.username is not None:
1230             netloc = urlquote(self.username, '@/:') + "@" + netloc
1231
1232         return urlparse.urlunsplit(('ssh', netloc, path, '', ''))
1233
1234     @classmethod
1235     def from_parsedurl(cls, parsedurl, **kwargs):
1236         return cls(host=parsedurl.hostname, port=parsedurl.port,
1237                    username=parsedurl.username, **kwargs)
1238
1239     def _get_cmd_path(self, cmd):
1240         cmd = self.alternative_paths.get(cmd, b'git-' + cmd)
1241         assert isinstance(cmd, bytes)
1242         return cmd
1243
1244     def _connect(self, cmd, path):
1245         if not isinstance(cmd, bytes):
1246             raise TypeError(cmd)
1247         if isinstance(path, bytes):
1248             path = path.decode(self._remote_path_encoding)
1249         if path.startswith("/~"):
1250             path = path[1:]
1251         argv = (self._get_cmd_path(cmd).decode(self._remote_path_encoding) +
1252                 " '" + path + "'")
1253         kwargs = {}
1254         if self.password is not None:
1255             kwargs['password'] = self.password
1256         if self.key_filename is not None:
1257             kwargs['key_filename'] = self.key_filename
1258         con = self.ssh_vendor.run_command(
1259             self.host, argv, port=self.port, username=self.username,
1260             **kwargs)
1261         return (Protocol(con.read, con.write, con.close,
1262                          report_activity=self._report_activity),
1263                 con.can_read)
1264
1265
1266 def default_user_agent_string():
1267     # Start user agent with "git/", because GitHub requires this. :-( See
1268     # https://github.com/jelmer/dulwich/issues/562 for details.
1269     return "git/dulwich/%s" % ".".join([str(x) for x in dulwich.__version__])
1270
1271
1272 def default_urllib3_manager(config, **override_kwargs):
1273     """Return `urllib3` connection pool manager.
1274
1275     Honour detected proxy configurations.
1276
1277     :param config: `dulwich.config.ConfigDict` instance with Git configuration.
1278     :param kwargs: Additional arguments for urllib3.ProxyManager
1279     :return: `urllib3.ProxyManager` instance for proxy configurations,
1280         `urllib3.PoolManager` otherwise.
1281     """
1282     proxy_server = user_agent = None
1283     ca_certs = ssl_verify = None
1284
1285     if config is not None:
1286         try:
1287             proxy_server = config.get(b"http", b"proxy")
1288         except KeyError:
1289             pass
1290         try:
1291             user_agent = config.get(b"http", b"useragent")
1292         except KeyError:
1293             pass
1294
1295         # TODO(jelmer): Support per-host settings
1296         try:
1297             ssl_verify = config.get_boolean(b"http", b"sslVerify")
1298         except KeyError:
1299             ssl_verify = True
1300
1301         try:
1302             ca_certs = config.get_boolean(b"http", b"sslCAInfo")
1303         except KeyError:
1304             ca_certs = None
1305
1306     if user_agent is None:
1307         user_agent = default_user_agent_string()
1308
1309     headers = {"User-agent": user_agent}
1310
1311     kwargs = {}
1312     if ssl_verify is True:
1313         kwargs['cert_reqs'] = "CERT_REQUIRED"
1314     elif ssl_verify is False:
1315         kwargs['cert_reqs'] = 'CERT_NONE'
1316     else:
1317         # Default to SSL verification
1318         kwargs['cert_reqs'] = "CERT_REQUIRED"
1319
1320     if ca_certs is not None:
1321         kwargs['ca_certs'] = ca_certs
1322     kwargs.update(override_kwargs)
1323
1324     # Try really hard to find a SSL certificate path
1325     if 'ca_certs' not in kwargs and kwargs.get('cert_reqs') != 'CERT_NONE':
1326         try:
1327             import certifi
1328         except ImportError:
1329             pass
1330         else:
1331             kwargs['ca_certs'] = certifi.where()
1332
1333     import urllib3
1334
1335     if proxy_server is not None:
1336         # `urllib3` requires a `str` object in both Python 2 and 3, while
1337         # `ConfigDict` coerces entries to `bytes` on Python 3. Compensate.
1338         if not isinstance(proxy_server, str):
1339             proxy_server = proxy_server.decode()
1340         manager = urllib3.ProxyManager(proxy_server, headers=headers,
1341                                        **kwargs)
1342     else:
1343         manager = urllib3.PoolManager(headers=headers, **kwargs)
1344
1345     return manager
1346
1347
1348 class HttpGitClient(GitClient):
1349
1350     def __init__(self, base_url, dumb=None, pool_manager=None, config=None,
1351                  username=None, password=None, **kwargs):
1352         self._base_url = base_url.rstrip("/") + "/"
1353         self._username = username
1354         self._password = password
1355         self.dumb = dumb
1356
1357         if pool_manager is None:
1358             self.pool_manager = default_urllib3_manager(config)
1359         else:
1360             self.pool_manager = pool_manager
1361
1362         if username is not None:
1363             # No escaping needed: ":" is not allowed in username:
1364             # https://tools.ietf.org/html/rfc2617#section-2
1365             credentials = "%s:%s" % (username, password)
1366             import urllib3.util
1367             basic_auth = urllib3.util.make_headers(basic_auth=credentials)
1368             self.pool_manager.headers.update(basic_auth)
1369
1370         GitClient.__init__(self, **kwargs)
1371
1372     def get_url(self, path):
1373         return self._get_url(path).rstrip("/")
1374
1375     @classmethod
1376     def from_parsedurl(cls, parsedurl, **kwargs):
1377         password = parsedurl.password
1378         if password is not None:
1379             kwargs['password'] = urlunquote(password)
1380         username = parsedurl.username
1381         if username is not None:
1382             kwargs['username'] = urlunquote(username)
1383         # TODO(jelmer): This also strips the username
1384         parsedurl = parsedurl._replace(netloc=parsedurl.hostname)
1385         return cls(urlparse.urlunparse(parsedurl), **kwargs)
1386
1387     def __repr__(self):
1388         return "%s(%r, dumb=%r)" % (
1389             type(self).__name__, self._base_url, self.dumb)
1390
1391     def _get_url(self, path):
1392         if not isinstance(path, str):
1393             # TODO(jelmer): this is unrelated to the local filesystem;
1394             # This is not necessarily the right encoding to decode the path
1395             # with.
1396             path = path.decode(sys.getfilesystemencoding())
1397         return urlparse.urljoin(self._base_url, path).rstrip("/") + "/"
1398
1399     def _http_request(self, url, headers=None, data=None,
1400                       allow_compression=False):
1401         """Perform HTTP request.
1402
1403         :param url: Request URL.
1404         :param headers: Optional custom headers to override defaults.
1405         :param data: Request data.
1406         :param allow_compression: Allow GZipped communication.
1407         :return: Tuple (`response`, `read`), where response is an `urllib3`
1408             response object with additional `content_type` and
1409             `redirect_location` properties, and `read` is a consumable read
1410             method for the response data.
1411         """
1412         req_headers = self.pool_manager.headers.copy()
1413         if headers is not None:
1414             req_headers.update(headers)
1415         req_headers["Pragma"] = "no-cache"
1416         if allow_compression:
1417             req_headers["Accept-Encoding"] = "gzip"
1418         else:
1419             req_headers["Accept-Encoding"] = "identity"
1420
1421         if data is None:
1422             resp = self.pool_manager.request("GET", url, headers=req_headers)
1423         else:
1424             resp = self.pool_manager.request("POST", url, headers=req_headers,
1425                                              body=data)
1426
1427         if resp.status == 404:
1428             raise NotGitRepository()
1429         elif resp.status != 200:
1430             raise GitProtocolError("unexpected http resp %d for %s" %
1431                                    (resp.status, url))
1432
1433         # TODO: Optimization available by adding `preload_content=False` to the
1434         # request and just passing the `read` method on instead of going via
1435         # `BytesIO`, if we can guarantee that the entire response is consumed
1436         # before issuing the next to still allow for connection reuse from the
1437         # pool.
1438         read = BytesIO(resp.data).read
1439
1440         resp.content_type = resp.getheader("Content-Type")
1441         resp.redirect_location = resp.get_redirect_location()
1442
1443         return resp, read
1444
1445     def _discover_references(self, service, base_url):
1446         assert base_url[-1] == "/"
1447         tail = "info/refs"
1448         headers = {"Accept": "*/*"}
1449         if self.dumb is not True:
1450             tail += "?service=%s" % service.decode('ascii')
1451         url = urlparse.urljoin(base_url, tail)
1452         resp, read = self._http_request(url, headers, allow_compression=True)
1453
1454         if resp.redirect_location:
1455             # Something changed (redirect!), so let's update the base URL
1456             if not resp.redirect_location.endswith(tail):
1457                 raise GitProtocolError(
1458                         "Redirected from URL %s to URL %s without %s" % (
1459                             url, resp.redirect_location, tail))
1460             base_url = resp.redirect_location[:-len(tail)]
1461
1462         try:
1463             self.dumb = not resp.content_type.startswith("application/x-git-")
1464             if not self.dumb:
1465                 proto = Protocol(read, None)
1466                 # The first line should mention the service
1467                 try:
1468                     [pkt] = list(proto.read_pkt_seq())
1469                 except ValueError:
1470                     raise GitProtocolError(
1471                         "unexpected number of packets received")
1472                 if pkt.rstrip(b'\n') != (b'# service=' + service):
1473                     raise GitProtocolError(
1474                         "unexpected first line %r from smart server" % pkt)
1475                 return read_pkt_refs(proto) + (base_url, )
1476             else:
1477                 return read_info_refs(resp), set(), base_url
1478         finally:
1479             resp.close()
1480
1481     def _smart_request(self, service, url, data):
1482         assert url[-1] == "/"
1483         url = urlparse.urljoin(url, service)
1484         result_content_type = "application/x-%s-result" % service
1485         headers = {
1486             "Content-Type": "application/x-%s-request" % service,
1487             "Accept": result_content_type,
1488             "Content-Length": str(len(data)),
1489         }
1490         resp, read = self._http_request(url, headers, data)
1491         if resp.content_type != result_content_type:
1492             raise GitProtocolError("Invalid content-type from server: %s"
1493                                    % resp.content_type)
1494         return resp, read
1495
1496     def send_pack(self, path, update_refs, generate_pack_data,
1497                   progress=None):
1498         """Upload a pack to a remote repository.
1499
1500         :param path: Repository path (as bytestring)
1501         :param update_refs: Function to determine changes to remote refs.
1502             Receive dict with existing remote refs, returns dict with
1503             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
1504         :param generate_pack_data: Function that can return a tuple
1505             with number of elements and pack data to upload.
1506         :param progress: Optional progress function
1507
1508         :raises SendPackError: if server rejects the pack data
1509         :raises UpdateRefsError: if the server supports report-status
1510                                  and rejects ref updates
1511         :return: new_refs dictionary containing the changes that were made
1512             {refname: new_ref}, including deleted refs.
1513         """
1514         url = self._get_url(path)
1515         old_refs, server_capabilities, url = self._discover_references(
1516             b"git-receive-pack", url)
1517         negotiated_capabilities = self._negotiate_receive_pack_capabilities(
1518                 server_capabilities)
1519         negotiated_capabilities.add(capability_agent())
1520
1521         if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
1522             self._report_status_parser = ReportStatusParser()
1523
1524         new_refs = update_refs(dict(old_refs))
1525         if new_refs is None:
1526             # Determine wants function is aborting the push.
1527             return old_refs
1528         if self.dumb:
1529             raise NotImplementedError(self.fetch_pack)
1530         req_data = BytesIO()
1531         req_proto = Protocol(None, req_data.write)
1532         (have, want) = self._handle_receive_pack_head(
1533             req_proto, negotiated_capabilities, old_refs, new_refs)
1534         if not want and set(new_refs.items()).issubset(set(old_refs.items())):
1535             return new_refs
1536         pack_data_count, pack_data = generate_pack_data(
1537                 have, want,
1538                 ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities))
1539         if pack_data_count:
1540             write_pack_data(req_proto.write_file(), pack_data_count, pack_data)
1541         resp, read = self._smart_request("git-receive-pack", url,
1542                                          data=req_data.getvalue())
1543         try:
1544             resp_proto = Protocol(read, None)
1545             self._handle_receive_pack_tail(
1546                 resp_proto, negotiated_capabilities, progress)
1547             return new_refs
1548         finally:
1549             resp.close()
1550
1551     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
1552                    progress=None):
1553         """Retrieve a pack from a git smart server.
1554
1555         :param determine_wants: Callback that returns list of commits to fetch
1556         :param graph_walker: Object with next() and ack().
1557         :param pack_data: Callback called for each bit of data in the pack
1558         :param progress: Callback for progress reports (strings)
1559         :return: FetchPackResult object
1560         """
1561         url = self._get_url(path)
1562         refs, server_capabilities, url = self._discover_references(
1563             b"git-upload-pack", url)
1564         negotiated_capabilities, symrefs, agent = (
1565                 self._negotiate_upload_pack_capabilities(
1566                         server_capabilities))
1567         wants = determine_wants(refs)
1568         if wants is not None:
1569             wants = [cid for cid in wants if cid != ZERO_SHA]
1570         if not wants:
1571             return FetchPackResult(refs, symrefs, agent)
1572         if self.dumb:
1573             raise NotImplementedError(self.send_pack)
1574         check_wants(wants, refs)
1575         req_data = BytesIO()
1576         req_proto = Protocol(None, req_data.write)
1577         self._handle_upload_pack_head(
1578                 req_proto, negotiated_capabilities, graph_walker, wants,
1579                 lambda: False)
1580         resp, read = self._smart_request(
1581             "git-upload-pack", url, data=req_data.getvalue())
1582         try:
1583             resp_proto = Protocol(read, None)
1584             self._handle_upload_pack_tail(
1585                 resp_proto, negotiated_capabilities, graph_walker, pack_data,
1586                 progress)
1587             return FetchPackResult(refs, symrefs, agent)
1588         finally:
1589             resp.close()
1590
1591     def get_refs(self, path):
1592         """Retrieve the current refs from a git smart server."""
1593         url = self._get_url(path)
1594         refs, _, _ = self._discover_references(
1595             b"git-upload-pack", url)
1596         return refs
1597
1598
1599 def get_transport_and_path_from_url(url, config=None, **kwargs):
1600     """Obtain a git client from a URL.
1601
1602     :param url: URL to open (a unicode string)
1603     :param config: Optional config object
1604     :param thin_packs: Whether or not thin packs should be retrieved
1605     :param report_activity: Optional callback for reporting transport
1606         activity.
1607     :return: Tuple with client instance and relative path.
1608     """
1609     parsed = urlparse.urlparse(url)
1610     if parsed.scheme == 'git':
1611         return (TCPGitClient.from_parsedurl(parsed, **kwargs),
1612                 parsed.path)
1613     elif parsed.scheme in ('git+ssh', 'ssh'):
1614         return SSHGitClient.from_parsedurl(parsed, **kwargs), parsed.path
1615     elif parsed.scheme in ('http', 'https'):
1616         return HttpGitClient.from_parsedurl(
1617             parsed, config=config, **kwargs), parsed.path
1618     elif parsed.scheme == 'file':
1619         return default_local_git_client_cls.from_parsedurl(
1620             parsed, **kwargs), parsed.path
1621
1622     raise ValueError("unknown scheme '%s'" % parsed.scheme)
1623
1624
1625 def parse_rsync_url(location):
1626     """Parse a rsync-style URL."""
1627     if ':' in location and '@' not in location:
1628         # SSH with no user@, zero or one leading slash.
1629         (host, path) = location.split(':', 1)
1630         user = None
1631     elif ':' in location:
1632         # SSH with user@host:foo.
1633         user_host, path = location.split(':', 1)
1634         if '@' in user_host:
1635             user, host = user_host.rsplit('@', 1)
1636         else:
1637             user = None
1638             host = user_host
1639     else:
1640         raise ValueError('not a valid rsync-style URL')
1641     return (user, host, path)
1642
1643
1644 def get_transport_and_path(location, **kwargs):
1645     """Obtain a git client from a URL.
1646
1647     :param location: URL or path (a string)
1648     :param config: Optional config object
1649     :param thin_packs: Whether or not thin packs should be retrieved
1650     :param report_activity: Optional callback for reporting transport
1651         activity.
1652     :return: Tuple with client instance and relative path.
1653     """
1654     # First, try to parse it as a URL
1655     try:
1656         return get_transport_and_path_from_url(location, **kwargs)
1657     except ValueError:
1658         pass
1659
1660     if (sys.platform == 'win32' and
1661             location[0].isalpha() and location[1:3] == ':\\'):
1662         # Windows local path
1663         return default_local_git_client_cls(**kwargs), location
1664
1665     try:
1666         (username, hostname, path) = parse_rsync_url(location)
1667     except ValueError:
1668         # Otherwise, assume it's a local path.
1669         return default_local_git_client_cls(**kwargs), location
1670     else:
1671         return SSHGitClient(hostname, username=username, **kwargs), path