index.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2013 Vinay Sajip.
  4. # Licensed to the Python Software Foundation under a contributor agreement.
  5. # See LICENSE.txt and CONTRIBUTORS.txt.
  6. #
  7. import hashlib
  8. import logging
  9. import os
  10. import shutil
  11. import subprocess
  12. import tempfile
  13. try:
  14. from threading import Thread
  15. except ImportError:
  16. from dummy_threading import Thread
  17. from . import DistlibException
  18. from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
  19. urlparse, build_opener, string_types)
  20. from .util import cached_property, zip_dir, ServerProxy
  21. logger = logging.getLogger(__name__)
  22. DEFAULT_INDEX = 'https://pypi.python.org/pypi'
  23. DEFAULT_REALM = 'pypi'
  24. class PackageIndex(object):
  25. """
  26. This class represents a package index compatible with PyPI, the Python
  27. Package Index.
  28. """
  29. boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
  30. def __init__(self, url=None):
  31. """
  32. Initialise an instance.
  33. :param url: The URL of the index. If not specified, the URL for PyPI is
  34. used.
  35. """
  36. self.url = url or DEFAULT_INDEX
  37. self.read_configuration()
  38. scheme, netloc, path, params, query, frag = urlparse(self.url)
  39. if params or query or frag or scheme not in ('http', 'https'):
  40. raise DistlibException('invalid repository: %s' % self.url)
  41. self.password_handler = None
  42. self.ssl_verifier = None
  43. self.gpg = None
  44. self.gpg_home = None
  45. self.rpc_proxy = None
  46. with open(os.devnull, 'w') as sink:
  47. for s in ('gpg2', 'gpg'):
  48. try:
  49. rc = subprocess.check_call([s, '--version'], stdout=sink,
  50. stderr=sink)
  51. if rc == 0:
  52. self.gpg = s
  53. break
  54. except OSError:
  55. pass
  56. def _get_pypirc_command(self):
  57. """
  58. Get the distutils command for interacting with PyPI configurations.
  59. :return: the command.
  60. """
  61. from distutils.core import Distribution
  62. from distutils.config import PyPIRCCommand
  63. d = Distribution()
  64. return PyPIRCCommand(d)
  65. def read_configuration(self):
  66. """
  67. Read the PyPI access configuration as supported by distutils, getting
  68. PyPI to do the acutal work. This populates ``username``, ``password``,
  69. ``realm`` and ``url`` attributes from the configuration.
  70. """
  71. # get distutils to do the work
  72. c = self._get_pypirc_command()
  73. c.repository = self.url
  74. cfg = c._read_pypirc()
  75. self.username = cfg.get('username')
  76. self.password = cfg.get('password')
  77. self.realm = cfg.get('realm', 'pypi')
  78. self.url = cfg.get('repository', self.url)
  79. def save_configuration(self):
  80. """
  81. Save the PyPI access configuration. You must have set ``username`` and
  82. ``password`` attributes before calling this method.
  83. Again, distutils is used to do the actual work.
  84. """
  85. self.check_credentials()
  86. # get distutils to do the work
  87. c = self._get_pypirc_command()
  88. c._store_pypirc(self.username, self.password)
  89. def check_credentials(self):
  90. """
  91. Check that ``username`` and ``password`` have been set, and raise an
  92. exception if not.
  93. """
  94. if self.username is None or self.password is None:
  95. raise DistlibException('username and password must be set')
  96. pm = HTTPPasswordMgr()
  97. _, netloc, _, _, _, _ = urlparse(self.url)
  98. pm.add_password(self.realm, netloc, self.username, self.password)
  99. self.password_handler = HTTPBasicAuthHandler(pm)
  100. def register(self, metadata):
  101. """
  102. Register a distribution on PyPI, using the provided metadata.
  103. :param metadata: A :class:`Metadata` instance defining at least a name
  104. and version number for the distribution to be
  105. registered.
  106. :return: The HTTP response received from PyPI upon submission of the
  107. request.
  108. """
  109. self.check_credentials()
  110. metadata.validate()
  111. d = metadata.todict()
  112. d[':action'] = 'verify'
  113. request = self.encode_request(d.items(), [])
  114. response = self.send_request(request)
  115. d[':action'] = 'submit'
  116. request = self.encode_request(d.items(), [])
  117. return self.send_request(request)
  118. def _reader(self, name, stream, outbuf):
  119. """
  120. Thread runner for reading lines of from a subprocess into a buffer.
  121. :param name: The logical name of the stream (used for logging only).
  122. :param stream: The stream to read from. This will typically a pipe
  123. connected to the output stream of a subprocess.
  124. :param outbuf: The list to append the read lines to.
  125. """
  126. while True:
  127. s = stream.readline()
  128. if not s:
  129. break
  130. s = s.decode('utf-8').rstrip()
  131. outbuf.append(s)
  132. logger.debug('%s: %s' % (name, s))
  133. stream.close()
  134. def get_sign_command(self, filename, signer, sign_password,
  135. keystore=None):
  136. """
  137. Return a suitable command for signing a file.
  138. :param filename: The pathname to the file to be signed.
  139. :param signer: The identifier of the signer of the file.
  140. :param sign_password: The passphrase for the signer's
  141. private key used for signing.
  142. :param keystore: The path to a directory which contains the keys
  143. used in verification. If not specified, the
  144. instance's ``gpg_home`` attribute is used instead.
  145. :return: The signing command as a list suitable to be
  146. passed to :class:`subprocess.Popen`.
  147. """
  148. cmd = [self.gpg, '--status-fd', '2', '--no-tty']
  149. if keystore is None:
  150. keystore = self.gpg_home
  151. if keystore:
  152. cmd.extend(['--homedir', keystore])
  153. if sign_password is not None:
  154. cmd.extend(['--batch', '--passphrase-fd', '0'])
  155. td = tempfile.mkdtemp()
  156. sf = os.path.join(td, os.path.basename(filename) + '.asc')
  157. cmd.extend(['--detach-sign', '--armor', '--local-user',
  158. signer, '--output', sf, filename])
  159. logger.debug('invoking: %s', ' '.join(cmd))
  160. return cmd, sf
  161. def run_command(self, cmd, input_data=None):
  162. """
  163. Run a command in a child process , passing it any input data specified.
  164. :param cmd: The command to run.
  165. :param input_data: If specified, this must be a byte string containing
  166. data to be sent to the child process.
  167. :return: A tuple consisting of the subprocess' exit code, a list of
  168. lines read from the subprocess' ``stdout``, and a list of
  169. lines read from the subprocess' ``stderr``.
  170. """
  171. kwargs = {
  172. 'stdout': subprocess.PIPE,
  173. 'stderr': subprocess.PIPE,
  174. }
  175. if input_data is not None:
  176. kwargs['stdin'] = subprocess.PIPE
  177. stdout = []
  178. stderr = []
  179. p = subprocess.Popen(cmd, **kwargs)
  180. # We don't use communicate() here because we may need to
  181. # get clever with interacting with the command
  182. t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
  183. t1.start()
  184. t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
  185. t2.start()
  186. if input_data is not None:
  187. p.stdin.write(input_data)
  188. p.stdin.close()
  189. p.wait()
  190. t1.join()
  191. t2.join()
  192. return p.returncode, stdout, stderr
  193. def sign_file(self, filename, signer, sign_password, keystore=None):
  194. """
  195. Sign a file.
  196. :param filename: The pathname to the file to be signed.
  197. :param signer: The identifier of the signer of the file.
  198. :param sign_password: The passphrase for the signer's
  199. private key used for signing.
  200. :param keystore: The path to a directory which contains the keys
  201. used in signing. If not specified, the instance's
  202. ``gpg_home`` attribute is used instead.
  203. :return: The absolute pathname of the file where the signature is
  204. stored.
  205. """
  206. cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
  207. keystore)
  208. rc, stdout, stderr = self.run_command(cmd,
  209. sign_password.encode('utf-8'))
  210. if rc != 0:
  211. raise DistlibException('sign command failed with error '
  212. 'code %s' % rc)
  213. return sig_file
  214. def upload_file(self, metadata, filename, signer=None, sign_password=None,
  215. filetype='sdist', pyversion='source', keystore=None):
  216. """
  217. Upload a release file to the index.
  218. :param metadata: A :class:`Metadata` instance defining at least a name
  219. and version number for the file to be uploaded.
  220. :param filename: The pathname of the file to be uploaded.
  221. :param signer: The identifier of the signer of the file.
  222. :param sign_password: The passphrase for the signer's
  223. private key used for signing.
  224. :param filetype: The type of the file being uploaded. This is the
  225. distutils command which produced that file, e.g.
  226. ``sdist`` or ``bdist_wheel``.
  227. :param pyversion: The version of Python which the release relates
  228. to. For code compatible with any Python, this would
  229. be ``source``, otherwise it would be e.g. ``3.2``.
  230. :param keystore: The path to a directory which contains the keys
  231. used in signing. If not specified, the instance's
  232. ``gpg_home`` attribute is used instead.
  233. :return: The HTTP response received from PyPI upon submission of the
  234. request.
  235. """
  236. self.check_credentials()
  237. if not os.path.exists(filename):
  238. raise DistlibException('not found: %s' % filename)
  239. metadata.validate()
  240. d = metadata.todict()
  241. sig_file = None
  242. if signer:
  243. if not self.gpg:
  244. logger.warning('no signing program available - not signed')
  245. else:
  246. sig_file = self.sign_file(filename, signer, sign_password,
  247. keystore)
  248. with open(filename, 'rb') as f:
  249. file_data = f.read()
  250. md5_digest = hashlib.md5(file_data).hexdigest()
  251. sha256_digest = hashlib.sha256(file_data).hexdigest()
  252. d.update({
  253. ':action': 'file_upload',
  254. 'protcol_version': '1',
  255. 'filetype': filetype,
  256. 'pyversion': pyversion,
  257. 'md5_digest': md5_digest,
  258. 'sha256_digest': sha256_digest,
  259. })
  260. files = [('content', os.path.basename(filename), file_data)]
  261. if sig_file:
  262. with open(sig_file, 'rb') as f:
  263. sig_data = f.read()
  264. files.append(('gpg_signature', os.path.basename(sig_file),
  265. sig_data))
  266. shutil.rmtree(os.path.dirname(sig_file))
  267. request = self.encode_request(d.items(), files)
  268. return self.send_request(request)
  269. def upload_documentation(self, metadata, doc_dir):
  270. """
  271. Upload documentation to the index.
  272. :param metadata: A :class:`Metadata` instance defining at least a name
  273. and version number for the documentation to be
  274. uploaded.
  275. :param doc_dir: The pathname of the directory which contains the
  276. documentation. This should be the directory that
  277. contains the ``index.html`` for the documentation.
  278. :return: The HTTP response received from PyPI upon submission of the
  279. request.
  280. """
  281. self.check_credentials()
  282. if not os.path.isdir(doc_dir):
  283. raise DistlibException('not a directory: %r' % doc_dir)
  284. fn = os.path.join(doc_dir, 'index.html')
  285. if not os.path.exists(fn):
  286. raise DistlibException('not found: %r' % fn)
  287. metadata.validate()
  288. name, version = metadata.name, metadata.version
  289. zip_data = zip_dir(doc_dir).getvalue()
  290. fields = [(':action', 'doc_upload'),
  291. ('name', name), ('version', version)]
  292. files = [('content', name, zip_data)]
  293. request = self.encode_request(fields, files)
  294. return self.send_request(request)
  295. def get_verify_command(self, signature_filename, data_filename,
  296. keystore=None):
  297. """
  298. Return a suitable command for verifying a file.
  299. :param signature_filename: The pathname to the file containing the
  300. signature.
  301. :param data_filename: The pathname to the file containing the
  302. signed data.
  303. :param keystore: The path to a directory which contains the keys
  304. used in verification. If not specified, the
  305. instance's ``gpg_home`` attribute is used instead.
  306. :return: The verifying command as a list suitable to be
  307. passed to :class:`subprocess.Popen`.
  308. """
  309. cmd = [self.gpg, '--status-fd', '2', '--no-tty']
  310. if keystore is None:
  311. keystore = self.gpg_home
  312. if keystore:
  313. cmd.extend(['--homedir', keystore])
  314. cmd.extend(['--verify', signature_filename, data_filename])
  315. logger.debug('invoking: %s', ' '.join(cmd))
  316. return cmd
  317. def verify_signature(self, signature_filename, data_filename,
  318. keystore=None):
  319. """
  320. Verify a signature for a file.
  321. :param signature_filename: The pathname to the file containing the
  322. signature.
  323. :param data_filename: The pathname to the file containing the
  324. signed data.
  325. :param keystore: The path to a directory which contains the keys
  326. used in verification. If not specified, the
  327. instance's ``gpg_home`` attribute is used instead.
  328. :return: True if the signature was verified, else False.
  329. """
  330. if not self.gpg:
  331. raise DistlibException('verification unavailable because gpg '
  332. 'unavailable')
  333. cmd = self.get_verify_command(signature_filename, data_filename,
  334. keystore)
  335. rc, stdout, stderr = self.run_command(cmd)
  336. if rc not in (0, 1):
  337. raise DistlibException('verify command failed with error '
  338. 'code %s' % rc)
  339. return rc == 0
  340. def download_file(self, url, destfile, digest=None, reporthook=None):
  341. """
  342. This is a convenience method for downloading a file from an URL.
  343. Normally, this will be a file from the index, though currently
  344. no check is made for this (i.e. a file can be downloaded from
  345. anywhere).
  346. The method is just like the :func:`urlretrieve` function in the
  347. standard library, except that it allows digest computation to be
  348. done during download and checking that the downloaded data
  349. matched any expected value.
  350. :param url: The URL of the file to be downloaded (assumed to be
  351. available via an HTTP GET request).
  352. :param destfile: The pathname where the downloaded file is to be
  353. saved.
  354. :param digest: If specified, this must be a (hasher, value)
  355. tuple, where hasher is the algorithm used (e.g.
  356. ``'md5'``) and ``value`` is the expected value.
  357. :param reporthook: The same as for :func:`urlretrieve` in the
  358. standard library.
  359. """
  360. if digest is None:
  361. digester = None
  362. logger.debug('No digest specified')
  363. else:
  364. if isinstance(digest, (list, tuple)):
  365. hasher, digest = digest
  366. else:
  367. hasher = 'md5'
  368. digester = getattr(hashlib, hasher)()
  369. logger.debug('Digest specified: %s' % digest)
  370. # The following code is equivalent to urlretrieve.
  371. # We need to do it this way so that we can compute the
  372. # digest of the file as we go.
  373. with open(destfile, 'wb') as dfp:
  374. # addinfourl is not a context manager on 2.x
  375. # so we have to use try/finally
  376. sfp = self.send_request(Request(url))
  377. try:
  378. headers = sfp.info()
  379. blocksize = 8192
  380. size = -1
  381. read = 0
  382. blocknum = 0
  383. if "content-length" in headers:
  384. size = int(headers["Content-Length"])
  385. if reporthook:
  386. reporthook(blocknum, blocksize, size)
  387. while True:
  388. block = sfp.read(blocksize)
  389. if not block:
  390. break
  391. read += len(block)
  392. dfp.write(block)
  393. if digester:
  394. digester.update(block)
  395. blocknum += 1
  396. if reporthook:
  397. reporthook(blocknum, blocksize, size)
  398. finally:
  399. sfp.close()
  400. # check that we got the whole file, if we can
  401. if size >= 0 and read < size:
  402. raise DistlibException(
  403. 'retrieval incomplete: got only %d out of %d bytes'
  404. % (read, size))
  405. # if we have a digest, it must match.
  406. if digester:
  407. actual = digester.hexdigest()
  408. if digest != actual:
  409. raise DistlibException('%s digest mismatch for %s: expected '
  410. '%s, got %s' % (hasher, destfile,
  411. digest, actual))
  412. logger.debug('Digest verified: %s', digest)
  413. def send_request(self, req):
  414. """
  415. Send a standard library :class:`Request` to PyPI and return its
  416. response.
  417. :param req: The request to send.
  418. :return: The HTTP response from PyPI (a standard library HTTPResponse).
  419. """
  420. handlers = []
  421. if self.password_handler:
  422. handlers.append(self.password_handler)
  423. if self.ssl_verifier:
  424. handlers.append(self.ssl_verifier)
  425. opener = build_opener(*handlers)
  426. return opener.open(req)
  427. def encode_request(self, fields, files):
  428. """
  429. Encode fields and files for posting to an HTTP server.
  430. :param fields: The fields to send as a list of (fieldname, value)
  431. tuples.
  432. :param files: The files to send as a list of (fieldname, filename,
  433. file_bytes) tuple.
  434. """
  435. # Adapted from packaging, which in turn was adapted from
  436. # http://code.activestate.com/recipes/146306
  437. parts = []
  438. boundary = self.boundary
  439. for k, values in fields:
  440. if not isinstance(values, (list, tuple)):
  441. values = [values]
  442. for v in values:
  443. parts.extend((
  444. b'--' + boundary,
  445. ('Content-Disposition: form-data; name="%s"' %
  446. k).encode('utf-8'),
  447. b'',
  448. v.encode('utf-8')))
  449. for key, filename, value in files:
  450. parts.extend((
  451. b'--' + boundary,
  452. ('Content-Disposition: form-data; name="%s"; filename="%s"' %
  453. (key, filename)).encode('utf-8'),
  454. b'',
  455. value))
  456. parts.extend((b'--' + boundary + b'--', b''))
  457. body = b'\r\n'.join(parts)
  458. ct = b'multipart/form-data; boundary=' + boundary
  459. headers = {
  460. 'Content-type': ct,
  461. 'Content-length': str(len(body))
  462. }
  463. return Request(self.url, body, headers)
  464. def search(self, terms, operator=None):
  465. if isinstance(terms, string_types):
  466. terms = {'name': terms}
  467. if self.rpc_proxy is None:
  468. self.rpc_proxy = ServerProxy(self.url, timeout=3.0)
  469. return self.rpc_proxy.search(terms, operator or 'and')