Merge pull request #520 from reaperhulk/pbkdf2-openssl

PBKDF2 Support (OpenSSL backend)
diff --git a/cryptography/hazmat/backends/interfaces.py b/cryptography/hazmat/backends/interfaces.py
index 4fbb348..53c7518 100644
--- a/cryptography/hazmat/backends/interfaces.py
+++ b/cryptography/hazmat/backends/interfaces.py
@@ -65,3 +65,19 @@
         """
         Create a HashContext for calculating a message authentication code.
         """
+
+
+class PBKDF2HMACBackend(six.with_metaclass(abc.ABCMeta)):
+    @abc.abstractmethod
+    def pbkdf2_hmac_supported(self, algorithm):
+        """
+        Return True if the hash algorithm is supported for PBKDF2 by this
+        backend.
+        """
+
+    @abc.abstractmethod
+    def derive_pbkdf2_hmac(self, algorithm, length, salt, iterations,
+                           key_material):
+        """
+        Return length bytes derived from provided PBKDF2 parameters.
+        """
diff --git a/cryptography/hazmat/backends/openssl/backend.py b/cryptography/hazmat/backends/openssl/backend.py
index d8d4669..cf931da 100644
--- a/cryptography/hazmat/backends/openssl/backend.py
+++ b/cryptography/hazmat/backends/openssl/backend.py
@@ -20,9 +20,9 @@
     UnsupportedAlgorithm, InvalidTag, InternalError
 )
 from cryptography.hazmat.backends.interfaces import (
-    CipherBackend, HashBackend, HMACBackend
+    CipherBackend, HashBackend, HMACBackend, PBKDF2HMACBackend
 )
-from cryptography.hazmat.primitives import interfaces
+from cryptography.hazmat.primitives import interfaces, hashes
 from cryptography.hazmat.primitives.ciphers.algorithms import (
     AES, Blowfish, Camellia, TripleDES, ARC4,
 )
@@ -35,6 +35,7 @@
 @utils.register_interface(CipherBackend)
 @utils.register_interface(HashBackend)
 @utils.register_interface(HMACBackend)
+@utils.register_interface(PBKDF2HMACBackend)
 class Backend(object):
     """
     OpenSSL API binding interfaces.
@@ -133,6 +134,49 @@
     def create_symmetric_decryption_ctx(self, cipher, mode):
         return _CipherContext(self, cipher, mode, _CipherContext._DECRYPT)
 
+    def pbkdf2_hmac_supported(self, algorithm):
+        if self._lib.Cryptography_HAS_PBKDF2_HMAC:
+            return self.hmac_supported(algorithm)
+        else:
+            # OpenSSL < 1.0.0 has an explicit PBKDF2-HMAC-SHA1 function,
+            # so if the PBKDF2_HMAC function is missing we only support
+            # SHA1 via PBKDF2_HMAC_SHA1.
+            return isinstance(algorithm, hashes.SHA1)
+
+    def derive_pbkdf2_hmac(self, algorithm, length, salt, iterations,
+                           key_material):
+        buf = self._ffi.new("char[]", length)
+        if self._lib.Cryptography_HAS_PBKDF2_HMAC:
+            evp_md = self._lib.EVP_get_digestbyname(
+                algorithm.name.encode("ascii"))
+            assert evp_md != self._ffi.NULL
+            res = self._lib.PKCS5_PBKDF2_HMAC(
+                key_material,
+                len(key_material),
+                salt,
+                len(salt),
+                iterations,
+                evp_md,
+                length,
+                buf
+            )
+            assert res == 1
+        else:
+            # OpenSSL < 1.0.0
+            assert isinstance(algorithm, hashes.SHA1)
+            res = self._lib.PKCS5_PBKDF2_HMAC_SHA1(
+                key_material,
+                len(key_material),
+                salt,
+                len(salt),
+                iterations,
+                length,
+                buf
+            )
+            assert res == 1
+
+        return self._ffi.buffer(buf)[:]
+
     def _handle_error(self, mode):
         code = self._lib.ERR_get_error()
         if not code and isinstance(mode, GCM):
diff --git a/cryptography/hazmat/primitives/kdf/__init__.py b/cryptography/hazmat/primitives/kdf/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/cryptography/hazmat/primitives/kdf/__init__.py
diff --git a/cryptography/hazmat/primitives/kdf/pbkdf2.py b/cryptography/hazmat/primitives/kdf/pbkdf2.py
new file mode 100644
index 0000000..71b8821
--- /dev/null
+++ b/cryptography/hazmat/primitives/kdf/pbkdf2.py
@@ -0,0 +1,66 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division, print_function
+
+import six
+
+from cryptography import utils
+from cryptography.exceptions import (
+    InvalidKey, UnsupportedAlgorithm, AlreadyFinalized
+)
+from cryptography.hazmat.primitives import constant_time, interfaces
+
+
+@utils.register_interface(interfaces.KeyDerivationFunction)
+class PBKDF2HMAC(object):
+    def __init__(self, algorithm, length, salt, iterations, backend):
+        if not backend.pbkdf2_hmac_supported(algorithm):
+            raise UnsupportedAlgorithm(
+                "{0} is not supported for PBKDF2 by this backend".format(
+                    algorithm.name)
+            )
+        self._used = False
+        self._algorithm = algorithm
+        self._length = length
+        if isinstance(salt, six.text_type):
+            raise TypeError(
+                "Unicode-objects must be encoded before using them as key "
+                "material."
+            )
+        self._salt = salt
+        self._iterations = iterations
+        self._backend = backend
+
+    def derive(self, key_material):
+        if self._used:
+            raise AlreadyFinalized("PBKDF2 instances can only be used once")
+        self._used = True
+
+        if isinstance(key_material, six.text_type):
+            raise TypeError(
+                "Unicode-objects must be encoded before using them as key "
+                "material."
+            )
+        return self._backend.derive_pbkdf2_hmac(
+            self._algorithm,
+            self._length,
+            self._salt,
+            self._iterations,
+            key_material
+        )
+
+    def verify(self, key_material, expected_key):
+        derived_key = self.derive(key_material)
+        if not constant_time.bytes_eq(derived_key, expected_key):
+            raise InvalidKey("Keys do not match.")
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 5a8e980..f401fe7 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -14,6 +14,7 @@
 * Improved thread-safety for the OpenSSL backend.
 * Fixed compilation on systems where OpenSSL's ``ec.h`` header is not
   available, such as CentOS.
+* Added :class:`~cryptography.hazmat.primitives.kdf.pbkdf2.PBKDF2HMAC`.
 
 0.1 - 2014-01-08
 ~~~~~~~~~~~~~~~~
diff --git a/docs/hazmat/backends/interfaces.rst b/docs/hazmat/backends/interfaces.rst
index dc24f84..49e4c88 100644
--- a/docs/hazmat/backends/interfaces.rst
+++ b/docs/hazmat/backends/interfaces.rst
@@ -131,3 +131,44 @@
 
         :returns:
             :class:`~cryptography.hazmat.primitives.interfaces.HashContext`
+
+
+.. class:: PBKDF2HMACBackend
+
+    .. versionadded:: 0.2
+
+    A backend with methods for using PBKDF2 using HMAC as a PRF.
+
+    .. method:: pbkdf2_hmac_supported(algorithm)
+
+        Check if the specified ``algorithm`` is supported by this backend.
+
+        :param algorithm: An instance of a
+            :class:`~cryptography.hazmat.primitives.interfaces.HashAlgorithm`
+            provider.
+
+        :returns: ``True`` if the specified ``algorithm`` is supported for
+            PBKDF2 HMAC by this backend, otherwise ``False``.
+
+    .. method:: derive_pbkdf2_hmac(self, algorithm, length, salt, iterations,
+                                   key_material)
+
+        :param algorithm: An instance of a
+            :class:`~cryptography.hazmat.primitives.interfaces.HashAlgorithm`
+            provider.
+
+        :param int length: The desired length of the derived key. Maximum is
+            (2\ :sup:`32` - 1) * ``algorithm.digest_size``
+
+        :param bytes salt: A salt.
+
+        :param int iterations: The number of iterations to perform of the hash
+            function. This can be used to control the length of time the
+            operation takes. Higher numbers help mitigate brute force attacks
+            against derived keys.
+
+        :param bytes key_material: The key material to use as a basis for
+            the derived key. This is typically a password.
+
+        :return bytes: Derived key.
+
diff --git a/docs/hazmat/primitives/index.rst b/docs/hazmat/primitives/index.rst
index b115fdb..bde0739 100644
--- a/docs/hazmat/primitives/index.rst
+++ b/docs/hazmat/primitives/index.rst
@@ -10,5 +10,6 @@
     hmac
     symmetric-encryption
     padding
+    key-derivation-functions
     constant-time
     interfaces
diff --git a/docs/hazmat/primitives/key-derivation-functions.rst b/docs/hazmat/primitives/key-derivation-functions.rst
new file mode 100644
index 0000000..529f441
--- /dev/null
+++ b/docs/hazmat/primitives/key-derivation-functions.rst
@@ -0,0 +1,125 @@
+.. hazmat::
+
+Key Derivation Functions
+========================
+
+.. currentmodule:: cryptography.hazmat.primitives.kdf
+
+Key derivation functions derive bytes suitable for cryptographic operations
+from passwords or other data sources using a pseudo-random function (PRF).
+Different KDFs are suitable for different tasks such as:
+
+* Cryptographic key derivation
+
+    Deriving a key suitable for use as input to an encryption algorithm.
+    Typically this means taking a password and running it through an algorithm
+    such as :class:`~cryptography.hazmat.primitives.kdf.pbkdf2.PBKDF2HMAC` or HKDF.
+    This process is typically known as `key stretching`_.
+
+* Password storage
+
+    When storing passwords you want to use an algorithm that is computationally
+    intensive. Legitimate users will only need to compute it once (for example,
+    taking the user's password, running it through the KDF, then comparing it
+    to the stored value), while attackers will need to do it billions of times.
+    Ideal password storage KDFs will be demanding on both computational and
+    memory resources.
+
+.. currentmodule:: cryptography.hazmat.primitives.kdf.pbkdf2
+
+.. class:: PBKDF2HMAC(algorithm, length, salt, iterations, backend)
+
+    .. versionadded:: 0.2
+
+    `PBKDF2`_ (Password Based Key Derivation Function 2) is typically used for
+    deriving a cryptographic key from a password. It may also be used for
+    key storage, but an alternate key storage KDF such as `scrypt`_ is generally
+    considered a better solution.
+
+    This class conforms to the
+    :class:`~cryptography.hazmat.primitives.interfaces.KeyDerivationFunction`
+    interface.
+
+    .. doctest::
+
+        >>> import os
+        >>> from cryptography.hazmat.primitives import hashes
+        >>> from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
+        >>> from cryptography.hazmat.backends import default_backend
+        >>> backend = default_backend()
+        >>> salt = os.urandom(16)
+        >>> # derive
+        >>> kdf = PBKDF2HMAC(
+        ...     algorithm=hashes.SHA256(),
+        ...     length=32,
+        ...     salt=salt,
+        ...     iterations=100000,
+        ...     backend=backend
+        ... )
+        >>> key = kdf.derive(b"my great password")
+        >>> # verify
+        >>> kdf = PBKDF2HMAC(
+        ...     algorithm=hashes.SHA256(),
+        ...     length=32,
+        ...     salt=salt,
+        ...     iterations=100000,
+        ...     backend=backend
+        ... )
+        >>> kdf.verify(b"my great password", key)
+
+    :param algorithm: An instance of a
+        :class:`~cryptography.hazmat.primitives.interfaces.HashAlgorithm`
+        provider.
+    :param int length: The desired length of the derived key. Maximum is
+        (2\ :sup:`32` - 1) * ``algorithm.digest_size``.
+    :param bytes salt: A salt. `NIST SP 800-132`_ recommends 128-bits or
+        longer.
+    :param int iterations: The number of iterations to perform of the hash
+        function. This can be used to control the length of time the operation
+        takes. Higher numbers help mitigate brute force attacks against derived
+        keys. See OWASP's `Password Storage Cheat Sheet`_ for more
+        detailed recommendations if you intend to use this for password storage.
+    :param backend: A
+        :class:`~cryptography.hazmat.backends.interfaces.CipherBackend`
+        provider.
+
+    .. method:: derive(key_material)
+
+        :param key_material bytes: The input key material. For PBKDF2 this
+            should be a password.
+        :return bytes: the derived key.
+        :raises cryptography.exceptions.AlreadyFinalized: This is raised when
+                                                          :meth:`derive` or
+                                                          :meth:`verify` is
+                                                          called more than
+                                                          once.
+
+        This generates and returns a new key from the supplied password.
+
+    .. method:: verify(key_material, expected_key)
+
+        :param key_material bytes: The input key material. This is the same as
+                                   ``key_material`` in :meth:`derive`.
+        :param expected_key bytes: The expected result of deriving a new key,
+                                   this is the same as the return value of
+                                   :meth:`derive`.
+        :raises cryptography.exceptions.InvalidKey: This is raised when the
+                                                    derived key does not match
+                                                    the expected key.
+        :raises cryptography.exceptions.AlreadyFinalized: This is raised when
+                                                          :meth:`derive` or
+                                                          :meth:`verify` is
+                                                          called more than
+                                                          once.
+
+        This checks whether deriving a new key from the supplied
+        ``key_material`` generates the same key as the ``expected_key``, and
+        raises an exception if they do not match. This can be used for
+        checking whether the password a user provides matches the stored derived
+        key.
+
+.. _`NIST SP 800-132`: http://csrc.nist.gov/publications/nistpubs/800-132/nist-sp800-132.pdf
+.. _`Password Storage Cheat Sheet`: https://www.owasp.org/index.php/Password_Storage_Cheat_Sheet
+.. _`PBKDF2`: http://en.wikipedia.org/wiki/PBKDF2
+.. _`scrypt`: http://en.wikipedia.org/wiki/Scrypt
+.. _`key stretching`: http://en.wikipedia.org/wiki/Key_stretching
diff --git a/pytest.ini b/pytest.ini
index 36d4edc..2a1b6e9 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,7 +1,8 @@
 [pytest]
 addopts = -r s
 markers =
-    hmac: this test requires a backend providing HMACBackend
     cipher: this test requires a backend providing CipherBackend
     hash: this test requires a backend providing HashBackend
+    hmac: this test requires a backend providing HMACBackend
+    pbkdf2hmac: this test requires a backend providing PBKDF2HMACBackend
     supported: parametrized test requiring only_if and skip_message
diff --git a/tests/conftest.py b/tests/conftest.py
index a9acb54..ecad1b2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,7 +2,7 @@
 
 from cryptography.hazmat.backends import _ALL_BACKENDS
 from cryptography.hazmat.backends.interfaces import (
-    HMACBackend, CipherBackend, HashBackend
+    HMACBackend, CipherBackend, HashBackend, PBKDF2HMACBackend
 )
 
 from .utils import check_for_iface, check_backend_support, select_backends
@@ -21,6 +21,7 @@
     check_for_iface("hmac", HMACBackend, item)
     check_for_iface("cipher", CipherBackend, item)
     check_for_iface("hash", HashBackend, item)
+    check_for_iface("pbkdf2hmac", PBKDF2HMACBackend, item)
     check_backend_support(item)
 
 
diff --git a/tests/hazmat/primitives/test_pbkdf2hmac.py b/tests/hazmat/primitives/test_pbkdf2hmac.py
new file mode 100644
index 0000000..6ad225a
--- /dev/null
+++ b/tests/hazmat/primitives/test_pbkdf2hmac.py
@@ -0,0 +1,69 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division, print_function
+
+import pytest
+import six
+
+from cryptography import utils
+from cryptography.exceptions import (
+    InvalidKey, UnsupportedAlgorithm, AlreadyFinalized
+)
+from cryptography.hazmat.primitives import hashes, interfaces
+from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
+from cryptography.hazmat.backends import default_backend
+
+
+@utils.register_interface(interfaces.HashAlgorithm)
+class DummyHash(object):
+    name = "dummy-hash"
+
+
+class TestPBKDF2HMAC(object):
+    def test_already_finalized(self):
+        kdf = PBKDF2HMAC(hashes.SHA1(), 20, b"salt", 10, default_backend())
+        kdf.derive(b"password")
+        with pytest.raises(AlreadyFinalized):
+            kdf.derive(b"password2")
+
+        kdf = PBKDF2HMAC(hashes.SHA1(), 20, b"salt", 10, default_backend())
+        key = kdf.derive(b"password")
+        with pytest.raises(AlreadyFinalized):
+            kdf.verify(b"password", key)
+
+        kdf = PBKDF2HMAC(hashes.SHA1(), 20, b"salt", 10, default_backend())
+        kdf.verify(b"password", key)
+        with pytest.raises(AlreadyFinalized):
+            kdf.verify(b"password", key)
+
+    def test_unsupported_algorithm(self):
+        with pytest.raises(UnsupportedAlgorithm):
+            PBKDF2HMAC(DummyHash(), 20, b"salt", 10, default_backend())
+
+    def test_invalid_key(self):
+        kdf = PBKDF2HMAC(hashes.SHA1(), 20, b"salt", 10, default_backend())
+        key = kdf.derive(b"password")
+
+        kdf = PBKDF2HMAC(hashes.SHA1(), 20, b"salt", 10, default_backend())
+        with pytest.raises(InvalidKey):
+            kdf.verify(b"password2", key)
+
+    def test_unicode_error_with_salt(self):
+        with pytest.raises(TypeError):
+            PBKDF2HMAC(hashes.SHA1(), 20, six.u("salt"), 10, default_backend())
+
+    def test_unicode_error_with_key_material(self):
+        kdf = PBKDF2HMAC(hashes.SHA1(), 20, b"salt", 10, default_backend())
+        with pytest.raises(TypeError):
+            kdf.derive(six.u("unicode here"))
diff --git a/tests/hazmat/primitives/test_pbkdf2hmac_vectors.py b/tests/hazmat/primitives/test_pbkdf2hmac_vectors.py
new file mode 100644
index 0000000..cbd4cc9
--- /dev/null
+++ b/tests/hazmat/primitives/test_pbkdf2hmac_vectors.py
@@ -0,0 +1,37 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division, print_function
+
+import pytest
+
+from cryptography.hazmat.primitives import hashes
+
+from .utils import generate_pbkdf2_test
+from ...utils import load_nist_vectors
+
+
+@pytest.mark.supported(
+    only_if=lambda backend: backend.pbkdf2_hmac_supported(hashes.SHA1()),
+    skip_message="Does not support SHA1 for PBKDF2HMAC",
+)
+@pytest.mark.pbkdf2hmac
+class TestPBKDF2HMAC_SHA1(object):
+    test_pbkdf2_sha1 = generate_pbkdf2_test(
+        load_nist_vectors,
+        "KDF",
+        [
+            "rfc-6070-PBKDF2-SHA1.txt",
+        ],
+        hashes.SHA1(),
+    )
diff --git a/tests/hazmat/primitives/utils.py b/tests/hazmat/primitives/utils.py
index f27afe4..6b1d055 100644
--- a/tests/hazmat/primitives/utils.py
+++ b/tests/hazmat/primitives/utils.py
@@ -4,6 +4,7 @@
 import pytest
 
 from cryptography.hazmat.primitives import hashes, hmac
+from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
 from cryptography.hazmat.primitives.ciphers import Cipher
 from cryptography.exceptions import (
     AlreadyFinalized, NotYetFinalized, AlreadyUpdated, InvalidTag,
@@ -211,6 +212,30 @@
     assert h.finalize() == binascii.unhexlify(md.encode("ascii"))
 
 
+def generate_pbkdf2_test(param_loader, path, file_names, algorithm):
+    all_params = _load_all_params(path, file_names, param_loader)
+
+    @pytest.mark.parametrize("params", all_params)
+    def test_pbkdf2(self, backend, params):
+        pbkdf2_test(backend, algorithm, params)
+    return test_pbkdf2
+
+
+def pbkdf2_test(backend, algorithm, params):
+    # Password and salt can contain \0, which should be loaded as a null char.
+    # The NIST loader loads them as literal strings so we replace with the
+    # proper value.
+    kdf = PBKDF2HMAC(
+        algorithm,
+        int(params["length"]),
+        params["salt"],
+        int(params["iterations"]),
+        backend
+    )
+    derived_key = kdf.derive(params["password"])
+    assert binascii.hexlify(derived_key) == params["derived_key"]
+
+
 def generate_aead_exception_test(cipher_factory, mode_factory):
     def test_aead_exception(self, backend):
         aead_exception_test(backend, cipher_factory, mode_factory)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index f852f3a..8ecb33f 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -180,6 +180,25 @@
     ]
 
 
+def test_load_nist_vectors_with_null_chars():
+    vector_data = textwrap.dedent("""
+    COUNT = 0
+    KEY = thing\\0withnulls
+
+    COUNT = 1
+    KEY = 00000000000000000000000000000000
+    """).splitlines()
+
+    assert load_nist_vectors(vector_data) == [
+        {
+            "key": b"thing\x00withnulls",
+        },
+        {
+            "key": b"00000000000000000000000000000000",
+        },
+    ]
+
+
 def test_load_cryptrec_vectors():
     vector_data = textwrap.dedent("""
     # Vectors taken from http://info.isl.ntt.co.jp/crypt/eng/camellia/
diff --git a/tests/utils.py b/tests/utils.py
index 507bc42..5c0e524 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -89,6 +89,10 @@
         # Build our data using a simple Key = Value format
         name, value = [c.strip() for c in line.split("=")]
 
+        # Some tests (PBKDF2) contain \0, which should be interpreted as a
+        # null character rather than literal.
+        value = value.replace("\\0", "\0")
+
         # COUNT is a special token that indicates a new block of data
         if name.upper() == "COUNT":
             test_data = {}