Made hashing efficient for large files
diff --git a/rsa/bigfile.py b/rsa/bigfile.py
new file mode 100644
index 0000000..02c0b53
--- /dev/null
+++ b/rsa/bigfile.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2011 Sybren A. Stüvel <sybren@stuvel.eu>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''Large file support
+
+ - break a file into smaller blocks, and encrypt them, and store the
+ encrypted blocks in another file.
+
+ - take such an encrypted files, decrypt its blocks, and reconstruct the
+ original file.
+
+The encrypted file format is as follows, where || denotes byte concatenation:
+
+ FILE := VERSION || BLOCK || BLOCK ...
+
+ BLOCK := LENGTH || DATA
+
+ LENGTH := varint-encoded length of the subsequent data. Varint comes from
+ Google Protobuf, and encodes an integer into a variable number of bytes.
+ Each byte uses the 7 lowest bits to encode the value. The highest bit set
+ to 1 indicates the next byte is also part of the varint. The last byte will
+ have this bit set to 0.
+
+This file format is called the VARBLOCK format, in line with the varint format
+used to denote the block sizes.
+
+'''
+
+from rsa import key, common, pkcs1, varblock
+
+def encrypt_bigfile(infile, outfile, pub_key):
+ '''Encrypts a file, writing it to 'outfile' in VARBLOCK format.
+
+ :param infile: file-like object to read the cleartext from
+ :param outfile: file-like object to write the crypto in VARBLOCK format to
+ :param pub_key: :py:class:`rsa.PublicKey` to encrypt with
+
+ '''
+
+ if not isinstance(pub_key, key.PublicKey):
+ raise TypeError('Public key required, but got %r' % pub_key)
+
+ key_bytes = common.bit_size(pub_key.n) // 8
+ blocksize = key_bytes - 11 # keep space for PKCS#1 padding
+
+ # Write the version number to the VARBLOCK file
+ outfile.write(chr(varblock.VARBLOCK_VERSION))
+
+ # Encrypt and write each block
+ for block in varblock.yield_fixedblocks(infile, blocksize):
+ crypto = pkcs1.encrypt(block, pub_key)
+
+ varblock.write_varint(outfile, len(crypto))
+ outfile.write(crypto)
+
+def decrypt_bigfile(infile, outfile, priv_key):
+ '''Decrypts an encrypted VARBLOCK file, writing it to 'outfile'
+
+ :param infile: file-like object to read the crypto in VARBLOCK format from
+ :param outfile: file-like object to write the cleartext to
+ :param priv_key: :py:class:`rsa.PrivateKey` to decrypt with
+
+ '''
+
+ if not isinstance(priv_key, key.PrivateKey):
+ raise TypeError('Private key required, but got %r' % priv_key)
+
+ for block in varblock.yield_varblocks(infile):
+ cleartext = pkcs1.decrypt(block, priv_key)
+ outfile.write(cleartext)
+
diff --git a/rsa/pkcs1.py b/rsa/pkcs1.py
index 7612b27..b81629e 100644
--- a/rsa/pkcs1.py
+++ b/rsa/pkcs1.py
@@ -31,7 +31,7 @@
import hashlib
import os
-from rsa import common, transform, core
+from rsa import common, transform, core, varblock
# ASN.1 codes that describe the hash algorithm used.
HASH_ASN1 = {
@@ -224,7 +224,9 @@
Hashes the message, then signs the hash with the given key. This is known
as a "detached signature", because the message itself isn't altered.
- :param message: the message to sign
+ :param message: the message to sign. Can be an 8-bit string or a file-like
+ object. If ``message`` has a ``read()`` method, it is assumed to be a
+ file-like object.
:param priv_key: the :py:class:`rsa.PrivateKey` to sign with
:param hash: the hash method used on the message. Use 'MD5', 'SHA-1',
'SHA-256', 'SHA-384' or 'SHA-512'.
@@ -258,7 +260,9 @@
The hash method is detected automatically from the signature.
- :param message: the signed message
+ :param message: the signed message. Can be an 8-bit string or a file-like
+ object. If ``message`` has a ``read()`` method, it is assumed to be a
+ file-like object.
:param signature: the signature block, as created with ``sign(...)``.
:param pub_key: the :py:class:`rsa.PublicKey` of the person signing the message.
:raise VerificationError: when the signature doesn't match the message.
@@ -289,14 +293,30 @@
raise VerificationError('Verification failed')
def _hash(message, method_name):
- '''Returns the message digest.'''
+ '''Returns the message digest.
+
+ :param message: the signed message. Can be an 8-bit string or a file-like
+ object. If ``message`` has a ``read()`` method, it is assumed to be a
+ file-like object.
+ :param method_name: the hash method, must be a key of
+ :py:const:`HASH_METHODS`.
+
+ '''
if method_name not in HASH_METHODS:
raise ValueError('Invalid hash method: %s' % method_name)
method = HASH_METHODS[method_name]
hasher = method()
- hasher.update(message)
+
+ if hasattr(message, 'read') and hasattr(message.read, '__call__'):
+ # read as 1K blocks
+ for block in varblock.yield_fixedblocks(message, 1024):
+ hasher.update(block)
+ else:
+ # hash the message object itself.
+ hasher.update(message)
+
return hasher.digest()
diff --git a/rsa/blocks.py b/rsa/varblock.py
similarity index 69%
rename from rsa/blocks.py
rename to rsa/varblock.py
index fed247e..b8bd899 100644
--- a/rsa/blocks.py
+++ b/rsa/varblock.py
@@ -13,15 +13,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-'''Large file support
+'''VARBLOCK file support
- - break a file into smaller blocks, and encrypt them, and store the
- encrypted blocks in another file.
-
- - take such an encrypted files, decrypt its blocks, and reconstruct the
- original file.
-
-The encrypted file format is as follows, where || denotes byte concatenation:
+The VARBLOCK file format is as follows, where || denotes byte concatenation:
FILE := VERSION || BLOCK || BLOCK ...
@@ -38,8 +32,6 @@
'''
-from rsa import key, common, pkcs1
-
VARBLOCK_VERSION = 1
def read_varint(infile):
@@ -155,45 +147,3 @@
if read_bytes < blocksize:
break
-
-def encrypt_bigfile(infile, outfile, pub_key):
- '''Encrypts a file, writing it to 'outfile' in VARBLOCK format.
-
- :param infile: file-like object to read the cleartext from
- :param outfile: file-like object to write the crypto in VARBLOCK format to
- :param pub_key: :py:class:`rsa.PublicKey` to encrypt with
-
- '''
-
- if not isinstance(pub_key, key.PublicKey):
- raise TypeError('Public key required, but got %r' % pub_key)
-
- key_bytes = common.bit_size(pub_key.n) // 8
- blocksize = key_bytes - 11 # keep space for PKCS#1 padding
-
- # Write the version number to the VARBLOCK file
- outfile.write(chr(VARBLOCK_VERSION))
-
- # Encrypt and write each block
- for block in yield_fixedblocks(infile, blocksize):
- crypto = pkcs1.encrypt(block, pub_key)
-
- write_varint(outfile, len(crypto))
- outfile.write(crypto)
-
-def decrypt_bigfile(infile, outfile, priv_key):
- '''Decrypts an encrypted VARBLOCK file, writing it to 'outfile'
-
- :param infile: file-like object to read the crypto in VARBLOCK format from
- :param outfile: file-like object to write the cleartext to
- :param priv_key: :py:class:`rsa.PrivateKey` to decrypt with
-
- '''
-
- if not isinstance(priv_key, key.PrivateKey):
- raise TypeError('Private key required, but got %r' % priv_key)
-
- for block in yield_varblocks(infile):
- cleartext = pkcs1.decrypt(block, priv_key)
- outfile.write(cleartext)
-
diff --git a/tests/test_bigfile.py b/tests/test_bigfile.py
new file mode 100644
index 0000000..ffca5b0
--- /dev/null
+++ b/tests/test_bigfile.py
@@ -0,0 +1,37 @@
+'''Tests block operations.'''
+
+from StringIO import StringIO
+import unittest
+
+import rsa
+from rsa import bigfile, varblock
+
+class BigfileTest(unittest.TestCase):
+
+ def test_encrypt_decrypt_bigfile(self):
+
+ # Expected block size + 11 bytes padding
+ pub_key, priv_key = rsa.newkeys((6 + 11) * 8)
+
+ # Encrypt the file
+ message = '123456Sybren'
+ infile = StringIO(message)
+ outfile = StringIO()
+
+ bigfile.encrypt_bigfile(infile, outfile, pub_key)
+
+ # Test
+ crypto = outfile.getvalue()
+
+ cryptfile = StringIO(crypto)
+ clearfile = StringIO()
+
+ bigfile.decrypt_bigfile(cryptfile, clearfile, priv_key)
+ self.assertEquals(clearfile.getvalue(), message)
+
+ # We have 2x6 bytes in the message, so that should result in two
+ # bigfile.
+ cryptfile.seek(0)
+ varblocks = list(varblock.yield_varblocks(cryptfile))
+ self.assertEqual(2, len(varblocks))
+
diff --git a/tests/test_blocks.py b/tests/test_blocks.py
deleted file mode 100644
index 22d6500..0000000
--- a/tests/test_blocks.py
+++ /dev/null
@@ -1,106 +0,0 @@
-'''Tests block operations.'''
-
-from StringIO import StringIO
-import unittest
-
-import rsa
-from rsa import blocks
-
-class VarintTest(unittest.TestCase):
-
- def test_read_varint(self):
-
- encoded = '\xac\x02crummy'
- infile = StringIO(encoded)
-
- (decoded, read) = blocks.read_varint(infile)
-
- # Test the returned values
- self.assertEqual(300, decoded)
- self.assertEqual(2, read)
-
- # The rest of the file should be untouched
- self.assertEqual('crummy', infile.read())
-
- def test_read_zero(self):
-
- encoded = '\x00crummy'
- infile = StringIO(encoded)
-
- (decoded, read) = blocks.read_varint(infile)
-
- # Test the returned values
- self.assertEqual(0, decoded)
- self.assertEqual(1, read)
-
- # The rest of the file should be untouched
- self.assertEqual('crummy', infile.read())
-
- def test_write_varint(self):
-
- expected = '\xac\x02'
- outfile = StringIO()
-
- written = blocks.write_varint(outfile, 300)
-
- # Test the returned values
- self.assertEqual(expected, outfile.getvalue())
- self.assertEqual(2, written)
-
-
- def test_write_zero(self):
-
- outfile = StringIO()
- written = blocks.write_varint(outfile, 0)
-
- # Test the returned values
- self.assertEqual('\x00', outfile.getvalue())
- self.assertEqual(1, written)
-
-
-class VarblockTest(unittest.TestCase):
-
- def test_yield_varblock(self):
- infile = StringIO('\x01\x0512345\x06Sybren')
-
- varblocks = list(blocks.yield_varblocks(infile))
- self.assertEqual(['12345', 'Sybren'], varblocks)
-
-class FixedblockTest(unittest.TestCase):
-
- def test_yield_fixedblock(self):
-
- infile = StringIO('123456Sybren')
-
- fixedblocks = list(blocks.yield_fixedblocks(infile, 6))
- self.assertEqual(['123456', 'Sybren'], fixedblocks)
-
-class BigfileTest(unittest.TestCase):
-
- def test_encrypt_decrypt_bigfile(self):
-
- # Expected block size + 11 bytes padding
- pub_key, priv_key = rsa.newkeys((6 + 11) * 8)
-
- # Encrypt the file
- message = '123456Sybren'
- infile = StringIO(message)
- outfile = StringIO()
-
- blocks.encrypt_bigfile(infile, outfile, pub_key)
-
- # Test
- crypto = outfile.getvalue()
-
- cryptfile = StringIO(crypto)
- clearfile = StringIO()
-
- blocks.decrypt_bigfile(cryptfile, clearfile, priv_key)
- self.assertEquals(clearfile.getvalue(), message)
-
- # We have 2x6 bytes in the message, so that should result in two
- # blocks.
- cryptfile.seek(0)
- varblocks = list(blocks.yield_varblocks(cryptfile))
- self.assertEqual(2, len(varblocks))
-
diff --git a/tests/test_varblock.py b/tests/test_varblock.py
new file mode 100644
index 0000000..d8addb4
--- /dev/null
+++ b/tests/test_varblock.py
@@ -0,0 +1,77 @@
+'''Tests varblock operations.'''
+
+from StringIO import StringIO
+import unittest
+
+import rsa
+from rsa import varblock
+
+class VarintTest(unittest.TestCase):
+
+ def test_read_varint(self):
+
+ encoded = '\xac\x02crummy'
+ infile = StringIO(encoded)
+
+ (decoded, read) = varblock.read_varint(infile)
+
+ # Test the returned values
+ self.assertEqual(300, decoded)
+ self.assertEqual(2, read)
+
+ # The rest of the file should be untouched
+ self.assertEqual('crummy', infile.read())
+
+ def test_read_zero(self):
+
+ encoded = '\x00crummy'
+ infile = StringIO(encoded)
+
+ (decoded, read) = varblock.read_varint(infile)
+
+ # Test the returned values
+ self.assertEqual(0, decoded)
+ self.assertEqual(1, read)
+
+ # The rest of the file should be untouched
+ self.assertEqual('crummy', infile.read())
+
+ def test_write_varint(self):
+
+ expected = '\xac\x02'
+ outfile = StringIO()
+
+ written = varblock.write_varint(outfile, 300)
+
+ # Test the returned values
+ self.assertEqual(expected, outfile.getvalue())
+ self.assertEqual(2, written)
+
+
+ def test_write_zero(self):
+
+ outfile = StringIO()
+ written = varblock.write_varint(outfile, 0)
+
+ # Test the returned values
+ self.assertEqual('\x00', outfile.getvalue())
+ self.assertEqual(1, written)
+
+
+class VarblockTest(unittest.TestCase):
+
+ def test_yield_varblock(self):
+ infile = StringIO('\x01\x0512345\x06Sybren')
+
+ varblocks = list(varblock.yield_varblocks(infile))
+ self.assertEqual(['12345', 'Sybren'], varblocks)
+
+class FixedblockTest(unittest.TestCase):
+
+ def test_yield_fixedblock(self):
+
+ infile = StringIO('123456Sybren')
+
+ fixedblocks = list(varblock.yield_fixedblocks(infile, 6))
+ self.assertEqual(['123456', 'Sybren'], fixedblocks)
+