2 # This file is part of GNUnet.
3 # (C) 2013 Christian Grothoff (and other contributing authors)
5 # GNUnet is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 3, or (at your
8 # option) any later version.
10 # GNUnet is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with GNUnet; see the file COPYING. If not, write to the
17 # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 # Boston, MA 02110-1301, USA.
21 # Brief: Computes GNUNET style Content Hash Key for a given file
22 # Author: Sree Harsha Totakura
24 from hashlib import sha512
29 from Crypto.Cipher import AES
33 DBLOCK_SIZE = (32 * 1024) # Data block size
35 # Pick a multiple of 2 here to achive 8-byte alignment! We also
36 # probably want DBlocks to have (roughly) the same size as IBlocks.
37 # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
38 # byte = 2 * 512 bits). DO NOT CHANGE!
41 CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
43 CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
45 GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
47 GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
50 def encode_data_to_string(data):
51 """Returns an ASCII encoding of the given data block like
52 GNUNET_STRINGS_data_to_string() function.
54 data: A bytearray representing the block of data which has to be encoded
56 echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
58 assert (bytearray == type(data))
66 while (rpos < size) or (vbit > 0):
67 if (rpos < size) and (vbit < 5):
68 bits = (bits << 8) | data[rpos] # eat 8 more bits
72 bits <<= (5 - vbit) # zero-padding
73 assert (vbit == ((size * 8) % 5))
75 out += echart[(bits >> (vbit - 5)) & 31]
82 def sha512_hash(data):
83 """ Returns the sha512 hash of the given data.
89 return hash_obj.digest()
93 """Class for AES Keys. Contains the main key and the initialization
96 key = None # The actual AES key
97 iv = None # The initialization vector
98 cipher = None # The cipher object
99 KEY_SIZE = 32 # AES 256-bit key = 32 bytes
100 IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
102 def __init__(self, passphrase):
103 """Creates a new AES key.
105 passphrase: string containing the passphrase to get the AES key and
106 initialization vector
108 passphrase = bytearray(passphrase);
109 self.key = bytearray(self.KEY_SIZE)
110 self.iv = bytearray(self.IV_SIZE)
111 if (len(passphrase) > self.KEY_SIZE):
112 self.key = passphrase[:self.KEY_SIZE]
113 passphrase = passphrase[self.KEY_SIZE:]
114 if (len(passphrase) > self.IV_SIZE):
115 self.iv = passphrase[:self.IV_SIZE]
117 self.iv[0:len(passphrase)] = passphrase
119 self.key[0:len(passphrase)] = passphrase
120 self.key = str(self.key)
121 self.iv = str(self.iv)
122 assert (len(self.key) == self.KEY_SIZE)
123 assert (len(self.iv) == self.IV_SIZE)
126 def setup_aes_cipher_(aes_key):
127 """Initializes the AES object with settings similar to those in GNUnet.
129 aes_key: the AESKey object
130 Returns the newly initialized AES object
132 return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
136 """Adds padding to the data such that the size of the data is a multiple of
139 data: the data string
140 Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
141 as padding; data is the new data string with padded bytes at the end
143 pad_len = len(data) % 16
145 pad_len = 16 - pad_len
146 pad_bytes = bytearray(15)
147 data += str(pad_bytes[:pad_len])
148 return (pad_len, data)
151 def aes_encrypt(aes_key, data):
152 """Encrypts the given data using AES.
154 aes_key: the AESKey object to use for AES encryption
155 data: the data string to encrypt
157 (pad_len, data) = aes_pad_(data)
158 cipher = setup_aes_cipher_(aes_key)
159 enc_data = cipher.encrypt(data)
161 enc_data = enc_data[:-pad_len]
165 def aes_decrypt(aes_key, data):
166 """Decrypts the given data using AES
168 aes_key: the AESKey object to use for AES decryption
169 data: the data string to decrypt
171 (pad_len, data) = aes_pad_(data)
172 cipher = setup_aes_cipher_(aes_key)
173 ptext = cipher.decrypt(data)
175 ptext = ptext[:-pad_len]
180 """Class for the content hash key."""
185 def __init__(self, key, query):
186 assert (len(key) == CHK_HASH_SIZE)
187 assert (len(query) == CHK_QUERY_SIZE)
191 def setSize(self, size):
195 sizestr = repr(self.fsize)
196 if isinstance(self.fsize, long):
197 sizestr = sizestr[:-1]
198 return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
199 encode_data_to_string(bytearray(self.key)) + "." + \
200 encode_data_to_string(bytearray(self.query)) + "." + \
204 def compute_depth_(size):
205 """Computes the depth of the hash tree.
207 size: the size of the file whose tree's depth has to be computed
208 Returns the depth of the tree. Always > 0.
214 if ((fl * CHK_PER_INODE) < fl):
216 fl = fl * CHK_PER_INODE
220 def compute_tree_size_(depth):
221 """Calculate how many bytes of payload a block tree of the given depth MAY
222 correspond to at most (this function ignores the fact that some blocks will
223 only be present partially due to the total file size cutting some blocks
226 depth: depth of the block. depth==0 is a DBLOCK.
227 Returns the number of bytes of payload a subtree of this depth may
231 for cnt in range(0, depth):
232 rsize *= CHK_PER_INODE
236 def compute_chk_offset_(depth, end_offset):
237 """Compute the offset of the CHK for the current block in the IBlock
240 depth: depth of the IBlock in the tree (aka overall number of tree levels
241 minus depth); 0 == DBLOCK
242 end_offset: current offset in the overall file, at the *beginning* of the
243 block for DBLOCK (depth == 0), otherwise at the *end* of the
245 Returns the offset in the list of CHKs in the above IBlock
247 bds = compute_tree_size_(depth)
250 ret = end_offset / bds
251 return ret % CHK_PER_INODE
254 def compute_iblock_size_(depth, offset):
255 """Compute the size of the current IBLOCK. The encoder is triggering the
256 calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
257 construction. The IBLOCK maybe a full or a partial IBLOCK, and this
258 function is to calculate how long it should be.
260 depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
261 (this function is for IBLOCKs only!)
262 offset: current offset in the payload (!) of the overall file, must be > 0
263 (since this function is called at the end of a block).
264 Returns the number of elements to be in the corresponding IBlock
268 bds = compute_tree_size_(depth)
275 if (mod % bds) is not 0:
280 def compute_rootchk(readin, size):
281 """Returns the content hash key after generating the hash tree for the given
284 readin: the stream where to read data from
285 size: the size of data to be read
287 depth = compute_depth_(size);
289 chks = [None] * (depth * CHK_PER_INODE) # list buffer
291 logging.debug("Begining to calculate tree hash with depth: " + repr(depth))
293 if (depth == current_depth):
294 off = CHK_PER_INODE * (depth - 1)
295 assert (chks[off] is not None)
296 logging.debug("Encoding done, reading CHK `" + chks[off].query + \
297 "' from " + repr(off) + "\n")
299 assert (size == read_offset)
300 uri_chk.setSize(size)
302 if (0 == current_depth):
303 pt_size = min(DBLOCK_SIZE, size - read_offset);
305 pt_block = readin.read(pt_size)
307 logging.warning("Error reading input file stream")
310 pt_elements = compute_iblock_size_(current_depth, read_offset)
313 reduce((lambda ba, chk:
314 ba + (chk.key + chk.query)),
315 chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
317 pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
318 assert (len(pt_block) == pt_size)
319 assert (pt_size <= DBLOCK_SIZE)
320 off = compute_chk_offset_(current_depth, read_offset)
321 logging.debug("Encoding data at offset " + repr(read_offset) + \
322 " and depth " + repr(current_depth) + " with block " \
323 "size " + repr(pt_size) + " and target CHK offset " + \
324 repr(current_depth * CHK_PER_INODE))
325 pt_hash = sha512_hash(pt_block)
326 pt_aes_key = AESKey(pt_hash)
327 pt_enc = aes_encrypt(pt_aes_key, pt_block)
328 pt_enc_hash = sha512_hash(pt_enc)
329 chk = Chk(pt_hash, pt_enc_hash)
330 chks[(current_depth * CHK_PER_INODE) + off] = chk
331 if (0 == current_depth):
332 read_offset += pt_size
333 if (read_offset == size) or \
334 (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
337 if (CHK_PER_INODE == off) or (read_offset == size):
343 def chkuri_from_path(path):
344 """Returns the CHK URI of the file at the given path.
346 path: the path of the file whose CHK has to be calculated
348 size = os.path.getsize(path)
349 readin = open(path, "rb")
350 chk = compute_rootchk(readin, size)
356 """Prints help about using this script."""
358 Usage: gnunet-chk.py [options] file
359 Prints the Content Hash Key of given file in GNUNET-style URI.
362 -h, --help : prints this message
366 if '__main__' == __name__:
368 opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
369 except getopt.GetoptError, err:
371 print("Exception occured")
374 for option, value in opts:
375 if option in("-h", "--help"):
379 print("Incorrect number of arguments passed")
382 print chkuri_from_path(args[0])