2 # This file is part of GNUnet.
3 # (C) 2013 Christian Grothoff (and other contributing authors)
5 # GNUnet is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 3, or (at your
8 # option) any later version.
10 # GNUnet is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with GNUnet; see the file COPYING. If not, write to the
17 # Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 # Boston, MA 02111-1307, USA.
21 # Brief: Computes GNUNET style Content Hash Key for a given file
22 # Author: Sree Harsha Totakura
24 from hashlib import sha512
29 from Crypto.Cipher import AES
32 DBLOCK_SIZE = (32 * 1024) # Data block size
34 # Pick a multiple of 2 here to achive 8-byte alignment! We also
35 # probably want DBlocks to have (roughly) the same size as IBlocks.
36 # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
37 # byte = 2 * 512 bits). DO NOT CHANGE!
40 CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
42 CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
44 GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
46 GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
49 def encode_data_to_string (data):
50 """Returns an ASCII encoding of the given data block like
51 GNUNET_STRINGS_data_to_string() function.
53 data: A bytearray representing the block of data which has to be encoded
55 echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
57 assert (bytearray == type(data))
65 while (rpos < size) or (vbit > 0):
66 if (rpos < size) and (vbit < 5):
67 bits = (bits << 8) | data[rpos] # eat 8 more bits
71 bits <<= (5 - vbit) # zero-padding
72 assert (vbit == ((size * 8) % 5))
74 out += echart[(bits >> (vbit - 5)) & 31]
81 def sha512_hash (data):
82 """ Returns the sha512 hash of the given data.
87 hash_obj.update (data)
88 return hash_obj.digest()
92 """Class for AES Keys. Contains the main key and the initialization
95 key = None # The actual AES key
96 iv = None # The initialization vector
97 cipher = None # The cipher object
98 KEY_SIZE = 32 # AES 256-bit key = 32 bytes
99 IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
101 def __init__ (self, passphrase):
102 """Creates a new AES key.
104 passphrase: string containing the passphrase to get the AES key and
105 initialization vector
107 passphrase = bytearray (passphrase);
108 self.key = bytearray (self.KEY_SIZE)
109 self.iv = bytearray (self.IV_SIZE)
110 if (len (passphrase) > self.KEY_SIZE):
111 self.key = passphrase[:self.KEY_SIZE]
112 passphrase = passphrase[self.KEY_SIZE:]
113 if (len (passphrase) > self.IV_SIZE):
114 self.iv = passphrase[:self.IV_SIZE]
116 self.iv[0:len (passphrase)] = passphrase
118 self.key[0:len (passphrase)] = passphrase
119 self.key = str (self.key)
120 self.iv = str (self.iv)
121 assert (len(self.key) == self.KEY_SIZE)
122 assert (len(self.iv) == self.IV_SIZE)
124 def setup_aes_cipher_ (aes_key):
125 """Initializes the AES object with settings similar to those in GNUnet.
127 aes_key: the AESKey object
128 Returns the newly initialized AES object
130 return AES.new (aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
133 """Adds padding to the data such that the size of the data is a multiple of
136 data: the data string
137 Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
138 as padding; data is the new data string with padded bytes at the end
140 pad_len = len(data) % 16
142 pad_len = 16 - pad_len
143 pad_bytes = bytearray (15)
144 data += str(pad_bytes[:pad_len])
145 return (pad_len, data)
147 def aes_encrypt (aes_key, data):
148 """Encrypts the given data using AES.
150 aes_key: the AESKey object to use for AES encryption
151 data: the data string to encrypt
153 (pad_len, data) = aes_pad_ (data)
154 cipher = setup_aes_cipher_ (aes_key)
155 enc_data = cipher.encrypt (data)
157 enc_data = enc_data[:-pad_len]
160 def aes_decrypt (aes_key, data):
161 """Decrypts the given data using AES
163 aes_key: the AESKey object to use for AES decryption
164 data: the data string to decrypt
166 (pad_len, data) = aes_pad_ (data)
167 cipher = setup_aes_cipher_ (aes_key)
168 ptext = cipher.decrypt (data)
170 ptext = ptext[:-pad_len]
175 """Class for the content hash key."""
180 def __init__(self, key, query):
181 assert (len(key) == CHK_HASH_SIZE)
182 assert (len(query) == CHK_QUERY_SIZE)
186 def setSize(self, size):
190 sizestr = repr (self.fsize)
191 if isinstance (self.fsize, long):
192 sizestr = sizestr[:-1]
193 return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
194 encode_data_to_string(bytearray(self.key)) + "." + \
195 encode_data_to_string(bytearray(self.query)) + "." + \
199 def compute_depth_(size):
200 """Computes the depth of the hash tree.
202 size: the size of the file whose tree's depth has to be computed
203 Returns the depth of the tree. Always > 0.
209 if ((fl * CHK_PER_INODE) < fl):
211 fl = fl * CHK_PER_INODE
214 def compute_tree_size_(depth):
215 """Calculate how many bytes of payload a block tree of the given depth MAY
216 correspond to at most (this function ignores the fact that some blocks will
217 only be present partially due to the total file size cutting some blocks
220 depth: depth of the block. depth==0 is a DBLOCK.
221 Returns the number of bytes of payload a subtree of this depth may
225 for cnt in range(0, depth):
226 rsize *= CHK_PER_INODE
229 def compute_chk_offset_(depth, end_offset):
230 """Compute the offset of the CHK for the current block in the IBlock
233 depth: depth of the IBlock in the tree (aka overall number of tree levels
234 minus depth); 0 == DBLOCK
235 end_offset: current offset in the overall file, at the *beginning* of the
236 block for DBLOCK (depth == 0), otherwise at the *end* of the
238 Returns the offset in the list of CHKs in the above IBlock
240 bds = compute_tree_size_(depth)
243 ret = end_offset / bds
244 return ret % CHK_PER_INODE
246 def compute_iblock_size_(depth, offset):
247 """Compute the size of the current IBLOCK. The encoder is triggering the
248 calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
249 construction. The IBLOCK maybe a full or a partial IBLOCK, and this
250 function is to calculate how long it should be.
252 depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
253 (this function is for IBLOCKs only!)
254 offset: current offset in the payload (!) of the overall file, must be > 0
255 (since this function is called at the end of a block).
256 Returns the number of elements to be in the corresponding IBlock
260 bds = compute_tree_size_ (depth)
267 if (mod % bds) is not 0:
272 def compute_rootchk(readin, size):
273 """Returns the content hash key after generating the hash tree for the given
276 readin: the stream where to read data from
277 size: the size of data to be read
279 depth = compute_depth_(size);
281 chks = [None] * (depth * CHK_PER_INODE) # list buffer
283 logging.debug("Begining to calculate tree hash with depth: "+ repr(depth))
285 if (depth == current_depth):
286 off = CHK_PER_INODE * (depth - 1)
287 assert (chks[off] is not None)
288 logging.debug("Encoding done, reading CHK `"+ chks[off].query + \
289 "' from "+ repr(off) +"\n")
291 assert (size == read_offset)
292 uri_chk.setSize (size)
294 if (0 == current_depth):
295 pt_size = min(DBLOCK_SIZE, size - read_offset);
297 pt_block = readin.read(pt_size)
299 logging.warning ("Error reading input file stream")
302 pt_elements = compute_iblock_size_(current_depth, read_offset)
305 reduce ((lambda ba, chk:
306 ba + (chk.key + chk.query)),
307 chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
309 pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
310 assert (len(pt_block) == pt_size)
311 assert (pt_size <= DBLOCK_SIZE)
312 off = compute_chk_offset_ (current_depth, read_offset)
313 logging.debug ("Encoding data at offset "+ repr(read_offset) + \
314 " and depth "+ repr(current_depth) +" with block " \
315 "size "+ repr(pt_size) +" and target CHK offset "+ \
316 repr(current_depth * CHK_PER_INODE))
317 pt_hash = sha512_hash (pt_block)
318 pt_aes_key = AESKey (pt_hash)
319 pt_enc = aes_encrypt (pt_aes_key, pt_block)
320 pt_enc_hash = sha512_hash (pt_enc)
321 chk = Chk(pt_hash, pt_enc_hash)
322 chks[(current_depth * CHK_PER_INODE) + off] = chk
323 if (0 == current_depth):
324 read_offset += pt_size
325 if (read_offset == size) or \
326 (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
329 if (CHK_PER_INODE == off) or (read_offset == size):
335 def chkuri_from_path (path):
336 """Returns the CHK URI of the file at the given path.
338 path: the path of the file whose CHK has to be calculated
340 size = os.path.getsize (path)
341 readin = open (path, "rb")
342 chk = compute_rootchk (readin, size)
347 """Prints help about using this script."""
349 Usage: gnunet-chk.py [options] file
350 Prints the Content Hash Key of given file in GNUNET-style URI.
353 -h, --help : prints this message
357 if '__main__' == __name__:
359 opts, args = getopt.getopt(sys.argv[1:],
362 except getopt.GetoptError, err:
364 print "Exception occured"
367 for option, value in opts:
368 if option in ("-h", "--help"):
372 print "Incorrect number of arguments passed"
375 print chkuri_from_path (args[0])