2 # This file is part of GNUnet.
3 # (C) 2013, 2018 Christian Grothoff (and other contributing authors)
5 # GNUnet is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 3, or (at your
8 # option) any later version.
10 # GNUnet is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with GNUnet; see the file COPYING. If not, write to the
17 # Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 # Boston, MA 02110-1301, USA.
21 # Brief: Computes GNUNET style Content Hash Key for a given file
22 # Author: Sree Harsha Totakura
24 from hashlib import sha512
29 from Crypto.Cipher import AES
30 from functools import reduce
34 DBLOCK_SIZE = (32 * 1024) # Data block size
36 # Pick a multiple of 2 here to achive 8-byte alignment! We also
37 # probably want DBlocks to have (roughly) the same size as IBlocks.
38 # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
39 # byte = 2 * 512 bits). DO NOT CHANGE!
42 CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
44 CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
46 GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
48 GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
51 def encode_data_to_string(data):
52 """Returns an ASCII encoding of the given data block like
53 GNUNET_STRINGS_data_to_string() function.
55 data: A bytearray representing the block of data which has to be encoded
57 echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
59 assert (bytearray == type(data))
67 while (rpos < size) or (vbit > 0):
68 if (rpos < size) and (vbit < 5):
69 bits = (bits << 8) | data[rpos] # eat 8 more bits
73 bits <<= (5 - vbit) # zero-padding
74 assert (vbit == ((size * 8) % 5))
76 out += echart[(bits >> (vbit - 5)) & 31]
83 def sha512_hash(data):
84 """ Returns the sha512 hash of the given data.
90 return hash_obj.digest()
94 """Class for AES Keys. Contains the main key and the initialization
97 key = None # The actual AES key
98 iv = None # The initialization vector
99 cipher = None # The cipher object
100 KEY_SIZE = 32 # AES 256-bit key = 32 bytes
101 IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
103 def __init__(self, passphrase):
104 """Creates a new AES key.
106 passphrase: string containing the passphrase to get the AES key and
107 initialization vector
109 passphrase = bytearray(passphrase)
110 self.key = bytearray(self.KEY_SIZE)
111 self.iv = bytearray(self.IV_SIZE)
112 if (len(passphrase) > self.KEY_SIZE):
113 self.key = passphrase[:self.KEY_SIZE]
114 passphrase = passphrase[self.KEY_SIZE:]
115 if (len(passphrase) > self.IV_SIZE):
116 self.iv = passphrase[:self.IV_SIZE]
118 self.iv[0:len(passphrase)] = passphrase
120 self.key[0:len(passphrase)] = passphrase
121 self.key = str(self.key)
122 self.iv = str(self.iv)
123 assert (len(self.key) == self.KEY_SIZE)
124 assert (len(self.iv) == self.IV_SIZE)
127 def setup_aes_cipher_(aes_key):
128 """Initializes the AES object with settings similar to those in GNUnet.
130 aes_key: the AESKey object
131 Returns the newly initialized AES object
133 return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
137 """Adds padding to the data such that the size of the data is a multiple of
140 data: the data string
141 Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
142 as padding; data is the new data string with padded bytes at the end
144 pad_len = len(data) % 16
146 pad_len = 16 - pad_len
147 pad_bytes = bytearray(15)
148 data += str(pad_bytes[:pad_len])
149 return (pad_len, data)
152 def aes_encrypt(aes_key, data):
153 """Encrypts the given data using AES.
155 aes_key: the AESKey object to use for AES encryption
156 data: the data string to encrypt
158 (pad_len, data) = aes_pad_(data)
159 cipher = setup_aes_cipher_(aes_key)
160 enc_data = cipher.encrypt(data)
162 enc_data = enc_data[:-pad_len]
166 def aes_decrypt(aes_key, data):
167 """Decrypts the given data using AES
169 aes_key: the AESKey object to use for AES decryption
170 data: the data string to decrypt
172 (pad_len, data) = aes_pad_(data)
173 cipher = setup_aes_cipher_(aes_key)
174 ptext = cipher.decrypt(data)
176 ptext = ptext[:-pad_len]
181 """Class for the content hash key."""
186 def __init__(self, key, query):
187 assert (len(key) == CHK_HASH_SIZE)
188 assert (len(query) == CHK_QUERY_SIZE)
192 def setSize(self, size):
196 sizestr = repr(self.fsize)
197 if isinstance(self.fsize, int):
198 sizestr = sizestr[:-1]
199 return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
200 encode_data_to_string(bytearray(self.key)) + "." + \
201 encode_data_to_string(bytearray(self.query)) + "." + \
205 def compute_depth_(size):
206 """Computes the depth of the hash tree.
208 size: the size of the file whose tree's depth has to be computed
209 Returns the depth of the tree. Always > 0.
215 if ((fl * CHK_PER_INODE) < fl):
217 fl = fl * CHK_PER_INODE
221 def compute_tree_size_(depth):
222 """Calculate how many bytes of payload a block tree of the given depth MAY
223 correspond to at most (this function ignores the fact that some blocks will
224 only be present partially due to the total file size cutting some blocks
227 depth: depth of the block. depth==0 is a DBLOCK.
228 Returns the number of bytes of payload a subtree of this depth may
232 for cnt in range(0, depth):
233 rsize *= CHK_PER_INODE
237 def compute_chk_offset_(depth, end_offset):
238 """Compute the offset of the CHK for the current block in the IBlock
241 depth: depth of the IBlock in the tree (aka overall number of tree levels
242 minus depth); 0 == DBLOCK
243 end_offset: current offset in the overall file, at the *beginning* of the
244 block for DBLOCK (depth == 0), otherwise at the *end* of the
246 Returns the offset in the list of CHKs in the above IBlock
248 bds = compute_tree_size_(depth)
251 ret = end_offset / bds
252 return ret % CHK_PER_INODE
255 def compute_iblock_size_(depth, offset):
256 """Compute the size of the current IBLOCK. The encoder is triggering the
257 calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
258 construction. The IBLOCK maybe a full or a partial IBLOCK, and this
259 function is to calculate how long it should be.
261 depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
262 (this function is for IBLOCKs only!)
263 offset: current offset in the payload (!) of the overall file, must be > 0
264 (since this function is called at the end of a block).
265 Returns the number of elements to be in the corresponding IBlock
269 bds = compute_tree_size_(depth)
276 if (mod % bds) is not 0:
281 def compute_rootchk(readin, size):
282 """Returns the content hash key after generating the hash tree for the given
285 readin: the stream where to read data from
286 size: the size of data to be read
288 depth = compute_depth_(size)
290 chks = [None] * (depth * CHK_PER_INODE) # list buffer
292 logging.debug("Begining to calculate tree hash with depth: " + repr(depth))
294 if (depth == current_depth):
295 off = CHK_PER_INODE * (depth - 1)
296 assert (chks[off] is not None)
297 logging.debug("Encoding done, reading CHK `" + chks[off].query + \
298 "' from " + repr(off) + "\n")
300 assert (size == read_offset)
301 uri_chk.setSize(size)
303 if (0 == current_depth):
304 pt_size = min(DBLOCK_SIZE, size - read_offset)
306 pt_block = readin.read(pt_size)
308 logging.warning("Error reading input file stream")
311 pt_elements = compute_iblock_size_(current_depth, read_offset)
314 reduce((lambda ba, chk:
315 ba + (chk.key + chk.query)),
316 chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
318 pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
319 assert (len(pt_block) == pt_size)
320 assert (pt_size <= DBLOCK_SIZE)
321 off = compute_chk_offset_(current_depth, read_offset)
322 logging.debug("Encoding data at offset " + repr(read_offset) + \
323 " and depth " + repr(current_depth) + " with block " \
324 "size " + repr(pt_size) + " and target CHK offset " + \
325 repr(current_depth * CHK_PER_INODE))
326 pt_hash = sha512_hash(pt_block)
327 pt_aes_key = AESKey(pt_hash)
328 pt_enc = aes_encrypt(pt_aes_key, pt_block)
329 pt_enc_hash = sha512_hash(pt_enc)
330 chk = Chk(pt_hash, pt_enc_hash)
331 chks[(current_depth * CHK_PER_INODE) + off] = chk
332 if (0 == current_depth):
333 read_offset += pt_size
334 if (read_offset == size) or \
335 (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
338 if (CHK_PER_INODE == off) or (read_offset == size):
344 def chkuri_from_path(path):
345 """Returns the CHK URI of the file at the given path.
347 path: the path of the file whose CHK has to be calculated
349 size = os.path.getsize(path)
350 readin = open(path, "rb")
351 chk = compute_rootchk(readin, size)
357 """Prints help about using this script."""
359 Usage: gnunet-chk.py [options] file
360 Prints the Content Hash Key of given file in GNUNET-style URI.
363 -h, --help : prints this message
367 if '__main__' == __name__:
369 opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
370 except getopt.GetoptError as err:
372 print("Exception occured")
375 for option, value in opts:
376 if option in("-h", "--help"):
380 print("Incorrect number of arguments passed")
383 print(chkuri_from_path(args[0]))