2 # This file is part of GNUnet.
3 # (C) 2013, 2018 Christian Grothoff (and other contributing authors)
5 # GNUnet is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published
7 # by the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # GNUnet is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Affero General Public License for more details.
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 # SPDX-License-Identifier: AGPL3.0-or-later
21 # Brief: Computes GNUNET style Content Hash Key for a given file
22 # Author: Sree Harsha Totakura
24 from builtins import str
25 from builtins import range
26 from past.utils import old_div
27 from builtins import object
28 from hashlib import sha512
33 from Crypto.Cipher import AES
34 from functools import reduce
38 DBLOCK_SIZE = (32 * 1024) # Data block size
40 # Pick a multiple of 2 here to achive 8-byte alignment! We also
41 # probably want DBlocks to have (roughly) the same size as IBlocks.
42 # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
43 # byte = 2 * 512 bits). DO NOT CHANGE!
46 CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
48 CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
50 GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
52 GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
55 def encode_data_to_string(data):
56 """Returns an ASCII encoding of the given data block like
57 GNUNET_STRINGS_data_to_string() function.
59 data: A bytearray representing the block of data which has to be encoded
61 echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
63 assert (bytearray == type(data))
71 while (rpos < size) or (vbit > 0):
72 if (rpos < size) and (vbit < 5):
73 bits = (bits << 8) | data[rpos] # eat 8 more bits
77 bits <<= (5 - vbit) # zero-padding
78 assert (vbit == ((size * 8) % 5))
80 out += echart[(bits >> (vbit - 5)) & 31]
87 def sha512_hash(data):
88 """ Returns the sha512 hash of the given data.
94 return hash_obj.digest()
98 """Class for AES Keys. Contains the main key and the initialization
101 key = None # The actual AES key
102 iv = None # The initialization vector
103 cipher = None # The cipher object
104 KEY_SIZE = 32 # AES 256-bit key = 32 bytes
105 IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
107 def __init__(self, passphrase):
108 """Creates a new AES key.
110 passphrase: string containing the passphrase to get the AES key and
111 initialization vector
113 passphrase = bytearray(passphrase)
114 self.key = bytearray(self.KEY_SIZE)
115 self.iv = bytearray(self.IV_SIZE)
116 if (len(passphrase) > self.KEY_SIZE):
117 self.key = passphrase[:self.KEY_SIZE]
118 passphrase = passphrase[self.KEY_SIZE:]
119 if (len(passphrase) > self.IV_SIZE):
120 self.iv = passphrase[:self.IV_SIZE]
122 self.iv[0:len(passphrase)] = passphrase
124 self.key[0:len(passphrase)] = passphrase
125 self.key = str(self.key)
126 self.iv = str(self.iv)
127 assert (len(self.key) == self.KEY_SIZE)
128 assert (len(self.iv) == self.IV_SIZE)
131 def setup_aes_cipher_(aes_key):
132 """Initializes the AES object with settings similar to those in GNUnet.
134 aes_key: the AESKey object
135 Returns the newly initialized AES object
137 return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
141 """Adds padding to the data such that the size of the data is a multiple of
144 data: the data string
145 Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
146 as padding; data is the new data string with padded bytes at the end
148 pad_len = len(data) % 16
150 pad_len = 16 - pad_len
151 pad_bytes = bytearray(15)
152 data += str(pad_bytes[:pad_len])
153 return (pad_len, data)
156 def aes_encrypt(aes_key, data):
157 """Encrypts the given data using AES.
159 aes_key: the AESKey object to use for AES encryption
160 data: the data string to encrypt
162 (pad_len, data) = aes_pad_(data)
163 cipher = setup_aes_cipher_(aes_key)
164 enc_data = cipher.encrypt(data)
166 enc_data = enc_data[:-pad_len]
170 def aes_decrypt(aes_key, data):
171 """Decrypts the given data using AES
173 aes_key: the AESKey object to use for AES decryption
174 data: the data string to decrypt
176 (pad_len, data) = aes_pad_(data)
177 cipher = setup_aes_cipher_(aes_key)
178 ptext = cipher.decrypt(data)
180 ptext = ptext[:-pad_len]
185 """Class for the content hash key."""
190 def __init__(self, key, query):
191 assert (len(key) == CHK_HASH_SIZE)
192 assert (len(query) == CHK_QUERY_SIZE)
196 def setSize(self, size):
200 sizestr = repr(self.fsize)
201 if isinstance(self.fsize, int):
202 sizestr = sizestr[:-1]
203 return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
204 encode_data_to_string(bytearray(self.key)) + "." + \
205 encode_data_to_string(bytearray(self.query)) + "." + \
209 def compute_depth_(size):
210 """Computes the depth of the hash tree.
212 size: the size of the file whose tree's depth has to be computed
213 Returns the depth of the tree. Always > 0.
219 if ((fl * CHK_PER_INODE) < fl):
221 fl = fl * CHK_PER_INODE
225 def compute_tree_size_(depth):
226 """Calculate how many bytes of payload a block tree of the given depth MAY
227 correspond to at most (this function ignores the fact that some blocks will
228 only be present partially due to the total file size cutting some blocks
231 depth: depth of the block. depth==0 is a DBLOCK.
232 Returns the number of bytes of payload a subtree of this depth may
236 for cnt in range(0, depth):
237 rsize *= CHK_PER_INODE
241 def compute_chk_offset_(depth, end_offset):
242 """Compute the offset of the CHK for the current block in the IBlock
245 depth: depth of the IBlock in the tree (aka overall number of tree levels
246 minus depth); 0 == DBLOCK
247 end_offset: current offset in the overall file, at the *beginning* of the
248 block for DBLOCK (depth == 0), otherwise at the *end* of the
250 Returns the offset in the list of CHKs in the above IBlock
252 bds = compute_tree_size_(depth)
255 ret = old_div(end_offset, bds)
256 return ret % CHK_PER_INODE
259 def compute_iblock_size_(depth, offset):
260 """Compute the size of the current IBLOCK. The encoder is triggering the
261 calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
262 construction. The IBLOCK maybe a full or a partial IBLOCK, and this
263 function is to calculate how long it should be.
265 depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
266 (this function is for IBLOCKs only!)
267 offset: current offset in the payload (!) of the overall file, must be > 0
268 (since this function is called at the end of a block).
269 Returns the number of elements to be in the corresponding IBlock
273 bds = compute_tree_size_(depth)
279 ret = old_div(mod, bds)
280 if (mod % bds) is not 0:
285 def compute_rootchk(readin, size):
286 """Returns the content hash key after generating the hash tree for the given
289 readin: the stream where to read data from
290 size: the size of data to be read
292 depth = compute_depth_(size)
294 chks = [None] * (depth * CHK_PER_INODE) # list buffer
296 logging.debug("Begining to calculate tree hash with depth: " + repr(depth))
298 if (depth == current_depth):
299 off = CHK_PER_INODE * (depth - 1)
300 assert (chks[off] is not None)
301 logging.debug("Encoding done, reading CHK `" + chks[off].query + \
302 "' from " + repr(off) + "\n")
304 assert (size == read_offset)
305 uri_chk.setSize(size)
307 if (0 == current_depth):
308 pt_size = min(DBLOCK_SIZE, size - read_offset)
310 pt_block = readin.read(pt_size)
312 logging.warning("Error reading input file stream")
315 pt_elements = compute_iblock_size_(current_depth, read_offset)
318 reduce((lambda ba, chk:
319 ba + (chk.key + chk.query)),
320 chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
322 pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
323 assert (len(pt_block) == pt_size)
324 assert (pt_size <= DBLOCK_SIZE)
325 off = compute_chk_offset_(current_depth, read_offset)
326 logging.debug("Encoding data at offset " + repr(read_offset) + \
327 " and depth " + repr(current_depth) + " with block " \
328 "size " + repr(pt_size) + " and target CHK offset " + \
329 repr(current_depth * CHK_PER_INODE))
330 pt_hash = sha512_hash(pt_block)
331 pt_aes_key = AESKey(pt_hash)
332 pt_enc = aes_encrypt(pt_aes_key, pt_block)
333 pt_enc_hash = sha512_hash(pt_enc)
334 chk = Chk(pt_hash, pt_enc_hash)
335 chks[(current_depth * CHK_PER_INODE) + off] = chk
336 if (0 == current_depth):
337 read_offset += pt_size
338 if (read_offset == size) or \
339 (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
342 if (CHK_PER_INODE == off) or (read_offset == size):
348 def chkuri_from_path(path):
349 """Returns the CHK URI of the file at the given path.
351 path: the path of the file whose CHK has to be calculated
353 size = os.path.getsize(path)
354 readin = open(path, "rb")
355 chk = compute_rootchk(readin, size)
361 """Prints help about using this script."""
363 Usage: gnunet-chk.py [options] file
364 Prints the Content Hash Key of given file in GNUNET-style URI.
367 -h, --help : prints this message
371 if '__main__' == __name__:
373 opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
374 except getopt.GetoptError as err:
376 print("Exception occured")
379 for option, value in opts:
380 if option in("-h", "--help"):
384 print("Incorrect number of arguments passed")
387 print(chkuri_from_path(args[0]))