2 # This file is part of GNUnet.
3 # (C) 2013, 2018 Christian Grothoff (and other contributing authors)
5 # GNUnet is free software: you can redistribute it and/or modify it
6 # under the terms of the GNU Affero General Public License as published
7 # by the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # GNUnet is distributed in the hope that it will be useful, but
11 # WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Affero General Public License for more details.
15 # You should have received a copy of the GNU Affero General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 # Brief: Computes GNUNET style Content Hash Key for a given file
20 # Author: Sree Harsha Totakura
22 from hashlib import sha512
27 from Crypto.Cipher import AES
28 from functools import reduce
32 DBLOCK_SIZE = (32 * 1024) # Data block size
34 # Pick a multiple of 2 here to achive 8-byte alignment! We also
35 # probably want DBlocks to have (roughly) the same size as IBlocks.
36 # With SHA-512, the optimal value is 32768 byte / 128 byte = 256 (128
37 # byte = 2 * 512 bits). DO NOT CHANGE!
40 CHK_HASH_SIZE = 64 # SHA-512 hash = 512 bits = 64 bytes
42 CHK_QUERY_SIZE = CHK_HASH_SIZE # Again a SHA-512 hash
44 GNUNET_FS_URI_PREFIX = "gnunet://fs/" # FS CHK URI prefix
46 GNUNET_FS_URI_CHK_INFIX = "chk/" # FS CHK URI infix
49 def encode_data_to_string(data):
50 """Returns an ASCII encoding of the given data block like
51 GNUNET_STRINGS_data_to_string() function.
53 data: A bytearray representing the block of data which has to be encoded
55 echart = "0123456789ABCDEFGHIJKLMNOPQRSTUV"
57 assert (bytearray == type(data))
65 while (rpos < size) or (vbit > 0):
66 if (rpos < size) and (vbit < 5):
67 bits = (bits << 8) | data[rpos] # eat 8 more bits
71 bits <<= (5 - vbit) # zero-padding
72 assert (vbit == ((size * 8) % 5))
74 out += echart[(bits >> (vbit - 5)) & 31]
81 def sha512_hash(data):
82 """ Returns the sha512 hash of the given data.
88 return hash_obj.digest()
92 """Class for AES Keys. Contains the main key and the initialization
95 key = None # The actual AES key
96 iv = None # The initialization vector
97 cipher = None # The cipher object
98 KEY_SIZE = 32 # AES 256-bit key = 32 bytes
99 IV_SIZE = AES.block_size # Initialization vector size (= AES block size)
101 def __init__(self, passphrase):
102 """Creates a new AES key.
104 passphrase: string containing the passphrase to get the AES key and
105 initialization vector
107 passphrase = bytearray(passphrase)
108 self.key = bytearray(self.KEY_SIZE)
109 self.iv = bytearray(self.IV_SIZE)
110 if (len(passphrase) > self.KEY_SIZE):
111 self.key = passphrase[:self.KEY_SIZE]
112 passphrase = passphrase[self.KEY_SIZE:]
113 if (len(passphrase) > self.IV_SIZE):
114 self.iv = passphrase[:self.IV_SIZE]
116 self.iv[0:len(passphrase)] = passphrase
118 self.key[0:len(passphrase)] = passphrase
119 self.key = str(self.key)
120 self.iv = str(self.iv)
121 assert (len(self.key) == self.KEY_SIZE)
122 assert (len(self.iv) == self.IV_SIZE)
125 def setup_aes_cipher_(aes_key):
126 """Initializes the AES object with settings similar to those in GNUnet.
128 aes_key: the AESKey object
129 Returns the newly initialized AES object
131 return AES.new(aes_key.key, AES.MODE_CFB, aes_key.iv, segment_size=128)
135 """Adds padding to the data such that the size of the data is a multiple of
138 data: the data string
139 Returns a tuple:(pad_len, data). pad_len denotes the number of bytes added
140 as padding; data is the new data string with padded bytes at the end
142 pad_len = len(data) % 16
144 pad_len = 16 - pad_len
145 pad_bytes = bytearray(15)
146 data += str(pad_bytes[:pad_len])
147 return (pad_len, data)
150 def aes_encrypt(aes_key, data):
151 """Encrypts the given data using AES.
153 aes_key: the AESKey object to use for AES encryption
154 data: the data string to encrypt
156 (pad_len, data) = aes_pad_(data)
157 cipher = setup_aes_cipher_(aes_key)
158 enc_data = cipher.encrypt(data)
160 enc_data = enc_data[:-pad_len]
164 def aes_decrypt(aes_key, data):
165 """Decrypts the given data using AES
167 aes_key: the AESKey object to use for AES decryption
168 data: the data string to decrypt
170 (pad_len, data) = aes_pad_(data)
171 cipher = setup_aes_cipher_(aes_key)
172 ptext = cipher.decrypt(data)
174 ptext = ptext[:-pad_len]
179 """Class for the content hash key."""
184 def __init__(self, key, query):
185 assert (len(key) == CHK_HASH_SIZE)
186 assert (len(query) == CHK_QUERY_SIZE)
190 def setSize(self, size):
194 sizestr = repr(self.fsize)
195 if isinstance(self.fsize, int):
196 sizestr = sizestr[:-1]
197 return GNUNET_FS_URI_PREFIX + GNUNET_FS_URI_CHK_INFIX + \
198 encode_data_to_string(bytearray(self.key)) + "." + \
199 encode_data_to_string(bytearray(self.query)) + "." + \
203 def compute_depth_(size):
204 """Computes the depth of the hash tree.
206 size: the size of the file whose tree's depth has to be computed
207 Returns the depth of the tree. Always > 0.
213 if ((fl * CHK_PER_INODE) < fl):
215 fl = fl * CHK_PER_INODE
219 def compute_tree_size_(depth):
220 """Calculate how many bytes of payload a block tree of the given depth MAY
221 correspond to at most (this function ignores the fact that some blocks will
222 only be present partially due to the total file size cutting some blocks
225 depth: depth of the block. depth==0 is a DBLOCK.
226 Returns the number of bytes of payload a subtree of this depth may
230 for cnt in range(0, depth):
231 rsize *= CHK_PER_INODE
235 def compute_chk_offset_(depth, end_offset):
236 """Compute the offset of the CHK for the current block in the IBlock
239 depth: depth of the IBlock in the tree (aka overall number of tree levels
240 minus depth); 0 == DBLOCK
241 end_offset: current offset in the overall file, at the *beginning* of the
242 block for DBLOCK (depth == 0), otherwise at the *end* of the
244 Returns the offset in the list of CHKs in the above IBlock
246 bds = compute_tree_size_(depth)
249 ret = end_offset / bds
250 return ret % CHK_PER_INODE
253 def compute_iblock_size_(depth, offset):
254 """Compute the size of the current IBLOCK. The encoder is triggering the
255 calculation of the size of an IBLOCK at the *end* (hence end_offset) of its
256 construction. The IBLOCK maybe a full or a partial IBLOCK, and this
257 function is to calculate how long it should be.
259 depth: depth of the IBlock in the tree, 0 would be a DBLOCK, must be > 0
260 (this function is for IBLOCKs only!)
261 offset: current offset in the payload (!) of the overall file, must be > 0
262 (since this function is called at the end of a block).
263 Returns the number of elements to be in the corresponding IBlock
267 bds = compute_tree_size_(depth)
274 if (mod % bds) is not 0:
279 def compute_rootchk(readin, size):
280 """Returns the content hash key after generating the hash tree for the given
283 readin: the stream where to read data from
284 size: the size of data to be read
286 depth = compute_depth_(size)
288 chks = [None] * (depth * CHK_PER_INODE) # list buffer
290 logging.debug("Begining to calculate tree hash with depth: " + repr(depth))
292 if (depth == current_depth):
293 off = CHK_PER_INODE * (depth - 1)
294 assert (chks[off] is not None)
295 logging.debug("Encoding done, reading CHK `" + chks[off].query + \
296 "' from " + repr(off) + "\n")
298 assert (size == read_offset)
299 uri_chk.setSize(size)
301 if (0 == current_depth):
302 pt_size = min(DBLOCK_SIZE, size - read_offset)
304 pt_block = readin.read(pt_size)
306 logging.warning("Error reading input file stream")
309 pt_elements = compute_iblock_size_(current_depth, read_offset)
312 reduce((lambda ba, chk:
313 ba + (chk.key + chk.query)),
314 chks[(current_depth - 1) * CHK_PER_INODE:][:pt_elements],
316 pt_size = pt_elements * (CHK_HASH_SIZE + CHK_QUERY_SIZE)
317 assert (len(pt_block) == pt_size)
318 assert (pt_size <= DBLOCK_SIZE)
319 off = compute_chk_offset_(current_depth, read_offset)
320 logging.debug("Encoding data at offset " + repr(read_offset) + \
321 " and depth " + repr(current_depth) + " with block " \
322 "size " + repr(pt_size) + " and target CHK offset " + \
323 repr(current_depth * CHK_PER_INODE))
324 pt_hash = sha512_hash(pt_block)
325 pt_aes_key = AESKey(pt_hash)
326 pt_enc = aes_encrypt(pt_aes_key, pt_block)
327 pt_enc_hash = sha512_hash(pt_enc)
328 chk = Chk(pt_hash, pt_enc_hash)
329 chks[(current_depth * CHK_PER_INODE) + off] = chk
330 if (0 == current_depth):
331 read_offset += pt_size
332 if (read_offset == size) or \
333 (0 == (read_offset % (CHK_PER_INODE * DBLOCK_SIZE))):
336 if (CHK_PER_INODE == off) or (read_offset == size):
342 def chkuri_from_path(path):
343 """Returns the CHK URI of the file at the given path.
345 path: the path of the file whose CHK has to be calculated
347 size = os.path.getsize(path)
348 readin = open(path, "rb")
349 chk = compute_rootchk(readin, size)
355 """Prints help about using this script."""
357 Usage: gnunet-chk.py [options] file
358 Prints the Content Hash Key of given file in GNUNET-style URI.
361 -h, --help : prints this message
365 if '__main__' == __name__:
367 opts, args = getopt.getopt(sys.argv[1:], "h", ["help"])
368 except getopt.GetoptError as err:
370 print("Exception occured")
373 for option, value in opts:
374 if option in("-h", "--help"):
378 print("Incorrect number of arguments passed")
381 print(chkuri_from_path(args[0]))