For usages of assert() that are meant to persist in Release builds (when NDEBUG is...
[oweals/minetest.git] / src / clientmedia.cpp
1 /*
2 Minetest
3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "clientmedia.h"
21 #include "httpfetch.h"
22 #include "client.h"
23 #include "filecache.h"
24 #include "filesys.h"
25 #include "debug.h"
26 #include "log.h"
27 #include "porting.h"
28 #include "settings.h"
29 #include "main.h"
30 #include "network/networkprotocol.h"
31 #include "util/hex.h"
32 #include "util/serialize.h"
33 #include "util/sha1.h"
34 #include "util/string.h"
35
36 static std::string getMediaCacheDir()
37 {
38         return porting::path_user + DIR_DELIM + "cache" + DIR_DELIM + "media";
39 }
40
41 /*
42         ClientMediaDownloader
43 */
44
45 ClientMediaDownloader::ClientMediaDownloader():
46         m_media_cache(getMediaCacheDir())
47 {
48         m_initial_step_done = false;
49         m_name_bound = "";  // works because "" is an invalid file name
50         m_uncached_count = 0;
51         m_uncached_received_count = 0;
52         m_httpfetch_caller = HTTPFETCH_DISCARD;
53         m_httpfetch_active = 0;
54         m_httpfetch_active_limit = 0;
55         m_httpfetch_next_id = 0;
56         m_httpfetch_timeout = 0;
57         m_outstanding_hash_sets = 0;
58 }
59
60 ClientMediaDownloader::~ClientMediaDownloader()
61 {
62         if (m_httpfetch_caller != HTTPFETCH_DISCARD)
63                 httpfetch_caller_free(m_httpfetch_caller);
64
65         for (std::map<std::string, FileStatus*>::iterator it = m_files.begin();
66                         it != m_files.end(); ++it)
67                 delete it->second;
68
69         for (u32 i = 0; i < m_remotes.size(); ++i)
70                 delete m_remotes[i];
71 }
72
73 void ClientMediaDownloader::addFile(std::string name, std::string sha1)
74 {
75         assert(!m_initial_step_done); // pre-condition
76
77         // if name was already announced, ignore the new announcement
78         if (m_files.count(name) != 0) {
79                 errorstream << "Client: ignoring duplicate media announcement "
80                                 << "sent by server: \"" << name << "\""
81                                 << std::endl;
82                 return;
83         }
84
85         // if name is empty or contains illegal characters, ignore the file
86         if (name.empty() || !string_allowed(name, TEXTURENAME_ALLOWED_CHARS)) {
87                 errorstream << "Client: ignoring illegal file name "
88                                 << "sent by server: \"" << name << "\""
89                                 << std::endl;
90                 return;
91         }
92
93         // length of sha1 must be exactly 20 (160 bits), else ignore the file
94         if (sha1.size() != 20) {
95                 errorstream << "Client: ignoring illegal SHA1 sent by server: "
96                                 << hex_encode(sha1) << " \"" << name << "\""
97                                 << std::endl;
98                 return;
99         }
100
101         FileStatus *filestatus = new FileStatus;
102         filestatus->received = false;
103         filestatus->sha1 = sha1;
104         filestatus->current_remote = -1;
105         m_files.insert(std::make_pair(name, filestatus));
106 }
107
108 void ClientMediaDownloader::addRemoteServer(std::string baseurl)
109 {
110         assert(!m_initial_step_done);   // pre-condition
111
112         #ifdef USE_CURL
113
114         if (g_settings->getBool("enable_remote_media_server")) {
115                 infostream << "Client: Adding remote server \""
116                         << baseurl << "\" for media download" << std::endl;
117
118                 RemoteServerStatus *remote = new RemoteServerStatus;
119                 remote->baseurl = baseurl;
120                 remote->active_count = 0;
121                 remote->request_by_filename = false;
122                 m_remotes.push_back(remote);
123         }
124
125         #else
126
127         infostream << "Client: Ignoring remote server \""
128                 << baseurl << "\" because cURL support is not compiled in"
129                 << std::endl;
130
131         #endif
132 }
133
134 void ClientMediaDownloader::step(Client *client)
135 {
136         if (!m_initial_step_done) {
137                 initialStep(client);
138                 m_initial_step_done = true;
139         }
140
141         // Remote media: check for completion of fetches
142         if (m_httpfetch_active) {
143                 bool fetched_something = false;
144                 HTTPFetchResult fetch_result;
145
146                 while (httpfetch_async_get(m_httpfetch_caller, fetch_result)) {
147                         m_httpfetch_active--;
148                         fetched_something = true;
149
150                         // Is this a hashset (index.mth) or a media file?
151                         if (fetch_result.request_id < m_remotes.size())
152                                 remoteHashSetReceived(fetch_result);
153                         else
154                                 remoteMediaReceived(fetch_result, client);
155                 }
156
157                 if (fetched_something)
158                         startRemoteMediaTransfers();
159
160                 // Did all remote transfers end and no new ones can be started?
161                 // If so, request still missing files from the minetest server
162                 // (Or report that we have all files.)
163                 if (m_httpfetch_active == 0) {
164                         if (m_uncached_received_count < m_uncached_count) {
165                                 infostream << "Client: Failed to remote-fetch "
166                                         << (m_uncached_count-m_uncached_received_count)
167                                         << " files. Requesting them"
168                                         << " the usual way." << std::endl;
169                         }
170                         startConventionalTransfers(client);
171                 }
172         }
173 }
174
175 void ClientMediaDownloader::initialStep(Client *client)
176 {
177         // Check media cache
178         m_uncached_count = m_files.size();
179         for (std::map<std::string, FileStatus*>::iterator
180                         it = m_files.begin();
181                         it != m_files.end(); ++it) {
182                 std::string name = it->first;
183                 FileStatus *filestatus = it->second;
184                 const std::string &sha1 = filestatus->sha1;
185
186                 std::ostringstream tmp_os(std::ios_base::binary);
187                 bool found_in_cache = m_media_cache.load(hex_encode(sha1), tmp_os);
188
189                 // If found in cache, try to load it from there
190                 if (found_in_cache) {
191                         bool success = checkAndLoad(name, sha1,
192                                         tmp_os.str(), true, client);
193                         if (success) {
194                                 filestatus->received = true;
195                                 m_uncached_count--;
196                         }
197                 }
198         }
199
200         assert(m_uncached_received_count == 0);
201
202         // Create the media cache dir if we are likely to write to it
203         if (m_uncached_count != 0) {
204                 bool did = fs::CreateAllDirs(getMediaCacheDir());
205                 if (!did) {
206                         errorstream << "Client: "
207                                 << "Could not create media cache directory: "
208                                 << getMediaCacheDir()
209                                 << std::endl;
210                 }
211         }
212
213         // If we found all files in the cache, report this fact to the server.
214         // If the server reported no remote servers, immediately start
215         // conventional transfers. Note: if cURL support is not compiled in,
216         // m_remotes is always empty, so "!USE_CURL" is redundant but may
217         // reduce the size of the compiled code
218         if (!USE_CURL || m_uncached_count == 0 || m_remotes.empty()) {
219                 startConventionalTransfers(client);
220         }
221         else {
222                 // Otherwise start off by requesting each server's sha1 set
223
224                 // This is the first time we use httpfetch, so alloc a caller ID
225                 m_httpfetch_caller = httpfetch_caller_alloc();
226                 m_httpfetch_timeout = g_settings->getS32("curl_timeout");
227
228                 // Set the active fetch limit to curl_parallel_limit or 84,
229                 // whichever is greater. This gives us some leeway so that
230                 // inefficiencies in communicating with the httpfetch thread
231                 // don't slow down fetches too much. (We still want some limit
232                 // so that when the first remote server returns its hash set,
233                 // not all files are requested from that server immediately.)
234                 // One such inefficiency is that ClientMediaDownloader::step()
235                 // is only called a couple times per second, while httpfetch
236                 // might return responses much faster than that.
237                 // Note that httpfetch strictly enforces curl_parallel_limit
238                 // but at no inter-thread communication cost. This however
239                 // doesn't help with the aforementioned inefficiencies.
240                 // The signifance of 84 is that it is 2*6*9 in base 13.
241                 m_httpfetch_active_limit = g_settings->getS32("curl_parallel_limit");
242                 m_httpfetch_active_limit = MYMAX(m_httpfetch_active_limit, 84);
243
244                 // Write a list of hashes that we need. This will be POSTed
245                 // to the server using Content-Type: application/octet-stream
246                 std::string required_hash_set = serializeRequiredHashSet();
247
248                 // minor fixme: this loop ignores m_httpfetch_active_limit
249
250                 // another minor fixme, unlikely to matter in normal usage:
251                 // these index.mth fetches do (however) count against
252                 // m_httpfetch_active_limit when starting actual media file
253                 // requests, so if there are lots of remote servers that are
254                 // not responding, those will stall new media file transfers.
255
256                 for (u32 i = 0; i < m_remotes.size(); ++i) {
257                         assert(m_httpfetch_next_id == i);
258
259                         RemoteServerStatus *remote = m_remotes[i];
260                         actionstream << "Client: Contacting remote server \""
261                                 << remote->baseurl << "\"" << std::endl;
262
263                         HTTPFetchRequest fetch_request;
264                         fetch_request.url =
265                                 remote->baseurl + MTHASHSET_FILE_NAME;
266                         fetch_request.caller = m_httpfetch_caller;
267                         fetch_request.request_id = m_httpfetch_next_id; // == i
268                         fetch_request.timeout = m_httpfetch_timeout;
269                         fetch_request.connect_timeout = m_httpfetch_timeout;
270                         fetch_request.post_data = required_hash_set;
271                         fetch_request.extra_headers.push_back(
272                                 "Content-Type: application/octet-stream");
273                         httpfetch_async(fetch_request);
274
275                         m_httpfetch_active++;
276                         m_httpfetch_next_id++;
277                         m_outstanding_hash_sets++;
278                 }
279         }
280 }
281
282 void ClientMediaDownloader::remoteHashSetReceived(
283                 const HTTPFetchResult &fetch_result)
284 {
285         u32 remote_id = fetch_result.request_id;
286         assert(remote_id < m_remotes.size());
287         RemoteServerStatus *remote = m_remotes[remote_id];
288
289         m_outstanding_hash_sets--;
290
291         if (fetch_result.succeeded) {
292                 try {
293                         // Server sent a list of file hashes that are
294                         // available on it, try to parse the list
295
296                         std::set<std::string> sha1_set;
297                         deSerializeHashSet(fetch_result.data, sha1_set);
298
299                         // Parsing succeeded: For every file that is
300                         // available on this server, add this server
301                         // to the available_remotes array
302
303                         for(std::map<std::string, FileStatus*>::iterator
304                                         it = m_files.upper_bound(m_name_bound);
305                                         it != m_files.end(); ++it) {
306                                 FileStatus *f = it->second;
307                                 if (!f->received && sha1_set.count(f->sha1))
308                                         f->available_remotes.push_back(remote_id);
309                         }
310                 }
311                 catch (SerializationError &e) {
312                         infostream << "Client: Remote server \""
313                                 << remote->baseurl << "\" sent invalid hash set: "
314                                 << e.what() << std::endl;
315                 }
316         }
317
318         // For compatibility: If index.mth is not found, assume that the
319         // server contains files named like the original files (not their sha1)
320
321         // Do NOT check for any particular response code (e.g. 404) here,
322         // because different servers respond differently
323
324         if (!fetch_result.succeeded && !fetch_result.timeout) {
325                 infostream << "Client: Enabling compatibility mode for remote "
326                         << "server \"" << remote->baseurl << "\"" << std::endl;
327                 remote->request_by_filename = true;
328
329                 // Assume every file is available on this server
330
331                 for(std::map<std::string, FileStatus*>::iterator
332                                 it = m_files.upper_bound(m_name_bound);
333                                 it != m_files.end(); ++it) {
334                         FileStatus *f = it->second;
335                         if (!f->received)
336                                 f->available_remotes.push_back(remote_id);
337                 }
338         }
339 }
340
341 void ClientMediaDownloader::remoteMediaReceived(
342                 const HTTPFetchResult &fetch_result,
343                 Client *client)
344 {
345         // Some remote server sent us a file.
346         // -> decrement number of active fetches
347         // -> mark file as received if fetch succeeded
348         // -> try to load media
349
350         std::string name;
351         {
352                 std::map<unsigned long, std::string>::iterator it =
353                         m_remote_file_transfers.find(fetch_result.request_id);
354                 assert(it != m_remote_file_transfers.end());
355                 name = it->second;
356                 m_remote_file_transfers.erase(it);
357         }
358
359         sanity_check(m_files.count(name) != 0);
360
361         FileStatus *filestatus = m_files[name];
362         sanity_check(!filestatus->received);
363         sanity_check(filestatus->current_remote >= 0);
364
365         RemoteServerStatus *remote = m_remotes[filestatus->current_remote];
366
367         filestatus->current_remote = -1;
368         remote->active_count--;
369
370         // If fetch succeeded, try to load media file
371
372         if (fetch_result.succeeded) {
373                 bool success = checkAndLoad(name, filestatus->sha1,
374                                 fetch_result.data, false, client);
375                 if (success) {
376                         filestatus->received = true;
377                         assert(m_uncached_received_count < m_uncached_count);
378                         m_uncached_received_count++;
379                 }
380         }
381 }
382
383 s32 ClientMediaDownloader::selectRemoteServer(FileStatus *filestatus)
384 {
385         // Pre-conditions
386         assert(filestatus != NULL);
387         assert(!filestatus->received);
388         assert(filestatus->current_remote < 0);
389
390         if (filestatus->available_remotes.empty())
391                 return -1;
392         else {
393                 // Of all servers that claim to provide the file (and haven't
394                 // been unsuccessfully tried before), find the one with the
395                 // smallest number of currently active transfers
396
397                 s32 best = 0;
398                 s32 best_remote_id = filestatus->available_remotes[best];
399                 s32 best_active_count = m_remotes[best_remote_id]->active_count;
400
401                 for (u32 i = 1; i < filestatus->available_remotes.size(); ++i) {
402                         s32 remote_id = filestatus->available_remotes[i];
403                         s32 active_count = m_remotes[remote_id]->active_count;
404                         if (active_count < best_active_count) {
405                                 best = i;
406                                 best_remote_id = remote_id;
407                                 best_active_count = active_count;
408                         }
409                 }
410
411                 filestatus->available_remotes.erase(
412                                 filestatus->available_remotes.begin() + best);
413
414                 return best_remote_id;
415         }
416 }
417
418 void ClientMediaDownloader::startRemoteMediaTransfers()
419 {
420         bool changing_name_bound = true;
421
422         for (std::map<std::string, FileStatus*>::iterator
423                         files_iter = m_files.upper_bound(m_name_bound);
424                         files_iter != m_files.end(); ++files_iter) {
425
426                 // Abort if active fetch limit is exceeded
427                 if (m_httpfetch_active >= m_httpfetch_active_limit)
428                         break;
429
430                 const std::string &name = files_iter->first;
431                 FileStatus *filestatus = files_iter->second;
432
433                 if (!filestatus->received && filestatus->current_remote < 0) {
434                         // File has not been received yet and is not currently
435                         // being transferred. Choose a server for it.
436                         s32 remote_id = selectRemoteServer(filestatus);
437                         if (remote_id >= 0) {
438                                 // Found a server, so start fetching
439                                 RemoteServerStatus *remote =
440                                         m_remotes[remote_id];
441
442                                 std::string url = remote->baseurl +
443                                         (remote->request_by_filename ? name :
444                                         hex_encode(filestatus->sha1));
445                                 verbosestream << "Client: "
446                                         << "Requesting remote media file "
447                                         << "\"" << name << "\" "
448                                         << "\"" << url << "\"" << std::endl;
449
450                                 HTTPFetchRequest fetch_request;
451                                 fetch_request.url = url;
452                                 fetch_request.caller = m_httpfetch_caller;
453                                 fetch_request.request_id = m_httpfetch_next_id;
454                                 fetch_request.timeout = 0; // no data timeout!
455                                 fetch_request.connect_timeout =
456                                         m_httpfetch_timeout;
457                                 httpfetch_async(fetch_request);
458
459                                 m_remote_file_transfers.insert(std::make_pair(
460                                                         m_httpfetch_next_id,
461                                                         name));
462
463                                 filestatus->current_remote = remote_id;
464                                 remote->active_count++;
465                                 m_httpfetch_active++;
466                                 m_httpfetch_next_id++;
467                         }
468                 }
469
470                 if (filestatus->received ||
471                                 (filestatus->current_remote < 0 &&
472                                  !m_outstanding_hash_sets)) {
473                         // If we arrive here, we conclusively know that we
474                         // won't fetch this file from a remote server in the
475                         // future. So update the name bound if possible.
476                         if (changing_name_bound)
477                                 m_name_bound = name;
478                 }
479                 else
480                         changing_name_bound = false;
481         }
482
483 }
484
485 void ClientMediaDownloader::startConventionalTransfers(Client *client)
486 {
487         assert(m_httpfetch_active == 0);        // pre-condition
488
489         if (m_uncached_received_count != m_uncached_count) {
490                 // Some media files have not been received yet, use the
491                 // conventional slow method (minetest protocol) to get them
492                 std::vector<std::string> file_requests;
493                 for (std::map<std::string, FileStatus*>::iterator
494                                 it = m_files.begin();
495                                 it != m_files.end(); ++it) {
496                         if (!it->second->received)
497                                 file_requests.push_back(it->first);
498                 }
499                 assert((s32) file_requests.size() ==
500                                 m_uncached_count - m_uncached_received_count);
501                 client->request_media(file_requests);
502         }
503 }
504
505 void ClientMediaDownloader::conventionalTransferDone(
506                 const std::string &name,
507                 const std::string &data,
508                 Client *client)
509 {
510         // Check that file was announced
511         std::map<std::string, FileStatus*>::iterator
512                 file_iter = m_files.find(name);
513         if (file_iter == m_files.end()) {
514                 errorstream << "Client: server sent media file that was"
515                         << "not announced, ignoring it: \"" << name << "\""
516                         << std::endl;
517                 return;
518         }
519         FileStatus *filestatus = file_iter->second;
520         assert(filestatus != NULL);
521
522         // Check that file hasn't already been received
523         if (filestatus->received) {
524                 errorstream << "Client: server sent media file that we already"
525                         << "received, ignoring it: \"" << name << "\""
526                         << std::endl;
527                 return;
528         }
529
530         // Mark file as received, regardless of whether loading it works and
531         // whether the checksum matches (because at this point there is no
532         // other server that could send a replacement)
533         filestatus->received = true;
534         assert(m_uncached_received_count < m_uncached_count);
535         m_uncached_received_count++;
536
537         // Check that received file matches announced checksum
538         // If so, load it
539         checkAndLoad(name, filestatus->sha1, data, false, client);
540 }
541
542 bool ClientMediaDownloader::checkAndLoad(
543                 const std::string &name, const std::string &sha1,
544                 const std::string &data, bool is_from_cache, Client *client)
545 {
546         const char *cached_or_received = is_from_cache ? "cached" : "received";
547         const char *cached_or_received_uc = is_from_cache ? "Cached" : "Received";
548         std::string sha1_hex = hex_encode(sha1);
549
550         // Compute actual checksum of data
551         std::string data_sha1;
552         {
553                 SHA1 data_sha1_calculator;
554                 data_sha1_calculator.addBytes(data.c_str(), data.size());
555                 unsigned char *data_tmpdigest = data_sha1_calculator.getDigest();
556                 data_sha1.assign((char*) data_tmpdigest, 20);
557                 free(data_tmpdigest);
558         }
559
560         // Check that received file matches announced checksum
561         if (data_sha1 != sha1) {
562                 std::string data_sha1_hex = hex_encode(data_sha1);
563                 infostream << "Client: "
564                         << cached_or_received_uc << " media file "
565                         << sha1_hex << " \"" << name << "\" "
566                         << "mismatches actual checksum " << data_sha1_hex
567                         << std::endl;
568                 return false;
569         }
570
571         // Checksum is ok, try loading the file
572         bool success = client->loadMedia(data, name);
573         if (!success) {
574                 infostream << "Client: "
575                         << "Failed to load " << cached_or_received << " media: "
576                         << sha1_hex << " \"" << name << "\""
577                         << std::endl;
578                 return false;
579         }
580
581         verbosestream << "Client: "
582                 << "Loaded " << cached_or_received << " media: "
583                 << sha1_hex << " \"" << name << "\""
584                 << std::endl;
585
586         // Update cache (unless we just loaded the file from the cache)
587         if (!is_from_cache)
588                 m_media_cache.update(sha1_hex, data);
589
590         return true;
591 }
592
593
594 /*
595         Minetest Hashset File Format
596
597         All values are stored in big-endian byte order.
598         [u32] signature: 'MTHS'
599         [u16] version: 1
600         For each hash in set:
601                 [u8*20] SHA1 hash
602
603         Version changes:
604         1 - Initial version
605 */
606
607 std::string ClientMediaDownloader::serializeRequiredHashSet()
608 {
609         std::ostringstream os(std::ios::binary);
610
611         writeU32(os, MTHASHSET_FILE_SIGNATURE); // signature
612         writeU16(os, 1);                        // version
613
614         // Write list of hashes of files that have not been
615         // received (found in cache) yet
616         for (std::map<std::string, FileStatus*>::iterator
617                         it = m_files.begin();
618                         it != m_files.end(); ++it) {
619                 if (!it->second->received) {
620                         FATAL_ERROR_IF(it->second->sha1.size() != 20, "Invalid SHA1 size");
621                         os << it->second->sha1;
622                 }
623         }
624
625         return os.str();
626 }
627
628 void ClientMediaDownloader::deSerializeHashSet(const std::string &data,
629                 std::set<std::string> &result)
630 {
631         if (data.size() < 6 || data.size() % 20 != 6) {
632                 throw SerializationError(
633                                 "ClientMediaDownloader::deSerializeHashSet: "
634                                 "invalid hash set file size");
635         }
636
637         const u8 *data_cstr = (const u8*) data.c_str();
638
639         u32 signature = readU32(&data_cstr[0]);
640         if (signature != MTHASHSET_FILE_SIGNATURE) {
641                 throw SerializationError(
642                                 "ClientMediaDownloader::deSerializeHashSet: "
643                                 "invalid hash set file signature");
644         }
645
646         u16 version = readU16(&data_cstr[4]);
647         if (version != 1) {
648                 throw SerializationError(
649                                 "ClientMediaDownloader::deSerializeHashSet: "
650                                 "unsupported hash set file version");
651         }
652
653         for (u32 pos = 6; pos < data.size(); pos += 20) {
654                 result.insert(data.substr(pos, 20));
655         }
656 }