3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #include "clientmedia.h"
21 #include "httpfetch.h"
23 #include "clientserver.h"
24 #include "filecache.h"
32 #include "util/serialize.h"
33 #include "util/string.h"
35 static std::string getMediaCacheDir()
37 return porting::path_user + DIR_DELIM + "cache" + DIR_DELIM + "media";
44 ClientMediaDownloader::ClientMediaDownloader():
45 m_media_cache(getMediaCacheDir())
47 m_initial_step_done = false;
48 m_name_bound = ""; // works because "" is an invalid file name
50 m_uncached_received_count = 0;
51 m_httpfetch_caller = HTTPFETCH_DISCARD;
52 m_httpfetch_active = 0;
53 m_httpfetch_active_limit = 0;
54 m_httpfetch_next_id = 0;
55 m_httpfetch_timeout = 0;
56 m_outstanding_hash_sets = 0;
59 ClientMediaDownloader::~ClientMediaDownloader()
61 if (m_httpfetch_caller != HTTPFETCH_DISCARD)
62 httpfetch_caller_free(m_httpfetch_caller);
64 for (std::map<std::string, FileStatus*>::iterator it = m_files.begin();
65 it != m_files.end(); ++it)
68 for (u32 i = 0; i < m_remotes.size(); ++i)
72 void ClientMediaDownloader::addFile(std::string name, std::string sha1)
74 assert(!m_initial_step_done);
76 // if name was already announced, ignore the new announcement
77 if (m_files.count(name) != 0) {
78 errorstream << "Client: ignoring duplicate media announcement "
79 << "sent by server: \"" << name << "\""
84 // if name is empty or contains illegal characters, ignore the file
85 if (name.empty() || !string_allowed(name, TEXTURENAME_ALLOWED_CHARS)) {
86 errorstream << "Client: ignoring illegal file name "
87 << "sent by server: \"" << name << "\""
92 // length of sha1 must be exactly 20 (160 bits), else ignore the file
93 if (sha1.size() != 20) {
94 errorstream << "Client: ignoring illegal SHA1 sent by server: "
95 << hex_encode(sha1) << " \"" << name << "\""
100 FileStatus *filestatus = new FileStatus;
101 filestatus->received = false;
102 filestatus->sha1 = sha1;
103 filestatus->current_remote = -1;
104 m_files.insert(std::make_pair(name, filestatus));
107 void ClientMediaDownloader::addRemoteServer(std::string baseurl)
109 assert(!m_initial_step_done);
113 if (g_settings->getBool("enable_remote_media_server")) {
114 infostream << "Client: Adding remote server \""
115 << baseurl << "\" for media download" << std::endl;
117 RemoteServerStatus *remote = new RemoteServerStatus;
118 remote->baseurl = baseurl;
119 remote->active_count = 0;
120 remote->request_by_filename = false;
121 m_remotes.push_back(remote);
126 infostream << "Client: Ignoring remote server \""
127 << baseurl << "\" because cURL support is not compiled in"
133 void ClientMediaDownloader::step(Client *client)
135 if (!m_initial_step_done) {
137 m_initial_step_done = true;
140 // Remote media: check for completion of fetches
141 if (m_httpfetch_active) {
142 bool fetched_something = false;
143 HTTPFetchResult fetchresult;
145 while (httpfetch_async_get(m_httpfetch_caller, fetchresult)) {
146 m_httpfetch_active--;
147 fetched_something = true;
149 // Is this a hashset (index.mth) or a media file?
150 if (fetchresult.request_id < m_remotes.size())
151 remoteHashSetReceived(fetchresult);
153 remoteMediaReceived(fetchresult, client);
156 if (fetched_something)
157 startRemoteMediaTransfers();
159 // Did all remote transfers end and no new ones can be started?
160 // If so, request still missing files from the minetest server
161 // (Or report that we have all files.)
162 if (m_httpfetch_active == 0) {
163 if (m_uncached_received_count < m_uncached_count) {
164 infostream << "Client: Failed to remote-fetch "
165 << (m_uncached_count-m_uncached_received_count)
166 << " files. Requesting them"
167 << " the usual way." << std::endl;
169 startConventionalTransfers(client);
174 void ClientMediaDownloader::initialStep(Client *client)
177 m_uncached_count = m_files.size();
178 for (std::map<std::string, FileStatus*>::iterator
179 it = m_files.begin();
180 it != m_files.end(); ++it) {
181 std::string name = it->first;
182 FileStatus *filestatus = it->second;
183 const std::string &sha1 = filestatus->sha1;
185 std::ostringstream tmp_os(std::ios_base::binary);
186 bool found_in_cache = m_media_cache.load(hex_encode(sha1), tmp_os);
188 // If found in cache, try to load it from there
189 if (found_in_cache) {
190 bool success = checkAndLoad(name, sha1,
191 tmp_os.str(), true, client);
193 filestatus->received = true;
199 assert(m_uncached_received_count == 0);
201 // Create the media cache dir if we are likely to write to it
202 if (m_uncached_count != 0) {
203 bool did = fs::CreateAllDirs(getMediaCacheDir());
205 errorstream << "Client: "
206 << "Could not create media cache directory: "
207 << getMediaCacheDir()
212 // If we found all files in the cache, report this fact to the server.
213 // If the server reported no remote servers, immediately start
214 // conventional transfers. Note: if cURL support is not compiled in,
215 // m_remotes is always empty, so "!USE_CURL" is redundant but may
216 // reduce the size of the compiled code
217 if (!USE_CURL || m_uncached_count == 0 || m_remotes.empty()) {
218 startConventionalTransfers(client);
221 // Otherwise start off by requesting each server's sha1 set
223 // This is the first time we use httpfetch, so alloc a caller ID
224 m_httpfetch_caller = httpfetch_caller_alloc();
225 m_httpfetch_timeout = g_settings->getS32("curl_timeout");
227 // Set the active fetch limit to curl_parallel_limit or 84,
228 // whichever is greater. This gives us some leeway so that
229 // inefficiencies in communicating with the httpfetch thread
230 // don't slow down fetches too much. (We still want some limit
231 // so that when the first remote server returns its hash set,
232 // not all files are requested from that server immediately.)
233 // One such inefficiency is that ClientMediaDownloader::step()
234 // is only called a couple times per second, while httpfetch
235 // might return responses much faster than that.
236 // Note that httpfetch strictly enforces curl_parallel_limit
237 // but at no inter-thread communication cost. This however
238 // doesn't help with the aforementioned inefficiencies.
239 // The signifance of 84 is that it is 2*6*9 in base 13.
240 m_httpfetch_active_limit = g_settings->getS32("curl_parallel_limit");
241 m_httpfetch_active_limit = MYMAX(m_httpfetch_active_limit, 84);
243 // Write a list of hashes that we need. This will be POSTed
244 // to the server using Content-Type: application/octet-stream
245 std::string required_hash_set = serializeRequiredHashSet();
247 // minor fixme: this loop ignores m_httpfetch_active_limit
249 // another minor fixme, unlikely to matter in normal usage:
250 // these index.mth fetches do (however) count against
251 // m_httpfetch_active_limit when starting actual media file
252 // requests, so if there are lots of remote servers that are
253 // not responding, those will stall new media file transfers.
255 for (u32 i = 0; i < m_remotes.size(); ++i) {
256 assert(m_httpfetch_next_id == i);
258 RemoteServerStatus *remote = m_remotes[i];
259 actionstream << "Client: Contacting remote server \""
260 << remote->baseurl << "\"" << std::endl;
262 HTTPFetchRequest fetchrequest;
264 remote->baseurl + MTHASHSET_FILE_NAME;
265 fetchrequest.caller = m_httpfetch_caller;
266 fetchrequest.request_id = m_httpfetch_next_id; // == i
267 fetchrequest.timeout = m_httpfetch_timeout;
268 fetchrequest.connect_timeout = m_httpfetch_timeout;
269 fetchrequest.post_fields = required_hash_set;
270 fetchrequest.extra_headers.push_back(
271 "Content-Type: application/octet-stream");
272 httpfetch_async(fetchrequest);
274 m_httpfetch_active++;
275 m_httpfetch_next_id++;
276 m_outstanding_hash_sets++;
281 void ClientMediaDownloader::remoteHashSetReceived(
282 const HTTPFetchResult &fetchresult)
284 u32 remote_id = fetchresult.request_id;
285 assert(remote_id < m_remotes.size());
286 RemoteServerStatus *remote = m_remotes[remote_id];
288 m_outstanding_hash_sets--;
290 if (fetchresult.succeeded) {
292 // Server sent a list of file hashes that are
293 // available on it, try to parse the list
295 std::set<std::string> sha1_set;
296 deSerializeHashSet(fetchresult.data, sha1_set);
298 // Parsing succeeded: For every file that is
299 // available on this server, add this server
300 // to the available_remotes array
302 for(std::map<std::string, FileStatus*>::iterator
303 it = m_files.upper_bound(m_name_bound);
304 it != m_files.end(); ++it) {
305 FileStatus *f = it->second;
306 if (!f->received && sha1_set.count(f->sha1))
307 f->available_remotes.push_back(remote_id);
310 catch (SerializationError &e) {
311 infostream << "Client: Remote server \""
312 << remote->baseurl << "\" sent invalid hash set: "
313 << e.what() << std::endl;
317 // For compatibility: If index.mth is not found, assume that the
318 // server contains files named like the original files (not their sha1)
320 // Do NOT check for any particular response code (e.g. 404) here,
321 // because different servers respond differently
323 if (!fetchresult.succeeded && !fetchresult.timeout) {
324 infostream << "Client: Enabling compatibility mode for remote "
325 << "server \"" << remote->baseurl << "\"" << std::endl;
326 remote->request_by_filename = true;
328 // Assume every file is available on this server
330 for(std::map<std::string, FileStatus*>::iterator
331 it = m_files.upper_bound(m_name_bound);
332 it != m_files.end(); ++it) {
333 FileStatus *f = it->second;
335 f->available_remotes.push_back(remote_id);
340 void ClientMediaDownloader::remoteMediaReceived(
341 const HTTPFetchResult &fetchresult,
344 // Some remote server sent us a file.
345 // -> decrement number of active fetches
346 // -> mark file as received if fetch succeeded
347 // -> try to load media
351 std::map<unsigned long, std::string>::iterator it =
352 m_remote_file_transfers.find(fetchresult.request_id);
353 assert(it != m_remote_file_transfers.end());
355 m_remote_file_transfers.erase(it);
358 assert(m_files.count(name) != 0);
360 FileStatus *filestatus = m_files[name];
361 assert(!filestatus->received);
362 assert(filestatus->current_remote >= 0);
364 RemoteServerStatus *remote = m_remotes[filestatus->current_remote];
366 filestatus->current_remote = -1;
367 remote->active_count--;
369 // If fetch succeeded, try to load media file
371 if (fetchresult.succeeded) {
372 bool success = checkAndLoad(name, filestatus->sha1,
373 fetchresult.data, false, client);
375 filestatus->received = true;
376 assert(m_uncached_received_count < m_uncached_count);
377 m_uncached_received_count++;
382 s32 ClientMediaDownloader::selectRemoteServer(FileStatus *filestatus)
384 assert(filestatus != NULL);
385 assert(!filestatus->received);
386 assert(filestatus->current_remote < 0);
388 if (filestatus->available_remotes.empty())
391 // Of all servers that claim to provide the file (and haven't
392 // been unsuccessfully tried before), find the one with the
393 // smallest number of currently active transfers
396 s32 best_remote_id = filestatus->available_remotes[best];
397 s32 best_active_count = m_remotes[best_remote_id]->active_count;
399 for (u32 i = 1; i < filestatus->available_remotes.size(); ++i) {
400 s32 remote_id = filestatus->available_remotes[i];
401 s32 active_count = m_remotes[remote_id]->active_count;
402 if (active_count < best_active_count) {
404 best_remote_id = remote_id;
405 best_active_count = active_count;
409 filestatus->available_remotes.erase(
410 filestatus->available_remotes.begin() + best);
412 return best_remote_id;
416 void ClientMediaDownloader::startRemoteMediaTransfers()
418 bool changing_name_bound = true;
420 for (std::map<std::string, FileStatus*>::iterator
421 files_iter = m_files.upper_bound(m_name_bound);
422 files_iter != m_files.end(); ++files_iter) {
424 // Abort if active fetch limit is exceeded
425 if (m_httpfetch_active >= m_httpfetch_active_limit)
428 const std::string &name = files_iter->first;
429 FileStatus *filestatus = files_iter->second;
431 if (!filestatus->received && filestatus->current_remote < 0) {
432 // File has not been received yet and is not currently
433 // being transferred. Choose a server for it.
434 s32 remote_id = selectRemoteServer(filestatus);
435 if (remote_id >= 0) {
436 // Found a server, so start fetching
437 RemoteServerStatus *remote =
438 m_remotes[remote_id];
440 std::string url = remote->baseurl +
441 (remote->request_by_filename ? name :
442 hex_encode(filestatus->sha1));
443 verbosestream << "Client: "
444 << "Requesting remote media file "
445 << "\"" << name << "\" "
446 << "\"" << url << "\"" << std::endl;
448 HTTPFetchRequest fetchrequest;
449 fetchrequest.url = url;
450 fetchrequest.caller = m_httpfetch_caller;
451 fetchrequest.request_id = m_httpfetch_next_id;
452 fetchrequest.timeout = 0; // no data timeout!
453 fetchrequest.connect_timeout =
455 httpfetch_async(fetchrequest);
457 m_remote_file_transfers.insert(std::make_pair(
461 filestatus->current_remote = remote_id;
462 remote->active_count++;
463 m_httpfetch_active++;
464 m_httpfetch_next_id++;
468 if (filestatus->received ||
469 (filestatus->current_remote < 0 &&
470 !m_outstanding_hash_sets)) {
471 // If we arrive here, we conclusively know that we
472 // won't fetch this file from a remote server in the
473 // future. So update the name bound if possible.
474 if (changing_name_bound)
478 changing_name_bound = false;
483 void ClientMediaDownloader::startConventionalTransfers(Client *client)
485 assert(m_httpfetch_active == 0);
487 if (m_uncached_received_count != m_uncached_count) {
488 // Some media files have not been received yet, use the
489 // conventional slow method (minetest protocol) to get them
490 std::list<std::string> file_requests;
491 for (std::map<std::string, FileStatus*>::iterator
492 it = m_files.begin();
493 it != m_files.end(); ++it) {
494 if (!it->second->received)
495 file_requests.push_back(it->first);
497 assert((s32) file_requests.size() ==
498 m_uncached_count - m_uncached_received_count);
499 client->request_media(file_requests);
503 void ClientMediaDownloader::conventionalTransferDone(
504 const std::string &name,
505 const std::string &data,
508 // Check that file was announced
509 std::map<std::string, FileStatus*>::iterator
510 file_iter = m_files.find(name);
511 if (file_iter == m_files.end()) {
512 errorstream << "Client: server sent media file that was"
513 << "not announced, ignoring it: \"" << name << "\""
517 FileStatus *filestatus = file_iter->second;
518 assert(filestatus != NULL);
520 // Check that file hasn't already been received
521 if (filestatus->received) {
522 errorstream << "Client: server sent media file that we already"
523 << "received, ignoring it: \"" << name << "\""
528 // Mark file as received, regardless of whether loading it works and
529 // whether the checksum matches (because at this point there is no
530 // other server that could send a replacement)
531 filestatus->received = true;
532 assert(m_uncached_received_count < m_uncached_count);
533 m_uncached_received_count++;
535 // Check that received file matches announced checksum
537 checkAndLoad(name, filestatus->sha1, data, false, client);
540 bool ClientMediaDownloader::checkAndLoad(
541 const std::string &name, const std::string &sha1,
542 const std::string &data, bool is_from_cache, Client *client)
544 const char *cached_or_received = is_from_cache ? "cached" : "received";
545 const char *cached_or_received_uc = is_from_cache ? "Cached" : "Received";
546 std::string sha1_hex = hex_encode(sha1);
548 // Compute actual checksum of data
549 std::string data_sha1;
551 SHA1 data_sha1_calculator;
552 data_sha1_calculator.addBytes(data.c_str(), data.size());
553 unsigned char *data_tmpdigest = data_sha1_calculator.getDigest();
554 data_sha1.assign((char*) data_tmpdigest, 20);
555 free(data_tmpdigest);
558 // Check that received file matches announced checksum
559 if (data_sha1 != sha1) {
560 std::string data_sha1_hex = hex_encode(data_sha1);
561 infostream << "Client: "
562 << cached_or_received_uc << " media file "
563 << sha1_hex << " \"" << name << "\" "
564 << "mismatches actual checksum " << data_sha1_hex
569 // Checksum is ok, try loading the file
570 bool success = client->loadMedia(data, name);
572 infostream << "Client: "
573 << "Failed to load " << cached_or_received << " media: "
574 << sha1_hex << " \"" << name << "\""
579 verbosestream << "Client: "
580 << "Loaded " << cached_or_received << " media: "
581 << sha1_hex << " \"" << name << "\""
584 // Update cache (unless we just loaded the file from the cache)
586 m_media_cache.update(sha1_hex, data);
593 Minetest Hashset File Format
595 All values are stored in big-endian byte order.
596 [u32] signature: 'MTHS'
598 For each hash in set:
605 std::string ClientMediaDownloader::serializeRequiredHashSet()
607 std::ostringstream os(std::ios::binary);
609 writeU32(os, MTHASHSET_FILE_SIGNATURE); // signature
610 writeU16(os, 1); // version
612 // Write list of hashes of files that have not been
613 // received (found in cache) yet
614 for (std::map<std::string, FileStatus*>::iterator
615 it = m_files.begin();
616 it != m_files.end(); ++it) {
617 if (!it->second->received) {
618 assert(it->second->sha1.size() == 20);
619 os << it->second->sha1;
626 void ClientMediaDownloader::deSerializeHashSet(const std::string &data,
627 std::set<std::string> &result)
629 if (data.size() < 6 || data.size() % 20 != 6) {
630 throw SerializationError(
631 "ClientMediaDownloader::deSerializeHashSet: "
632 "invalid hash set file size");
635 const u8 *data_cstr = (const u8*) data.c_str();
637 u32 signature = readU32(&data_cstr[0]);
638 if (signature != MTHASHSET_FILE_SIGNATURE) {
639 throw SerializationError(
640 "ClientMediaDownloader::deSerializeHashSet: "
641 "invalid hash set file signature");
644 u16 version = readU16(&data_cstr[4]);
646 throw SerializationError(
647 "ClientMediaDownloader::deSerializeHashSet: "
648 "unsupported hash set file version");
651 for (u32 pos = 6; pos < data.size(); pos += 20) {
652 result.insert(data.substr(pos, 20));