3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #include "socket.h" // for select()
21 #include "porting.h" // for sleep_ms(), get_sysinfo(), secure_rand_fill_buf()
22 #include "httpfetch.h"
28 #include "threading/event.h"
30 #include "exceptions.h"
33 #include "util/container.h"
34 #include "util/thread.h"
39 Mutex g_httpfetch_mutex;
40 std::map<unsigned long, std::queue<HTTPFetchResult> > g_httpfetch_results;
41 PcgRandom g_callerid_randomness;
43 HTTPFetchRequest::HTTPFetchRequest() :
45 caller(HTTPFETCH_DISCARD),
47 timeout(g_settings->getS32("curl_timeout")),
48 connect_timeout(timeout),
50 useragent(std::string(PROJECT_NAME_C "/") + g_version_hash + " (" + porting::get_sysinfo() + ")")
55 static void httpfetch_deliver_result(const HTTPFetchResult &fetch_result)
57 unsigned long caller = fetch_result.caller;
58 if (caller != HTTPFETCH_DISCARD) {
59 MutexAutoLock lock(g_httpfetch_mutex);
60 g_httpfetch_results[caller].push(fetch_result);
64 static void httpfetch_request_clear(unsigned long caller);
66 unsigned long httpfetch_caller_alloc()
68 MutexAutoLock lock(g_httpfetch_mutex);
70 // Check each caller ID except HTTPFETCH_DISCARD
71 const unsigned long discard = HTTPFETCH_DISCARD;
72 for (unsigned long caller = discard + 1; caller != discard; ++caller) {
73 std::map<unsigned long, std::queue<HTTPFetchResult> >::iterator
74 it = g_httpfetch_results.find(caller);
75 if (it == g_httpfetch_results.end()) {
76 verbosestream << "httpfetch_caller_alloc: allocating "
77 << caller << std::endl;
78 // Access element to create it
79 g_httpfetch_results[caller];
84 FATAL_ERROR("httpfetch_caller_alloc: ran out of caller IDs");
88 unsigned long httpfetch_caller_alloc_secure()
90 MutexAutoLock lock(g_httpfetch_mutex);
92 // Generate random caller IDs and make sure they're not
93 // already used or equal to HTTPFETCH_DISCARD
94 // Give up after 100 tries to prevent infinite loop
99 caller = (((u64) g_callerid_randomness.next()) << 32) |
100 g_callerid_randomness.next();
103 FATAL_ERROR("httpfetch_caller_alloc_secure: ran out of caller IDs");
104 return HTTPFETCH_DISCARD;
106 } while (g_httpfetch_results.find(caller) != g_httpfetch_results.end());
108 verbosestream << "httpfetch_caller_alloc_secure: allocating "
109 << caller << std::endl;
111 // Access element to create it
112 g_httpfetch_results[caller];
116 void httpfetch_caller_free(unsigned long caller)
118 verbosestream<<"httpfetch_caller_free: freeing "
121 httpfetch_request_clear(caller);
122 if (caller != HTTPFETCH_DISCARD) {
123 MutexAutoLock lock(g_httpfetch_mutex);
124 g_httpfetch_results.erase(caller);
128 bool httpfetch_async_get(unsigned long caller, HTTPFetchResult &fetch_result)
130 MutexAutoLock lock(g_httpfetch_mutex);
132 // Check that caller exists
133 std::map<unsigned long, std::queue<HTTPFetchResult> >::iterator
134 it = g_httpfetch_results.find(caller);
135 if (it == g_httpfetch_results.end())
138 // Check that result queue is nonempty
139 std::queue<HTTPFetchResult> &caller_results = it->second;
140 if (caller_results.empty())
144 fetch_result = caller_results.front();
145 caller_results.pop();
150 #include <curl/curl.h>
153 USE_CURL is on: use cURL based httpfetch implementation
156 static size_t httpfetch_writefunction(
157 char *ptr, size_t size, size_t nmemb, void *userdata)
159 std::ostringstream *stream = (std::ostringstream*)userdata;
160 size_t count = size * nmemb;
161 stream->write(ptr, count);
165 static size_t httpfetch_discardfunction(
166 char *ptr, size_t size, size_t nmemb, void *userdata)
173 std::list<CURL*> handles;
179 for (std::list<CURL*>::iterator it = handles.begin();
180 it != handles.end(); ++it) {
181 curl_easy_cleanup(*it);
187 if (handles.empty()) {
188 curl = curl_easy_init();
190 errorstream<<"curl_easy_init returned NULL"<<std::endl;
194 curl = handles.front();
199 void free(CURL *handle)
202 handles.push_back(handle);
206 class HTTPFetchOngoing
209 HTTPFetchOngoing(const HTTPFetchRequest &request, CurlHandlePool *pool);
212 CURLcode start(CURLM *multi);
213 const HTTPFetchResult * complete(CURLcode res);
215 const HTTPFetchRequest &getRequest() const { return request; };
216 const CURL *getEasyHandle() const { return curl; };
219 CurlHandlePool *pool;
222 HTTPFetchRequest request;
223 HTTPFetchResult result;
224 std::ostringstream oss;
225 struct curl_slist *http_header;
230 HTTPFetchOngoing::HTTPFetchOngoing(const HTTPFetchRequest &request_,
231 CurlHandlePool *pool_):
237 oss(std::ios::binary),
241 curl = pool->alloc();
246 // Set static cURL options
247 curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
248 curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
249 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
250 curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 1);
252 std::string bind_address = g_settings->get("bind_address");
253 if (!bind_address.empty()) {
254 curl_easy_setopt(curl, CURLOPT_INTERFACE, bind_address.c_str());
257 #if LIBCURL_VERSION_NUM >= 0x071304
258 // Restrict protocols so that curl vulnerabilities in
259 // other protocols don't affect us.
260 // These settings were introduced in curl 7.19.4.
266 curl_easy_setopt(curl, CURLOPT_PROTOCOLS, protocols);
267 curl_easy_setopt(curl, CURLOPT_REDIR_PROTOCOLS, protocols);
270 // Set cURL options based on HTTPFetchRequest
271 curl_easy_setopt(curl, CURLOPT_URL,
272 request.url.c_str());
273 curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS,
275 curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT_MS,
276 request.connect_timeout);
278 if (request.useragent != "")
279 curl_easy_setopt(curl, CURLOPT_USERAGENT, request.useragent.c_str());
281 // Set up a write callback that writes to the
282 // ostringstream ongoing->oss, unless the data
283 // is to be discarded
284 if (request.caller == HTTPFETCH_DISCARD) {
285 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
286 httpfetch_discardfunction);
287 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
289 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
290 httpfetch_writefunction);
291 curl_easy_setopt(curl, CURLOPT_WRITEDATA, &oss);
294 // Set POST (or GET) data
295 if (request.post_fields.empty() && request.post_data.empty()) {
296 curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
297 } else if (request.multipart) {
298 curl_httppost *last = NULL;
299 for (StringMap::iterator it = request.post_fields.begin();
300 it != request.post_fields.end(); ++it) {
301 curl_formadd(&post, &last,
302 CURLFORM_NAMELENGTH, it->first.size(),
303 CURLFORM_PTRNAME, it->first.c_str(),
304 CURLFORM_CONTENTSLENGTH, it->second.size(),
305 CURLFORM_PTRCONTENTS, it->second.c_str(),
308 curl_easy_setopt(curl, CURLOPT_HTTPPOST, post);
309 // request.post_fields must now *never* be
310 // modified until CURLOPT_HTTPPOST is cleared
311 } else if (request.post_data.empty()) {
312 curl_easy_setopt(curl, CURLOPT_POST, 1);
314 for (StringMap::iterator it = request.post_fields.begin();
315 it != request.post_fields.end(); ++it) {
318 str += urlencode(it->first);
320 str += urlencode(it->second);
322 curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE,
324 curl_easy_setopt(curl, CURLOPT_COPYPOSTFIELDS,
327 curl_easy_setopt(curl, CURLOPT_POST, 1);
328 curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE,
329 request.post_data.size());
330 curl_easy_setopt(curl, CURLOPT_POSTFIELDS,
331 request.post_data.c_str());
332 // request.post_data must now *never* be
333 // modified until CURLOPT_POSTFIELDS is cleared
335 // Set additional HTTP headers
336 for (std::vector<std::string>::iterator it = request.extra_headers.begin();
337 it != request.extra_headers.end(); ++it) {
338 http_header = curl_slist_append(http_header, it->c_str());
340 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_header);
342 if (!g_settings->getBool("curl_verify_cert")) {
343 curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
347 CURLcode HTTPFetchOngoing::start(CURLM *multi_)
350 return CURLE_FAILED_INIT;
353 // Easy interface (sync)
354 return curl_easy_perform(curl);
357 // Multi interface (async)
358 CURLMcode mres = curl_multi_add_handle(multi_, curl);
359 if (mres != CURLM_OK) {
360 errorstream << "curl_multi_add_handle"
361 << " returned error code " << mres
363 return CURLE_FAILED_INIT;
365 multi = multi_; // store for curl_multi_remove_handle
369 const HTTPFetchResult * HTTPFetchOngoing::complete(CURLcode res)
371 result.succeeded = (res == CURLE_OK);
372 result.timeout = (res == CURLE_OPERATION_TIMEDOUT);
373 result.data = oss.str();
375 // Get HTTP/FTP response code
376 result.response_code = 0;
377 if (curl && (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE,
378 &result.response_code) != CURLE_OK)) {
379 // We failed to get a return code, make sure it is still 0
380 result.response_code = 0;
383 if (res != CURLE_OK) {
384 errorstream << request.url << " not found ("
385 << curl_easy_strerror(res) << ")"
386 << " (response code " << result.response_code << ")"
393 HTTPFetchOngoing::~HTTPFetchOngoing()
396 CURLMcode mres = curl_multi_remove_handle(multi, curl);
397 if (mres != CURLM_OK) {
398 errorstream << "curl_multi_remove_handle"
399 << " returned error code " << mres
404 // Set safe options for the reusable cURL handle
405 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
406 httpfetch_discardfunction);
407 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
408 curl_easy_setopt(curl, CURLOPT_POSTFIELDS, NULL);
410 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, NULL);
411 curl_slist_free_all(http_header);
414 curl_easy_setopt(curl, CURLOPT_HTTPPOST, NULL);
418 // Store the cURL handle for reuse
423 class CurlFetchThread : public Thread
434 HTTPFetchRequest fetch_request;
439 MutexedQueue<Request> m_requests;
440 size_t m_parallel_limit;
442 // Variables exclusively used within thread
443 std::vector<HTTPFetchOngoing*> m_all_ongoing;
444 std::list<HTTPFetchRequest> m_queued_fetches;
447 CurlFetchThread(int parallel_limit) :
450 if (parallel_limit >= 1)
451 m_parallel_limit = parallel_limit;
453 m_parallel_limit = 1;
456 void requestFetch(const HTTPFetchRequest &fetch_request)
460 req.fetch_request = fetch_request;
462 m_requests.push_back(req);
465 void requestClear(unsigned long caller, Event *event)
469 req.fetch_request.caller = caller;
471 m_requests.push_back(req);
477 req.type = RT_WAKEUP;
479 m_requests.push_back(req);
483 // Handle a request from some other thread
484 // E.g. new fetch; clear fetches for one caller; wake up
485 void processRequest(const Request &req)
487 if (req.type == RT_FETCH) {
488 // New fetch, queue until there are less
489 // than m_parallel_limit ongoing fetches
490 m_queued_fetches.push_back(req.fetch_request);
492 // see processQueued() for what happens next
495 else if (req.type == RT_CLEAR) {
496 unsigned long caller = req.fetch_request.caller;
498 // Abort all ongoing fetches for the caller
499 for (std::vector<HTTPFetchOngoing*>::iterator
500 it = m_all_ongoing.begin();
501 it != m_all_ongoing.end();) {
502 if ((*it)->getRequest().caller == caller) {
504 it = m_all_ongoing.erase(it);
510 // Also abort all queued fetches for the caller
511 for (std::list<HTTPFetchRequest>::iterator
512 it = m_queued_fetches.begin();
513 it != m_queued_fetches.end();) {
514 if ((*it).caller == caller)
515 it = m_queued_fetches.erase(it);
520 else if (req.type == RT_WAKEUP) {
521 // Wakeup: Nothing to do, thread is awake at this point
524 if (req.event != NULL)
528 // Start new ongoing fetches if m_parallel_limit allows
529 void processQueued(CurlHandlePool *pool)
531 while (m_all_ongoing.size() < m_parallel_limit &&
532 !m_queued_fetches.empty()) {
533 HTTPFetchRequest request = m_queued_fetches.front();
534 m_queued_fetches.pop_front();
536 // Create ongoing fetch data and make a cURL handle
537 // Set cURL options based on HTTPFetchRequest
538 HTTPFetchOngoing *ongoing =
539 new HTTPFetchOngoing(request, pool);
541 // Initiate the connection (curl_multi_add_handle)
542 CURLcode res = ongoing->start(m_multi);
543 if (res == CURLE_OK) {
544 m_all_ongoing.push_back(ongoing);
547 httpfetch_deliver_result(*ongoing->complete(res));
553 // Process CURLMsg (indicates completion of a fetch)
554 void processCurlMessage(CURLMsg *msg)
556 // Determine which ongoing fetch the message pertains to
559 for (i = 0; i < m_all_ongoing.size(); ++i) {
560 if (m_all_ongoing[i]->getEasyHandle() == msg->easy_handle) {
565 if (msg->msg == CURLMSG_DONE && found) {
566 // m_all_ongoing[i] succeeded or failed.
567 HTTPFetchOngoing *ongoing = m_all_ongoing[i];
568 httpfetch_deliver_result(*ongoing->complete(msg->data.result));
570 m_all_ongoing.erase(m_all_ongoing.begin() + i);
574 // Wait for a request from another thread, or timeout elapses
575 void waitForRequest(long timeout)
577 if (m_queued_fetches.empty()) {
579 Request req = m_requests.pop_front(timeout);
582 catch (ItemNotFoundException &e) {}
586 // Wait until some IO happens, or timeout elapses
587 void waitForIO(long timeout)
593 long select_timeout = -1;
594 struct timeval select_tv;
597 FD_ZERO(&read_fd_set);
598 FD_ZERO(&write_fd_set);
599 FD_ZERO(&exc_fd_set);
601 mres = curl_multi_fdset(m_multi, &read_fd_set,
602 &write_fd_set, &exc_fd_set, &max_fd);
603 if (mres != CURLM_OK) {
604 errorstream<<"curl_multi_fdset"
605 <<" returned error code "<<mres
610 mres = curl_multi_timeout(m_multi, &select_timeout);
611 if (mres != CURLM_OK) {
612 errorstream<<"curl_multi_timeout"
613 <<" returned error code "<<mres
618 // Limit timeout so new requests get through
619 if (select_timeout < 0 || select_timeout > timeout)
620 select_timeout = timeout;
622 if (select_timeout > 0) {
623 // in Winsock it is forbidden to pass three empty
624 // fd_sets to select(), so in that case use sleep_ms
626 select_tv.tv_sec = select_timeout / 1000;
627 select_tv.tv_usec = (select_timeout % 1000) * 1000;
628 int retval = select(max_fd + 1, &read_fd_set,
629 &write_fd_set, &exc_fd_set,
633 errorstream<<"select returned error code "
634 <<WSAGetLastError()<<std::endl;
636 errorstream<<"select returned error code "
642 sleep_ms(select_timeout);
649 DSTACK(FUNCTION_NAME);
653 m_multi = curl_multi_init();
654 if (m_multi == NULL) {
655 errorstream<<"curl_multi_init returned NULL\n";
659 FATAL_ERROR_IF(!m_all_ongoing.empty(), "Expected empty");
661 while (!stopRequested()) {
662 BEGIN_DEBUG_EXCEPTION_HANDLER
665 Handle new async requests
668 while (!m_requests.empty()) {
669 Request req = m_requests.pop_frontNoEx();
672 processQueued(&pool);
675 Handle ongoing async requests
678 int still_ongoing = 0;
679 while (curl_multi_perform(m_multi, &still_ongoing) ==
680 CURLM_CALL_MULTI_PERFORM)
684 Handle completed async requests
686 if (still_ongoing < (int) m_all_ongoing.size()) {
689 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
690 while (msg != NULL) {
691 processCurlMessage(msg);
692 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
697 If there are ongoing requests, wait for data
698 (with a timeout of 100ms so that new requests
701 If no ongoing requests, wait for a new request.
702 (Possibly an empty request that signals
703 that the thread should be stopped.)
705 if (m_all_ongoing.empty())
706 waitForRequest(100000000);
710 END_DEBUG_EXCEPTION_HANDLER
713 // Call curl_multi_remove_handle and cleanup easy handles
714 for (size_t i = 0; i < m_all_ongoing.size(); ++i) {
715 delete m_all_ongoing[i];
717 m_all_ongoing.clear();
719 m_queued_fetches.clear();
721 CURLMcode mres = curl_multi_cleanup(m_multi);
722 if (mres != CURLM_OK) {
723 errorstream<<"curl_multi_cleanup"
724 <<" returned error code "<<mres
732 CurlFetchThread *g_httpfetch_thread = NULL;
734 void httpfetch_init(int parallel_limit)
736 verbosestream<<"httpfetch_init: parallel_limit="<<parallel_limit
739 CURLcode res = curl_global_init(CURL_GLOBAL_DEFAULT);
740 FATAL_ERROR_IF(res != CURLE_OK, "CURL init failed");
742 g_httpfetch_thread = new CurlFetchThread(parallel_limit);
744 // Initialize g_callerid_randomness for httpfetch_caller_alloc_secure
746 porting::secure_rand_fill_buf(randbuf, sizeof(u64) * 2);
747 g_callerid_randomness = PcgRandom(randbuf[0], randbuf[1]);
750 void httpfetch_cleanup()
752 verbosestream<<"httpfetch_cleanup: cleaning up"<<std::endl;
754 g_httpfetch_thread->stop();
755 g_httpfetch_thread->requestWakeUp();
756 g_httpfetch_thread->wait();
757 delete g_httpfetch_thread;
759 curl_global_cleanup();
762 void httpfetch_async(const HTTPFetchRequest &fetch_request)
764 g_httpfetch_thread->requestFetch(fetch_request);
765 if (!g_httpfetch_thread->isRunning())
766 g_httpfetch_thread->start();
769 static void httpfetch_request_clear(unsigned long caller)
771 if (g_httpfetch_thread->isRunning()) {
773 g_httpfetch_thread->requestClear(caller, &event);
776 g_httpfetch_thread->requestClear(caller, NULL);
780 void httpfetch_sync(const HTTPFetchRequest &fetch_request,
781 HTTPFetchResult &fetch_result)
783 // Create ongoing fetch data and make a cURL handle
784 // Set cURL options based on HTTPFetchRequest
786 HTTPFetchOngoing ongoing(fetch_request, &pool);
787 // Do the fetch (curl_easy_perform)
788 CURLcode res = ongoing.start(NULL);
789 // Update fetch result
790 fetch_result = *ongoing.complete(res);
798 Dummy httpfetch implementation that always returns an error.
801 void httpfetch_init(int parallel_limit)
805 void httpfetch_cleanup()
809 void httpfetch_async(const HTTPFetchRequest &fetch_request)
811 errorstream << "httpfetch_async: unable to fetch " << fetch_request.url
812 << " because USE_CURL=0" << std::endl;
814 HTTPFetchResult fetch_result(fetch_request); // sets succeeded = false etc.
815 httpfetch_deliver_result(fetch_result);
818 static void httpfetch_request_clear(unsigned long caller)
822 void httpfetch_sync(const HTTPFetchRequest &fetch_request,
823 HTTPFetchResult &fetch_result)
825 errorstream << "httpfetch_sync: unable to fetch " << fetch_request.url
826 << " because USE_CURL=0" << std::endl;
828 fetch_result = HTTPFetchResult(fetch_request); // sets succeeded = false etc.