3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #include "httpfetch.h"
21 #include "porting.h" // for sleep_ms(), get_sysinfo(), secure_rand_fill_buf()
28 #include "network/socket.h" // for select()
29 #include "threading/event.h"
31 #include "exceptions.h"
34 #include "util/container.h"
35 #include "util/thread.h"
40 std::mutex g_httpfetch_mutex;
41 std::map<unsigned long, std::queue<HTTPFetchResult> > g_httpfetch_results;
42 PcgRandom g_callerid_randomness;
44 HTTPFetchRequest::HTTPFetchRequest() :
45 timeout(g_settings->getS32("curl_timeout")),
46 connect_timeout(timeout),
47 useragent(std::string(PROJECT_NAME_C "/") + g_version_hash + " (" + porting::get_sysinfo() + ")")
52 static void httpfetch_deliver_result(const HTTPFetchResult &fetch_result)
54 unsigned long caller = fetch_result.caller;
55 if (caller != HTTPFETCH_DISCARD) {
56 MutexAutoLock lock(g_httpfetch_mutex);
57 g_httpfetch_results[caller].push(fetch_result);
61 static void httpfetch_request_clear(unsigned long caller);
63 unsigned long httpfetch_caller_alloc()
65 MutexAutoLock lock(g_httpfetch_mutex);
67 // Check each caller ID except HTTPFETCH_DISCARD
68 const unsigned long discard = HTTPFETCH_DISCARD;
69 for (unsigned long caller = discard + 1; caller != discard; ++caller) {
70 std::map<unsigned long, std::queue<HTTPFetchResult> >::iterator
71 it = g_httpfetch_results.find(caller);
72 if (it == g_httpfetch_results.end()) {
73 verbosestream << "httpfetch_caller_alloc: allocating "
74 << caller << std::endl;
75 // Access element to create it
76 g_httpfetch_results[caller];
81 FATAL_ERROR("httpfetch_caller_alloc: ran out of caller IDs");
85 unsigned long httpfetch_caller_alloc_secure()
87 MutexAutoLock lock(g_httpfetch_mutex);
89 // Generate random caller IDs and make sure they're not
90 // already used or equal to HTTPFETCH_DISCARD
91 // Give up after 100 tries to prevent infinite loop
96 caller = (((u64) g_callerid_randomness.next()) << 32) |
97 g_callerid_randomness.next();
100 FATAL_ERROR("httpfetch_caller_alloc_secure: ran out of caller IDs");
101 return HTTPFETCH_DISCARD;
103 } while (g_httpfetch_results.find(caller) != g_httpfetch_results.end());
105 verbosestream << "httpfetch_caller_alloc_secure: allocating "
106 << caller << std::endl;
108 // Access element to create it
109 g_httpfetch_results[caller];
113 void httpfetch_caller_free(unsigned long caller)
115 verbosestream<<"httpfetch_caller_free: freeing "
118 httpfetch_request_clear(caller);
119 if (caller != HTTPFETCH_DISCARD) {
120 MutexAutoLock lock(g_httpfetch_mutex);
121 g_httpfetch_results.erase(caller);
125 bool httpfetch_async_get(unsigned long caller, HTTPFetchResult &fetch_result)
127 MutexAutoLock lock(g_httpfetch_mutex);
129 // Check that caller exists
130 std::map<unsigned long, std::queue<HTTPFetchResult> >::iterator
131 it = g_httpfetch_results.find(caller);
132 if (it == g_httpfetch_results.end())
135 // Check that result queue is nonempty
136 std::queue<HTTPFetchResult> &caller_results = it->second;
137 if (caller_results.empty())
141 fetch_result = caller_results.front();
142 caller_results.pop();
147 #include <curl/curl.h>
150 USE_CURL is on: use cURL based httpfetch implementation
153 static size_t httpfetch_writefunction(
154 char *ptr, size_t size, size_t nmemb, void *userdata)
156 std::ostringstream *stream = (std::ostringstream*)userdata;
157 size_t count = size * nmemb;
158 stream->write(ptr, count);
162 static size_t httpfetch_discardfunction(
163 char *ptr, size_t size, size_t nmemb, void *userdata)
170 std::list<CURL*> handles;
173 CurlHandlePool() = default;
177 for (std::list<CURL*>::iterator it = handles.begin();
178 it != handles.end(); ++it) {
179 curl_easy_cleanup(*it);
185 if (handles.empty()) {
186 curl = curl_easy_init();
188 errorstream<<"curl_easy_init returned NULL"<<std::endl;
192 curl = handles.front();
197 void free(CURL *handle)
200 handles.push_back(handle);
204 class HTTPFetchOngoing
207 HTTPFetchOngoing(const HTTPFetchRequest &request, CurlHandlePool *pool);
210 CURLcode start(CURLM *multi);
211 const HTTPFetchResult * complete(CURLcode res);
213 const HTTPFetchRequest &getRequest() const { return request; };
214 const CURL *getEasyHandle() const { return curl; };
217 CurlHandlePool *pool;
220 HTTPFetchRequest request;
221 HTTPFetchResult result;
222 std::ostringstream oss;
223 struct curl_slist *http_header;
228 HTTPFetchOngoing::HTTPFetchOngoing(const HTTPFetchRequest &request_,
229 CurlHandlePool *pool_):
235 oss(std::ios::binary),
239 curl = pool->alloc();
244 // Set static cURL options
245 curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
246 curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
247 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
248 curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 3);
249 curl_easy_setopt(curl, CURLOPT_ENCODING, "gzip");
251 std::string bind_address = g_settings->get("bind_address");
252 if (!bind_address.empty()) {
253 curl_easy_setopt(curl, CURLOPT_INTERFACE, bind_address.c_str());
256 #if LIBCURL_VERSION_NUM >= 0x071304
257 // Restrict protocols so that curl vulnerabilities in
258 // other protocols don't affect us.
259 // These settings were introduced in curl 7.19.4.
265 curl_easy_setopt(curl, CURLOPT_PROTOCOLS, protocols);
266 curl_easy_setopt(curl, CURLOPT_REDIR_PROTOCOLS, protocols);
269 // Set cURL options based on HTTPFetchRequest
270 curl_easy_setopt(curl, CURLOPT_URL,
271 request.url.c_str());
272 curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS,
274 curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT_MS,
275 request.connect_timeout);
277 if (!request.useragent.empty())
278 curl_easy_setopt(curl, CURLOPT_USERAGENT, request.useragent.c_str());
280 // Set up a write callback that writes to the
281 // ostringstream ongoing->oss, unless the data
282 // is to be discarded
283 if (request.caller == HTTPFETCH_DISCARD) {
284 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
285 httpfetch_discardfunction);
286 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
288 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
289 httpfetch_writefunction);
290 curl_easy_setopt(curl, CURLOPT_WRITEDATA, &oss);
293 // Set POST (or GET) data
294 if (request.post_fields.empty() && request.post_data.empty()) {
295 curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
296 } else if (request.multipart) {
297 curl_httppost *last = NULL;
298 for (StringMap::iterator it = request.post_fields.begin();
299 it != request.post_fields.end(); ++it) {
300 curl_formadd(&post, &last,
301 CURLFORM_NAMELENGTH, it->first.size(),
302 CURLFORM_PTRNAME, it->first.c_str(),
303 CURLFORM_CONTENTSLENGTH, it->second.size(),
304 CURLFORM_PTRCONTENTS, it->second.c_str(),
307 curl_easy_setopt(curl, CURLOPT_HTTPPOST, post);
308 // request.post_fields must now *never* be
309 // modified until CURLOPT_HTTPPOST is cleared
310 } else if (request.post_data.empty()) {
311 curl_easy_setopt(curl, CURLOPT_POST, 1);
313 for (auto &post_field : request.post_fields) {
316 str += urlencode(post_field.first);
318 str += urlencode(post_field.second);
320 curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE,
322 curl_easy_setopt(curl, CURLOPT_COPYPOSTFIELDS,
325 curl_easy_setopt(curl, CURLOPT_POST, 1);
326 curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE,
327 request.post_data.size());
328 curl_easy_setopt(curl, CURLOPT_POSTFIELDS,
329 request.post_data.c_str());
330 // request.post_data must now *never* be
331 // modified until CURLOPT_POSTFIELDS is cleared
333 // Set additional HTTP headers
334 for (const std::string &extra_header : request.extra_headers) {
335 http_header = curl_slist_append(http_header, extra_header.c_str());
337 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_header);
339 if (!g_settings->getBool("curl_verify_cert")) {
340 curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
344 CURLcode HTTPFetchOngoing::start(CURLM *multi_)
347 return CURLE_FAILED_INIT;
350 // Easy interface (sync)
351 return curl_easy_perform(curl);
354 // Multi interface (async)
355 CURLMcode mres = curl_multi_add_handle(multi_, curl);
356 if (mres != CURLM_OK) {
357 errorstream << "curl_multi_add_handle"
358 << " returned error code " << mres
360 return CURLE_FAILED_INIT;
362 multi = multi_; // store for curl_multi_remove_handle
366 const HTTPFetchResult * HTTPFetchOngoing::complete(CURLcode res)
368 result.succeeded = (res == CURLE_OK);
369 result.timeout = (res == CURLE_OPERATION_TIMEDOUT);
370 result.data = oss.str();
372 // Get HTTP/FTP response code
373 result.response_code = 0;
374 if (curl && (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE,
375 &result.response_code) != CURLE_OK)) {
376 // We failed to get a return code, make sure it is still 0
377 result.response_code = 0;
380 if (res != CURLE_OK) {
381 errorstream << request.url << " not found ("
382 << curl_easy_strerror(res) << ")"
383 << " (response code " << result.response_code << ")"
390 HTTPFetchOngoing::~HTTPFetchOngoing()
393 CURLMcode mres = curl_multi_remove_handle(multi, curl);
394 if (mres != CURLM_OK) {
395 errorstream << "curl_multi_remove_handle"
396 << " returned error code " << mres
401 // Set safe options for the reusable cURL handle
402 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
403 httpfetch_discardfunction);
404 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
405 curl_easy_setopt(curl, CURLOPT_POSTFIELDS, NULL);
407 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, NULL);
408 curl_slist_free_all(http_header);
411 curl_easy_setopt(curl, CURLOPT_HTTPPOST, NULL);
415 // Store the cURL handle for reuse
420 class CurlFetchThread : public Thread
431 HTTPFetchRequest fetch_request;
436 MutexedQueue<Request> m_requests;
437 size_t m_parallel_limit;
439 // Variables exclusively used within thread
440 std::vector<HTTPFetchOngoing*> m_all_ongoing;
441 std::list<HTTPFetchRequest> m_queued_fetches;
444 CurlFetchThread(int parallel_limit) :
447 if (parallel_limit >= 1)
448 m_parallel_limit = parallel_limit;
450 m_parallel_limit = 1;
453 void requestFetch(const HTTPFetchRequest &fetch_request)
457 req.fetch_request = fetch_request;
459 m_requests.push_back(req);
462 void requestClear(unsigned long caller, Event *event)
466 req.fetch_request.caller = caller;
468 m_requests.push_back(req);
474 req.type = RT_WAKEUP;
476 m_requests.push_back(req);
480 // Handle a request from some other thread
481 // E.g. new fetch; clear fetches for one caller; wake up
482 void processRequest(const Request &req)
484 if (req.type == RT_FETCH) {
485 // New fetch, queue until there are less
486 // than m_parallel_limit ongoing fetches
487 m_queued_fetches.push_back(req.fetch_request);
489 // see processQueued() for what happens next
492 else if (req.type == RT_CLEAR) {
493 unsigned long caller = req.fetch_request.caller;
495 // Abort all ongoing fetches for the caller
496 for (std::vector<HTTPFetchOngoing*>::iterator
497 it = m_all_ongoing.begin();
498 it != m_all_ongoing.end();) {
499 if ((*it)->getRequest().caller == caller) {
501 it = m_all_ongoing.erase(it);
507 // Also abort all queued fetches for the caller
508 for (std::list<HTTPFetchRequest>::iterator
509 it = m_queued_fetches.begin();
510 it != m_queued_fetches.end();) {
511 if ((*it).caller == caller)
512 it = m_queued_fetches.erase(it);
517 else if (req.type == RT_WAKEUP) {
518 // Wakeup: Nothing to do, thread is awake at this point
521 if (req.event != NULL)
525 // Start new ongoing fetches if m_parallel_limit allows
526 void processQueued(CurlHandlePool *pool)
528 while (m_all_ongoing.size() < m_parallel_limit &&
529 !m_queued_fetches.empty()) {
530 HTTPFetchRequest request = m_queued_fetches.front();
531 m_queued_fetches.pop_front();
533 // Create ongoing fetch data and make a cURL handle
534 // Set cURL options based on HTTPFetchRequest
535 HTTPFetchOngoing *ongoing =
536 new HTTPFetchOngoing(request, pool);
538 // Initiate the connection (curl_multi_add_handle)
539 CURLcode res = ongoing->start(m_multi);
540 if (res == CURLE_OK) {
541 m_all_ongoing.push_back(ongoing);
544 httpfetch_deliver_result(*ongoing->complete(res));
550 // Process CURLMsg (indicates completion of a fetch)
551 void processCurlMessage(CURLMsg *msg)
553 // Determine which ongoing fetch the message pertains to
556 for (i = 0; i < m_all_ongoing.size(); ++i) {
557 if (m_all_ongoing[i]->getEasyHandle() == msg->easy_handle) {
562 if (msg->msg == CURLMSG_DONE && found) {
563 // m_all_ongoing[i] succeeded or failed.
564 HTTPFetchOngoing *ongoing = m_all_ongoing[i];
565 httpfetch_deliver_result(*ongoing->complete(msg->data.result));
567 m_all_ongoing.erase(m_all_ongoing.begin() + i);
571 // Wait for a request from another thread, or timeout elapses
572 void waitForRequest(long timeout)
574 if (m_queued_fetches.empty()) {
576 Request req = m_requests.pop_front(timeout);
579 catch (ItemNotFoundException &e) {}
583 // Wait until some IO happens, or timeout elapses
584 void waitForIO(long timeout)
590 long select_timeout = -1;
591 struct timeval select_tv;
594 FD_ZERO(&read_fd_set);
595 FD_ZERO(&write_fd_set);
596 FD_ZERO(&exc_fd_set);
598 mres = curl_multi_fdset(m_multi, &read_fd_set,
599 &write_fd_set, &exc_fd_set, &max_fd);
600 if (mres != CURLM_OK) {
601 errorstream<<"curl_multi_fdset"
602 <<" returned error code "<<mres
607 mres = curl_multi_timeout(m_multi, &select_timeout);
608 if (mres != CURLM_OK) {
609 errorstream<<"curl_multi_timeout"
610 <<" returned error code "<<mres
615 // Limit timeout so new requests get through
616 if (select_timeout < 0 || select_timeout > timeout)
617 select_timeout = timeout;
619 if (select_timeout > 0) {
620 // in Winsock it is forbidden to pass three empty
621 // fd_sets to select(), so in that case use sleep_ms
623 select_tv.tv_sec = select_timeout / 1000;
624 select_tv.tv_usec = (select_timeout % 1000) * 1000;
625 int retval = select(max_fd + 1, &read_fd_set,
626 &write_fd_set, &exc_fd_set,
630 errorstream<<"select returned error code "
631 <<WSAGetLastError()<<std::endl;
633 errorstream<<"select returned error code "
639 sleep_ms(select_timeout);
648 m_multi = curl_multi_init();
649 if (m_multi == NULL) {
650 errorstream<<"curl_multi_init returned NULL\n";
654 FATAL_ERROR_IF(!m_all_ongoing.empty(), "Expected empty");
656 while (!stopRequested()) {
657 BEGIN_DEBUG_EXCEPTION_HANDLER
660 Handle new async requests
663 while (!m_requests.empty()) {
664 Request req = m_requests.pop_frontNoEx();
667 processQueued(&pool);
670 Handle ongoing async requests
673 int still_ongoing = 0;
674 while (curl_multi_perform(m_multi, &still_ongoing) ==
675 CURLM_CALL_MULTI_PERFORM)
679 Handle completed async requests
681 if (still_ongoing < (int) m_all_ongoing.size()) {
684 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
685 while (msg != NULL) {
686 processCurlMessage(msg);
687 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
692 If there are ongoing requests, wait for data
693 (with a timeout of 100ms so that new requests
696 If no ongoing requests, wait for a new request.
697 (Possibly an empty request that signals
698 that the thread should be stopped.)
700 if (m_all_ongoing.empty())
701 waitForRequest(100000000);
705 END_DEBUG_EXCEPTION_HANDLER
708 // Call curl_multi_remove_handle and cleanup easy handles
709 for (HTTPFetchOngoing *i : m_all_ongoing) {
712 m_all_ongoing.clear();
714 m_queued_fetches.clear();
716 CURLMcode mres = curl_multi_cleanup(m_multi);
717 if (mres != CURLM_OK) {
718 errorstream<<"curl_multi_cleanup"
719 <<" returned error code "<<mres
727 CurlFetchThread *g_httpfetch_thread = NULL;
729 void httpfetch_init(int parallel_limit)
731 verbosestream<<"httpfetch_init: parallel_limit="<<parallel_limit
734 CURLcode res = curl_global_init(CURL_GLOBAL_DEFAULT);
735 FATAL_ERROR_IF(res != CURLE_OK, "CURL init failed");
737 g_httpfetch_thread = new CurlFetchThread(parallel_limit);
739 // Initialize g_callerid_randomness for httpfetch_caller_alloc_secure
741 porting::secure_rand_fill_buf(randbuf, sizeof(u64) * 2);
742 g_callerid_randomness = PcgRandom(randbuf[0], randbuf[1]);
745 void httpfetch_cleanup()
747 verbosestream<<"httpfetch_cleanup: cleaning up"<<std::endl;
749 g_httpfetch_thread->stop();
750 g_httpfetch_thread->requestWakeUp();
751 g_httpfetch_thread->wait();
752 delete g_httpfetch_thread;
754 curl_global_cleanup();
757 void httpfetch_async(const HTTPFetchRequest &fetch_request)
759 g_httpfetch_thread->requestFetch(fetch_request);
760 if (!g_httpfetch_thread->isRunning())
761 g_httpfetch_thread->start();
764 static void httpfetch_request_clear(unsigned long caller)
766 if (g_httpfetch_thread->isRunning()) {
768 g_httpfetch_thread->requestClear(caller, &event);
771 g_httpfetch_thread->requestClear(caller, NULL);
775 void httpfetch_sync(const HTTPFetchRequest &fetch_request,
776 HTTPFetchResult &fetch_result)
778 // Create ongoing fetch data and make a cURL handle
779 // Set cURL options based on HTTPFetchRequest
781 HTTPFetchOngoing ongoing(fetch_request, &pool);
782 // Do the fetch (curl_easy_perform)
783 CURLcode res = ongoing.start(NULL);
784 // Update fetch result
785 fetch_result = *ongoing.complete(res);
793 Dummy httpfetch implementation that always returns an error.
796 void httpfetch_init(int parallel_limit)
800 void httpfetch_cleanup()
804 void httpfetch_async(const HTTPFetchRequest &fetch_request)
806 errorstream << "httpfetch_async: unable to fetch " << fetch_request.url
807 << " because USE_CURL=0" << std::endl;
809 HTTPFetchResult fetch_result(fetch_request); // sets succeeded = false etc.
810 httpfetch_deliver_result(fetch_result);
813 static void httpfetch_request_clear(unsigned long caller)
817 void httpfetch_sync(const HTTPFetchRequest &fetch_request,
818 HTTPFetchResult &fetch_result)
820 errorstream << "httpfetch_sync: unable to fetch " << fetch_request.url
821 << " because USE_CURL=0" << std::endl;
823 fetch_result = HTTPFetchResult(fetch_request); // sets succeeded = false etc.