3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #include "socket.h" // for select()
21 #include "porting.h" // for sleep_ms(), get_sysinfo()
22 #include "httpfetch.h"
28 #include "jthread/jevent.h"
30 #include "exceptions.h"
33 #include "util/container.h"
34 #include "util/thread.h"
38 JMutex g_httpfetch_mutex;
39 std::map<unsigned long, std::queue<HTTPFetchResult> > g_httpfetch_results;
41 HTTPFetchRequest::HTTPFetchRequest()
44 caller = HTTPFETCH_DISCARD;
46 timeout = g_settings->getS32("curl_timeout");
47 connect_timeout = timeout;
50 useragent = std::string(PROJECT_NAME_C "/") + g_version_hash + " (" + porting::get_sysinfo() + ")";
54 static void httpfetch_deliver_result(const HTTPFetchResult &fetch_result)
56 unsigned long caller = fetch_result.caller;
57 if (caller != HTTPFETCH_DISCARD) {
58 JMutexAutoLock lock(g_httpfetch_mutex);
59 g_httpfetch_results[caller].push(fetch_result);
63 static void httpfetch_request_clear(unsigned long caller);
65 unsigned long httpfetch_caller_alloc()
67 JMutexAutoLock lock(g_httpfetch_mutex);
69 // Check each caller ID except HTTPFETCH_DISCARD
70 const unsigned long discard = HTTPFETCH_DISCARD;
71 for (unsigned long caller = discard + 1; caller != discard; ++caller) {
72 std::map<unsigned long, std::queue<HTTPFetchResult> >::iterator
73 it = g_httpfetch_results.find(caller);
74 if (it == g_httpfetch_results.end()) {
75 verbosestream << "httpfetch_caller_alloc: allocating "
76 << caller << std::endl;
77 // Access element to create it
78 g_httpfetch_results[caller];
83 FATAL_ERROR("httpfetch_caller_alloc: ran out of caller IDs");
87 void httpfetch_caller_free(unsigned long caller)
89 verbosestream<<"httpfetch_caller_free: freeing "
92 httpfetch_request_clear(caller);
93 if (caller != HTTPFETCH_DISCARD) {
94 JMutexAutoLock lock(g_httpfetch_mutex);
95 g_httpfetch_results.erase(caller);
99 bool httpfetch_async_get(unsigned long caller, HTTPFetchResult &fetch_result)
101 JMutexAutoLock lock(g_httpfetch_mutex);
103 // Check that caller exists
104 std::map<unsigned long, std::queue<HTTPFetchResult> >::iterator
105 it = g_httpfetch_results.find(caller);
106 if (it == g_httpfetch_results.end())
109 // Check that result queue is nonempty
110 std::queue<HTTPFetchResult> &caller_results = it->second;
111 if (caller_results.empty())
115 fetch_result = caller_results.front();
116 caller_results.pop();
121 #include <curl/curl.h>
124 USE_CURL is on: use cURL based httpfetch implementation
127 static size_t httpfetch_writefunction(
128 char *ptr, size_t size, size_t nmemb, void *userdata)
130 std::ostringstream *stream = (std::ostringstream*)userdata;
131 size_t count = size * nmemb;
132 stream->write(ptr, count);
136 static size_t httpfetch_discardfunction(
137 char *ptr, size_t size, size_t nmemb, void *userdata)
144 std::list<CURL*> handles;
150 for (std::list<CURL*>::iterator it = handles.begin();
151 it != handles.end(); ++it) {
152 curl_easy_cleanup(*it);
158 if (handles.empty()) {
159 curl = curl_easy_init();
161 errorstream<<"curl_easy_init returned NULL"<<std::endl;
165 curl = handles.front();
170 void free(CURL *handle)
173 handles.push_back(handle);
177 class HTTPFetchOngoing
180 HTTPFetchOngoing(HTTPFetchRequest request, CurlHandlePool *pool);
183 CURLcode start(CURLM *multi);
184 const HTTPFetchResult * complete(CURLcode res);
186 const HTTPFetchRequest &getRequest() const { return request; };
187 const CURL *getEasyHandle() const { return curl; };
190 CurlHandlePool *pool;
193 HTTPFetchRequest request;
194 HTTPFetchResult result;
195 std::ostringstream oss;
196 struct curl_slist *http_header;
201 HTTPFetchOngoing::HTTPFetchOngoing(HTTPFetchRequest request_, CurlHandlePool *pool_):
207 oss(std::ios::binary),
211 curl = pool->alloc();
216 // Set static cURL options
217 curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
218 curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
219 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
220 curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 1);
222 std::string bind_address = g_settings->get("bind_address");
223 if (!bind_address.empty()) {
224 curl_easy_setopt(curl, CURLOPT_INTERFACE, bind_address.c_str());
227 #if LIBCURL_VERSION_NUM >= 0x071304
228 // Restrict protocols so that curl vulnerabilities in
229 // other protocols don't affect us.
230 // These settings were introduced in curl 7.19.4.
236 curl_easy_setopt(curl, CURLOPT_PROTOCOLS, protocols);
237 curl_easy_setopt(curl, CURLOPT_REDIR_PROTOCOLS, protocols);
240 // Set cURL options based on HTTPFetchRequest
241 curl_easy_setopt(curl, CURLOPT_URL,
242 request.url.c_str());
243 curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS,
245 curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT_MS,
246 request.connect_timeout);
248 if (request.useragent != "")
249 curl_easy_setopt(curl, CURLOPT_USERAGENT, request.useragent.c_str());
251 // Set up a write callback that writes to the
252 // ostringstream ongoing->oss, unless the data
253 // is to be discarded
254 if (request.caller == HTTPFETCH_DISCARD) {
255 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
256 httpfetch_discardfunction);
257 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
259 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
260 httpfetch_writefunction);
261 curl_easy_setopt(curl, CURLOPT_WRITEDATA, &oss);
264 // Set POST (or GET) data
265 if (request.post_fields.empty()) {
266 curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
267 } else if (request.multipart) {
268 curl_httppost *last = NULL;
269 for (StringMap::iterator it = request.post_fields.begin();
270 it != request.post_fields.end(); ++it) {
271 curl_formadd(&post, &last,
272 CURLFORM_NAMELENGTH, it->first.size(),
273 CURLFORM_PTRNAME, it->first.c_str(),
274 CURLFORM_CONTENTSLENGTH, it->second.size(),
275 CURLFORM_PTRCONTENTS, it->second.c_str(),
278 curl_easy_setopt(curl, CURLOPT_HTTPPOST, post);
279 // request.post_fields must now *never* be
280 // modified until CURLOPT_HTTPPOST is cleared
281 } else if (request.post_data.empty()) {
282 curl_easy_setopt(curl, CURLOPT_POST, 1);
284 for (StringMap::iterator it = request.post_fields.begin();
285 it != request.post_fields.end(); ++it) {
288 str += urlencode(it->first);
290 str += urlencode(it->second);
292 curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE,
294 curl_easy_setopt(curl, CURLOPT_COPYPOSTFIELDS,
297 curl_easy_setopt(curl, CURLOPT_POST, 1);
298 curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE,
299 request.post_data.size());
300 curl_easy_setopt(curl, CURLOPT_POSTFIELDS,
301 request.post_data.c_str());
302 // request.post_data must now *never* be
303 // modified until CURLOPT_POSTFIELDS is cleared
305 // Set additional HTTP headers
306 for (std::vector<std::string>::iterator it = request.extra_headers.begin();
307 it != request.extra_headers.end(); ++it) {
308 http_header = curl_slist_append(http_header, it->c_str());
310 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_header);
312 if (!g_settings->getBool("curl_verify_cert")) {
313 curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
317 CURLcode HTTPFetchOngoing::start(CURLM *multi_)
320 return CURLE_FAILED_INIT;
323 // Easy interface (sync)
324 return curl_easy_perform(curl);
327 // Multi interface (async)
328 CURLMcode mres = curl_multi_add_handle(multi_, curl);
329 if (mres != CURLM_OK) {
330 errorstream << "curl_multi_add_handle"
331 << " returned error code " << mres
333 return CURLE_FAILED_INIT;
335 multi = multi_; // store for curl_multi_remove_handle
339 const HTTPFetchResult * HTTPFetchOngoing::complete(CURLcode res)
341 result.succeeded = (res == CURLE_OK);
342 result.timeout = (res == CURLE_OPERATION_TIMEDOUT);
343 result.data = oss.str();
345 // Get HTTP/FTP response code
346 result.response_code = 0;
347 if (curl && (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE,
348 &result.response_code) != CURLE_OK)) {
349 // We failed to get a return code, make sure it is still 0
350 result.response_code = 0;
353 if (res != CURLE_OK) {
354 errorstream << request.url << " not found ("
355 << curl_easy_strerror(res) << ")"
356 << " (response code " << result.response_code << ")"
363 HTTPFetchOngoing::~HTTPFetchOngoing()
366 CURLMcode mres = curl_multi_remove_handle(multi, curl);
367 if (mres != CURLM_OK) {
368 errorstream << "curl_multi_remove_handle"
369 << " returned error code " << mres
374 // Set safe options for the reusable cURL handle
375 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
376 httpfetch_discardfunction);
377 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
378 curl_easy_setopt(curl, CURLOPT_POSTFIELDS, NULL);
380 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, NULL);
381 curl_slist_free_all(http_header);
384 curl_easy_setopt(curl, CURLOPT_HTTPPOST, NULL);
388 // Store the cURL handle for reuse
393 class CurlFetchThread : public JThread
404 HTTPFetchRequest fetch_request;
409 MutexedQueue<Request> m_requests;
410 size_t m_parallel_limit;
412 // Variables exclusively used within thread
413 std::vector<HTTPFetchOngoing*> m_all_ongoing;
414 std::list<HTTPFetchRequest> m_queued_fetches;
417 CurlFetchThread(int parallel_limit)
419 if (parallel_limit >= 1)
420 m_parallel_limit = parallel_limit;
422 m_parallel_limit = 1;
425 void requestFetch(const HTTPFetchRequest &fetch_request)
429 req.fetch_request = fetch_request;
431 m_requests.push_back(req);
434 void requestClear(unsigned long caller, Event *event)
438 req.fetch_request.caller = caller;
440 m_requests.push_back(req);
446 req.type = RT_WAKEUP;
448 m_requests.push_back(req);
452 // Handle a request from some other thread
453 // E.g. new fetch; clear fetches for one caller; wake up
454 void processRequest(const Request &req)
456 if (req.type == RT_FETCH) {
457 // New fetch, queue until there are less
458 // than m_parallel_limit ongoing fetches
459 m_queued_fetches.push_back(req.fetch_request);
461 // see processQueued() for what happens next
464 else if (req.type == RT_CLEAR) {
465 unsigned long caller = req.fetch_request.caller;
467 // Abort all ongoing fetches for the caller
468 for (std::vector<HTTPFetchOngoing*>::iterator
469 it = m_all_ongoing.begin();
470 it != m_all_ongoing.end();) {
471 if ((*it)->getRequest().caller == caller) {
473 it = m_all_ongoing.erase(it);
479 // Also abort all queued fetches for the caller
480 for (std::list<HTTPFetchRequest>::iterator
481 it = m_queued_fetches.begin();
482 it != m_queued_fetches.end();) {
483 if ((*it).caller == caller)
484 it = m_queued_fetches.erase(it);
489 else if (req.type == RT_WAKEUP) {
490 // Wakeup: Nothing to do, thread is awake at this point
493 if (req.event != NULL)
497 // Start new ongoing fetches if m_parallel_limit allows
498 void processQueued(CurlHandlePool *pool)
500 while (m_all_ongoing.size() < m_parallel_limit &&
501 !m_queued_fetches.empty()) {
502 HTTPFetchRequest request = m_queued_fetches.front();
503 m_queued_fetches.pop_front();
505 // Create ongoing fetch data and make a cURL handle
506 // Set cURL options based on HTTPFetchRequest
507 HTTPFetchOngoing *ongoing =
508 new HTTPFetchOngoing(request, pool);
510 // Initiate the connection (curl_multi_add_handle)
511 CURLcode res = ongoing->start(m_multi);
512 if (res == CURLE_OK) {
513 m_all_ongoing.push_back(ongoing);
516 httpfetch_deliver_result(*ongoing->complete(res));
522 // Process CURLMsg (indicates completion of a fetch)
523 void processCurlMessage(CURLMsg *msg)
525 // Determine which ongoing fetch the message pertains to
528 for (i = 0; i < m_all_ongoing.size(); ++i) {
529 if (m_all_ongoing[i]->getEasyHandle() == msg->easy_handle) {
534 if (msg->msg == CURLMSG_DONE && found) {
535 // m_all_ongoing[i] succeeded or failed.
536 HTTPFetchOngoing *ongoing = m_all_ongoing[i];
537 httpfetch_deliver_result(*ongoing->complete(msg->data.result));
539 m_all_ongoing.erase(m_all_ongoing.begin() + i);
543 // Wait for a request from another thread, or timeout elapses
544 void waitForRequest(long timeout)
546 if (m_queued_fetches.empty()) {
548 Request req = m_requests.pop_front(timeout);
551 catch (ItemNotFoundException &e) {}
555 // Wait until some IO happens, or timeout elapses
556 void waitForIO(long timeout)
562 long select_timeout = -1;
563 struct timeval select_tv;
566 FD_ZERO(&read_fd_set);
567 FD_ZERO(&write_fd_set);
568 FD_ZERO(&exc_fd_set);
570 mres = curl_multi_fdset(m_multi, &read_fd_set,
571 &write_fd_set, &exc_fd_set, &max_fd);
572 if (mres != CURLM_OK) {
573 errorstream<<"curl_multi_fdset"
574 <<" returned error code "<<mres
579 mres = curl_multi_timeout(m_multi, &select_timeout);
580 if (mres != CURLM_OK) {
581 errorstream<<"curl_multi_timeout"
582 <<" returned error code "<<mres
587 // Limit timeout so new requests get through
588 if (select_timeout < 0 || select_timeout > timeout)
589 select_timeout = timeout;
591 if (select_timeout > 0) {
592 // in Winsock it is forbidden to pass three empty
593 // fd_sets to select(), so in that case use sleep_ms
595 select_tv.tv_sec = select_timeout / 1000;
596 select_tv.tv_usec = (select_timeout % 1000) * 1000;
597 int retval = select(max_fd + 1, &read_fd_set,
598 &write_fd_set, &exc_fd_set,
602 errorstream<<"select returned error code "
603 <<WSAGetLastError()<<std::endl;
605 errorstream<<"select returned error code "
611 sleep_ms(select_timeout);
619 log_register_thread("CurlFetchThread");
620 DSTACK(__FUNCTION_NAME);
622 porting::setThreadName("CurlFetchThread");
626 m_multi = curl_multi_init();
627 if (m_multi == NULL) {
628 errorstream<<"curl_multi_init returned NULL\n";
632 FATAL_ERROR_IF(!m_all_ongoing.empty(), "Expected empty");
634 while (!StopRequested()) {
635 BEGIN_DEBUG_EXCEPTION_HANDLER
638 Handle new async requests
641 while (!m_requests.empty()) {
642 Request req = m_requests.pop_frontNoEx();
645 processQueued(&pool);
648 Handle ongoing async requests
651 int still_ongoing = 0;
652 while (curl_multi_perform(m_multi, &still_ongoing) ==
653 CURLM_CALL_MULTI_PERFORM)
657 Handle completed async requests
659 if (still_ongoing < (int) m_all_ongoing.size()) {
662 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
663 while (msg != NULL) {
664 processCurlMessage(msg);
665 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
670 If there are ongoing requests, wait for data
671 (with a timeout of 100ms so that new requests
674 If no ongoing requests, wait for a new request.
675 (Possibly an empty request that signals
676 that the thread should be stopped.)
678 if (m_all_ongoing.empty())
679 waitForRequest(100000000);
683 END_DEBUG_EXCEPTION_HANDLER(errorstream)
686 // Call curl_multi_remove_handle and cleanup easy handles
687 for (size_t i = 0; i < m_all_ongoing.size(); ++i) {
688 delete m_all_ongoing[i];
690 m_all_ongoing.clear();
692 m_queued_fetches.clear();
694 CURLMcode mres = curl_multi_cleanup(m_multi);
695 if (mres != CURLM_OK) {
696 errorstream<<"curl_multi_cleanup"
697 <<" returned error code "<<mres
705 CurlFetchThread *g_httpfetch_thread = NULL;
707 void httpfetch_init(int parallel_limit)
709 verbosestream<<"httpfetch_init: parallel_limit="<<parallel_limit
712 CURLcode res = curl_global_init(CURL_GLOBAL_DEFAULT);
713 FATAL_ERROR_IF(res != CURLE_OK, "CURL init failed");
715 g_httpfetch_thread = new CurlFetchThread(parallel_limit);
718 void httpfetch_cleanup()
720 verbosestream<<"httpfetch_cleanup: cleaning up"<<std::endl;
722 g_httpfetch_thread->Stop();
723 g_httpfetch_thread->requestWakeUp();
724 g_httpfetch_thread->Wait();
725 delete g_httpfetch_thread;
727 curl_global_cleanup();
730 void httpfetch_async(const HTTPFetchRequest &fetch_request)
732 g_httpfetch_thread->requestFetch(fetch_request);
733 if (!g_httpfetch_thread->IsRunning())
734 g_httpfetch_thread->Start();
737 static void httpfetch_request_clear(unsigned long caller)
739 if (g_httpfetch_thread->IsRunning()) {
741 g_httpfetch_thread->requestClear(caller, &event);
745 g_httpfetch_thread->requestClear(caller, NULL);
749 void httpfetch_sync(const HTTPFetchRequest &fetch_request,
750 HTTPFetchResult &fetch_result)
752 // Create ongoing fetch data and make a cURL handle
753 // Set cURL options based on HTTPFetchRequest
755 HTTPFetchOngoing ongoing(fetch_request, &pool);
756 // Do the fetch (curl_easy_perform)
757 CURLcode res = ongoing.start(NULL);
758 // Update fetch result
759 fetch_result = *ongoing.complete(res);
767 Dummy httpfetch implementation that always returns an error.
770 void httpfetch_init(int parallel_limit)
774 void httpfetch_cleanup()
778 void httpfetch_async(const HTTPFetchRequest &fetch_request)
780 errorstream << "httpfetch_async: unable to fetch " << fetch_request.url
781 << " because USE_CURL=0" << std::endl;
783 HTTPFetchResult fetch_result(fetch_request); // sets succeeded = false etc.
784 httpfetch_deliver_result(fetch_result);
787 static void httpfetch_request_clear(unsigned long caller)
791 void httpfetch_sync(const HTTPFetchRequest &fetch_request,
792 HTTPFetchResult &fetch_result)
794 errorstream << "httpfetch_sync: unable to fetch " << fetch_request.url
795 << " because USE_CURL=0" << std::endl;
797 fetch_result = HTTPFetchResult(fetch_request); // sets succeeded = false etc.