3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #include "socket.h" // for select()
21 #include "porting.h" // for sleep_ms()
22 #include "httpfetch.h"
28 #include "jthread/jevent.h"
30 #include "exceptions.h"
33 #include "util/container.h"
34 #include "util/thread.h"
36 JMutex g_httpfetch_mutex;
37 std::map<unsigned long, std::list<HTTPFetchResult> > g_httpfetch_results;
39 static void httpfetch_deliver_result(const HTTPFetchResult &fetchresult)
41 unsigned long caller = fetchresult.caller;
42 if (caller != HTTPFETCH_DISCARD) {
43 JMutexAutoLock lock(g_httpfetch_mutex);
44 g_httpfetch_results[caller].push_back(fetchresult);
48 static void httpfetch_request_clear(unsigned long caller);
50 unsigned long httpfetch_caller_alloc()
52 JMutexAutoLock lock(g_httpfetch_mutex);
54 // Check each caller ID except HTTPFETCH_DISCARD
55 const unsigned long discard = HTTPFETCH_DISCARD;
56 for (unsigned long caller = discard + 1; caller != discard; ++caller) {
57 std::map<unsigned long, std::list<HTTPFetchResult> >::iterator
58 it = g_httpfetch_results.find(caller);
59 if (it == g_httpfetch_results.end()) {
60 verbosestream<<"httpfetch_caller_alloc: allocating "
62 // Access element to create it
63 g_httpfetch_results[caller];
68 assert("httpfetch_caller_alloc: ran out of caller IDs" == 0);
72 void httpfetch_caller_free(unsigned long caller)
74 verbosestream<<"httpfetch_caller_free: freeing "
77 httpfetch_request_clear(caller);
78 if (caller != HTTPFETCH_DISCARD) {
79 JMutexAutoLock lock(g_httpfetch_mutex);
80 g_httpfetch_results.erase(caller);
84 bool httpfetch_async_get(unsigned long caller, HTTPFetchResult &fetchresult)
86 JMutexAutoLock lock(g_httpfetch_mutex);
88 // Check that caller exists
89 std::map<unsigned long, std::list<HTTPFetchResult> >::iterator
90 it = g_httpfetch_results.find(caller);
91 if (it == g_httpfetch_results.end())
94 // Check that result queue is nonempty
95 std::list<HTTPFetchResult> &callerresults = it->second;
96 if (callerresults.empty())
100 fetchresult = callerresults.front();
101 callerresults.pop_front();
106 #include <curl/curl.h>
109 USE_CURL is on: use cURL based httpfetch implementation
112 static size_t httpfetch_writefunction(
113 char *ptr, size_t size, size_t nmemb, void *userdata)
115 std::ostringstream *stream = (std::ostringstream*)userdata;
116 size_t count = size * nmemb;
117 stream->write(ptr, count);
121 static size_t httpfetch_discardfunction(
122 char *ptr, size_t size, size_t nmemb, void *userdata)
129 std::list<CURL*> handles;
135 for (std::list<CURL*>::iterator it = handles.begin();
136 it != handles.end(); ++it) {
137 curl_easy_cleanup(*it);
143 if (handles.empty()) {
144 curl = curl_easy_init();
146 errorstream<<"curl_easy_init returned NULL"<<std::endl;
150 curl = handles.front();
155 void free(CURL *handle)
158 handles.push_back(handle);
162 struct HTTPFetchOngoing
164 CurlHandlePool *pool;
167 HTTPFetchRequest request;
168 HTTPFetchResult result;
169 std::ostringstream oss;
171 struct curl_slist *httpheader;
173 HTTPFetchOngoing(HTTPFetchRequest request_, CurlHandlePool *pool_):
179 oss(std::ios::binary),
182 curl = pool->alloc();
184 // Set static cURL options
185 curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
186 curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
187 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
188 curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 1);
190 #if LIBCURL_VERSION_NUM >= 0x071304
191 // Restrict protocols so that curl vulnerabilities in
192 // other protocols don't affect us.
193 // These settings were introduced in curl 7.19.4.
199 curl_easy_setopt(curl, CURLOPT_PROTOCOLS, protocols);
200 curl_easy_setopt(curl, CURLOPT_REDIR_PROTOCOLS, protocols);
203 // Set cURL options based on HTTPFetchRequest
204 curl_easy_setopt(curl, CURLOPT_URL,
205 request.url.c_str());
206 curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS,
208 curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT_MS,
209 request.connect_timeout);
211 if (request.useragent != "")
212 curl_easy_setopt(curl, CURLOPT_USERAGENT, request.useragent.c_str());
214 // Set up a write callback that writes to the
215 // ostringstream ongoing->oss, unless the data
216 // is to be discarded
217 if (request.caller == HTTPFETCH_DISCARD) {
218 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
219 httpfetch_discardfunction);
220 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
223 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
224 httpfetch_writefunction);
225 curl_easy_setopt(curl, CURLOPT_WRITEDATA, &oss);
227 // Set POST (or GET) data
228 if (request.post_fields.empty()) {
229 curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
232 curl_easy_setopt(curl, CURLOPT_POST, 1);
233 curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE,
234 request.post_fields.size());
235 curl_easy_setopt(curl, CURLOPT_POSTFIELDS,
236 request.post_fields.c_str());
237 // request.post_fields must now *never* be
238 // modified until CURLOPT_POSTFIELDS is cleared
240 // Set additional HTTP headers
241 for (size_t i = 0; i < request.extra_headers.size(); ++i) {
242 httpheader = curl_slist_append(
244 request.extra_headers[i].c_str());
246 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, httpheader);
250 CURLcode start(CURLM *multi_)
253 return CURLE_FAILED_INIT;
256 // Multi interface (async)
257 CURLMcode mres = curl_multi_add_handle(multi_, curl);
258 if (mres != CURLM_OK) {
259 errorstream<<"curl_multi_add_handle"
260 <<" returned error code "<<mres
262 return CURLE_FAILED_INIT;
264 multi = multi_; // store for curl_multi_remove_handle
268 // Easy interface (sync)
269 return curl_easy_perform(curl);
273 void complete(CURLcode res)
275 result.succeeded = (res == CURLE_OK);
276 result.timeout = (res == CURLE_OPERATION_TIMEDOUT);
277 result.data = oss.str();
279 // Get HTTP/FTP response code
280 result.response_code = 0;
282 if (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE,
283 &result.response_code) != CURLE_OK) {
284 //we failed to get a return code make sure it is still 0
285 result.response_code = 0;
289 if (res != CURLE_OK) {
290 infostream<<request.url<<" not found ("
291 <<curl_easy_strerror(res)<<")"
292 <<" (response code "<<result.response_code<<")"
300 CURLMcode mres = curl_multi_remove_handle(multi, curl);
301 if (mres != CURLM_OK) {
302 errorstream<<"curl_multi_remove_handle"
303 <<" returned error code "<<mres
308 // Set safe options for the reusable cURL handle
309 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
310 httpfetch_discardfunction);
311 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
312 curl_easy_setopt(curl, CURLOPT_POSTFIELDS, NULL);
313 if (httpheader != NULL) {
314 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, NULL);
315 curl_slist_free_all(httpheader);
318 // Store the cURL handle for reuse
323 class CurlFetchThread : public JThread
334 HTTPFetchRequest fetchrequest;
339 MutexedQueue<Request> m_requests;
340 size_t m_parallel_limit;
342 // Variables exclusively used within thread
343 std::vector<HTTPFetchOngoing*> m_all_ongoing;
344 std::list<HTTPFetchRequest> m_queued_fetches;
347 CurlFetchThread(int parallel_limit)
349 if (parallel_limit >= 1)
350 m_parallel_limit = parallel_limit;
352 m_parallel_limit = 1;
355 void requestFetch(const HTTPFetchRequest &fetchrequest)
359 req.fetchrequest = fetchrequest;
361 m_requests.push_back(req);
364 void requestClear(unsigned long caller, Event *event)
368 req.fetchrequest.caller = caller;
370 m_requests.push_back(req);
376 req.type = RT_WAKEUP;
378 m_requests.push_back(req);
382 // Handle a request from some other thread
383 // E.g. new fetch; clear fetches for one caller; wake up
384 void processRequest(const Request &req)
386 if (req.type == RT_FETCH) {
387 // New fetch, queue until there are less
388 // than m_parallel_limit ongoing fetches
389 m_queued_fetches.push_back(req.fetchrequest);
391 // see processQueued() for what happens next
394 else if (req.type == RT_CLEAR) {
395 unsigned long caller = req.fetchrequest.caller;
397 // Abort all ongoing fetches for the caller
398 for (std::vector<HTTPFetchOngoing*>::iterator
399 it = m_all_ongoing.begin();
400 it != m_all_ongoing.end();) {
401 if ((*it)->request.caller == caller) {
403 it = m_all_ongoing.erase(it);
409 // Also abort all queued fetches for the caller
410 for (std::list<HTTPFetchRequest>::iterator
411 it = m_queued_fetches.begin();
412 it != m_queued_fetches.end();) {
413 if ((*it).caller == caller)
414 it = m_queued_fetches.erase(it);
419 else if (req.type == RT_WAKEUP) {
420 // Wakeup: Nothing to do, thread is awake at this point
423 if (req.event != NULL)
427 // Start new ongoing fetches if m_parallel_limit allows
428 void processQueued(CurlHandlePool *pool)
430 while (m_all_ongoing.size() < m_parallel_limit &&
431 !m_queued_fetches.empty()) {
432 HTTPFetchRequest request = m_queued_fetches.front();
433 m_queued_fetches.pop_front();
435 // Create ongoing fetch data and make a cURL handle
436 // Set cURL options based on HTTPFetchRequest
437 HTTPFetchOngoing *ongoing =
438 new HTTPFetchOngoing(request, pool);
440 // Initiate the connection (curl_multi_add_handle)
441 CURLcode res = ongoing->start(m_multi);
442 if (res == CURLE_OK) {
443 m_all_ongoing.push_back(ongoing);
446 ongoing->complete(res);
447 httpfetch_deliver_result(ongoing->result);
453 // Process CURLMsg (indicates completion of a fetch)
454 void processCurlMessage(CURLMsg *msg)
456 // Determine which ongoing fetch the message pertains to
459 for (i = 0; i < m_all_ongoing.size(); ++i) {
460 if (m_all_ongoing[i]->curl == msg->easy_handle) {
465 if (msg->msg == CURLMSG_DONE && found) {
466 // m_all_ongoing[i] succeeded or failed.
467 HTTPFetchOngoing *ongoing = m_all_ongoing[i];
468 ongoing->complete(msg->data.result);
469 httpfetch_deliver_result(ongoing->result);
471 m_all_ongoing.erase(m_all_ongoing.begin() + i);
475 // Wait for a request from another thread, or timeout elapses
476 void waitForRequest(long timeout)
478 if (m_queued_fetches.empty()) {
480 Request req = m_requests.pop_front(timeout);
483 catch (ItemNotFoundException &e) {}
487 // Wait until some IO happens, or timeout elapses
488 void waitForIO(long timeout)
494 long select_timeout = -1;
495 struct timeval select_tv;
498 FD_ZERO(&read_fd_set);
499 FD_ZERO(&write_fd_set);
500 FD_ZERO(&exc_fd_set);
502 mres = curl_multi_fdset(m_multi, &read_fd_set,
503 &write_fd_set, &exc_fd_set, &max_fd);
504 if (mres != CURLM_OK) {
505 errorstream<<"curl_multi_fdset"
506 <<" returned error code "<<mres
511 mres = curl_multi_timeout(m_multi, &select_timeout);
512 if (mres != CURLM_OK) {
513 errorstream<<"curl_multi_timeout"
514 <<" returned error code "<<mres
519 // Limit timeout so new requests get through
520 if (select_timeout < 0 || select_timeout > timeout)
521 select_timeout = timeout;
523 if (select_timeout > 0) {
524 // in Winsock it is forbidden to pass three empty
525 // fd_sets to select(), so in that case use sleep_ms
527 select_tv.tv_sec = select_timeout / 1000;
528 select_tv.tv_usec = (select_timeout % 1000) * 1000;
529 int retval = select(max_fd + 1, &read_fd_set,
530 &write_fd_set, &exc_fd_set,
534 errorstream<<"select returned error code "
535 <<WSAGetLastError()<<std::endl;
537 errorstream<<"select returned error code "
543 sleep_ms(select_timeout);
551 log_register_thread("CurlFetchThread");
552 DSTACK(__FUNCTION_NAME);
556 m_multi = curl_multi_init();
557 if (m_multi == NULL) {
558 errorstream<<"curl_multi_init returned NULL\n";
562 assert(m_all_ongoing.empty());
564 while (!StopRequested()) {
565 BEGIN_DEBUG_EXCEPTION_HANDLER
568 Handle new async requests
571 while (!m_requests.empty()) {
572 Request req = m_requests.pop_front();
575 processQueued(&pool);
578 Handle ongoing async requests
581 int still_ongoing = 0;
582 while (curl_multi_perform(m_multi, &still_ongoing) ==
583 CURLM_CALL_MULTI_PERFORM)
587 Handle completed async requests
589 if (still_ongoing < (int) m_all_ongoing.size()) {
592 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
593 while (msg != NULL) {
594 processCurlMessage(msg);
595 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
600 If there are ongoing requests, wait for data
601 (with a timeout of 100ms so that new requests
604 If no ongoing requests, wait for a new request.
605 (Possibly an empty request that signals
606 that the thread should be stopped.)
608 if (m_all_ongoing.empty())
609 waitForRequest(100000000);
613 END_DEBUG_EXCEPTION_HANDLER(errorstream)
616 // Call curl_multi_remove_handle and cleanup easy handles
617 for (size_t i = 0; i < m_all_ongoing.size(); ++i) {
618 delete m_all_ongoing[i];
620 m_all_ongoing.clear();
622 m_queued_fetches.clear();
624 CURLMcode mres = curl_multi_cleanup(m_multi);
625 if (mres != CURLM_OK) {
626 errorstream<<"curl_multi_cleanup"
627 <<" returned error code "<<mres
635 CurlFetchThread *g_httpfetch_thread = NULL;
637 void httpfetch_init(int parallel_limit)
639 verbosestream<<"httpfetch_init: parallel_limit="<<parallel_limit
642 CURLcode res = curl_global_init(CURL_GLOBAL_DEFAULT);
643 assert(res == CURLE_OK);
645 g_httpfetch_thread = new CurlFetchThread(parallel_limit);
648 void httpfetch_cleanup()
650 verbosestream<<"httpfetch_cleanup: cleaning up"<<std::endl;
652 g_httpfetch_thread->Stop();
653 g_httpfetch_thread->requestWakeUp();
654 g_httpfetch_thread->Wait();
655 delete g_httpfetch_thread;
657 curl_global_cleanup();
660 void httpfetch_async(const HTTPFetchRequest &fetchrequest)
662 g_httpfetch_thread->requestFetch(fetchrequest);
663 if (!g_httpfetch_thread->IsRunning())
664 g_httpfetch_thread->Start();
667 static void httpfetch_request_clear(unsigned long caller)
669 if (g_httpfetch_thread->IsRunning()) {
671 g_httpfetch_thread->requestClear(caller, &event);
675 g_httpfetch_thread->requestClear(caller, NULL);
679 void httpfetch_sync(const HTTPFetchRequest &fetchrequest,
680 HTTPFetchResult &fetchresult)
682 // Create ongoing fetch data and make a cURL handle
683 // Set cURL options based on HTTPFetchRequest
685 HTTPFetchOngoing ongoing(fetchrequest, &pool);
686 // Do the fetch (curl_easy_perform)
687 CURLcode res = ongoing.start(NULL);
688 // Update fetchresult
689 ongoing.complete(res);
690 fetchresult = ongoing.result;
698 Dummy httpfetch implementation that always returns an error.
701 void httpfetch_init(int parallel_limit)
705 void httpfetch_cleanup()
709 void httpfetch_async(const HTTPFetchRequest &fetchrequest)
711 errorstream<<"httpfetch_async: unable to fetch "<<fetchrequest.url
712 <<" because USE_CURL=0"<<std::endl;
714 HTTPFetchResult fetchresult(fetchrequest); // sets succeeded = false etc.
715 httpfetch_deliver_result(fetchresult);
718 static void httpfetch_request_clear(unsigned long caller)
722 void httpfetch_sync(const HTTPFetchRequest &fetchrequest,
723 HTTPFetchResult &fetchresult)
725 errorstream<<"httpfetch_sync: unable to fetch "<<fetchrequest.url
726 <<" because USE_CURL=0"<<std::endl;
728 fetchresult = HTTPFetchResult(fetchrequest); // sets succeeded = false etc.