3 Copyright (C) 2013 celeron55, Perttu Ahola <celeron55@gmail.com>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License along
16 with this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #include "socket.h" // for select()
21 #include "httpfetch.h"
27 #include "jthread/jevent.h"
29 #include "exceptions.h"
32 #include "util/container.h"
33 #include "util/thread.h"
35 JMutex g_httpfetch_mutex;
36 std::map<unsigned long, std::list<HTTPFetchResult> > g_httpfetch_results;
38 static void httpfetch_deliver_result(const HTTPFetchResult &fetchresult)
40 unsigned long caller = fetchresult.caller;
41 if (caller != HTTPFETCH_DISCARD) {
42 JMutexAutoLock lock(g_httpfetch_mutex);
43 g_httpfetch_results[caller].push_back(fetchresult);
47 static void httpfetch_request_clear(unsigned long caller);
49 unsigned long httpfetch_caller_alloc()
51 JMutexAutoLock lock(g_httpfetch_mutex);
53 // Check each caller ID except HTTPFETCH_DISCARD
54 const unsigned long discard = HTTPFETCH_DISCARD;
55 for (unsigned long caller = discard + 1; caller != discard; ++caller) {
56 std::map<unsigned long, std::list<HTTPFetchResult> >::iterator
57 it = g_httpfetch_results.find(caller);
58 if (it == g_httpfetch_results.end()) {
59 verbosestream<<"httpfetch_caller_alloc: allocating "
61 // Access element to create it
62 g_httpfetch_results[caller];
67 assert("httpfetch_caller_alloc: ran out of caller IDs" == 0);
71 void httpfetch_caller_free(unsigned long caller)
73 verbosestream<<"httpfetch_caller_free: freeing "
76 httpfetch_request_clear(caller);
77 if (caller != HTTPFETCH_DISCARD) {
78 JMutexAutoLock lock(g_httpfetch_mutex);
79 g_httpfetch_results.erase(caller);
83 bool httpfetch_async_get(unsigned long caller, HTTPFetchResult &fetchresult)
85 JMutexAutoLock lock(g_httpfetch_mutex);
87 // Check that caller exists
88 std::map<unsigned long, std::list<HTTPFetchResult> >::iterator
89 it = g_httpfetch_results.find(caller);
90 if (it == g_httpfetch_results.end())
93 // Check that result queue is nonempty
94 std::list<HTTPFetchResult> &callerresults = it->second;
95 if (callerresults.empty())
99 fetchresult = callerresults.front();
100 callerresults.pop_front();
105 #include <curl/curl.h>
108 USE_CURL is on: use cURL based httpfetch implementation
111 static size_t httpfetch_writefunction(
112 char *ptr, size_t size, size_t nmemb, void *userdata)
114 std::ostringstream *stream = (std::ostringstream*)userdata;
115 size_t count = size * nmemb;
116 stream->write(ptr, count);
120 static size_t httpfetch_discardfunction(
121 char *ptr, size_t size, size_t nmemb, void *userdata)
128 std::list<CURL*> handles;
134 for (std::list<CURL*>::iterator it = handles.begin();
135 it != handles.end(); ++it) {
136 curl_easy_cleanup(*it);
142 if (handles.empty()) {
143 curl = curl_easy_init();
145 errorstream<<"curl_easy_init returned NULL"<<std::endl;
149 curl = handles.front();
154 void free(CURL *handle)
157 handles.push_back(handle);
161 struct HTTPFetchOngoing
163 CurlHandlePool *pool;
166 HTTPFetchRequest request;
167 HTTPFetchResult result;
168 std::ostringstream oss;
170 struct curl_slist *httpheader;
172 HTTPFetchOngoing(HTTPFetchRequest request_, CurlHandlePool *pool_):
178 oss(std::ios::binary),
181 curl = pool->alloc();
183 // Set static cURL options
184 curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
185 curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
186 curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
187 curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 1);
189 #if LIBCURL_VERSION_NUM >= 0x071304
190 // Restrict protocols so that curl vulnerabilities in
191 // other protocols don't affect us.
192 // These settings were introduced in curl 7.19.4.
198 curl_easy_setopt(curl, CURLOPT_PROTOCOLS, protocols);
199 curl_easy_setopt(curl, CURLOPT_REDIR_PROTOCOLS, protocols);
202 // Set cURL options based on HTTPFetchRequest
203 curl_easy_setopt(curl, CURLOPT_URL,
204 request.url.c_str());
205 curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS,
207 curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT_MS,
208 request.connect_timeout);
210 if (request.useragent != "")
211 curl_easy_setopt(curl, CURLOPT_USERAGENT, request.useragent.c_str());
213 // Set up a write callback that writes to the
214 // ostringstream ongoing->oss, unless the data
215 // is to be discarded
216 if (request.caller == HTTPFETCH_DISCARD) {
217 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
218 httpfetch_discardfunction);
219 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
222 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
223 httpfetch_writefunction);
224 curl_easy_setopt(curl, CURLOPT_WRITEDATA, &oss);
226 // Set POST (or GET) data
227 if (request.post_fields.empty()) {
228 curl_easy_setopt(curl, CURLOPT_HTTPGET, 1);
231 curl_easy_setopt(curl, CURLOPT_POST, 1);
232 curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE,
233 request.post_fields.size());
234 curl_easy_setopt(curl, CURLOPT_POSTFIELDS,
235 request.post_fields.c_str());
236 // request.post_fields must now *never* be
237 // modified until CURLOPT_POSTFIELDS is cleared
239 // Set additional HTTP headers
240 for (size_t i = 0; i < request.extra_headers.size(); ++i) {
241 httpheader = curl_slist_append(
243 request.extra_headers[i].c_str());
245 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, httpheader);
249 CURLcode start(CURLM *multi_)
252 return CURLE_FAILED_INIT;
255 // Multi interface (async)
256 CURLMcode mres = curl_multi_add_handle(multi_, curl);
257 if (mres != CURLM_OK) {
258 errorstream<<"curl_multi_add_handle"
259 <<" returned error code "<<mres
261 return CURLE_FAILED_INIT;
263 multi = multi_; // store for curl_multi_remove_handle
267 // Easy interface (sync)
268 return curl_easy_perform(curl);
272 void complete(CURLcode res)
274 result.succeeded = (res == CURLE_OK);
275 result.timeout = (res == CURLE_OPERATION_TIMEDOUT);
276 result.data = oss.str();
278 // Get HTTP/FTP response code
279 result.response_code = 0;
281 if (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE,
282 &result.response_code) != CURLE_OK) {
283 //we failed to get a return code make sure it is still 0
284 result.response_code = 0;
288 if (res != CURLE_OK) {
289 infostream<<request.url<<" not found ("
290 <<curl_easy_strerror(res)<<")"
291 <<" (response code "<<result.response_code<<")"
299 CURLMcode mres = curl_multi_remove_handle(multi, curl);
300 if (mres != CURLM_OK) {
301 errorstream<<"curl_multi_remove_handle"
302 <<" returned error code "<<mres
307 // Set safe options for the reusable cURL handle
308 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,
309 httpfetch_discardfunction);
310 curl_easy_setopt(curl, CURLOPT_WRITEDATA, NULL);
311 curl_easy_setopt(curl, CURLOPT_POSTFIELDS, NULL);
312 if (httpheader != NULL) {
313 curl_easy_setopt(curl, CURLOPT_HTTPHEADER, NULL);
314 curl_slist_free_all(httpheader);
317 // Store the cURL handle for reuse
322 class CurlFetchThread : public JThread
333 HTTPFetchRequest fetchrequest;
338 MutexedQueue<Request> m_requests;
339 size_t m_parallel_limit;
341 // Variables exclusively used within thread
342 std::vector<HTTPFetchOngoing*> m_all_ongoing;
343 std::list<HTTPFetchRequest> m_queued_fetches;
346 CurlFetchThread(int parallel_limit)
348 if (parallel_limit >= 1)
349 m_parallel_limit = parallel_limit;
351 m_parallel_limit = 1;
354 void requestFetch(const HTTPFetchRequest &fetchrequest)
358 req.fetchrequest = fetchrequest;
360 m_requests.push_back(req);
363 void requestClear(unsigned long caller, Event *event)
367 req.fetchrequest.caller = caller;
369 m_requests.push_back(req);
375 req.type = RT_WAKEUP;
377 m_requests.push_back(req);
381 // Handle a request from some other thread
382 // E.g. new fetch; clear fetches for one caller; wake up
383 void processRequest(const Request &req)
385 if (req.type == RT_FETCH) {
386 // New fetch, queue until there are less
387 // than m_parallel_limit ongoing fetches
388 m_queued_fetches.push_back(req.fetchrequest);
390 // see processQueued() for what happens next
393 else if (req.type == RT_CLEAR) {
394 unsigned long caller = req.fetchrequest.caller;
396 // Abort all ongoing fetches for the caller
397 for (std::vector<HTTPFetchOngoing*>::iterator
398 it = m_all_ongoing.begin();
399 it != m_all_ongoing.end();) {
400 if ((*it)->request.caller == caller) {
402 it = m_all_ongoing.erase(it);
408 // Also abort all queued fetches for the caller
409 for (std::list<HTTPFetchRequest>::iterator
410 it = m_queued_fetches.begin();
411 it != m_queued_fetches.end();) {
412 if ((*it).caller == caller)
413 it = m_queued_fetches.erase(it);
418 else if (req.type == RT_WAKEUP) {
419 // Wakeup: Nothing to do, thread is awake at this point
422 if (req.event != NULL)
426 // Start new ongoing fetches if m_parallel_limit allows
427 void processQueued(CurlHandlePool *pool)
429 while (m_all_ongoing.size() < m_parallel_limit &&
430 !m_queued_fetches.empty()) {
431 HTTPFetchRequest request = m_queued_fetches.front();
432 m_queued_fetches.pop_front();
434 // Create ongoing fetch data and make a cURL handle
435 // Set cURL options based on HTTPFetchRequest
436 HTTPFetchOngoing *ongoing =
437 new HTTPFetchOngoing(request, pool);
439 // Initiate the connection (curl_multi_add_handle)
440 CURLcode res = ongoing->start(m_multi);
441 if (res == CURLE_OK) {
442 m_all_ongoing.push_back(ongoing);
445 ongoing->complete(res);
446 httpfetch_deliver_result(ongoing->result);
452 // Process CURLMsg (indicates completion of a fetch)
453 void processCurlMessage(CURLMsg *msg)
455 // Determine which ongoing fetch the message pertains to
458 for (i = 0; i < m_all_ongoing.size(); ++i) {
459 if (m_all_ongoing[i]->curl == msg->easy_handle) {
464 if (msg->msg == CURLMSG_DONE && found) {
465 // m_all_ongoing[i] succeeded or failed.
466 HTTPFetchOngoing *ongoing = m_all_ongoing[i];
467 ongoing->complete(msg->data.result);
468 httpfetch_deliver_result(ongoing->result);
470 m_all_ongoing.erase(m_all_ongoing.begin() + i);
474 // Wait for a request from another thread, or timeout elapses
475 void waitForRequest(long timeout)
477 if (m_queued_fetches.empty()) {
479 Request req = m_requests.pop_front(timeout);
482 catch (ItemNotFoundException &e) {}
486 // Wait until some IO happens, or timeout elapses
487 void waitForIO(long timeout)
493 long select_timeout = -1;
494 struct timeval select_tv;
497 FD_ZERO(&read_fd_set);
498 FD_ZERO(&write_fd_set);
499 FD_ZERO(&exc_fd_set);
501 mres = curl_multi_fdset(m_multi, &read_fd_set,
502 &write_fd_set, &exc_fd_set, &max_fd);
503 if (mres != CURLM_OK) {
504 errorstream<<"curl_multi_fdset"
505 <<" returned error code "<<mres
510 mres = curl_multi_timeout(m_multi, &select_timeout);
511 if (mres != CURLM_OK) {
512 errorstream<<"curl_multi_timeout"
513 <<" returned error code "<<mres
518 // Limit timeout so new requests get through
519 if (select_timeout < 0 || select_timeout > timeout)
520 select_timeout = timeout;
522 if (select_timeout > 0) {
523 select_tv.tv_sec = select_timeout / 1000;
524 select_tv.tv_usec = (select_timeout % 1000) * 1000;
525 int retval = select(max_fd + 1, &read_fd_set,
526 &write_fd_set, &exc_fd_set,
530 errorstream<<"select returned error code "
531 <<WSAGetLastError()<<std::endl;
533 errorstream<<"select returned error code "
543 log_register_thread("CurlFetchThread");
544 DSTACK(__FUNCTION_NAME);
548 m_multi = curl_multi_init();
549 if (m_multi == NULL) {
550 errorstream<<"curl_multi_init returned NULL\n";
554 assert(m_all_ongoing.empty());
556 while (!StopRequested()) {
557 BEGIN_DEBUG_EXCEPTION_HANDLER
560 Handle new async requests
563 while (!m_requests.empty()) {
564 Request req = m_requests.pop_front();
567 processQueued(&pool);
570 Handle ongoing async requests
573 int still_ongoing = 0;
574 while (curl_multi_perform(m_multi, &still_ongoing) ==
575 CURLM_CALL_MULTI_PERFORM)
579 Handle completed async requests
581 if (still_ongoing < (int) m_all_ongoing.size()) {
584 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
585 while (msg != NULL) {
586 processCurlMessage(msg);
587 msg = curl_multi_info_read(m_multi, &msgs_in_queue);
592 If there are ongoing requests, wait for data
593 (with a timeout of 100ms so that new requests
596 If no ongoing requests, wait for a new request.
597 (Possibly an empty request that signals
598 that the thread should be stopped.)
600 if (m_all_ongoing.empty())
601 waitForRequest(100000000);
605 END_DEBUG_EXCEPTION_HANDLER(errorstream)
608 // Call curl_multi_remove_handle and cleanup easy handles
609 for (size_t i = 0; i < m_all_ongoing.size(); ++i) {
610 delete m_all_ongoing[i];
612 m_all_ongoing.clear();
614 m_queued_fetches.clear();
616 CURLMcode mres = curl_multi_cleanup(m_multi);
617 if (mres != CURLM_OK) {
618 errorstream<<"curl_multi_cleanup"
619 <<" returned error code "<<mres
627 CurlFetchThread *g_httpfetch_thread = NULL;
629 void httpfetch_init(int parallel_limit)
631 verbosestream<<"httpfetch_init: parallel_limit="<<parallel_limit
634 CURLcode res = curl_global_init(CURL_GLOBAL_DEFAULT);
635 assert(res == CURLE_OK);
637 g_httpfetch_thread = new CurlFetchThread(parallel_limit);
640 void httpfetch_cleanup()
642 verbosestream<<"httpfetch_cleanup: cleaning up"<<std::endl;
644 g_httpfetch_thread->Stop();
645 g_httpfetch_thread->requestWakeUp();
646 g_httpfetch_thread->Wait();
647 delete g_httpfetch_thread;
649 curl_global_cleanup();
652 void httpfetch_async(const HTTPFetchRequest &fetchrequest)
654 g_httpfetch_thread->requestFetch(fetchrequest);
655 if (!g_httpfetch_thread->IsRunning())
656 g_httpfetch_thread->Start();
659 static void httpfetch_request_clear(unsigned long caller)
661 if (g_httpfetch_thread->IsRunning()) {
663 g_httpfetch_thread->requestClear(caller, &event);
667 g_httpfetch_thread->requestClear(caller, NULL);
671 void httpfetch_sync(const HTTPFetchRequest &fetchrequest,
672 HTTPFetchResult &fetchresult)
674 // Create ongoing fetch data and make a cURL handle
675 // Set cURL options based on HTTPFetchRequest
677 HTTPFetchOngoing ongoing(fetchrequest, &pool);
678 // Do the fetch (curl_easy_perform)
679 CURLcode res = ongoing.start(NULL);
680 // Update fetchresult
681 ongoing.complete(res);
682 fetchresult = ongoing.result;
690 Dummy httpfetch implementation that always returns an error.
693 void httpfetch_init(int parallel_limit)
697 void httpfetch_cleanup()
701 void httpfetch_async(const HTTPFetchRequest &fetchrequest)
703 errorstream<<"httpfetch_async: unable to fetch "<<fetchrequest.url
704 <<" because USE_CURL=0"<<std::endl;
706 HTTPFetchResult fetchresult(fetchrequest); // sets succeeded = false etc.
707 httpfetch_deliver_result(fetchresult);
710 static void httpfetch_request_clear(unsigned long caller)
714 void httpfetch_sync(const HTTPFetchRequest &fetchrequest,
715 HTTPFetchResult &fetchresult)
717 errorstream<<"httpfetch_sync: unable to fetch "<<fetchrequest.url
718 <<" because USE_CURL=0"<<std::endl;
720 fetchresult = HTTPFetchResult(fetchrequest); // sets succeeded = false etc.