From 1bb72501dd6928f4a2f317120f49fd2a71db1fbf Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 18 Jan 2024 08:38:49 +0100 Subject: [PATCH] Set User-Agent: header field in HTTP request for curl downloads Some servers (for example wikimedia.org) don't allow downloads with the default user agent of libcurl and send HTTP status 403, so OCR for images on such servers fails. Setting the user agent to "Tesseract OCR" allows OCR for images on those servers. Signed-off-by: Stefan Weil --- src/api/baseapi.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index a21798429a..42a1badb98 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1184,6 +1184,10 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c if (curlcode != CURLE_OK) { return error("curl_easy_setopt"); } + curlcode = curl_easy_setopt(curl, CURLOPT_USERAGENT, "Tesseract OCR"); + if (curlcode != CURLE_OK) { + return error("curl_easy_setopt"); + } curlcode = curl_easy_perform(curl); if (curlcode != CURLE_OK) { return error("curl_easy_perform");