Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Firefox curl-impersonate #186

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions curl_cffi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
__all__ = [
"Curl",
"CurlChrome",
"CurlFirefox",
"CurlInfo",
"CurlOpt",
"CurlMOpt",
"CurlECode",
"CurlHttpVersion",
"CurlError",
"AsyncCurl",
"AsyncCurlChrome",
"AsyncCurlFirefox",
"ffi",
"lib",
"ffi_ff",
"lib_ff",
]

# This line includes _wrapper.so into the wheel
from ._wrapper import ffi, lib
# This line includes _wrapper_{chrome,ff}.so into the wheel
from ._wrapper_chrome import ffi, lib
from ._wrapper_ff import ffi as ffi_ff, lib as lib_ff

from .const import CurlInfo, CurlMOpt, CurlOpt, CurlECode, CurlHttpVersion
from .curl import Curl, CurlError
from .aio import AsyncCurl
from .curl import Curl, CurlChrome, CurlFirefox, CurlError
from .aio import AsyncCurl, AsyncCurlChrome, AsyncCurlFirefox
Comment on lines +28 to +29
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it need to deprecate Curl and AsyncCurl in favor of CurlChrome and AsyncCurlChrome?


from .__version__ import __title__, __version__, __description__, __curl_version__
from .__version__ import __title__, __version__, __description__, __curl_version__, __curl_chrome_version__, __curl_firefox_version__
4 changes: 3 additions & 1 deletion curl_cffi/__version__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# New in version 3.8.
# from importlib import metadata
from .curl import Curl
from .curl import Curl, CurlFirefox


__title__ = "curl_cffi"
Expand All @@ -9,3 +9,5 @@
__description__ = "libcurl ffi bindings for Python, with impersonation support"
__version__ = "0.5.10"
__curl_version__ = Curl().version().decode()
__curl_chrome_version__ = __curl_version__
__curl_firefox_version__ = CurlFirefox().version().decode()
131 changes: 74 additions & 57 deletions curl_cffi/aio.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from typing import Any
from weakref import WeakSet, WeakKeyDictionary

from ._wrapper import ffi, lib # type: ignore
from ._wrapper_chrome import ffi as ffi_chrome, lib as lib_chrome # type: ignore
from ._wrapper_ff import ffi as ffi_ff, lib as lib_ff # type: ignore
from .const import CurlMOpt
from .curl import Curl, DEFAULT_CACERT

Expand Down Expand Up @@ -81,54 +82,60 @@ def _get_selector_noop(loop) -> asyncio.AbstractEventLoop:
CURLMSG_DONE = 1


@ffi.def_extern()
def timer_function(curlm, timeout_ms: int, clientp: Any):
"""
see: https://curl.se/libcurl/c/CURLMOPT_TIMERFUNCTION.html
"""
async_curl = ffi.from_handle(clientp)
# print("time out in %sms" % timeout_ms)
if timeout_ms == -1:
for timer in async_curl._timers:
timer.cancel()
async_curl._timers = WeakSet()
else:
timer = async_curl.loop.call_later(
timeout_ms / 1000,
async_curl.process_data,
CURL_SOCKET_TIMEOUT, # -1
CURL_POLL_NONE, # 0
)
async_curl._timers.add(timer)


@ffi.def_extern()
def socket_function(curl, sockfd: int, what: int, clientp: Any, data: Any):
async_curl = ffi.from_handle(clientp)
loop = async_curl.loop

if what & CURL_POLL_IN or what & CURL_POLL_OUT or what & CURL_POLL_REMOVE:
if sockfd in async_curl._sockfds:
loop.remove_reader(sockfd)
loop.remove_writer(sockfd)
async_curl._sockfds.remove(sockfd)
elif what & CURL_POLL_REMOVE:
message = f"File descriptor {sockfd} not found."
raise TypeError(message)

if what & CURL_POLL_IN:
loop.add_reader(sockfd, async_curl.process_data, sockfd, CURL_CSELECT_IN)
async_curl._sockfds.add(sockfd)
if what & CURL_POLL_OUT:
loop.add_writer(sockfd, async_curl.process_data, sockfd, CURL_CSELECT_OUT)
async_curl._sockfds.add(sockfd)
for linked in [(ffi_chrome, lib_chrome), (ffi_ff, lib_ff)]:
ffi, lib = linked

@ffi.def_extern()
def timer_function(curlm, timeout_ms: int, clientp: Any):
"""
see: https://curl.se/libcurl/c/CURLMOPT_TIMERFUNCTION.html
"""
async_curl = ffi.from_handle(clientp)
# print("time out in %sms" % timeout_ms)
if timeout_ms == -1:
for timer in async_curl._timers:
timer.cancel()
async_curl._timers = WeakSet()
else:
timer = async_curl.loop.call_later(
timeout_ms / 1000,
async_curl.process_data,
CURL_SOCKET_TIMEOUT, # -1
CURL_POLL_NONE, # 0
)
async_curl._timers.add(timer)


@ffi.def_extern()
def socket_function(curl, sockfd: int, what: int, clientp: Any, data: Any):
async_curl = ffi.from_handle(clientp)
loop = async_curl.loop

if what & CURL_POLL_IN or what & CURL_POLL_OUT or what & CURL_POLL_REMOVE:
if sockfd in async_curl._sockfds:
loop.remove_reader(sockfd)
loop.remove_writer(sockfd)
async_curl._sockfds.remove(sockfd)
elif what & CURL_POLL_REMOVE:
message = f"File descriptor {sockfd} not found."
raise TypeError(message)

if what & CURL_POLL_IN:
loop.add_reader(sockfd, async_curl.process_data, sockfd, CURL_CSELECT_IN)
async_curl._sockfds.add(sockfd)
if what & CURL_POLL_OUT:
loop.add_writer(sockfd, async_curl.process_data, sockfd, CURL_CSELECT_OUT)
async_curl._sockfds.add(sockfd)


class AsyncCurl:
"""Wrapper around curl_multi handle to provide asyncio support. It uses the libcurl
socket_action APIs."""

def __init__(self, cacert: str = DEFAULT_CACERT, loop=None):
self._curlm = lib.curl_multi_init()
def __init__(self, cacert: str = DEFAULT_CACERT, loop = None, _ffi = ffi_chrome, _lib = lib_chrome):
self._ffi = _ffi
self._lib = _lib
self._curlm = self._lib.curl_multi_init()
self._cacert = cacert
self._curl2future = {} # curl to future map
self._curl2curl = {} # c curl to Curl
Expand All @@ -141,9 +148,9 @@ def __init__(self, cacert: str = DEFAULT_CACERT, loop=None):
self._setup()

def _setup(self):
self.setopt(CurlMOpt.TIMERFUNCTION, lib.timer_function)
self.setopt(CurlMOpt.SOCKETFUNCTION, lib.socket_function)
self._self_handle = ffi.new_handle(self)
self.setopt(CurlMOpt.TIMERFUNCTION, self._lib.timer_function)
self.setopt(CurlMOpt.SOCKETFUNCTION, self._lib.socket_function)
self._self_handle = self._ffi.new_handle(self)
self.setopt(CurlMOpt.SOCKETDATA, self._self_handle)
self.setopt(CurlMOpt.TIMERDATA, self._self_handle)

Expand All @@ -153,11 +160,11 @@ def close(self):
self._checker.cancel()
# Close all pending futures
for curl, future in self._curl2future.items():
lib.curl_multi_remove_handle(self._curlm, curl._curl)
self._lib.curl_multi_remove_handle(self._curlm, curl._curl)
if not future.done() and not future.cancelled():
future.set_result(None)
# Cleanup curl_multi handle
lib.curl_multi_cleanup(self._curlm)
self._lib.curl_multi_cleanup(self._curlm)
self._curlm = None
# Remove add readers and writers
for sockfd in self._sockfds:
Expand All @@ -180,16 +187,16 @@ def add_handle(self, curl: Curl):
`perform` in the async world."""
# import pdb; pdb.set_trace()
curl._ensure_cacert()
lib.curl_multi_add_handle(self._curlm, curl._curl)
self._lib.curl_multi_add_handle(self._curlm, curl._curl)
future = self.loop.create_future()
self._curl2future[curl] = future
self._curl2curl[curl._curl] = curl
return future

def socket_action(self, sockfd: int, ev_bitmask: int) -> int:
"""Call libcurl socket_action function"""
running_handle = ffi.new("int *")
lib.curl_multi_socket_action(self._curlm, sockfd, ev_bitmask, running_handle)
running_handle = self._ffi.new("int *")
self._lib.curl_multi_socket_action(self._curlm, sockfd, ev_bitmask, running_handle)
return running_handle[0]

def process_data(self, sockfd: int, ev_bitmask: int):
Expand All @@ -200,11 +207,11 @@ def process_data(self, sockfd: int, ev_bitmask: int):

self.socket_action(sockfd, ev_bitmask)

msg_in_queue = ffi.new("int *")
msg_in_queue = self._ffi.new("int *")
while True:
curl_msg = lib.curl_multi_info_read(self._curlm, msg_in_queue)
curl_msg = self._lib.curl_multi_info_read(self._curlm, msg_in_queue)
# print("message in queue", msg_in_queue[0], curl_msg)
if curl_msg == ffi.NULL:
if curl_msg == self._ffi.NULL:
break
if curl_msg.msg == CURLMSG_DONE:
# print("curl_message", curl_msg.msg, curl_msg.data.result)
Expand All @@ -219,7 +226,7 @@ def process_data(self, sockfd: int, ev_bitmask: int):
print("NOT DONE") # Will not reach, for no other code being defined.

def _pop_future(self, curl: Curl):
lib.curl_multi_remove_handle(self._curlm, curl._curl)
self._lib.curl_multi_remove_handle(self._curlm, curl._curl)
self._curl2curl.pop(curl._curl, None)
return self._curl2future.pop(curl, None)

Expand All @@ -243,4 +250,14 @@ def set_exception(self, curl: Curl, exception):

def setopt(self, option, value):
"""Wrapper around curl_multi_setopt."""
return lib.curl_multi_setopt(self._curlm, option, value)
return self._lib.curl_multi_setopt(self._curlm, option, value)


class AsyncCurlChrome(AsyncCurl):
pass


class AsyncCurlFirefox(AsyncCurl):

def __init__(self, loop = None):
super().__init__(cacert=None, loop=loop, _ffi=ffi_ff, _lib=lib_ff)
65 changes: 33 additions & 32 deletions curl_cffi/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,43 @@

from cffi import FFI

ffibuilder = FFI()
# arch = "%s-%s" % (os.uname().sysname, os.uname().machine)
uname = platform.uname()


ffibuilder.set_source(
"curl_cffi._wrapper",
"""
#include "shim.h"
""",
libraries=["curl-impersonate-chrome"] if uname.system != "Windows" else ["libcurl"],
library_dirs=[
"/Users/runner/work/_temp/install/lib"
if uname.system == "Darwin" and uname.machine == "x86_64"
else "./lib"
if uname.system == "Windows"
else "/usr/local/lib" # Linux and macOS arm64
],
source_extension=".c",
include_dirs=[
os.path.join(os.path.dirname(__file__), "include"),
os.path.join(os.path.dirname(__file__), "ffi"),
],
sources=[
os.path.join(os.path.dirname(__file__), "ffi/shim.c"),
],
extra_compile_args=(
["-Wno-implicit-function-declaration"] if uname.system == "Darwin" else []
),
# extra_link_args=["-Wl,-rpath,$ORIGIN/../libcurl/" + arch],
)
for distro in ["chrome", "ff"]:
ffibuilder = FFI()
ffibuilder.set_source(
"curl_cffi._wrapper_"+distro,
"""
#include "shim.h"
""",
libraries=["curl-impersonate-"+distro] if uname.system != "Windows" else ["libcurl"],
library_dirs=[
"/Users/runner/work/_temp/install/lib"
if uname.system == "Darwin" and uname.machine == "x86_64"
else "./lib"
if uname.system == "Windows"
else "/usr/local/lib" # Linux and macOS arm64
],
source_extension=".c",
include_dirs=[
os.path.join(os.path.dirname(__file__), "include"),
os.path.join(os.path.dirname(__file__), "ffi"),
],
sources=[
os.path.join(os.path.dirname(__file__), "ffi/shim.c"),
],
extra_compile_args=(
["-Wno-implicit-function-declaration"] if uname.system == "Darwin" else []
),
# extra_link_args=["-Wl,-rpath,$ORIGIN/../libcurl/" + arch],
)

with open(os.path.join(os.path.dirname(__file__), "ffi/cdef.c")) as f:
cdef_content = f.read()
ffibuilder.cdef(cdef_content)
with open(os.path.join(os.path.dirname(__file__), "ffi/cdef.c")) as f:
cdef_content = f.read()
ffibuilder.cdef(cdef_content)


if __name__ == "__main__":
ffibuilder.compile(verbose=False)
if __name__ == "__main__":
ffibuilder.compile(verbose=False)
Loading
Loading