From 57eada7d7b75cc34a44d7a2f43b42c391b00b65a Mon Sep 17 00:00:00 2001 From: ramsy0dev <0ramsy0@gmail.com> Date: Wed, 13 Sep 2023 01:59:14 +0100 Subject: [PATCH] Init commit --- README.md | 0 poetry.lock | 365 +++++++++++++++++ proxycrawler/__main__.py | 4 + proxycrawler/cli.py | 196 +++++++++ proxycrawler/constants.py | 27 ++ proxycrawler/helpers.py | 33 ++ proxycrawler/messages/debug.py | 6 + proxycrawler/messages/errors.py | 23 ++ proxycrawler/messages/info.py | 20 + proxycrawler/src/database/database_handler.py | 120 ++++++ proxycrawler/src/database/tables.py | 34 ++ .../src/models/free_proxy_list_model.py | 121 ++++++ proxycrawler/src/models/geonode_model.py | 177 ++++++++ proxycrawler/src/models/proxy_model.py | 116 ++++++ proxycrawler/src/proxycrawler.py | 378 ++++++++++++++++++ proxycrawler/src/services/freeproxylist.py | 63 +++ proxycrawler/src/services/geonode.py | 97 +++++ proxycrawler/src/thread_task.py | 25 ++ pyproject.toml | 20 + 19 files changed, 1825 insertions(+) create mode 100644 README.md create mode 100644 poetry.lock create mode 100644 proxycrawler/__main__.py create mode 100644 proxycrawler/cli.py create mode 100644 proxycrawler/constants.py create mode 100644 proxycrawler/helpers.py create mode 100644 proxycrawler/messages/debug.py create mode 100644 proxycrawler/messages/errors.py create mode 100644 proxycrawler/messages/info.py create mode 100644 proxycrawler/src/database/database_handler.py create mode 100644 proxycrawler/src/database/tables.py create mode 100644 proxycrawler/src/models/free_proxy_list_model.py create mode 100644 proxycrawler/src/models/geonode_model.py create mode 100644 proxycrawler/src/models/proxy_model.py create mode 100644 proxycrawler/src/proxycrawler.py create mode 100644 proxycrawler/src/services/freeproxylist.py create mode 100644 proxycrawler/src/services/geonode.py create mode 100644 proxycrawler/src/thread_task.py create mode 100644 pyproject.toml diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..6bce833 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,365 @@ +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. + +[[package]] +name = "beautifulsoup4" +version = "4.12.2" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "certifi" +version = "2023.7.22" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.2.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, + {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, +] + +[[package]] +name = "greenlet" +version = "2.0.2" +description = "Lightweight in-process concurrent programming" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" +files = [ + {file = "greenlet-2.0.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:bdfea8c661e80d3c1c99ad7c3ff74e6e87184895bbaca6ee8cc61209f8b9b85d"}, + {file = "greenlet-2.0.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9d14b83fab60d5e8abe587d51c75b252bcc21683f24699ada8fb275d7712f5a9"}, + {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, + {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, + {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, + {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, + {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, + {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, + {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d75209eed723105f9596807495d58d10b3470fa6732dd6756595e89925ce2470"}, + {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3a51c9751078733d88e013587b108f1b7a1fb106d402fb390740f002b6f6551a"}, + {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, + {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, + {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, + {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, + {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, + {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, + {file = "greenlet-2.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:eff4eb9b7eb3e4d0cae3d28c283dc16d9bed6b193c2e1ace3ed86ce48ea8df19"}, + {file = "greenlet-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5454276c07d27a740c5892f4907c86327b632127dd9abec42ee62e12427ff7e3"}, + {file = "greenlet-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:7cafd1208fdbe93b67c7086876f061f660cfddc44f404279c1585bbf3cdc64c5"}, + {file = "greenlet-2.0.2-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:910841381caba4f744a44bf81bfd573c94e10b3045ee00de0cbf436fe50673a6"}, + {file = "greenlet-2.0.2-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:18a7f18b82b52ee85322d7a7874e676f34ab319b9f8cce5de06067384aa8ff43"}, + {file = "greenlet-2.0.2-cp35-cp35m-win32.whl", hash = "sha256:03a8f4f3430c3b3ff8d10a2a86028c660355ab637cee9333d63d66b56f09d52a"}, + {file = "greenlet-2.0.2-cp35-cp35m-win_amd64.whl", hash = "sha256:4b58adb399c4d61d912c4c331984d60eb66565175cdf4a34792cd9600f21b394"}, + {file = "greenlet-2.0.2-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:703f18f3fda276b9a916f0934d2fb6d989bf0b4fb5a64825260eb9bfd52d78f0"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:32e5b64b148966d9cccc2c8d35a671409e45f195864560829f395a54226408d3"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2dd11f291565a81d71dab10b7033395b7a3a5456e637cf997a6f33ebdf06f8db"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0f72c9ddb8cd28532185f54cc1453f2c16fb417a08b53a855c4e6a418edd099"}, + {file = "greenlet-2.0.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd021c754b162c0fb55ad5d6b9d960db667faad0fa2ff25bb6e1301b0b6e6a75"}, + {file = "greenlet-2.0.2-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:3c9b12575734155d0c09d6c3e10dbd81665d5c18e1a7c6597df72fd05990c8cf"}, + {file = "greenlet-2.0.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b9ec052b06a0524f0e35bd8790686a1da006bd911dd1ef7d50b77bfbad74e292"}, + {file = "greenlet-2.0.2-cp36-cp36m-win32.whl", hash = "sha256:dbfcfc0218093a19c252ca8eb9aee3d29cfdcb586df21049b9d777fd32c14fd9"}, + {file = "greenlet-2.0.2-cp36-cp36m-win_amd64.whl", hash = "sha256:9f35ec95538f50292f6d8f2c9c9f8a3c6540bbfec21c9e5b4b751e0a7c20864f"}, + {file = "greenlet-2.0.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:d5508f0b173e6aa47273bdc0a0b5ba055b59662ba7c7ee5119528f466585526b"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:f82d4d717d8ef19188687aa32b8363e96062911e63ba22a0cff7802a8e58e5f1"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9c59a2120b55788e800d82dfa99b9e156ff8f2227f07c5e3012a45a399620b7"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2780572ec463d44c1d3ae850239508dbeb9fed38e294c68d19a24d925d9223ca"}, + {file = "greenlet-2.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:937e9020b514ceedb9c830c55d5c9872abc90f4b5862f89c0887033ae33c6f73"}, + {file = "greenlet-2.0.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:36abbf031e1c0f79dd5d596bfaf8e921c41df2bdf54ee1eed921ce1f52999a86"}, + {file = "greenlet-2.0.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:18e98fb3de7dba1c0a852731c3070cf022d14f0d68b4c87a19cc1016f3bb8b33"}, + {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, + {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, + {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, + {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acd2162a36d3de67ee896c43effcd5ee3de247eb00354db411feb025aa319857"}, + {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0bf60faf0bc2468089bdc5edd10555bab6e85152191df713e2ab1fcc86382b5a"}, + {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, + {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, + {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, + {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be4ed120b52ae4d974aa40215fcdfde9194d63541c7ded40ee12eb4dda57b76b"}, + {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94c817e84245513926588caf1152e3b559ff794d505555211ca041f032abbb6b"}, + {file = "greenlet-2.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1a819eef4b0e0b96bb0d98d797bef17dc1b4a10e8d7446be32d1da33e095dbb8"}, + {file = "greenlet-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7efde645ca1cc441d6dc4b48c0f7101e8d86b54c8530141b09fd31cef5149ec9"}, + {file = "greenlet-2.0.2-cp39-cp39-win32.whl", hash = "sha256:ea9872c80c132f4663822dd2a08d404073a5a9b5ba6155bea72fb2a79d1093b5"}, + {file = "greenlet-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:db1a39669102a1d8d12b57de2bb7e2ec9066a6f2b3da35ae511ff93b01b5d564"}, + {file = "greenlet-2.0.2.tar.gz", hash = "sha256:e7c8dc13af7db097bed64a051d2dd49e9f0af495c26995c00a9ee842690d34c0"}, +] + +[package.extras] +docs = ["Sphinx", "docutils (<0.18)"] +test = ["objgraph", "psutil"] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, +] + +[[package]] +name = "sqlalchemy" +version = "2.0.20" +description = "Database Abstraction Library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "SQLAlchemy-2.0.20-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759b51346aa388c2e606ee206c0bc6f15a5299f6174d1e10cadbe4530d3c7a98"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1506e988ebeaaf316f183da601f24eedd7452e163010ea63dbe52dc91c7fc70e"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5768c268df78bacbde166b48be788b83dddaa2a5974b8810af422ddfe68a9bc8"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f0dd6d15b6dc8b28a838a5c48ced7455c3e1fb47b89da9c79cc2090b072a50"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:243d0fb261f80a26774829bc2cee71df3222587ac789b7eaf6555c5b15651eed"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6eb6d77c31e1bf4268b4d61b549c341cbff9842f8e115ba6904249c20cb78a61"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-win32.whl", hash = "sha256:bcb04441f370cbe6e37c2b8d79e4af9e4789f626c595899d94abebe8b38f9a4d"}, + {file = "SQLAlchemy-2.0.20-cp310-cp310-win_amd64.whl", hash = "sha256:d32b5ffef6c5bcb452723a496bad2d4c52b346240c59b3e6dba279f6dcc06c14"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dd81466bdbc82b060c3c110b2937ab65ace41dfa7b18681fdfad2f37f27acdd7"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6fe7d61dc71119e21ddb0094ee994418c12f68c61b3d263ebaae50ea8399c4d4"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4e571af672e1bb710b3cc1a9794b55bce1eae5aed41a608c0401885e3491179"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3364b7066b3c7f4437dd345d47271f1251e0cfb0aba67e785343cdbdb0fff08c"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1be86ccea0c965a1e8cd6ccf6884b924c319fcc85765f16c69f1ae7148eba64b"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1d35d49a972649b5080557c603110620a86aa11db350d7a7cb0f0a3f611948a0"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-win32.whl", hash = "sha256:27d554ef5d12501898d88d255c54eef8414576f34672e02fe96d75908993cf53"}, + {file = "SQLAlchemy-2.0.20-cp311-cp311-win_amd64.whl", hash = "sha256:411e7f140200c02c4b953b3dbd08351c9f9818d2bd591b56d0fa0716bd014f1e"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3c6aceebbc47db04f2d779db03afeaa2c73ea3f8dcd3987eb9efdb987ffa09a3"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d3f175410a6db0ad96b10bfbb0a5530ecd4fcf1e2b5d83d968dd64791f810ed"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea8186be85da6587456c9ddc7bf480ebad1a0e6dcbad3967c4821233a4d4df57"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c3d99ba99007dab8233f635c32b5cd24fb1df8d64e17bc7df136cedbea427897"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:76fdfc0f6f5341987474ff48e7a66c3cd2b8a71ddda01fa82fedb180b961630a"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-win32.whl", hash = "sha256:d3793dcf5bc4d74ae1e9db15121250c2da476e1af8e45a1d9a52b1513a393459"}, + {file = "SQLAlchemy-2.0.20-cp37-cp37m-win_amd64.whl", hash = "sha256:79fde625a0a55220d3624e64101ed68a059c1c1f126c74f08a42097a72ff66a9"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:599ccd23a7146e126be1c7632d1d47847fa9f333104d03325c4e15440fc7d927"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1a58052b5a93425f656675673ef1f7e005a3b72e3f2c91b8acca1b27ccadf5f4"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79543f945be7a5ada9943d555cf9b1531cfea49241809dd1183701f94a748624"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63e73da7fb030ae0a46a9ffbeef7e892f5def4baf8064786d040d45c1d6d1dc5"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3ce5e81b800a8afc870bb8e0a275d81957e16f8c4b62415a7b386f29a0cb9763"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cb0d3e94c2a84215532d9bcf10229476ffd3b08f481c53754113b794afb62d14"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-win32.whl", hash = "sha256:8dd77fd6648b677d7742d2c3cc105a66e2681cc5e5fb247b88c7a7b78351cf74"}, + {file = "SQLAlchemy-2.0.20-cp38-cp38-win_amd64.whl", hash = "sha256:6f8a934f9dfdf762c844e5164046a9cea25fabbc9ec865c023fe7f300f11ca4a"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:26a3399eaf65e9ab2690c07bd5cf898b639e76903e0abad096cd609233ce5208"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4cde2e1096cbb3e62002efdb7050113aa5f01718035ba9f29f9d89c3758e7e4e"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1b09ba72e4e6d341bb5bdd3564f1cea6095d4c3632e45dc69375a1dbe4e26ec"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b74eeafaa11372627ce94e4dc88a6751b2b4d263015b3523e2b1e57291102f0"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:77d37c1b4e64c926fa3de23e8244b964aab92963d0f74d98cbc0783a9e04f501"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:eefebcc5c555803065128401a1e224a64607259b5eb907021bf9b175f315d2a6"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-win32.whl", hash = "sha256:3423dc2a3b94125094897118b52bdf4d37daf142cbcf26d48af284b763ab90e9"}, + {file = "SQLAlchemy-2.0.20-cp39-cp39-win_amd64.whl", hash = "sha256:5ed61e3463021763b853628aef8bc5d469fe12d95f82c74ef605049d810f3267"}, + {file = "SQLAlchemy-2.0.20-py3-none-any.whl", hash = "sha256:63a368231c53c93e2b67d0c5556a9836fdcd383f7e3026a39602aad775b14acf"}, + {file = "SQLAlchemy-2.0.20.tar.gz", hash = "sha256:ca8a5ff2aa7f3ade6c498aaafce25b1eaeabe4e42b73e25519183e4566a16fc6"}, +] + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +typing-extensions = ">=4.2.0" + +[package.extras] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing-extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx-oracle (>=7)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3-binary"] + +[[package]] +name = "typing-extensions" +version = "4.7.1" +description = "Backported and Experimental Type Hints for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, + {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, +] + +[[package]] +name = "urllib3" +version = "2.0.4" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.7" +files = [ + {file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"}, + {file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "user-agent" +version = "0.1.10" +description = "User-Agent generator" +optional = false +python-versions = "*" +files = [ + {file = "user_agent-0.1.10.tar.gz", hash = "sha256:b86537cb2a9d3bda0e2afcc654ec15b383502836877a67520654acadf73f1723"}, +] + +[package.dependencies] +six = "*" + +[metadata] +lock-version = "2.0" +python-versions = "^3.11" +content-hash = "17b7d85ea22370b8fe472c31e12effc91b660c075e2bf9d5001d3fa049ce4c77" diff --git a/proxycrawler/__main__.py b/proxycrawler/__main__.py new file mode 100644 index 0000000..69e69b8 --- /dev/null +++ b/proxycrawler/__main__.py @@ -0,0 +1,4 @@ +from proxycrawler.cli import run + +if __name__ == "__main__": + run() diff --git a/proxycrawler/cli.py b/proxycrawler/cli.py new file mode 100644 index 0000000..81b048e --- /dev/null +++ b/proxycrawler/cli.py @@ -0,0 +1,196 @@ +import os +import sys +import typer + +from rich import print +from rich.console import Console + +from proxycrawler import helpers +from proxycrawler import constants +from proxycrawler.messages import ( + info, + errors +) +from proxycrawler.src.proxycrawler import ProxyCrawler +from proxycrawler.src.database.database_handler import DatabaseHandler + +# Init cli +cli = typer.Typer() + +@cli.command() +def version(): + """ proxycrawler's version """ + print(f"[bold white]Version [bold cyan]{constants.VERSION}[bold white]") + +@cli.command() +def scrap( + enable_save_on_run: bool = typer.Option(True, "--enable-save-on-run", help="Save valid proxies while proxycrawler is still running (can be useful in case of a bad internet connection)"), + group_by_protocol: bool = typer.Option(False, "--group-by-protocol", help="Save proxies into seperate files based on the supported protocols [http, https, socks4, sock5]"), + output_file_path: str = typer.Option(None, "--output-file-path", help="Costum output file path to save results (.txt)") +): + """ Start scrapping proxies """ + console = Console() + + # Configuring console + console._log_render.omit_repeated_times = False # Repeat the timestamp even if the logs were logged on the same time + + # Check output file path + if output_file_path is not None and not os.path.exists("/".join(output_file_path.split("/")[:-1])): + console.log( + errors.UNVALID_OUTPUT_FILE_PATH( + output_file_path=output_file_path + ) + ) + sys.exit(1) + + # Init database handler + database_handler = DatabaseHandler() + + # Init ProxyCrawler + proxy_crawler = ProxyCrawler( + database_handler=database_handler, + console=console, + ) + + # Fetching proxies and validating them + proxy_crawler.crawl_proxies( + enable_save_on_run=enable_save_on_run, + group_by_protocol=group_by_protocol, + output_file_path=output_file_path + ) + +@cli.command() +def export_db( + proxies_count: int = typer.Option(None, "--proxies-count", help="Number of proxies to export (exports all by default)"), + validate_proxies: bool = typer.Option(True, "--validate", help="Validate proxies"), + group_by_protocol: bool = typer.Option(False, "--group-by-protocol", help="Save proxies into seperate files based on the supported protocols [http, https, sock4, sock5]"), + output_file_path: str = typer.Option(None, "--output-file-path", help="Costum output file path to save results (.txt)") +): + """ Export proxies from the database """ + console = Console() + + # Configuring console + console._log_render.omit_repeated_times = False # Repeat the timestamp even if the logs were logged on the same time + + # Check output file path + if output_file_path is not None and not os.path.exists("/".join(output_file_path.split("/")[:-1])): + console.log( + errors.UNVALID_OUTPUT_FILE_PATH( + output_file_path=output_file_path + ) + ) + sys.exit(1) + + # Init database handler + database_handler = DatabaseHandler() + + # Init proxycrawler + proxy_crawler = ProxyCrawler( + database_handler=database_handler, + console=console, + ) + + console.log( + info.FETCHING_AND_VALIDATING_PROXIES_FROM_DATABASE + ) + + proxy_crawler.export_database_proxies( + proxies_count=proxies_count, + group_by_protocol=group_by_protocol, + validate_proxies=validate_proxies, + output_file_path=output_file_path + ) + +@cli.command() +def validate( + proxy_file_path: str = typer.Option(None, "--proxy-file", help="path to the proxy file"), + protocol: str = typer.Option(None, "--protocol", help="Set a specific protocol to test the proxies on"), + test_all_protocols: bool = typer.Option(False, "--test-all-protocols", help="Test all the protocols on a proxy"), + group_by_protocol: bool = typer.Option(False, "--group-by-protocol", help="Save proxies into seperate files based on the supported protocols [http, https, sock4, sock5]"), + output_file_path: str = typer.Option(None, "--output-file-path", help="Costum output file path to save results (.txt)") +): + """ Validate a proxies list file """ + console = Console() + + # Configuring console + console._log_render.omit_repeated_times = False # Repeat the timestamp even if the logs were logged on the same time + + # Init database handler + database_handler = DatabaseHandler() + + # Init proxycrawler + proxy_crawler = ProxyCrawler( + database_handler=database_handler, + console=console, + ) + + # Check output file path + if output_file_path is not None and not os.path.exists("/".join(output_file_path.split("/")[:-1])): + console.log( + errors.UNVALID_OUTPUT_FILE_PATH( + output_file_path=output_file_path + ) + ) + sys.exit(1) + + # Check if the proxies file exists + if not os.path.exists(proxy_file_path): + console.log(errors.PROXY_FILE_DOESNT_EXIST) + sys.exit(1) + + # Check the file's extension + if not proxy_file_path.endswith(".txt"): + console.log(errors.FILE_EXTENSION_NOT_SUPPORTED) + sys.exit(1) + + # Check the format of the proxies + proxies = [proxy.strip() for proxy in open(proxy_file_path, "r").readlines()] + results = [] + + for proxy in proxies: + if not proxy_crawler.check_proxy_fromat(proxy=proxy): + results.append(proxy) + + if len(results) != 0: + console.log(errors.UNVALID_PROXY_FORMAT) + sys.exit(1) + + # Check the protocol + protocols = [ + "http", + "https", + "socks4", + "socks5" + ] + if protocol is not None and protocol not in protocols: + console.log( + errors.UNVALID_PROXY_PROTOCOL( + protocol=protocol + ) + ) + sys.exit(1) + + # Validate the list of proxies + console.log( + info.VALIDATING_PROXIES_FROM_FILE( + proxies_count=len(proxies), + proxy_file_path=proxy_file_path + ) + ) + + proxy_crawler.validate_proxies( + proxies=proxies, + protocol=protocol, + test_all_protocols=test_all_protocols, + group_by_protocol=group_by_protocol, + proxy_file_path=proxy_file_path, + output_file_path=output_file_path + ) + +def run(): + """ Runs proxycrawler """ + helpers.banner() + cli() + +if __name__ == "__main__": + run() diff --git a/proxycrawler/constants.py b/proxycrawler/constants.py new file mode 100644 index 0000000..ae593bd --- /dev/null +++ b/proxycrawler/constants.py @@ -0,0 +1,27 @@ +import os + +# Package main info +PACKAGE = "proxycrawler" +VERSION = "0.1.0" +AUTHOR = "ramsy0dev" +GITHUB = "https://github.com/ramsy0dev/proxycrawler" + +# Banner +BANNER = f"""[bold white] + __ + ____ _________ _ ____ ________________ __ __/ /__ _____ + / __ \/ ___/ __ \| |/_/ / / / ___/ ___/ __ `/ | /| / / / _ \/ ___/ + / /_/ / / / /_/ /> None: + """ proxycrawler's banner """ + print(constants.BANNER) + +# def log_json(json_data: str, console) -> None: +# """ Logs out the json data in a beautified way """ +# splited_json_data = json_data.split("\n") + +# for log_line in splited_json_data: +# console.log(log_line) + +def date() -> datetime: + """ Returns the current date """ + return datetime.datetime.now() + +def generate_uid(data: str) -> str: + """ Generates a UID based on the given data """ + data = f"{data}{''.join([char for char in random.choices(string.ascii_letters)])}" + + hashed_data_salt = hashlib.md5(data.encode()).hexdigest() + generated_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, hashed_data_salt) + + return str(generated_uuid) diff --git a/proxycrawler/messages/debug.py b/proxycrawler/messages/debug.py new file mode 100644 index 0000000..329d021 --- /dev/null +++ b/proxycrawler/messages/debug.py @@ -0,0 +1,6 @@ +""" + Debug messages used through out proxycrawler + to help in debugging +""" + +EXCEPTION_RAISED_WHEN_VALIDATING_PROXY = lambda proxy, error: f"[bold blue][DEBUG] [bold white]Exception raised when validating proxy:[bold green]{proxy}[bold white]. Error: {error}" diff --git a/proxycrawler/messages/errors.py b/proxycrawler/messages/errors.py new file mode 100644 index 0000000..70fade7 --- /dev/null +++ b/proxycrawler/messages/errors.py @@ -0,0 +1,23 @@ +""" + Errors messages used through out proxycrawler + to log out to the end-user +""" + +FILE_EXTENSION_NOT_SUPPORTED = f"[bold red][ERROR] [bold white]The provided proxy file's extension is not supported. Please make sure it's a plain text file (.txt) and try again" + +PROXY_FILE_DOESNT_EXIST = f"[bold red][ERROR] [bold white]The provided proxy file path doesn't seem to exists. Please verify it and try again" + +UNVALID_OUTPUT_FILE_PATH = lambda output_file_path: f"[bold red][ERROR] [bold white]Unvalid output file path [bold red]'{output_file_path}'[bold white]. Please change it and try again (or you can leave it empty)" + +FAILD_TO_REQUEST_GEONODE_API = lambda error: f"[bold red][ERROR] [bold white]Faild to request [bold green]geonode[bold white]'s API. Error: {error}" +FAILD_TO_REQUEST_FREE_PROXY_LIST = lambda error: f"[bold red][ERROR] [bold white]Faild to request [bold green]free-proxy-list.net[bold white]. Error: {error}" + +UNVALID_COUNTRY_CODE = lambda country_code, supported_country_code: f"[bold red][ ! ] [bold white]Unvalid country code [bold red]'{country_code}'[bold white]. Supported country code: \n{supported_country_code}" + +UNVALID_PROXY_FORMAT = f"[bold red][ERROR] [bold white]Unvalid proxies format. Format should be [bold green]://ip:port[bold white]. Please fix it and try again" + +UNVALID_PROXY_PROTOCOL = lambda protocol, protocols: f"[bold red][ERROR] [bold white]Unvalid proxy protocol [bold red]'{protocol}'. the supported protocols are [bold green]{protocols}[bold white] (you may keep --protocol null to test it on all protocols)" + +NO_PROXIES_WHERE_GATHERED = lambda proxies: f"[bold red][ERROR] [bold white]No proxies where gathered. proxies:[bold red]{proxies}[bold white]" + +NO_PROXIES_WHERE_FOUND_IN_THE_DATABASE = "[bold red][ERROR] [bold white]No proxies where found in the database" diff --git a/proxycrawler/messages/info.py b/proxycrawler/messages/info.py new file mode 100644 index 0000000..e115abd --- /dev/null +++ b/proxycrawler/messages/info.py @@ -0,0 +1,20 @@ +""" + Info messages used through out proxycrawler + to log out to the end-user +""" + +USING_SERVICE = lambda service_name, service_url: f"[bold green][INFO] [bold white]Using service [bold green]'{service_name}'[bold white] with url:[bold red]'{service_url}'[bold white]" + +REQUESTING_GEONODE_API = lambda api_url, payload: f"[bold green][INFO] [bold white]Requesting [bold green]Geonode[bold white]'s API at api_url:[bold green]'{api_url}'[bold white] with payload: {payload}" + +REQUESTING_FREE_PROXY_LIST = lambda url: f"[bold green][INFO] [bold white]Scrapping [bold green]free-proxy-list[bold white] at url:[bold green]'{url}'[bold white]" + +FOUND_A_VALID_PROXY = lambda proxy: f"[bold green][INFO] [bold white]Found a valid proxy: [bold green]{proxy.proxy}[bold white]" + +PROXIES_SAVED_IN_PATHS = lambda output_file_paths: "[bold green][INFO] [bold white]Proxies saved in the following files:{}".format("".join([f"\n\t[bold green]->[bold white] {path}" for path in output_file_paths])) + +FETCHING_AND_VALIDATING_PROXIES_FROM_DATABASE = f"[bold green][INFO] [bold white]Fetching and validating proxies from the database" + +FETCHED_PROXIES_FROM_THE_DATABASE = lambda count: f"[bold green][INFO] [bold white]Fetched [bold green]'{count}'[bold white] proxies from the database. Validating them ..." + +VALIDATING_PROXIES_FROM_FILE = lambda proxies_count, proxy_file_path: f"[bold green][INFO] [bold white]Found [bold green]'{proxies_count}'[bold white] proxies from [bold green]'{proxy_file_path}'[bold white]. Validating them..." diff --git a/proxycrawler/src/database/database_handler.py b/proxycrawler/src/database/database_handler.py new file mode 100644 index 0000000..901cc9b --- /dev/null +++ b/proxycrawler/src/database/database_handler.py @@ -0,0 +1,120 @@ +import os + +from typing import List + +from sqlalchemy import ( + create_engine, + select, + update +) +from sqlalchemy.orm import sessionmaker + +from proxycrawler import constants +from proxycrawler.src.database.tables import Base, Proxies + +class DatabaseHandler (object): + """ proxycrawler's database handler """ + def __init__(self) -> None: + self.database_url = constants.DATABASE_URL + + # Check the database url + if not self._check_database_url(): + self._create_database() + + # Init engine + self.engine: create_engine = self.create_engine() + + # Create tables in case they don't exist + self.create_tables() + + def create_engine(self) -> create_engine: + """ Creates an egine object and returnes it """ + return create_engine( + url=self.database_url, + ) + + def create_tables(self) -> None: + """ Creates all the needed tables """ + session = sessionmaker(bind=self.engine) + + with session() as session: + Base.metadata.create_all(bind=self.engine) # Create all the tables + session.commit() + + def save_proxy(self, proxy: Proxies) -> None: + """ Saves the proxy into the `proxies` table """ + session = sessionmaker(bind=self.engine) + + with session() as session: + # Check if the proxy already exists in the database + if len(session.execute(select(Proxies).where(Proxies.ip == proxy.ip and Proxies.port == proxy.port)).fetchall()) > 0: + # Check if the protocols are the same + if len(session.execute(select(Proxies).where(Proxies.proxy == proxy.proxy and Proxies.protocols == proxy.protocols)).fetchall()) == 0: + session.execute( + update( + Proxies + ).where( + Proxies.proxy_id == proxy.proxy_id + ).values( + proxy=proxy.proxy, + protocols=proxy.protocols + ) + ) + session.commit() + + return + + # Save the proxy to the database + session.add(proxy) + session.commit() + + def fetch_proxies(self, proxies_count: int | None = None) -> List[tuple[Proxies]]: + """ Fetch proxies from the database """ + session = sessionmaker(bind=self.engine) + + proxies = None + with session() as session: + if proxies_count is not None: + proxies = session.execute( + select(Proxies).limit(proxies_count) + ).fetchall() + else: + proxies = session.execute( + select(Proxies) + ).fetchall() + + return proxies + + def update_proxy_valid_value(self, proxy: Proxies) -> None: + """ Updates the value of `is_valid` of proxies """ + session = sessionmaker( + bind=self.engine + ) + + with session() as session: + session.execute( + update(Proxies).where( + Proxies.proxy_id == proxy.proxy_id + ).values( + is_valid=proxy.is_valid + ) + ) + + session.commit() + + def _check_database_url(self) -> bool: + """ Checks if the database url is valid """ + database_path = self.database_url.replace("sqlite+pysqlite:///", "") + + return os.path.exists(database_path) + + def _create_database(self) -> None: + """ Create the sqlite database as the corresponding path """ + database_path = self.database_url.replace("sqlite+pysqlite:///", "") + + os.makedirs( + database_path.replace("/database.db", ""), + exist_ok=True + ) # Create the directory leading to the database file + + open(database_path, "a").close() # Creating the database file diff --git a/proxycrawler/src/database/tables.py b/proxycrawler/src/database/tables.py new file mode 100644 index 0000000..dd3db44 --- /dev/null +++ b/proxycrawler/src/database/tables.py @@ -0,0 +1,34 @@ +from sqlalchemy import ( + Column, + String, + Integer, + Boolean, + DateTime, + JSON +) + +from sqlalchemy.orm import DeclarativeBase + +from proxycrawler import helpers + +class Base(DeclarativeBase): + """ Base """ + pass + +# Tables +class Proxies(Base): + """ Proxies table """ + __tablename__ = "proxies" + + # Columns + proxy_id = Column(String, primary_key=True) + ip = Column(String(30)) + port = Column(Integer) + proxy = Column(JSON) + protocols = Column(String) + country = Column(String(10)) + is_valid = Column(Boolean, default=True) + added_at = Column(DateTime, default=helpers.date()) + + def __repr__(self) -> str: + return f"Proxies(proxy_id={self.proxy_id!r}, ip={self.ip!r}, port={self.port!r}, proxy={self.proxy!r}, protocols={self.protocols!r}, country={self.country!r}, is_valid={self.is_valid!r}, added_at={self.added_at!r})" diff --git a/proxycrawler/src/models/free_proxy_list_model.py b/proxycrawler/src/models/free_proxy_list_model.py new file mode 100644 index 0000000..2f72f0e --- /dev/null +++ b/proxycrawler/src/models/free_proxy_list_model.py @@ -0,0 +1,121 @@ +import json +import time +import requests + +from rich.console import Console +from user_agent import generate_user_agent + +from proxycrawler import helpers +from proxycrawler import constants +from proxycrawler.messages import ( + info, + debug, + errors +) +from proxycrawler.src.database.tables import Proxies + +class FreeProxyListModel(object): + """ Proxy model """ + ip : str + port : str + proxy_country_code : str + country : str + provider : str + google : str + https : str + last_checked : str + proxy : dict = dict() + is_valid : bool = False + + def __init__(self, console: Console) -> None: + self.protocols = list() + self.console = console + + def validate(self) -> bool: + """ Validate the proxy """ + protocols = ["http", "https", "socks4", "socks5"] + proxy = { + + } + delay_time = 3 + + for protocol in protocols: + try: + headers = { + "User-Agent": generate_user_agent() + } + proxies = { + protocol: f"{protocol}://{self.ip}:{self.port}" + } + status_codes = [] + + for _ in range(3): + time.sleep(delay_time) + response = requests.get( + "https://google.com", + headers=headers, + proxies=proxies + ) + status_codes.append(response.status_code) + + if status_codes.count(200) >= 2: + proxy[protocol] = proxies[protocol] + self.protocols.append(protocol) + except requests.exceptions.ProxyError as error: + if constants.DEBUG: + self.console.log( + debug.EXCEPTION_RAISED_WHEN_VALIDATING_PROXY( + proxy=proxies, + error=error + ) + ) + except Exception as error: + if constants.DEBUG: + self.console.log( + debug.EXCEPTION_RAISED_WHEN_VALIDATING_PROXY( + proxy=proxies, + error=error + ) + ) + + if len(proxy) != 0: + self.is_valid = True + + self.proxy = proxy + + return self.is_valid + + def export_dict(self) -> dict: + """ Exports the fields into a dict """ + return { + "ip" : self.ip, + "port" : self.port, + "proxy_country_code" : self.proxy_country_code, + "country" : self.country, + "provider" : self.provider, + "google" : self.google, + "https" : self.https, + "last_checked" : self.last_checked, + "proxy" : self.proxy, + "is_valid" : self.is_valid + } + + def export_table_row(self) -> Proxies: + """ Exports the current proxies data into a `Proxies` table row """ + proxy_id = helpers.generate_uid( + data=json.dumps( + self.export_dict() + ) + ) + + proxy = Proxies( + proxy_id=proxy_id, + ip=self.ip, + port=self.port, + proxy=json.dumps(self.proxy), + protocols=str(self.protocols), + country=self.country, + is_valid=self.is_valid + ) + + return proxy diff --git a/proxycrawler/src/models/geonode_model.py b/proxycrawler/src/models/geonode_model.py new file mode 100644 index 0000000..3fb6b39 --- /dev/null +++ b/proxycrawler/src/models/geonode_model.py @@ -0,0 +1,177 @@ +import json +import time +import requests + +from rich.console import Console +from user_agent import generate_user_agent + +from proxycrawler import helpers +from proxycrawler import constants +from proxycrawler.messages import ( + info, + debug, + errors +) +from proxycrawler.src.database.tables import Proxies + +class GeonodeModel(object): + """ Geonode proxies service model """ + ip: str + anonymityLevel: str + protocols: list + asn: str + city: str + country: str + created_at: str + google: bool + isp: str + lastChecked: int + latency: float + org: str + port: str + region: str | None + responseTime: int + speed: int + updated_at: str + workingPercent: float | None + upTime: float + upTimeSuccessCount: int + upTimeTryCount: int + proxy: dict = dict() + is_valid: bool = False + + def __init__(self, console: Console) -> None: + self.console = console + + def set_fields(self, data: dict) -> None: + """ Set the values for the fields """ + for field in self.__annotations__: + if field in ["proxy", "is_valid"]: + continue + + setattr(self, str(field), data.get(field, None)) + + def validate(self) -> bool: + """ Validate the proxy """ + protocols = self.protocols + proxy = { + + } + delay_time = 3 + + for protocol in protocols: + try: + headers = { + "User-Agent": generate_user_agent() + } + proxies = { + protocol: f"{protocol}://{self.ip}:{self.port}" + } + status_codes = [] + + for _ in range(3): + time.sleep(delay_time) + response = requests.get( + "https://google.com", + headers=headers, + proxies=proxies + ) + status_codes.append(response.status_code) + + if status_codes.count(200) >= 2: + proxy[protocol] = proxies[protocol] + except requests.exceptions.ProxyError as error: + if constants.DEBUG: + self.console.log( + debug.EXCEPTION_RAISED_WHEN_VALIDATING_PROXY( + proxy=proxies, + error=error + ) + ) + except Exception as error: + if constants.DEBUG: + self.console.log( + debug.EXCEPTION_RAISED_WHEN_VALIDATING_PROXY( + proxy=proxies, + error=error + ) + ) + + if len(proxy) != 0: + self.is_valid = True + + self.proxy = proxy + + return self.is_valid + + def export_tuple(self) -> tuple: + """ Export the fields into a tuple """ + return ( + self.ip, + self.anonymityLevel, + self.asn, + self.city, + self.country, + self.created_at, + self.google, + self.isp, + self.lastChecked, + self.latency, + self.org, + self.port, + self.protocols, + self.region, + self.responseTime, + self.speed, + self.updated_at, + self.workingPercent, + self.upTime, + self.upTimeSuccessCount, + self.upTimeTryCount + ) + + def export_dict(self) -> dict: + """ Exports the fields into a dict """ + return { + "ip": self.ip, + "anonymityLevel": self.anonymityLevel, + "asn": self.asn, + "city": self.city, + "country": self.country, + "created_at": self.created_at, + "google": self.google, + "isp": self.isp, + "lastChecked": self.lastChecked, + "latency": self.latency, + "org": self.org, + "port": self.port, + "protocols": self.protocols, + "region": self.region, + "responseTime": self.responseTime, + "speed": self.speed, + "updated_at": self.updated_at, + "workingPercent": self.workingPercent, + "upTime": self.upTime, + "upTimeSuccessCount": self.upTimeSuccessCount, + "upTimeTryCount": self.upTimeTryCount + } + + def export_table_row(self) -> Proxies: + """ Exports the current proxies data into a `Proxies` table row """ + proxy_id = helpers.generate_uid( + data=json.dumps( + self.export_dict() + ) + ) + + proxy = Proxies( + proxy_id=proxy_id, + ip=self.ip, + port=self.port, + proxy=json.dumps(self.proxy), + protocols=str(self.protocols), + country=self.country, + is_valid=self.is_valid + ) + + return proxy diff --git a/proxycrawler/src/models/proxy_model.py b/proxycrawler/src/models/proxy_model.py new file mode 100644 index 0000000..fe15154 --- /dev/null +++ b/proxycrawler/src/models/proxy_model.py @@ -0,0 +1,116 @@ +import json +import time +import requests + +from rich.console import Console +from user_agent import generate_user_agent + +from proxycrawler import helpers +from proxycrawler import constants +from proxycrawler.messages import ( + info, + debug, + errors +) +from proxycrawler.src.database.tables import Proxies + +class ProxyModel(object): + """ Proxy model """ + proxy: dict = dict() + country: str = "Null" + is_valid: bool = False + + def __init__(self, ip: str, port: int, protocols: list[str], console: Console) -> None: + self.ip = ip + self.port = port + self.protocols = protocols + self.console = console + + def validate(self) -> bool: + """ Validate proxy """ + proxy = { + + } + delay_time = 3 + + for protocol in self.protocols: + try: + headers = { + "User-Agent": generate_user_agent() + } + proxies = { + protocol: f"{protocol}://{self.ip}:{self.port}" + } + status_codes = [] + + for _ in range(3): + time.sleep(delay_time) + response = requests.get( + "https://google.com", + headers=headers, + proxies=proxies + ) + status_codes.append(response.status_code) + + if status_codes.count(200) >= 2: + proxy[protocol] = proxies[protocol] + except requests.exceptions.ProxyError as error: + if constants.DEBUG: + self.console.log( + debug.EXCEPTION_RAISED_WHEN_VALIDATING_PROXY( + proxy=proxies, + error=error + ) + ) + except Exception as error: + if constants.DEBUG: + self.console.log( + debug.EXCEPTION_RAISED_WHEN_VALIDATING_PROXY( + proxy=proxies, + error=error + ) + ) + + if len(proxy) != 0: + self.is_valid = True + + self.proxy = proxy + + protocols = [] + for protocol in self.proxy: + protocols.append(protocol) + + self.protocols = protocols + + return self.is_valid + + def export_dict(self) -> dict: + """ Exports fields into a dict """ + return { + "ip" : self.ip, + "port" : self.port, + "country" : self.country, + "proxy" : self.proxy, + "protocols" : self.protocols, + "is_valid" : self.is_valid + } + + def export_table_row(self) -> Proxies: + """ Exports the current proxies data into a `Proxies` table row """ + proxy_id = helpers.generate_uid( + data=json.dumps( + self.export_dict() + ) + ) + + proxy = Proxies( + proxy_id=proxy_id, + ip=self.ip, + port=self.port, + proxy=json.dumps(self.proxy), + protocols=str(self.protocols), + country=self.country, + is_valid=self.is_valid + ) + + return proxy diff --git a/proxycrawler/src/proxycrawler.py b/proxycrawler/src/proxycrawler.py new file mode 100644 index 0000000..1910422 --- /dev/null +++ b/proxycrawler/src/proxycrawler.py @@ -0,0 +1,378 @@ +import re +import ast +import sys +import json +import time +import requests + +from user_agent import generate_user_agent +from rich.console import Console + +from proxycrawler import constants +from proxycrawler.messages import ( + info, + debug, + errors +) +from proxycrawler.src.database.database_handler import DatabaseHandler +from proxycrawler.src.database.tables import Proxies + +# Services +from proxycrawler.src.services.geonode import Geonode +from proxycrawler.src.services.freeproxylist import FreeProxyList + +# Models +from proxycrawler.src.models.proxy_model import ProxyModel + +class ProxyCrawler: + """ ProxyCrawler """ + free_proxy_list: list = list() + geonode_proxies_list: list = list() + + output_save_paths: list = list() + + def __init__( + self, database_handler: DatabaseHandler, console: Console | None = Console()) -> None: + self.database_handler = database_handler + self.console = console + + def crawl_proxies( + self, + enable_save_on_run: bool, + group_by_protocol: bool, + output_file_path: str + ) -> None: + """ Starts crawling proxies from all the known services """ + geonode = Geonode( + console=self.console, + + ) + free_proxy_list = FreeProxyList( + console=self.console + ) + + services = { + "free_proxy_list": free_proxy_list, + "geonode": geonode + } + + for service in services: + service_name = service + service = services[service] + service_url = service.url + + self.console.log( + info.USING_SERVICE( + service_name=service_name, + service_url=service_url + ) + ) + + # Fetching and validating proxies from `service_name` + service.fetch_proxies() + + # Saving the proxies to the database + proxies = service.valid_proxies + + for proxy in proxies: + proxy = proxy.export_table_row() + + self.database_handler.save_proxy( + proxy=proxy + ) + + # Save to the output file on the run in case + # `self.enable_save_on_run` was enabled + if not enable_save_on_run: + continue + + self.output_save_paths = self.save_to_file( + proxies=proxies, + group_by_protocol=group_by_protocol, + output_file_path=output_file_path + ) + + if not enable_save_on_run: + for service in services: + proxies = services[service].valid_proxies + + self.output_save_paths = self.save_to_file( + proxies=proxies, + group_by_protocol=group_by_protocol, + output_file_path=output_file_path + ) + + self.console.log( + info.PROXIES_SAVED_IN_PATHS( + output_file_paths=self.output_save_paths + ) + ) + + def export_database_proxies( + self, + proxies_count: int, + group_by_protocol: bool | None = False, + validate_proxies: bool | None = True, + output_file_path: str | None = None + ) -> None: + """ Export the proxies from the database and validate them """ + saved_database_proxies = self.database_handler.fetch_proxies( + proxies_count=proxies_count + ) + valid_proxies = [] + + if len(saved_database_proxies) == 0: + self.console.log( + errors.NO_PROXIES_WHERE_FOUND_IN_THE_DATABASE + ) + sys.exit(1) + + self.console.log( + info.FETCHED_PROXIES_FROM_THE_DATABASE( + count=len(saved_database_proxies) + ) + ) + + for proxy in saved_database_proxies: + proxy = proxy[0] + + if validate_proxies: + if self.validate_db_proxies(proxy=proxy): + valid_proxies.append(proxy) + + self.database_handler.update_proxy_valid_value( + proxy=proxy + ) + + self.console.log( + info.FOUND_A_VALID_PROXY( + proxy=proxy + ) + ) + + else: + proxy.is_valid = False + + self.database_handler.update_proxy_valid_value( + proxy=proxy + ) + + if len(valid_proxies) == 0 and not validate_proxies: + self.output_save_paths = self.save_to_file( + proxies=saved_database_proxies, + group_by_protocol=group_by_protocol, + output_file_path=output_file_path + ) + else: + self.output_save_paths = self.save_to_file( + proxies=valid_proxies, + group_by_protocol=group_by_protocol, + output_file_path=output_file_path + ) + + self.console.log( + info.PROXIES_SAVED_IN_PATHS( + output_file_paths=self.output_save_paths + ) + ) + + def validate_db_proxies(self, proxy: Proxies) -> bool: + """ Validate proxies """ + if type(proxy.proxy) == str: + proxy.proxy = json.loads(proxy.proxy) + + delay_time = 7 + status_codes = [] + headers = { + "User-Agent": generate_user_agent() + } + + for _ in range(3): + time.sleep(delay_time) + try: + response = requests.get( + "https://google.com", + headers=headers, + proxies=proxy.proxy + ) + status_codes.append(response.status_code) + except requests.exceptions.ProxyError as error: + if constants.DEBUG: + self.console.log( + debug.EXCEPTION_RAISED_WHEN_VALIDATING_PROXY( + proxy=proxy.proxy, + error=error + ) + ) + except Exception as error: + if constants.DEBUG: + self.console.log( + debug.EXCEPTION_RAISED_WHEN_VALIDATING_PROXY( + proxy=proxy.proxy, + error=error + ) + ) + + if status_codes.count(200) >= 2: + proxy.is_valid == True + + return proxy.is_valid + + def validate_proxies(self, proxies: list[str], protocol: str | None = None, test_all_protocols: bool | None = False, group_by_protocol: bool | None = False, proxy_file_path: str | None = None,output_file_path: str | None = None) -> None: + """ Validates proxies """ + protocols = [ + "http", + "https", + "socks4", + "socks5" + ] + valid_proxies = [] + + # In case of a protocol was specified to test the proxies + # on or in case the user chose to test all the protocols + # on the proxies, we need to make sure we are revalidating + # the same proxy because the list may contain the same proxy + # but with different procotols, wich is why we keep track of + # them here in the `processed_proxies` + processed_proxies = [] + + for proxy in proxies: + if proxy == "": + continue + + ip = proxy.split("/")[2].split(":")[0] + port= proxy.split(":")[-1] + proxy_protocols = None + + # Determining wich protocols to use + if not test_all_protocols: + if protocol is None: + proxy_protocols = [proxy.split("/")[0].replace(":", "")] + else: + # Skip the proxy if already processed + if proxy.split("/")[2] in processed_proxies: + continue + + proxy_protocols = [protocol] + else: + # Skip the proxy if already processed + if proxy.split("/")[2] in processed_proxies: + continue + + proxy_protocols = protocols + + proxy = ProxyModel( + ip=ip, + port=port, + protocols=proxy_protocols, + console=self.console + ) + + if proxy.validate(): + valid_proxies.append(proxy) + + self.console.log( + info.FOUND_A_VALID_PROXY( + proxy=proxy + ) + ) + + # Save proxy to the database + self.database_handler.save_proxy( + proxy=proxy.export_table_row() + ) + + processed_proxies.append(f"{proxy.ip}:{proxy.port}") + + if output_file_path is None: + output_file_path = f"{proxy_file_path.split('/')[-1].replace('.txt', '')}-valid.txt" + + self.output_save_paths = self.save_to_file( + proxies=valid_proxies, + group_by_protocol=group_by_protocol, + output_file_path=output_file_path + ) + + self.console.log( + info.PROXIES_SAVED_IN_PATHS( + output_file_paths=self.output_save_paths + ) + ) + + def check_proxy_fromat(self, proxy: str) -> bool: + """ Checks the format of the proxy """ + regex = r"^(https?|socks[45])://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+" + + return re.match(regex, proxy) + + def save_to_file( + self, + proxies: list, + group_by_protocol: bool, + output_file_path: str | None + ) -> list[str]: + """ Saves proxies to the output file path """ + output_save_paths = [] + + if output_file_path is None: + output_file_path = f"./proxycrawler-proxies.txt" + + if not group_by_protocol: + with open(output_file_path, "a") as save_proxies: + data = [] + + for proxy_data in proxies: + if type(proxy_data.proxy) == str: + proxy_data.proxy = json.loads(proxy_data.proxy) # Reattache to a session + + for proxy in proxy_data.proxy: + data.append(f"{proxy_data.proxy[proxy]}\n") + + save_proxies.write(''.join(data)) + + output_save_paths.append(output_file_path) + + return output_save_paths + + protocols = { + "http": { + "output_file_path": f"{'/'.join(output_file_path.split('/')[:-1])}/proxies-http.txt", + "proxies": set() + }, + "https": { + "output_file_path": f"{'/'.join(output_file_path.split('/')[:-1])}/proxies-https.txt", + "proxies": set() + }, + "socks4": { + "output_file_path": f"{'/'.join(output_file_path.split('/')[:-1])}/proxies-socks4.txt", + "proxies": set() + }, + "socks5": { + "output_file_path": f"{'/'.join(output_file_path.split('/')[:-1])}/proxies-socks5.txt", + "proxies": set() + } + } + + for proxy in proxies: + if type(proxy.proxy) == str: + proxy.proxy = json.loads(proxy.proxy) + + if type(proxy.protocols) == str: + proxy.protocols = ast.literal_eval(proxy.protocols) + + for protocol in proxy.protocols: + protocols[protocol]["proxies"].add(f"{proxy.proxy[protocol]}\n") + + for protocol in protocols: + # Don't save in case no proxies have this `protocol` + if not len(protocols[protocol]["proxies"]) > 0: + continue + + with open(protocols[protocol]["output_file_path"], "a") as save_proxies: + save_proxies.write(''.join(protocols[protocol]["proxies"])) + + output_save_paths.append(protocols[protocol]["output_file_path"]) + + self.console.log(output_save_paths) + + return output_save_paths diff --git a/proxycrawler/src/services/freeproxylist.py b/proxycrawler/src/services/freeproxylist.py new file mode 100644 index 0000000..4bb09a3 --- /dev/null +++ b/proxycrawler/src/services/freeproxylist.py @@ -0,0 +1,63 @@ +import requests + +from rich.console import Console +from bs4 import BeautifulSoup +from user_agent import generate_user_agent + +from proxycrawler.messages import ( + info, + errors +) +from proxycrawler.src.models.free_proxy_list_model import FreeProxyListModel + +class FreeProxyList(object): + """ free-proxy-list.net """ + url: str = "https://free-proxy-list.net" + valid_proxies: list[FreeProxyListModel] = list() + + def __init__(self, console: Console): + self.console = console + + def fetch_proxies(self) -> list[FreeProxyListModel]: + """ Fetches proxies with filter `country_code` """ + headers = { + "User-Agent": generate_user_agent() + } + self.console.log(info.REQUESTING_FREE_PROXY_LIST(url=self.url)) + + response = requests.get( + self.url, + headers=headers + ) + + if response.status_code != 200: + self.console.log(errors.FAILD_TO_REQUEST_FREE_PROXY_LIST(error=response.text)) + + soup = BeautifulSoup( + response.content, + "html.parser" + ) + rows = soup.find_all("tr") + + for row in rows: + proxy = FreeProxyListModel( + console=self.console + ) + parts = row.find_all("td") + + if len(parts) == 8: + proxy.ip = parts[0].text + proxy.port = parts[1].text + proxy.proxy_country_code = parts[2].text + proxy.country = parts[3].text + proxy.provider = parts[4].text + proxy.google = parts[5].text + proxy.https = parts[6].text + proxy.last_checked = parts[7].text + + if proxy.https == "yes": + # Check if the proxy already exists and validate the proxy + if proxy.validate(): + self.valid_proxies.append(proxy) + + self.console.log(info.FOUND_A_VALID_PROXY(proxy=proxy)) diff --git a/proxycrawler/src/services/geonode.py b/proxycrawler/src/services/geonode.py new file mode 100644 index 0000000..3efd763 --- /dev/null +++ b/proxycrawler/src/services/geonode.py @@ -0,0 +1,97 @@ +import requests + +from user_agent import generate_user_agent +from rich.console import Console + +from proxycrawler.messages import ( + info, + errors +) +from proxycrawler.src.models.geonode_model import GeonodeModel + +class Geonode(object): + """ Geonode """ + url: str = "https://geonode.com/free-proxy-list" + api_url: str = "https://proxylist.geonode.com/api/proxy-list" + params: dict = { + "limit": 500, + "page": 1, # NOTE: page limit is 100 + "sort_by": "lastChecked", + "sort_type": "desc" + } + valid_proxies: list[GeonodeModel] = list() + + def __init__(self, console: Console) -> None: + self.console = console + + def fetch_proxies(self) -> list[GeonodeModel]: + """ Fetchs the proxies from Geonode """ + page_limit = 100 + + for page_number in range(1, page_limit): + payload = self.params + payload["page"] = page_number + headers = { + "Host": "proxylist.geonode.com", + "User-Agent": generate_user_agent(), + "Accept": "application/json, text/plain, */*", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate", # Remove 'br' from accepted encoding because the response content is not readable + "Origin": "https://geonode.com", + "Connection": "keep-alive", + "Referer": "https://geonode.com/", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-site" + } + + proxies = None + + try: + self.console.log( + info.REQUESTING_GEONODE_API( + api_url=self.api_url, + payload=payload + ) + ) + + response = requests.get( + self.api_url, + params=payload, + headers=headers + ) + + if response.status_code != 200: + continue + + proxies = response.json()["data"] + except Exception as error: + self.console.log( + errors.FAILD_TO_REQUEST_GEONODE_API( + error=error + ) + ) + + # In case no proxies where retrieved just return `None` + if proxies is None: + continue + + # Validating proxies + for proxy_info in proxies: + proxy = GeonodeModel( + console=self.console + ) + + proxy.set_fields( + data=proxy_info + ) + if proxy.validate(): + self.valid_proxies.append(proxy) + + self.console.log( + info.FOUND_A_VALID_PROXY( + proxy=proxy + ) + ) + + return self.valid_proxies diff --git a/proxycrawler/src/thread_task.py b/proxycrawler/src/thread_task.py new file mode 100644 index 0000000..4e34f48 --- /dev/null +++ b/proxycrawler/src/thread_task.py @@ -0,0 +1,25 @@ +# import threading + +# class ThreadTask(object): +# """ Handles threads """ +# def __init__(self, target_function, args): +# self.target_function = target_function +# self.args = args +# self.is_running = False +# self.thread = None + +# def start(self): +# if not self.is_running: +# self.is_running = True +# self.thread = threading.Thread(target=self._run) +# self.thread.start() + +# def stop(self): +# if self.is_running: +# self.is_running = False +# self.thread.join() # Wait for the thread to finish + +# def _run(self): +# self.target_function( +# *self.args +# ) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..954c094 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,20 @@ +[tool.poetry] +name = "proxycrawler" +version = "0.1.0" +description = "A proxy scrapper and validator" +authors = ["ramsy0dev <0ramsy0@gmail.com>"] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.11" +beautifulsoup4 = "^4.12.2" +requests = "^2.31.0" +user-agent = "^0.1.10" +sqlalchemy = "^2.0.20" + +[tool.poetry.scripts] +proxycrawler = "proxycrawler:__main__.run" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api"