-
Notifications
You must be signed in to change notification settings - Fork 1
/
http_user_agent.ml
101 lines (84 loc) · 3.43 KB
/
http_user_agent.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
(*
OCaml HTTP - do it yourself (fully OCaml) HTTP daemon
Copyright (C) <2002-2005> Stefano Zacchiroli <[email protected]>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Library General Public License as
published by the Free Software Foundation, version 2.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
USA
*)
open Printf
open Http_common
exception Http_error of (int * string) (* code, body *)
let http_scheme_RE = Pcre.regexp ~flags:[`CASELESS] "^http://"
let url_RE = Pcre.regexp "^([\\w.-]+)(:(\\d+))?(/.*)?$"
let tcp_bufsiz = 4096 (* for TCP I/O *)
let parse_url url =
try
let subs =
Pcre.extract ~rex:url_RE (Pcre.replace ~rex:http_scheme_RE url)
in
(subs.(1),
(if subs.(2) = "" then 80 else int_of_string subs.(3)),
(if subs.(4) = "" then "/" else subs.(4)))
with exc ->
failwith
(sprintf "Can't parse url: %s (exception: %s)"
url (Printexc.to_string exc))
let init_socket addr port =
let inet_addr = (Unix.gethostbyname addr).Unix.h_addr_list.(0) in
let sockaddr = Unix.ADDR_INET (inet_addr, port) in
let suck = Unix.socket Unix.PF_INET Unix.SOCK_STREAM 0 in
Unix.connect suck sockaddr;
let outchan = Unix.out_channel_of_descr suck in
let inchan = Unix.in_channel_of_descr suck in
(inchan, outchan)
let submit_request kind url =
let (address, port, path) = parse_url url in
let (inchan, outchan) = init_socket address port in
let req_string = match kind with `GET -> "GET" | `HEAD -> "HEAD" in
output_string outchan (sprintf "%s %s HTTP/1.0\r\n" req_string path);
output_string outchan (sprintf "Host: %s\r\n\r\n" address);
flush outchan;
(inchan, outchan)
let head url =
let (inchan, outchan) = submit_request `HEAD url in
let (_, status) = Http_parser.parse_response_fst_line inchan in
(match code_of_status status with
| 200 -> ()
| code -> raise (Http_error (code, "")));
let buf = Http_misc.buf_of_inchan inchan in
close_in inchan; (* close also outchan, same fd *)
Buffer.contents buf
let get_iter ?(head_callback = fun _ _ -> ()) callback url =
let (inchan, outchan) = submit_request `GET url in
let buf = Bytes.create tcp_bufsiz in
let (_, status) = Http_parser.parse_response_fst_line inchan in
(match code_of_status status with
| 200 -> ()
| code -> raise (Http_error (code, "")));
let headers = Http_parser.parse_headers inchan in
head_callback status headers;
(try
while true do
match input inchan buf 0 tcp_bufsiz with
| 0 -> raise End_of_file
| bytes when bytes = tcp_bufsiz -> (* buffer full, no need to slice it *)
callback buf
| bytes when bytes < tcp_bufsiz -> (* buffer not full, slice it *)
callback (Bytes.sub buf 0 bytes)
| _ -> (* ( bytes < 0 ) || ( bytes > tcp_bufsiz ) *)
assert false
done
with End_of_file -> ());
close_in inchan (* close also outchan, same fd *)
let get ?head_callback url =
let buf = Buffer.create 10240 in
get_iter ?head_callback (Buffer.add_bytes buf) url;
Buffer.contents buf