View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2020, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(http_open,
   38          [ http_open/3,                % +URL, -Stream, +Options
   39            http_set_authorization/2,   % +URL, +Authorization
   40            http_close_keep_alive/1     % +Address
   41          ]).   42:- autoload(library(aggregate),[aggregate_all/3]).   43:- autoload(library(apply),[foldl/4,include/3]).   44:- autoload(library(base64),[base64/3]).   45:- autoload(library(debug),[debug/3,debugging/1]).   46:- autoload(library(error),
   47	    [ domain_error/2, must_be/2, existence_error/2, instantiation_error/1
   48	    ]).   49:- autoload(library(lists),[last/2,member/2]).   50:- autoload(library(option),
   51	    [ meta_options/3, option/2, select_option/4, merge_options/3,
   52	      option/3, select_option/3
   53	    ]).   54:- autoload(library(readutil),[read_line_to_codes/2]).   55:- autoload(library(uri),
   56	    [ uri_resolve/3, uri_components/2, uri_data/3,
   57              uri_authority_components/2, uri_authority_data/3,
   58	      uri_encoded/3, uri_query_components/2, uri_is_global/1
   59	    ]).   60:- autoload(library(http/http_header),
   61            [ http_parse_header/2, http_post_data/3 ]).   62:- autoload(library(http/http_stream),[stream_range_open/3]).   63:- if(exists_source(library(ssl))).   64:- autoload(library(ssl), [ssl_upgrade_legacy_options/2]).   65:- endif.   66:- use_module(library(socket)).   67
   68
   69/** <module> HTTP client library
   70
   71This library defines http_open/3, which opens an URL as a Prolog stream.
   72The functionality of the  library  can   be  extended  by  loading two
   73additional modules that act as plugins:
   74
   75    * library(http/http_ssl_plugin)
   76    Loading this library causes http_open/3 to handle HTTPS connections.
   77    Relevant options for SSL certificate handling are handed to
   78    ssl_context/3. This plugin is loaded automatically if the scheme
   79    `https` is requested using a default SSL context. See the plugin for
   80    additional information regarding security.
   81
   82    * library(zlib)
   83    Loading this library supports the `gzip` transfer encoding.  This
   84    plugin is lazily loaded if a connection is opened that claims this
   85    transfer encoding.
   86
   87    * library(http/http_cookie)
   88    Loading this library adds tracking cookies to http_open/3. Returned
   89    cookies are collected in the Prolog database and supplied for
   90    subsequent requests.
   91
   92    * library(http/http_stream)
   93    This library adds support for _chunked_ encoding and makes the
   94    http_open/3 advertise itself as HTTP/1.1 instead of HTTP/1.0.
   95
   96
   97Here is a simple example to fetch a web-page:
   98
   99```
  100?- http_open('http://www.google.com/search?q=prolog', In, []),
  101   copy_stream_data(In, user_output),
  102   close(In).
  103<!doctype html><head><title>prolog - Google Search</title><script>
  104...
  105```
  106
  107The example below fetches the modification time of a web-page. Note that
  108=|Modified|= is =|''|= (the empty atom) if the  web-server does not provide a
  109time-stamp for the resource. See also parse_time/2.
  110
  111```
  112modified(URL, Stamp) :-
  113       http_open(URL, In,
  114                 [ method(head),
  115                   header(last_modified, Modified)
  116                 ]),
  117       close(In),
  118       Modified \== '',
  119       parse_time(Modified, Stamp).
  120```
  121
  122Then next example uses Google search. It exploits library(uri) to manage
  123URIs, library(sgml) to load  an  HTML   document  and  library(xpath) to
  124navigate the parsed HTML. Note that  you   may  need to adjust the XPath
  125queries if the data returned by Google changes (this example indeed
  126no longer works and currently fails at the first xpath/3 call)
  127
  128```
  129:- use_module(library(http/http_open)).
  130:- use_module(library(xpath)).
  131:- use_module(library(sgml)).
  132:- use_module(library(uri)).
  133
  134google(For, Title, HREF) :-
  135        uri_encoded(query_value, For, Encoded),
  136        atom_concat('http://www.google.com/search?q=', Encoded, URL),
  137        http_open(URL, In, []),
  138        call_cleanup(
  139            load_html(In, DOM, []),
  140            close(In)),
  141        xpath(DOM, //h3(@class=r), Result),
  142        xpath(Result, //a(@href=HREF0, text), Title),
  143        uri_components(HREF0, Components),
  144        uri_data(search, Components, Query),
  145        uri_query_components(Query, Parts),
  146        memberchk(q=HREF, Parts).
  147```
  148
  149An example query is below:
  150
  151```
  152?- google(prolog, Title, HREF).
  153Title = 'SWI-Prolog',
  154HREF = 'http://www.swi-prolog.org/' ;
  155Title = 'Prolog - Wikipedia',
  156HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
  157Title = 'Prolog - Wikipedia, the free encyclopedia',
  158HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
  159Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
  160HREF = 'http://www.pro-log.nl/' ;
  161Title = 'Learn Prolog Now!',
  162HREF = 'http://www.learnprolognow.org/' ;
  163Title = 'Free Online Version - Learn Prolog
  164...
  165```
  166
  167@see load_html/3 and xpath/3 can be used to parse and navigate HTML
  168     documents.
  169@see http_get/3 and http_post/4 provide an alternative interface that
  170     convert the reply depending on the =|Content-Type|= header.
  171*/
  172
  173:- multifile
  174    http:encoding_filter/3,           % +Encoding, +In0, -In
  175    http:current_transfer_encoding/1, % ?Encoding
  176    http:disable_encoding_filter/1,   % +ContentType
  177    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  178                                      % -NewStreamPair, +Options
  179    http:open_options/2,              % +Parts, -Options
  180    http:write_cookies/3,             % +Out, +Parts, +Options
  181    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  182    http:authenticate_client/2,       % +URL, +Action
  183    http:http_connection_over_proxy/6.  184
  185:- meta_predicate
  186    http_open(+,-,:).  187
  188:- predicate_options(http_open/3, 3,
  189                     [ authorization(compound),
  190                       final_url(-atom),
  191                       header(+atom, -atom),
  192                       headers(-list),
  193                       connection(+atom),
  194                       method(oneof([delete,get,put,head,post,patch,options])),
  195                       size(-integer),
  196                       status_code(-integer),
  197                       output(-stream),
  198                       timeout(number),
  199                       unix_socket(+atom),
  200                       proxy(atom, integer),
  201                       proxy_authorization(compound),
  202                       bypass_proxy(boolean),
  203                       request_header(any),
  204                       user_agent(atom),
  205                       version(-compound),
  206        % The option below applies if library(http/http_header) is loaded
  207                       post(any),
  208        % The options below apply if library(http/http_ssl_plugin)) is loaded
  209                       pem_password_hook(callable),
  210                       cacert_file(atom),
  211                       cert_verify_hook(callable)
  212                     ]).  213
  214%!  user_agent(-Agent) is det.
  215%
  216%   Default value for =|User-Agent|=,  can   be  overruled using the
  217%   option user_agent(Agent) of http_open/3.
  218
  219user_agent('SWI-Prolog').
  220
  221%!  http_open(+URL, -Stream, +Options) is det.
  222%
  223%   Open the data at the HTTP  server   as  a  Prolog stream. URL is
  224%   either an atom  specifying  a  URL   or  a  list  representing a
  225%   broken-down  URL  as  specified  below.   After  this  predicate
  226%   succeeds the data can be read from Stream. After completion this
  227%   stream must be  closed  using   the  built-in  Prolog  predicate
  228%   close/1. Options provides additional options:
  229%
  230%     * authenticate(+Boolean)
  231%     If `false` (default `true`), do _not_ try to automatically
  232%     authenticate the client if a 401 (Unauthorized) status code
  233%     is received.
  234%
  235%     * authorization(+Term)
  236%     Send authorization. See also http_set_authorization/2. Supported
  237%     schemes:
  238%
  239%       - basic(+User, +Password)
  240%       HTTP Basic authentication.
  241%       - bearer(+Token)
  242%       HTTP Bearer authentication.
  243%       - digest(+User, +Password)
  244%       HTTP Digest authentication.  This option is only provided
  245%       if the plugin library(http/http_digest) is also loaded.
  246%
  247%     * unix_socket(+Path)
  248%     Connect to the given Unix domain socket.  In this scenario
  249%     the host name and port or ignored.  If the server replies
  250%     with a _redirect_ message and the host differs from the
  251%     original host as normal TCP connection is used to handle
  252%     the redirect.  This option is inspired by curl(1)'s option
  253%     `--unix-socket`.
  254%
  255%     * connection(+Connection)
  256%     Specify the =Connection= header.  Default is =close=.  The
  257%     alternative is =|Keep-alive|=.  This maintains a pool of
  258%     available connections as determined by keep_connection/1.
  259%     The library(http/websockets) uses =|Keep-alive, Upgrade|=.
  260%     Keep-alive connections can be closed explicitly using
  261%     http_close_keep_alive/1. Keep-alive connections may
  262%     significantly improve repetitive requests on the same server,
  263%     especially if the IP route is long, HTTPS is used or the
  264%     connection uses a proxy.
  265%
  266%     * final_url(-FinalURL)
  267%     Unify FinalURL with the final   destination. This differs from
  268%     the  original  URL  if  the  returned  head  of  the  original
  269%     indicates an HTTP redirect (codes 301,  302 or 303). Without a
  270%     redirect, FinalURL is the same as URL if  URL is an atom, or a
  271%     URL constructed from the parts.
  272%
  273%     * header(Name, -AtomValue)
  274%     If provided, AtomValue is  unified  with   the  value  of  the
  275%     indicated  field  in  the  reply    header.  Name  is  matched
  276%     case-insensitive and the underscore  (_)   matches  the hyphen
  277%     (-). Multiple of these options  may   be  provided  to extract
  278%     multiple  header  fields.  If  the  header  is  not  available
  279%     AtomValue is unified to the empty atom ('').
  280%
  281%     * headers(-List)
  282%     If provided, List is unified with  a list of Name(Value) pairs
  283%     corresponding to fields in the reply   header.  Name and Value
  284%     follow the same conventions  used   by  the header(Name,Value)
  285%     option.
  286%
  287%     * method(+Method)
  288%     One of =get= (default), =head=, =delete=, =post=,   =put=   or
  289%     =patch=.
  290%     The  =head= message can be
  291%     used in combination with  the   header(Name,  Value) option to
  292%     access information on the resource   without actually fetching
  293%     the resource itself.  The  returned   stream  must  be  closed
  294%     immediately.
  295%
  296%     If post(Data) is provided, the default is =post=.
  297%
  298%     * size(-Size)
  299%     Size is unified with the   integer value of =|Content-Length|=
  300%     in the reply header.
  301%
  302%     * version(-Version)
  303%     Version is a _pair_ `Major-Minor`, where `Major` and `Minor`
  304%     are integers representing the HTTP version in the reply header.
  305%
  306%     * range(+Range)
  307%     Ask for partial content. Range   is  a term _|Unit(From,To)|_,
  308%     where `From` is an integer and `To`   is  either an integer or
  309%     the atom `end`. HTTP 1.1 only   supports Unit = `bytes`. E.g.,
  310%     to   ask   for    bytes    1000-1999,     use    the    option
  311%     range(bytes(1000,1999))
  312%
  313%     * redirect(+Boolean)
  314%     If `false` (default `true`), do _not_ automatically redirect
  315%     if a 3XX code is received.  Must be combined with
  316%     status_code(Code) and one of the header options to read the
  317%     redirect reply. In particular, without status_code(Code) a
  318%     redirect is mapped to an exception.
  319%
  320%     * status_code(-Code)
  321%     If this option is  present  and   Code  unifies  with the HTTP
  322%     status code, do *not* translate errors (4xx, 5xx) into an
  323%     exception. Instead, http_open/3 behaves as if 2xx (success) is
  324%     returned, providing the application to read the error document
  325%     from the returned stream.
  326%
  327%     * output(-Out)
  328%     Unify the output stream with Out and do not close it. This can
  329%     be used to upgrade a connection.
  330%
  331%     * timeout(+Timeout)
  332%     If provided, set a timeout on   the stream using set_stream/2.
  333%     With this option if no new data arrives within Timeout seconds
  334%     the stream raises an exception.  Default   is  to wait forever
  335%     (=infinite=).
  336%
  337%     * post(+Data)
  338%     Issue a =POST= request on the HTTP server.  Data is
  339%     handed to http_post_data/3.
  340%
  341%     * proxy(+Host:Port)
  342%     Use an HTTP proxy to connect to the outside world.  See also
  343%     socket:proxy_for_url/3.  This option overrules the proxy
  344%     specification defined by socket:proxy_for_url/3.
  345%
  346%     * proxy(+Host, +Port)
  347%     Synonym for proxy(+Host:Port).  Deprecated.
  348%
  349%     * proxy_authorization(+Authorization)
  350%     Send authorization to the proxy.  Otherwise   the  same as the
  351%     =authorization= option.
  352%
  353%     * bypass_proxy(+Boolean)
  354%     If =true=, bypass proxy hooks.  Default is =false=.
  355%
  356%     * request_header(Name = Value)
  357%     Additional  name-value  parts  are  added   in  the  order  of
  358%     appearance to the HTTP request   header.  No interpretation is
  359%     done.
  360%
  361%     * max_redirect(+Max)
  362%     Sets the maximum length of a redirection chain.  This is needed
  363%     for some IRIs that redirect indefinitely to other IRIs without
  364%     looping (e.g., redirecting to IRIs with a random element in them).
  365%     Max must be either a non-negative integer or the atom `infinite`.
  366%     The default value is `10`.
  367%
  368%     * user_agent(+Agent)
  369%     Defines the value of the  =|User-Agent|=   field  of  the HTTP
  370%     header. Default is =SWI-Prolog=.
  371%
  372%   The hook http:open_options/2 can  be   used  to  provide default
  373%   options   based   on   the   broken-down     URL.   The   option
  374%   status_code(-Code)  is  particularly  useful   to  query  *REST*
  375%   interfaces that commonly return status   codes  other than `200`
  376%   that need to be be processed by the client code.
  377%
  378%   @param URL is either an atom or string (url) or a list of _parts_.
  379%
  380%               When provided, this list may contain the fields
  381%               =scheme=, =user=, =password=, =host=, =port=, =path=
  382%               and either =query_string= (whose argument is an atom)
  383%               or =search= (whose argument is a list of
  384%               =|Name(Value)|= or =|Name=Value|= compound terms).
  385%               Only =host= is mandatory.  The example below opens the
  386%               URL =|http://www.example.com/my/path?q=Hello%20World&lang=en|=.
  387%               Note that values must *not* be quoted because the
  388%               library inserts the required quotes.
  389%
  390%               ```
  391%               http_open([ host('www.example.com'),
  392%                           path('/my/path'),
  393%                           search([ q='Hello world',
  394%                                    lang=en
  395%                                  ])
  396%                         ])
  397%               ```
  398%
  399%   @throws error(existence_error(url, Id),Context) is raised if the
  400%   HTTP result code is not in the range 200..299. Context has the
  401%   shape context(Message, status(Code, TextCode)), where `Code` is the
  402%   numeric HTTP code and `TextCode` is the textual description thereof
  403%   provided by the server. `Message` may provide additional details or
  404%   may be unbound.
  405%
  406%   @see ssl_context/3 for SSL related options if
  407%   library(http/http_ssl_plugin) is loaded.
  408
  409:- multifile
  410    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  411
  412http_open(URL, Stream, QOptions) :-
  413    meta_options(is_meta, QOptions, Options0),
  414    (   atomic(URL)
  415    ->  parse_url_ex(URL, Parts)
  416    ;   Parts = URL
  417    ),
  418    autoload_https(Parts),
  419    upgrade_ssl_options(Parts, Options0, Options),
  420    add_authorization(Parts, Options, Options1),
  421    findall(HostOptions, hooked_options(Parts, HostOptions), AllHostOptions),
  422    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  423    (   option(bypass_proxy(true), Options)
  424    ->  try_http_proxy(direct, Parts, Stream, Options2)
  425    ;   term_variables(Options2, Vars2),
  426        findall(Result-Vars2,
  427                try_a_proxy(Parts, Result, Options2),
  428                ResultList),
  429        last(ResultList, Status-Vars2)
  430    ->  (   Status = true(_Proxy, Stream)
  431        ->  true
  432        ;   throw(error(proxy_error(tried(ResultList)), _))
  433        )
  434    ;   try_http_proxy(direct, Parts, Stream, Options2)
  435    ).
  436
  437try_a_proxy(Parts, Result, Options) :-
  438    parts_uri(Parts, AtomicURL),
  439    option(host(Host), Parts),
  440    (   option(unix_socket(Path), Options)
  441    ->  Proxy = unix_socket(Path)
  442    ;   (   option(proxy(ProxyHost:ProxyPort), Options)
  443        ;   is_list(Options),
  444            memberchk(proxy(ProxyHost,ProxyPort), Options)
  445        )
  446    ->  Proxy = proxy(ProxyHost, ProxyPort)
  447    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  448    ),
  449    debug(http(proxy),
  450          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  451    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  452    ->  (   var(E)
  453        ->  !, Result = true(Proxy, Stream)
  454        ;   Result = error(Proxy, E)
  455        )
  456    ;   Result = false(Proxy)
  457    ),
  458    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  459
  460try_http_proxy(Method, Parts, Stream, Options0) :-
  461    option(host(Host), Parts),
  462    proxy_request_uri(Method, Parts, RequestURI),
  463    select_option(visited(Visited0), Options0, OptionsV, []),
  464    Options = [visited([Parts|Visited0])|OptionsV],
  465    parts_scheme(Parts, Scheme),
  466    default_port(Scheme, DefPort),
  467    url_part(port(Port), Parts, DefPort),
  468    host_and_port(Host, DefPort, Port, HostPort),
  469    (   option(connection(Connection), Options0),
  470        keep_alive(Connection),
  471        get_from_pool(Host:Port, StreamPair),
  472        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
  473              [ Host:Port, StreamPair ]),
  474        catch(send_rec_header(StreamPair, Stream, HostPort,
  475                              RequestURI, Parts, Options),
  476              error(E,_),
  477              keep_alive_error(E))
  478    ->  true
  479    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  480                                        SocketStreamPair, Options, Options1),
  481        (   catch(http:http_protocol_hook(Scheme, Parts,
  482                                          SocketStreamPair,
  483                                          StreamPair, Options),
  484                  Error,
  485                  ( close(SocketStreamPair, [force(true)]),
  486                    throw(Error)))
  487        ->  true
  488        ;   StreamPair = SocketStreamPair
  489        ),
  490        send_rec_header(StreamPair, Stream, HostPort,
  491                        RequestURI, Parts, Options1)
  492    ),
  493    return_final_url(Options).
  494
  495proxy_request_uri(direct, Parts, RequestURI) :-
  496    !,
  497    parts_request_uri(Parts, RequestURI).
  498proxy_request_uri(unix_socket(_), Parts, RequestURI) :-
  499    !,
  500    parts_request_uri(Parts, RequestURI).
  501proxy_request_uri(_, Parts, RequestURI) :-
  502    parts_uri(Parts, RequestURI).
  503
  504http:http_connection_over_proxy(unix_socket(Path), _, _,
  505                                StreamPair, Options, Options) :-
  506    !,
  507    unix_domain_socket(Socket),
  508    tcp_connect(Socket, Path),
  509    tcp_open_socket(Socket, In, Out),
  510    stream_pair(StreamPair, In, Out).
  511http:http_connection_over_proxy(direct, _, Host:Port,
  512                                StreamPair, Options, Options) :-
  513    !,
  514    open_socket(Host:Port, StreamPair, Options).
  515http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  516                                StreamPair, Options, Options) :-
  517    \+ ( memberchk(scheme(Scheme), Parts),
  518         secure_scheme(Scheme)
  519       ),
  520    !,
  521    % We do not want any /more/ proxy after this
  522    open_socket(ProxyHost:ProxyPort, StreamPair,
  523                [bypass_proxy(true)|Options]).
  524http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  525                                StreamPair, Options, Options) :-
  526    !,
  527    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  528    catch(negotiate_socks_connection(Host:Port, StreamPair),
  529          Error,
  530          ( close(StreamPair, [force(true)]),
  531            throw(Error)
  532          )).
  533
  534%!  hooked_options(+Parts, -Options) is nondet.
  535%
  536%   Calls  http:open_options/2  and  if  necessary    upgrades  old  SSL
  537%   cacerts_file(File) option to a cacerts(List) option to ensure proper
  538%   merging of options.
  539
  540hooked_options(Parts, Options) :-
  541    http:open_options(Parts, Options0),
  542    upgrade_ssl_options(Parts, Options0, Options).
  543
  544:- if(current_predicate(ssl_upgrade_legacy_options/2)).  545upgrade_ssl_options(Parts, Options0, Options) :-
  546    requires_ssl(Parts),
  547    !,
  548    ssl_upgrade_legacy_options(Options0, Options).
  549:- endif.  550upgrade_ssl_options(_, Options, Options).
  551
  552merge_options_rev(Old, New, Merged) :-
  553    merge_options(New, Old, Merged).
  554
  555is_meta(pem_password_hook).             % SSL plugin callbacks
  556is_meta(cert_verify_hook).
  557
  558
  559http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  560
  561default_port(https, 443) :- !.
  562default_port(wss,   443) :- !.
  563default_port(_,     80).
  564
  565host_and_port(Host, DefPort, DefPort, Host) :- !.
  566host_and_port(Host, _,       Port,    Host:Port).
  567
  568%!  autoload_https(+Parts) is det.
  569%
  570%   If the requested scheme is https or wss, load the HTTPS plugin.
  571
  572autoload_https(Parts) :-
  573    requires_ssl(Parts),
  574    memberchk(scheme(S), Parts),
  575    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  576    exists_source(library(http/http_ssl_plugin)),
  577    !,
  578    use_module(library(http/http_ssl_plugin)).
  579autoload_https(_).
  580
  581requires_ssl(Parts) :-
  582    memberchk(scheme(S), Parts),
  583    secure_scheme(S).
  584
  585secure_scheme(https).
  586secure_scheme(wss).
  587
  588%!  send_rec_header(+StreamPair, -Stream,
  589%!                  +Host, +RequestURI, +Parts, +Options) is det.
  590%
  591%   Send header to Out and process reply.  If there is an error or
  592%   failure, close In and Out and return the error or failure.
  593
  594send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  595    (   catch(guarded_send_rec_header(StreamPair, Stream,
  596                                      Host, RequestURI, Parts, Options),
  597              E, true)
  598    ->  (   var(E)
  599        ->  (   option(output(StreamPair), Options)
  600            ->  true
  601            ;   true
  602            )
  603        ;   close(StreamPair, [force(true)]),
  604            throw(E)
  605        )
  606    ;   close(StreamPair, [force(true)]),
  607        fail
  608    ).
  609
  610guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  611    user_agent(Agent, Options),
  612    method(Options, MNAME),
  613    http_version(Version),
  614    option(connection(Connection), Options, close),
  615    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  616    debug(http(send_request), "> Host: ~w", [Host]),
  617    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  618    debug(http(send_request), "> Connection: ~w", [Connection]),
  619    format(StreamPair,
  620           '~w ~w HTTP/~w\r\n\c
  621               Host: ~w\r\n\c
  622               User-Agent: ~w\r\n\c
  623               Connection: ~w\r\n',
  624           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  625    parts_uri(Parts, URI),
  626    x_headers(Options, URI, StreamPair),
  627    write_cookies(StreamPair, Parts, Options),
  628    (   option(post(PostData), Options)
  629    ->  http_post_data(PostData, StreamPair, [])
  630    ;   format(StreamPair, '\r\n', [])
  631    ),
  632    flush_output(StreamPair),
  633                                    % read the reply header
  634    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  635    update_cookies(Lines, Parts, Options),
  636    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  637            StreamPair, Stream).
  638
  639
  640%!  http_version(-Version:atom) is det.
  641%
  642%   HTTP version we publish. We  can  only   use  1.1  if we support
  643%   chunked encoding.
  644
  645http_version('1.1') :-
  646    http:current_transfer_encoding(chunked),
  647    !.
  648http_version('1.0').
  649
  650method(Options, MNAME) :-
  651    option(post(_), Options),
  652    !,
  653    option(method(M), Options, post),
  654    (   map_method(M, MNAME0)
  655    ->  MNAME = MNAME0
  656    ;   domain_error(method, M)
  657    ).
  658method(Options, MNAME) :-
  659    option(method(M), Options, get),
  660    (   map_method(M, MNAME0)
  661    ->  MNAME = MNAME0
  662    ;   map_method(_, M)
  663    ->  MNAME = M
  664    ;   domain_error(method, M)
  665    ).
  666
  667%!  map_method(+MethodID, -Method)
  668%
  669%   Support additional ``METHOD`` keywords.  Default   are  the official
  670%   HTTP methods as defined by the various RFCs.
  671
  672:- multifile
  673    map_method/2.  674
  675map_method(delete,  'DELETE').
  676map_method(get,     'GET').
  677map_method(head,    'HEAD').
  678map_method(post,    'POST').
  679map_method(put,     'PUT').
  680map_method(patch,   'PATCH').
  681map_method(options, 'OPTIONS').
  682
  683%!  x_headers(+Options, +URI, +Out) is det.
  684%
  685%   Emit extra headers from   request_header(Name=Value)  options in
  686%   Options.
  687%
  688%   @tbd Use user/password fields
  689
  690x_headers(Options, URI, Out) :-
  691    x_headers_(Options, [url(URI)|Options], Out).
  692
  693x_headers_([], _, _).
  694x_headers_([H|T], Options, Out) :-
  695    x_header(H, Options, Out),
  696    x_headers_(T, Options, Out).
  697
  698x_header(request_header(Name=Value), _, Out) :-
  699    !,
  700    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  701    format(Out, '~w: ~w\r\n', [Name, Value]).
  702x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  703    !,
  704    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  705x_header(authorization(Authorization), Options, Out) :-
  706    !,
  707    auth_header(Authorization, Options, 'Authorization', Out).
  708x_header(range(Spec), _, Out) :-
  709    !,
  710    Spec =.. [Unit, From, To],
  711    (   To == end
  712    ->  ToT = ''
  713    ;   must_be(integer, To),
  714        ToT = To
  715    ),
  716    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  717    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  718x_header(_, _, _).
  719
  720%!  auth_header(+AuthOption, +Options, +HeaderName, +Out)
  721
  722auth_header(basic(User, Password), _, Header, Out) :-
  723    !,
  724    format(codes(Codes), '~w:~w', [User, Password]),
  725    phrase(base64(Codes), Base64Codes),
  726    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  727    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  728auth_header(bearer(Token), _, Header, Out) :-
  729    !,
  730    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  731    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  732auth_header(Auth, Options, _, Out) :-
  733    option(url(URL), Options),
  734    add_method(Options, Options1),
  735    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  736    !.
  737auth_header(Auth, _, _, _) :-
  738    domain_error(authorization, Auth).
  739
  740user_agent(Agent, Options) :-
  741    (   option(user_agent(Agent), Options)
  742    ->  true
  743    ;   user_agent(Agent)
  744    ).
  745
  746add_method(Options0, Options) :-
  747    option(method(_), Options0),
  748    !,
  749    Options = Options0.
  750add_method(Options0, Options) :-
  751    option(post(_), Options0),
  752    !,
  753    Options = [method(post)|Options0].
  754add_method(Options0, [method(get)|Options0]).
  755
  756%!  do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header,
  757%!          +Options, +Parts, +Host, +In, -FinalIn) is det.
  758%
  759%   Handle the HTTP status once available. If   200-299, we are ok. If a
  760%   redirect, redo the open,  returning  a   new  stream.  Else issue an
  761%   error.
  762%
  763%   @error  existence_error(url, URL)
  764
  765                                        % Redirections
  766do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  767    redirect_code(Code),
  768    option(redirect(true), Options0, true),
  769    location(Lines, RequestURI),
  770    !,
  771    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  772    close(In),
  773    parts_uri(Parts, Base),
  774    uri_resolve(RequestURI, Base, Redirected),
  775    parse_url_ex(Redirected, RedirectedParts),
  776    (   redirect_limit_exceeded(Options0, Max)
  777    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  778        throw(error(permission_error(redirect, http, Redirected),
  779                    context(_, Comment)))
  780    ;   redirect_loop(RedirectedParts, Options0)
  781    ->  throw(error(permission_error(redirect, http, Redirected),
  782                    context(_, 'Redirection loop')))
  783    ;   true
  784    ),
  785    redirect_options(Parts, RedirectedParts, Options0, Options),
  786    http_open(RedirectedParts, Stream, Options).
  787                                        % Need authentication
  788do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  789    authenticate_code(Code),
  790    option(authenticate(true), Options0, true),
  791    parts_uri(Parts, URI),
  792    parse_headers(Lines, Headers),
  793    http:authenticate_client(
  794             URI,
  795             auth_reponse(Headers, Options0, Options)),
  796    !,
  797    close(In0),
  798    http_open(Parts, Stream, Options).
  799                                        % Accepted codes
  800do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  801    (   option(status_code(Code), Options),
  802        Lines \== []
  803    ->  true
  804    ;   successful_code(Code)
  805    ),
  806    !,
  807    parts_uri(Parts, URI),
  808    parse_headers(Lines, Headers),
  809    return_version(Options, Version),
  810    return_size(Options, Headers),
  811    return_fields(Options, Headers),
  812    return_headers(Options, Headers),
  813    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  814    transfer_encoding_filter(Lines, In1, In),
  815                                    % properly re-initialise the stream
  816    set_stream(In, file_name(URI)),
  817    set_stream(In, record_position(true)).
  818do_open(_, _, _, [], Options, _, _, _, _) :-
  819    option(connection(Connection), Options),
  820    keep_alive(Connection),
  821    !,
  822    throw(error(keep_alive(closed),_)).
  823                                        % report anything else as error
  824do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  825    parts_uri(Parts, URI),
  826    (   map_error_code(Code, Error)
  827    ->  Formal =.. [Error, url, URI]
  828    ;   Formal = existence_error(url, URI)
  829    ),
  830    throw(error(Formal, context(_, status(Code, Comment)))).
  831
  832
  833successful_code(Code) :-
  834    between(200, 299, Code).
  835
  836%!  redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet.
  837%
  838%   True if we have exceeded the maximum redirection length (default 10).
  839
  840redirect_limit_exceeded(Options, Max) :-
  841    option(visited(Visited), Options, []),
  842    length(Visited, N),
  843    option(max_redirect(Max), Options, 10),
  844    (Max == infinite -> fail ; N > Max).
  845
  846
  847%!  redirect_loop(+Parts, +Options) is semidet.
  848%
  849%   True if we are in  a  redirection   loop.  Note  that some sites
  850%   redirect once to the same place using  cookies or similar, so we
  851%   allow for two tries. In fact,   we  should probably test whether
  852%   authorization or cookie headers have changed.
  853
  854redirect_loop(Parts, Options) :-
  855    option(visited(Visited), Options, []),
  856    include(==(Parts), Visited, Same),
  857    length(Same, Count),
  858    Count > 2.
  859
  860
  861%!  redirect_options(+Parts, +RedirectedParts, +Options0, -Options) is det.
  862%
  863%   A redirect from a POST should do  a   GET  on the returned URI. This
  864%   means we must remove the method(post)   and  post(Data) options from
  865%   the original option-list.
  866%
  867%   If we are connecting over a Unix   domain socket we drop this option
  868%   if the redirect host does not match the initial host.
  869
  870redirect_options(Parts, RedirectedParts, Options0, Options) :-
  871    select_option(unix_socket(_), Options0, Options1),
  872    memberchk(host(Host), Parts),
  873    memberchk(host(RHost), RedirectedParts),
  874    debug(http(redirect), 'http_open: redirecting AF_UNIX ~w to ~w',
  875          [Host, RHost]),
  876    Host \== RHost,
  877    !,
  878    redirect_options(Options1, Options).
  879redirect_options(_, _, Options0, Options) :-
  880    redirect_options(Options0, Options).
  881
  882redirect_options(Options0, Options) :-
  883    (   select_option(post(_), Options0, Options1)
  884    ->  true
  885    ;   Options1 = Options0
  886    ),
  887    (   select_option(method(Method), Options1, Options),
  888        \+ redirect_method(Method)
  889    ->  true
  890    ;   Options = Options1
  891    ).
  892
  893redirect_method(delete).
  894redirect_method(get).
  895redirect_method(head).
  896
  897
  898%!  map_error_code(+HTTPCode, -PrologError) is semidet.
  899%
  900%   Map HTTP error codes to Prolog errors.
  901%
  902%   @tbd    Many more maps. Unfortunately many have no sensible Prolog
  903%           counterpart.
  904
  905map_error_code(401, permission_error).
  906map_error_code(403, permission_error).
  907map_error_code(404, existence_error).
  908map_error_code(405, permission_error).
  909map_error_code(407, permission_error).
  910map_error_code(410, existence_error).
  911
  912redirect_code(301).                     % Moved Permanently
  913redirect_code(302).                     % Found (previously "Moved Temporary")
  914redirect_code(303).                     % See Other
  915redirect_code(307).                     % Temporary Redirect
  916
  917authenticate_code(401).
  918
  919%!  open_socket(+Address, -StreamPair, +Options) is det.
  920%
  921%   Create and connect a client socket to Address.  Options
  922%
  923%       * timeout(+Timeout)
  924%       Sets timeout on the stream, *after* connecting the
  925%       socket.
  926%
  927%   @tbd    Make timeout also work on tcp_connect/4.
  928%   @tbd    This is the same as do_connect/4 in http_client.pl
  929
  930open_socket(Address, StreamPair, Options) :-
  931    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  932    tcp_connect(Address, StreamPair, Options),
  933    stream_pair(StreamPair, In, Out),
  934    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  935    set_stream(In, record_position(false)),
  936    (   option(timeout(Timeout), Options)
  937    ->  set_stream(In, timeout(Timeout))
  938    ;   true
  939    ).
  940
  941
  942return_version(Options, Major-Minor) :-
  943    option(version(Major-Minor), Options, _).
  944
  945return_size(Options, Headers) :-
  946    (   memberchk(content_length(Size), Headers)
  947    ->  option(size(Size), Options, _)
  948    ;   true
  949    ).
  950
  951return_fields([], _).
  952return_fields([header(Name, Value)|T], Headers) :-
  953    !,
  954    (   Term =.. [Name,Value],
  955        memberchk(Term, Headers)
  956    ->  true
  957    ;   Value = ''
  958    ),
  959    return_fields(T, Headers).
  960return_fields([_|T], Lines) :-
  961    return_fields(T, Lines).
  962
  963return_headers(Options, Headers) :-
  964    option(headers(Headers), Options, _).
  965
  966%!  parse_headers(+Lines, -Headers:list(compound)) is det.
  967%
  968%   Parse the header lines for   the  headers(-List) option. Invalid
  969%   header   lines   are   skipped,   printing   a   warning   using
  970%   pring_message/2.
  971
  972parse_headers([], []) :- !.
  973parse_headers([Line|Lines], Headers) :-
  974    catch(http_parse_header(Line, [Header]), Error, true),
  975    (   var(Error)
  976    ->  Headers = [Header|More]
  977    ;   print_message(warning, Error),
  978        Headers = More
  979    ),
  980    parse_headers(Lines, More).
  981
  982
  983%!  return_final_url(+Options) is semidet.
  984%
  985%   If Options contains final_url(URL), unify URL with the final
  986%   URL after redirections.
  987
  988return_final_url(Options) :-
  989    option(final_url(URL), Options),
  990    var(URL),
  991    !,
  992    option(visited([Parts|_]), Options),
  993    parts_uri(Parts, URL).
  994return_final_url(_).
  995
  996
  997%!  transfer_encoding_filter(+Lines, +In0, -In) is det.
  998%
  999%   Install filters depending on the transfer  encoding. If In0 is a
 1000%   stream-pair, we close the output   side. If transfer-encoding is
 1001%   not specified, the content-encoding is  interpreted as a synonym
 1002%   for transfer-encoding, because many   servers incorrectly depend
 1003%   on  this.  Exceptions  to  this   are  content-types  for  which
 1004%   disable_encoding_filter/1 holds.
 1005
 1006transfer_encoding_filter(Lines, In0, In) :-
 1007    transfer_encoding(Lines, Encoding),
 1008    !,
 1009    transfer_encoding_filter_(Encoding, In0, In).
 1010transfer_encoding_filter(Lines, In0, In) :-
 1011    content_encoding(Lines, Encoding),
 1012    content_type(Lines, Type),
 1013    \+ http:disable_encoding_filter(Type),
 1014    !,
 1015    transfer_encoding_filter_(Encoding, In0, In).
 1016transfer_encoding_filter(_, In, In).
 1017
 1018transfer_encoding_filter_(Encoding, In0, In) :-
 1019    stream_pair(In0, In1, Out),
 1020    (   nonvar(Out)
 1021    ->  close(Out)
 1022    ;   true
 1023    ),
 1024    (   http:encoding_filter(Encoding, In1, In)
 1025    ->  true
 1026    ;   autoload_encoding(Encoding),
 1027        http:encoding_filter(Encoding, In1, In)
 1028    ->  true
 1029    ;   domain_error(http_encoding, Encoding)
 1030    ).
 1031
 1032:- multifile
 1033    autoload_encoding/1. 1034
 1035:- if(exists_source(library(zlib))). 1036autoload_encoding(gzip) :-
 1037    use_module(library(zlib)).
 1038:- endif. 1039
 1040content_type(Lines, Type) :-
 1041    member(Line, Lines),
 1042    phrase(field('content-type'), Line, Rest),
 1043    !,
 1044    atom_codes(Type, Rest).
 1045
 1046%!  http:disable_encoding_filter(+ContentType) is semidet.
 1047%
 1048%   Do not use  the   =|Content-encoding|=  as =|Transfer-encoding|=
 1049%   encoding for specific values of   ContentType. This predicate is
 1050%   multifile and can thus be extended by the user.
 1051
 1052http:disable_encoding_filter('application/x-gzip').
 1053http:disable_encoding_filter('application/x-tar').
 1054http:disable_encoding_filter('x-world/x-vrml').
 1055http:disable_encoding_filter('application/zip').
 1056http:disable_encoding_filter('application/x-gzip').
 1057http:disable_encoding_filter('application/x-zip-compressed').
 1058http:disable_encoding_filter('application/x-compress').
 1059http:disable_encoding_filter('application/x-compressed').
 1060http:disable_encoding_filter('application/x-spoon').
 1061
 1062%!  transfer_encoding(+Lines, -Encoding) is semidet.
 1063%
 1064%   True if Encoding  is  the   value  of  the =|Transfer-encoding|=
 1065%   header.
 1066
 1067transfer_encoding(Lines, Encoding) :-
 1068    what_encoding(transfer_encoding, Lines, Encoding).
 1069
 1070what_encoding(What, Lines, Encoding) :-
 1071    member(Line, Lines),
 1072    phrase(encoding_(What, Debug), Line, Rest),
 1073    !,
 1074    atom_codes(Encoding, Rest),
 1075    debug(http(What), '~w: ~p', [Debug, Rest]).
 1076
 1077encoding_(content_encoding, 'Content-encoding') -->
 1078    field('content-encoding').
 1079encoding_(transfer_encoding, 'Transfer-encoding') -->
 1080    field('transfer-encoding').
 1081
 1082%!  content_encoding(+Lines, -Encoding) is semidet.
 1083%
 1084%   True if Encoding is the value of the =|Content-encoding|=
 1085%   header.
 1086
 1087content_encoding(Lines, Encoding) :-
 1088    what_encoding(content_encoding, Lines, Encoding).
 1089
 1090%!  read_header(+In:stream, +Parts, -Version, -Code:int,
 1091%!  -Comment:atom, -Lines:list) is det.
 1092%
 1093%   Read the HTTP reply-header.  If the reply is completely empty
 1094%   an existence error is thrown.  If the replied header is
 1095%   otherwise invalid a 500 HTTP error is simulated, having the
 1096%   comment =|Invalid reply header|=.
 1097%
 1098%   @param Parts    A list of compound terms that describe the
 1099%                   parsed request URI.
 1100%   @param Version  HTTP reply version as Major-Minor pair
 1101%   @param Code     Numeric HTTP reply-code
 1102%   @param Comment  Comment of reply-code as atom
 1103%   @param Lines    Remaining header lines as code-lists.
 1104%
 1105%   @error existence_error(http_reply, Uri)
 1106
 1107read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 1108    read_line_to_codes(In, Line),
 1109    (   Line == end_of_file
 1110    ->  parts_uri(Parts, Uri),
 1111        existence_error(http_reply,Uri)
 1112    ;   true
 1113    ),
 1114    Line \== end_of_file,
 1115    phrase(first_line(Major-Minor, Code, Comment), Line),
 1116    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1117    read_line_to_codes(In, Line2),
 1118    rest_header(Line2, In, Lines),
 1119    !,
 1120    (   debugging(http(open))
 1121    ->  forall(member(HL, Lines),
 1122               debug(http(open), '~s', [HL]))
 1123    ;   true
 1124    ).
 1125read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1126
 1127rest_header([], _, []) :- !.            % blank line: end of header
 1128rest_header(L0, In, [L0|L]) :-
 1129    read_line_to_codes(In, L1),
 1130    rest_header(L1, In, L).
 1131
 1132%!  content_length(+Header, -Length:int) is semidet.
 1133%
 1134%   Find the Content-Length in an HTTP reply-header.
 1135
 1136content_length(Lines, Length) :-
 1137    member(Line, Lines),
 1138    phrase(content_length(Length0), Line),
 1139    !,
 1140    Length = Length0.
 1141
 1142location(Lines, RequestURI) :-
 1143    member(Line, Lines),
 1144    phrase(atom_field(location, RequestURI), Line),
 1145    !.
 1146
 1147connection(Lines, Connection) :-
 1148    member(Line, Lines),
 1149    phrase(atom_field(connection, Connection0), Line),
 1150    !,
 1151    Connection = Connection0.
 1152
 1153first_line(Major-Minor, Code, Comment) -->
 1154    "HTTP/", integer(Major), ".", integer(Minor),
 1155    skip_blanks,
 1156    integer(Code),
 1157    skip_blanks,
 1158    rest(Comment).
 1159
 1160atom_field(Name, Value) -->
 1161    field(Name),
 1162    rest(Value).
 1163
 1164content_length(Len) -->
 1165    field('content-length'),
 1166    integer(Len).
 1167
 1168field(Name) -->
 1169    { atom_codes(Name, Codes) },
 1170    field_codes(Codes).
 1171
 1172field_codes([]) -->
 1173    ":",
 1174    skip_blanks.
 1175field_codes([H|T]) -->
 1176    [C],
 1177    { match_header_char(H, C)
 1178    },
 1179    field_codes(T).
 1180
 1181match_header_char(C, C) :- !.
 1182match_header_char(C, U) :-
 1183    code_type(C, to_lower(U)),
 1184    !.
 1185match_header_char(0'_, 0'-).
 1186
 1187
 1188skip_blanks -->
 1189    [C],
 1190    { code_type(C, white)
 1191    },
 1192    !,
 1193    skip_blanks.
 1194skip_blanks -->
 1195    [].
 1196
 1197%!  integer(-Int)//
 1198%
 1199%   Read 1 or more digits and return as integer.
 1200
 1201integer(Code) -->
 1202    digit(D0),
 1203    digits(D),
 1204    { number_codes(Code, [D0|D])
 1205    }.
 1206
 1207digit(C) -->
 1208    [C],
 1209    { code_type(C, digit)
 1210    }.
 1211
 1212digits([D0|D]) -->
 1213    digit(D0),
 1214    !,
 1215    digits(D).
 1216digits([]) -->
 1217    [].
 1218
 1219%!  rest(-Atom:atom)//
 1220%
 1221%   Get rest of input as an atom.
 1222
 1223rest(Atom) --> call(rest_(Atom)).
 1224
 1225rest_(Atom, L, []) :-
 1226    atom_codes(Atom, L).
 1227
 1228
 1229                 /*******************************
 1230                 *   AUTHORIZATION MANAGEMENT   *
 1231                 *******************************/
 1232
 1233%!  http_set_authorization(+URL, +Authorization) is det.
 1234%
 1235%   Set user/password to supply with URLs   that have URL as prefix.
 1236%   If  Authorization  is  the   atom    =|-|=,   possibly   defined
 1237%   authorization is cleared.  For example:
 1238%
 1239%   ```
 1240%   ?- http_set_authorization('http://www.example.com/private/',
 1241%                             basic('John', 'Secret'))
 1242%   ```
 1243%
 1244%   @tbd    Move to a separate module, so http_get/3, etc. can use this
 1245%           too.
 1246
 1247:- dynamic
 1248    stored_authorization/2,
 1249    cached_authorization/2. 1250
 1251http_set_authorization(URL, Authorization) :-
 1252    must_be(atom, URL),
 1253    retractall(stored_authorization(URL, _)),
 1254    (   Authorization = (-)
 1255    ->  true
 1256    ;   check_authorization(Authorization),
 1257        assert(stored_authorization(URL, Authorization))
 1258    ),
 1259    retractall(cached_authorization(_,_)).
 1260
 1261check_authorization(Var) :-
 1262    var(Var),
 1263    !,
 1264    instantiation_error(Var).
 1265check_authorization(basic(User, Password)) :-
 1266    must_be(atom, User),
 1267    must_be(text, Password).
 1268check_authorization(digest(User, Password)) :-
 1269    must_be(atom, User),
 1270    must_be(text, Password).
 1271
 1272%!  authorization(+URL, -Authorization) is semidet.
 1273%
 1274%   True if Authorization must be supplied for URL.
 1275%
 1276%   @tbd    Cleanup cache if it gets too big.
 1277
 1278authorization(_, _) :-
 1279    \+ stored_authorization(_, _),
 1280    !,
 1281    fail.
 1282authorization(URL, Authorization) :-
 1283    cached_authorization(URL, Authorization),
 1284    !,
 1285    Authorization \== (-).
 1286authorization(URL, Authorization) :-
 1287    (   stored_authorization(Prefix, Authorization),
 1288        sub_atom(URL, 0, _, _, Prefix)
 1289    ->  assert(cached_authorization(URL, Authorization))
 1290    ;   assert(cached_authorization(URL, -)),
 1291        fail
 1292    ).
 1293
 1294add_authorization(_, Options, Options) :-
 1295    option(authorization(_), Options),
 1296    !.
 1297add_authorization(Parts, Options0, Options) :-
 1298    url_part(user(User), Parts),
 1299    url_part(password(Passwd), Parts),
 1300    !,
 1301    Options = [authorization(basic(User,Passwd))|Options0].
 1302add_authorization(Parts, Options0, Options) :-
 1303    stored_authorization(_, _) ->   % quick test to avoid work
 1304    parts_uri(Parts, URL),
 1305    authorization(URL, Auth),
 1306    !,
 1307    Options = [authorization(Auth)|Options0].
 1308add_authorization(_, Options, Options).
 1309
 1310
 1311%!  parse_url_ex(+URL, -Parts)
 1312%
 1313%   Parts:  Scheme,  Host,  Port,    User:Password,  RequestURI  (no
 1314%   fragment).
 1315
 1316parse_url_ex(URL, [uri(URL)|Parts]) :-
 1317    uri_components(URL, Components),
 1318    phrase(components(Components), Parts),
 1319    (   option(host(_), Parts)
 1320    ->  true
 1321    ;   domain_error(url, URL)
 1322    ).
 1323
 1324components(Components) -->
 1325    uri_scheme(Components),
 1326    uri_path(Components),
 1327    uri_authority(Components),
 1328    uri_request_uri(Components).
 1329
 1330uri_scheme(Components) -->
 1331    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1332    !,
 1333    [ scheme(Scheme)
 1334    ].
 1335uri_scheme(_) --> [].
 1336
 1337uri_path(Components) -->
 1338    { uri_data(path, Components, Path0), nonvar(Path0),
 1339      (   Path0 == ''
 1340      ->  Path = (/)
 1341      ;   Path = Path0
 1342      )
 1343    },
 1344    !,
 1345    [ path(Path)
 1346    ].
 1347uri_path(_) --> [].
 1348
 1349uri_authority(Components) -->
 1350    { uri_data(authority, Components, Auth), nonvar(Auth),
 1351      !,
 1352      uri_authority_components(Auth, Data)
 1353    },
 1354    [ authority(Auth) ],
 1355    auth_field(user, Data),
 1356    auth_field(password, Data),
 1357    auth_field(host, Data),
 1358    auth_field(port, Data).
 1359uri_authority(_) --> [].
 1360
 1361auth_field(Field, Data) -->
 1362    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1363      !,
 1364      (   atom(EncValue)
 1365      ->  uri_encoded(query_value, Value, EncValue)
 1366      ;   Value = EncValue
 1367      ),
 1368      Part =.. [Field,Value]
 1369    },
 1370    [ Part ].
 1371auth_field(_, _) --> [].
 1372
 1373uri_request_uri(Components) -->
 1374    { uri_data(path, Components, Path0),
 1375      uri_data(search, Components, Search),
 1376      (   Path0 == ''
 1377      ->  Path = (/)
 1378      ;   Path = Path0
 1379      ),
 1380      uri_data(path, Components2, Path),
 1381      uri_data(search, Components2, Search),
 1382      uri_components(RequestURI, Components2)
 1383    },
 1384    [ request_uri(RequestURI)
 1385    ].
 1386
 1387%!  parts_scheme(+Parts, -Scheme) is det.
 1388%!  parts_uri(+Parts, -URI) is det.
 1389%!  parts_request_uri(+Parts, -RequestURI) is det.
 1390%!  parts_search(+Parts, -Search) is det.
 1391%!  parts_authority(+Parts, -Authority) is semidet.
 1392
 1393parts_scheme(Parts, Scheme) :-
 1394    url_part(scheme(Scheme), Parts),
 1395    !.
 1396parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1397    url_part(protocol(Scheme), Parts),
 1398    !.
 1399parts_scheme(_, http).
 1400
 1401parts_authority(Parts, Auth) :-
 1402    url_part(authority(Auth), Parts),
 1403    !.
 1404parts_authority(Parts, Auth) :-
 1405    url_part(host(Host), Parts, _),
 1406    url_part(port(Port), Parts, _),
 1407    url_part(user(User), Parts, _),
 1408    url_part(password(Password), Parts, _),
 1409    uri_authority_components(Auth,
 1410                             uri_authority(User, Password, Host, Port)).
 1411
 1412parts_request_uri(Parts, RequestURI) :-
 1413    option(request_uri(RequestURI), Parts),
 1414    !.
 1415parts_request_uri(Parts, RequestURI) :-
 1416    url_part(path(Path), Parts, /),
 1417    ignore(parts_search(Parts, Search)),
 1418    uri_data(path, Data, Path),
 1419    uri_data(search, Data, Search),
 1420    uri_components(RequestURI, Data).
 1421
 1422parts_search(Parts, Search) :-
 1423    option(query_string(Search), Parts),
 1424    !.
 1425parts_search(Parts, Search) :-
 1426    option(search(Fields), Parts),
 1427    !,
 1428    uri_query_components(Search, Fields).
 1429
 1430
 1431parts_uri(Parts, URI) :-
 1432    option(uri(URI), Parts),
 1433    !.
 1434parts_uri(Parts, URI) :-
 1435    parts_scheme(Parts, Scheme),
 1436    ignore(parts_authority(Parts, Auth)),
 1437    parts_request_uri(Parts, RequestURI),
 1438    uri_components(RequestURI, Data),
 1439    uri_data(scheme, Data, Scheme),
 1440    uri_data(authority, Data, Auth),
 1441    uri_components(URI, Data).
 1442
 1443parts_port(Parts, Port) :-
 1444    parts_scheme(Parts, Scheme),
 1445    default_port(Scheme, DefPort),
 1446    url_part(port(Port), Parts, DefPort).
 1447
 1448url_part(Part, Parts) :-
 1449    Part =.. [Name,Value],
 1450    Gen =.. [Name,RawValue],
 1451    option(Gen, Parts),
 1452    !,
 1453    Value = RawValue.
 1454
 1455url_part(Part, Parts, Default) :-
 1456    Part =.. [Name,Value],
 1457    Gen =.. [Name,RawValue],
 1458    (   option(Gen, Parts)
 1459    ->  Value = RawValue
 1460    ;   Value = Default
 1461    ).
 1462
 1463
 1464                 /*******************************
 1465                 *            COOKIES           *
 1466                 *******************************/
 1467
 1468write_cookies(Out, Parts, Options) :-
 1469    http:write_cookies(Out, Parts, Options),
 1470    !.
 1471write_cookies(_, _, _).
 1472
 1473update_cookies(_, _, _) :-
 1474    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1475    !.
 1476update_cookies(Lines, Parts, Options) :-
 1477    (   member(Line, Lines),
 1478        phrase(atom_field('set_cookie', CookieData), Line),
 1479        http:update_cookies(CookieData, Parts, Options),
 1480        fail
 1481    ;   true
 1482    ).
 1483
 1484
 1485                 /*******************************
 1486                 *           OPEN ANY           *
 1487                 *******************************/
 1488
 1489:- multifile iostream:open_hook/6. 1490
 1491%!  iostream:open_hook(+Spec, +Mode, -Stream, -Close,
 1492%!                     +Options0, -Options) is semidet.
 1493%
 1494%   Hook implementation that makes  open_any/5   support  =http= and
 1495%   =https= URLs for =|Mode == read|=.
 1496
 1497iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1498    (atom(URL) -> true ; string(URL)),
 1499    uri_is_global(URL),
 1500    uri_components(URL, Components),
 1501    uri_data(scheme, Components, Scheme),
 1502    http_scheme(Scheme),
 1503    !,
 1504    Options = Options0,
 1505    Close = close(Stream),
 1506    http_open(URL, Stream, Options0).
 1507
 1508http_scheme(http).
 1509http_scheme(https).
 1510
 1511
 1512                 /*******************************
 1513                 *          KEEP-ALIVE          *
 1514                 *******************************/
 1515
 1516%!  consider_keep_alive(+HeaderLines, +Parts, +Host,
 1517%!                      +Stream0, -Stream,
 1518%!                      +Options) is det.
 1519
 1520consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1521    option(connection(Asked), Options),
 1522    keep_alive(Asked),
 1523    connection(Lines, Given),
 1524    keep_alive(Given),
 1525    content_length(Lines, Bytes),
 1526    !,
 1527    stream_pair(StreamPair, In0, _),
 1528    connection_address(Host, Parts, HostPort),
 1529    debug(http(connection),
 1530          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1531    stream_range_open(In0, In,
 1532                      [ size(Bytes),
 1533                        onclose(keep_alive(StreamPair, HostPort))
 1534                      ]).
 1535consider_keep_alive(_, _, _, Stream, Stream, _).
 1536
 1537connection_address(Host, _, Host) :-
 1538    Host = _:_,
 1539    !.
 1540connection_address(Host, Parts, Host:Port) :-
 1541    parts_port(Parts, Port).
 1542
 1543keep_alive(keep_alive) :- !.
 1544keep_alive(Connection) :-
 1545    downcase_atom(Connection, 'keep-alive').
 1546
 1547:- public keep_alive/4. 1548
 1549keep_alive(StreamPair, Host, _In, 0) :-
 1550    !,
 1551    debug(http(connection), 'Adding connection to ~p to pool', [Host]),
 1552    add_to_pool(Host, StreamPair).
 1553keep_alive(StreamPair, Host, In, Left) :-
 1554    Left < 100,
 1555    debug(http(connection), 'Reading ~D left bytes', [Left]),
 1556    read_incomplete(In, Left),
 1557    add_to_pool(Host, StreamPair),
 1558    !.
 1559keep_alive(StreamPair, _, _, _) :-
 1560    debug(http(connection),
 1561          'Closing connection due to excessive unprocessed input', []),
 1562    (   debugging(http(connection))
 1563    ->  catch(close(StreamPair), E,
 1564              print_message(warning, E))
 1565    ;   close(StreamPair, [force(true)])
 1566    ).
 1567
 1568%!  read_incomplete(+In, +Left) is semidet.
 1569%
 1570%   If we have not all input from  a Keep-alive connection, read the
 1571%   remainder if it is short. Else, we fail and close the stream.
 1572
 1573read_incomplete(In, Left) :-
 1574    catch(setup_call_cleanup(
 1575              open_null_stream(Null),
 1576              copy_stream_data(In, Null, Left),
 1577              close(Null)),
 1578          _,
 1579          fail).
 1580
 1581:- dynamic
 1582    connection_pool/4,              % Hash, Address, Stream, Time
 1583    connection_gc_time/1. 1584
 1585add_to_pool(Address, StreamPair) :-
 1586    keep_connection(Address),
 1587    get_time(Now),
 1588    term_hash(Address, Hash),
 1589    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1590
 1591get_from_pool(Address, StreamPair) :-
 1592    term_hash(Address, Hash),
 1593    retract(connection_pool(Hash, Address, StreamPair, _)).
 1594
 1595%!  keep_connection(+Address) is semidet.
 1596%
 1597%   Succeeds if we want to keep   the  connection open. We currently
 1598%   keep a maximum of 10 connections  waiting   and  a  maximum of 2
 1599%   waiting for the same address. Connections   older than 2 seconds
 1600%   are closed.
 1601
 1602keep_connection(Address) :-
 1603    close_old_connections(2),
 1604    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1605    C =< 10,
 1606    term_hash(Address, Hash),
 1607    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1608    Count =< 2.
 1609
 1610close_old_connections(Timeout) :-
 1611    get_time(Now),
 1612    Before is Now - Timeout,
 1613    (   connection_gc_time(GC),
 1614        GC > Before
 1615    ->  true
 1616    ;   (   retractall(connection_gc_time(_)),
 1617            asserta(connection_gc_time(Now)),
 1618            connection_pool(Hash, Address, StreamPair, Added),
 1619            Added < Before,
 1620            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1621            debug(http(connection),
 1622                  'Closing inactive keep-alive to ~p', [Address]),
 1623            close(StreamPair, [force(true)]),
 1624            fail
 1625        ;   true
 1626        )
 1627    ).
 1628
 1629
 1630%!  http_close_keep_alive(+Address) is det.
 1631%
 1632%   Close all keep-alive connections matching Address. Address is of
 1633%   the  form  Host:Port.  In  particular,  http_close_keep_alive(_)
 1634%   closes all currently known keep-alive connections.
 1635
 1636http_close_keep_alive(Address) :-
 1637    forall(get_from_pool(Address, StreamPair),
 1638           close(StreamPair, [force(true)])).
 1639
 1640%!  keep_alive_error(+Error)
 1641%
 1642%   Deal with an error from reusing  a keep-alive connection. If the
 1643%   error is due to an I/O error   or end-of-file, fail to backtrack
 1644%   over get_from_pool/2. Otherwise it is a   real error and we thus
 1645%   re-raise it.
 1646
 1647keep_alive_error(keep_alive(closed)) :-
 1648    !,
 1649    debug(http(connection), 'Keep-alive connection was closed', []),
 1650    fail.
 1651keep_alive_error(io_error(_,_)) :-
 1652    !,
 1653    debug(http(connection), 'IO error on Keep-alive connection', []),
 1654    fail.
 1655keep_alive_error(Error) :-
 1656    throw(Error).
 1657
 1658
 1659                 /*******************************
 1660                 *     HOOK DOCUMENTATION       *
 1661                 *******************************/
 1662
 1663%!  http:open_options(+Parts, -Options) is nondet.
 1664%
 1665%   This hook is used by the HTTP   client library to define default
 1666%   options based on the the broken-down request-URL.  The following
 1667%   example redirects all trafic, except for localhost over a proxy:
 1668%
 1669%       ```
 1670%       :- multifile
 1671%           http:open_options/2.
 1672%
 1673%       http:open_options(Parts, Options) :-
 1674%           option(host(Host), Parts),
 1675%           Host \== localhost,
 1676%           Options = [proxy('proxy.local', 3128)].
 1677%       ```
 1678%
 1679%   This hook may return multiple   solutions.  The returned options
 1680%   are  combined  using  merge_options/3  where  earlier  solutions
 1681%   overrule later solutions.
 1682
 1683%!  http:write_cookies(+Out, +Parts, +Options) is semidet.
 1684%
 1685%   Emit a =|Cookie:|= header for the  current connection. Out is an
 1686%   open stream to the HTTP server, Parts is the broken-down request
 1687%   (see uri_components/2) and Options is the list of options passed
 1688%   to http_open.  The predicate is called as if using ignore/1.
 1689%
 1690%   @see complements http:update_cookies/3.
 1691%   @see library(http/http_cookie) implements cookie handling on
 1692%   top of these hooks.
 1693
 1694%!  http:update_cookies(+CookieData, +Parts, +Options) is semidet.
 1695%
 1696%   Update the cookie database.  CookieData  is   the  value  of the
 1697%   =|Set-Cookie|= field, Parts is  the   broken-down  request  (see
 1698%   uri_components/2) and Options is the list   of options passed to
 1699%   http_open.
 1700%
 1701%   @see complements http:write_cookies
 1702%   @see library(http/http_cookies) implements cookie handling on
 1703%   top of these hooks.