bdep/http-service.cxx


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466

// file      : bdep/submit.cxx -*- C++ -*-
// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd
// license   : MIT; see accompanying LICENSE file

#include <bdep/http-service.hxx>

#include <cstdlib> // strtoul()

#include <libbutl/fdstream.mxx> // fdterm()

#include <bdep/diagnostics.hxx>

using namespace std;
using namespace butl;

namespace bdep
{
  namespace http_service
  {
    result
    post (const common_options& o, const url& u, const parameters& params)
    {
      using parser     = manifest_parser;
      using parsing    = manifest_parsing;
      using name_value = manifest_name_value;

      // The overall plan is to post the data using the curl program, read
      // the HTTP response status and content type, read and parse the body
      // according to the content type, and obtain the result message and
      // optional reference in case of both the request success and failure.
      //
      // The successful request response (HTTP status code 200) is expected to
      // contain the result manifest (text/manifest content type). The faulty
      // response (HTTP status code other than 200) can either contain the
      // result manifest or a plain text error description (text/plain content
      // type) or some other content (for example text/html). We will print
      // the manifest message value, if available or the first line of the
      // plain text error description or, as a last resort, construct the
      // message from the HTTP status code and reason phrase.
      //
      string message;
      optional<uint16_t> status;  // Request result manifest status value.
      optional<string> reference;
      vector<name_value> body;

      // None of the 3XX redirect code semantics assume automatic re-posting.
      // We will treat all such codes as failures, additionally printing the
      // location header value to advise the user to try the other URL for the
      // request.
      //
      // Note that services that move to a new URL may well be responding with
      // the 301 (moved permanently) code.
      //
      optional<url> location;

      // Note that it's a bad idea to issue the diagnostics while curl is
      // running, as it will be messed up with the progress output. Thus, we
      // throw the runtime_error exception on the HTTP response parsing error
      // (rather than use our fail stream) and issue the diagnostics after
      // curl finishes.
      //
      // Also note that we prefer the start/finish process facility for
      // running curl over using butl::curl because in this context it is
      // restrictive and inconvenient.
      //
      process pr;
      bool io (false);
      try
      {
        // Map the verbosity level.
        //
        cstrings v;
        if (verb < 1)
        {
          v.push_back ("-s");
          v.push_back ("-S"); // But show errors.
        }
        else if (verb == 1 && fdterm (2))
          v.push_back ("--progress-bar");
        else if (verb > 3)
          v.push_back ("-v");

        // Convert the submit arguments to curl's --form* options.
        //
        strings fos;
        for (const parameter& p: params)
        {
          fos.emplace_back (p.type == parameter::file
                            ? "--form"
                            : "--form-string");

          fos.emplace_back (p.type == parameter::file
                            ? p.name + "=@" + p.value
                            : p.name + "="  + p.value);
        }

        // Start curl program.
        //
        fdpipe pipe (fdopen_pipe ()); // Text mode seems appropriate.

        // Note that we don't specify any default timeouts, assuming that bdep
        // is an interactive program and the user can always interrupt the
        // command (or pass the timeout with --curl-option).
        //
        pr = start (0          /* stdin  */,
                    pipe       /* stdout */,
                    2          /* stderr */,
                    o.curl (),
                    v,
                    "-A", (BDEP_USER_AGENT " curl"),

                    o.curl_option (),

                    // Include the response headers in the output so we can
                    // get the status code/reason, content type, and the
                    // redirect location.
                    //
                    "--include",

                    fos,
                    u.string ());

        pipe.out.close ();

        // First we read the HTTP response status line and headers. At this
        // stage we will read until the empty line (containing just CRLF). Not
        // being able to reach such a line is an error, which is the reason
        // for the exception mask choice.
        //
        ifdstream is (
          move (pipe.in),
          fdstream_mode::skip,
          ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit);

        // Parse and return the HTTP status code. Return 0 if the argument is
        // invalid.
        //
        auto status_code = [] (const string& s)
        {
          char* e (nullptr);
          unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw.
          assert (e != nullptr);

          return *e == '\0' && c >= 100 && c < 600
                 ? static_cast<uint16_t> (c)
                 : 0;
        };

        // Read the CRLF-terminated line from the stream stripping the
        // trailing CRLF.
        //
        auto read_line = [&is] ()
        {
          string l;
          getline (is, l); // Strips the trailing LF (0xA).

          // Note that on POSIX CRLF is not automatically translated into
          // LF, so we need to strip CR (0xD) manually.
          //
          if (!l.empty () && l.back () == '\r')
            l.pop_back ();

          return l;
        };

        auto bad_response = [] (const string& d) {throw runtime_error (d);};

        // Read and parse the HTTP response status line, return the status
        // code and the reason phrase.
        //
        struct http_status
        {
          uint16_t code;
          string reason;
        };

        auto read_status = [&read_line, &status_code, &bad_response] ()
        {
          string l (read_line ());

          for (;;) // Breakout loop.
          {
            if (l.compare (0, 5, "HTTP/") != 0)
              break;

            size_t p (l.find (' ', 5));             // The protocol end.
            if (p == string::npos)
              break;

            p = l.find_first_not_of (' ', p + 1);   // The code start.
            if (p == string::npos)
              break;

            size_t e (l.find (' ', p + 1));         // The code end.
            if (e == string::npos)
              break;

            uint16_t c (status_code (string (l, p, e - p)));
            if (c == 0)
              break;

            string r;
            p = l.find_first_not_of (' ', e + 1);   // The reason start.
            if (p != string::npos)
            {
              e = l.find_last_not_of (' ');         // The reason end.
              assert (e != string::npos && e >= p);

              r = string (l, p, e - p + 1);
            }

            return http_status {c, move (r)};
          }

          bad_response ("invalid HTTP response status line '" + l + "'");

          assert (false); // Can't be here.
          return http_status {};
        };

        // The curl output for a successfull request looks like this:
        //
        // HTTP/1.1 100 Continue
        //
        // HTTP/1.1 200 OK
        // Content-Length: 83
        // Content-Type: text/manifest;charset=utf-8
        //
        // : 1
        // status: 200
        // message: submission is queued
        // reference: 256910ca46d5
        //
        // curl normally sends the 'Expect: 100-continue' header for uploads,
        // so we need to handle the interim HTTP server response with the
        // continue (100) status code.
        //
        // Interestingly, Apache can respond with the continue (100) code and
        // with the not found (404) code afterwords. Can it be configured to
        // just respond with 404?
        //
        http_status rs (read_status ());

        if (rs.code == 100)
        {
          while (!read_line ().empty ()) ; // Skips the interim response.
          rs = read_status ();             // Reads the final status code.
        }

        // Read through the response headers until the empty line is
        // encountered and obtain the content type and/or the redirect
        // location, if present.
        //
        optional<string> ctype;

        // Check if the line contains the specified header and return its
        // value if that's the case. Return nullopt otherwise.
        //
        // Note that we don't expect the header values that we are interested
        // in to span over multiple lines.
        //
        string l;
        auto header = [&l] (const char* name) -> optional<string>
        {
          size_t n (string::traits_type::length (name));
          if (!(casecmp (name, l, n) == 0 && l[n] == ':'))
            return nullopt;

          string r;
          size_t p (l.find_first_not_of (' ', n + 1)); // The value begin.
          if (p != string::npos)
          {
            size_t e (l.find_last_not_of (' '));       // The value end.
            assert (e != string::npos && e >= p);

            r = string (l, p, e - p + 1);
          }

          return optional<string> (move (r));
        };

        while (!(l = read_line ()).empty ())
        {
          if (optional<string> v = header ("Content-Type"))
            ctype = move (v);
          else if (optional<string> v = header ("Location"))
          {
            if ((rs.code >= 301 && rs.code <= 303) || rs.code == 307)
            try
            {
              location = url (*v);
              location->query = nullopt; // Can possibly contain '?submit'.
            }
            catch (const invalid_argument&)
            {
              // Let's just ignore invalid locations.
              //
            }
          }
        }

        assert (!eof (is)); // Would have already failed otherwise.

        // Now parse the response payload if the content type is specified and
        // is recognized (text/manifest or text/plain), skip it (with the
        // ifdstream's close() function) otherwise.
        //
        // Note that eof and getline() fail conditions are not errors anymore,
        // so we adjust the exception mask accordingly.
        //
        is.exceptions (ifdstream::badbit);

        if (ctype)
        {
          if (casecmp ("text/manifest", *ctype, 13) == 0)
          {
            parser p (is, "manifest");
            name_value nv (p.next ());

            if (nv.empty ())
              bad_response ("empty manifest");

            const string& n (nv.name);
            string& v (nv.value);

            // The format version pair is verified by the parser.
            //
            assert (n.empty () && v == "1");

            body.push_back (move (nv)); // Save the format version pair.

            auto bad_value = [&p, &nv] (const string& d) {
              throw parsing (p.name (), nv.value_line, nv.value_column, d);};

            // Get and verify the HTTP status.
            //
            nv = p.next ();
            if (n != "status")
              bad_value ("no status specified");

            uint16_t c (status_code (v));
            if (c == 0)
              bad_value ("invalid HTTP status '" + v + "'");

            if (c != rs.code)
              bad_value ("status " + v + " doesn't match HTTP response "
                         "code " + to_string (rs.code));

            // Get the message.
            //
            nv = p.next ();
            if (n != "message" || v.empty ())
              bad_value ("no message specified");

            message = move (v);

            // Try to get an optional reference.
            //
            nv = p.next ();

            if (n == "reference")
            {
              if (v.empty ())
                bad_value ("empty reference specified");

              reference = move (v);

              nv = p.next ();
            }

            // Save the remaining name/value pairs.
            //
            for (; !nv.empty (); nv = p.next ())
              body.push_back (move (nv));

            status = c;
          }
          else if (casecmp ("text/plain", *ctype, 10) == 0)
            getline (is, message); // Can result in the empty message.
        }

        is.close (); // Detect errors.

        // The meaningful result we expect is either manifest (status code is
        // not necessarily 200) or HTTP redirect (location is present). We
        // unable to interpret any other cases and so report them as a bad
        // response.
        //
        if (!status)
        {
          if (rs.code == 200)
            bad_response ("manifest expected");

          if (message.empty ())
          {
            message = "HTTP status code " + to_string (rs.code);

            if (!rs.reason.empty ())
              message += " (" + lcase (rs.reason) + ")";
          }

          if (!location)
            bad_response (message);
        }
      }
      catch (const io_error&)
      {
        // Presumably the child process failed and issued diagnostics so let
        // finish() try to deal with that first.
        //
        io = true;
      }
      // Handle all parsing errors, including the manifest_parsing exception
      // that inherits from the runtime_error exception.
      //
      // Note that the io_error class inherits from the runtime_error class,
      // so this catch-clause must go last.
      //
      catch (const runtime_error& e)
      {
        finish (o.curl (), pr); // Throws on process failure.

        // Finally we can safely issue the diagnostics (see above for
        // details).
        //
        diag_record dr (fail);

        url du (u);
        du.query = nullopt; // Strip URL parameters from the diagnostics.

        dr << e <<
          info << "consider reporting this to " << du << " maintainers";

        if (reference)
          dr << info << "reference: " << *reference;
      }

      finish (o.curl (), pr, io);

      assert (!message.empty ());

      // Print the request failure reason and fail.
      //
      if (!status || *status != 200)
      {
        diag_record dr (fail);
        dr << message;

        if (reference)
          dr << info << "reference: " << *reference;

        if (location)
          dr << info << "new location: " << *location;

        // In case of a server error advise the user to re-try later, assuming
        // that the issue is temporary (service overload, network connectivity
        // loss, etc.).
        //
        if (status && *status >= 500 && *status < 600)
          dr << info << "try again later";
      }

      return result {move (message), move (reference), move (body)};
    }
  }
}