XRootD
XrdHttpReadRangeHandler.cc
Go to the documentation of this file.
1 //------------------------------------------------------------------------------
2 // This file is part of XrdHTTP: A pragmatic implementation of the
3 // HTTP/WebDAV protocol for the Xrootd framework
4 //
5 // Copyright (c) 2013 by European Organization for Nuclear Research (CERN)
6 // Authors: Cedric Caffy <ccaffy@cern.ch>, David Smith
7 // File Date: Aug 2023
8 //------------------------------------------------------------------------------
9 // XRootD is free software: you can redistribute it and/or modify
10 // it under the terms of the GNU Lesser General Public License as published by
11 // the Free Software Foundation, either version 3 of the License, or
12 // (at your option) any later version.
13 //
14 // XRootD is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public License
20 // along with XRootD. If not, see <http://www.gnu.org/licenses/>.
21 //------------------------------------------------------------------------------
22 
23 #include "XProtocol/XPtypes.hh"
25 #include "XrdOuc/XrdOuca2x.hh"
26 #include "XrdOuc/XrdOucTUtils.hh"
27 #include "XrdOuc/XrdOucUtils.hh"
28 
29 #include <algorithm>
30 #include <climits>
31 #include <cstdint>
32 #include <cstdlib>
33 #include <cstring>
34 #include <memory>
35 #include <sstream>
36 
37 //------------------------------------------------------------------------------
41 //------------------------------------------------------------------------------
43 (
44  XrdSysError &Eroute,
45  const char *const parms,
46  Configuration &cfg)
47 {
48  if( !parms ) return 0;
49 
50  std::vector<std::string> splitArgs;
51  XrdOucTUtils::splitString( splitArgs, parms, "," );
52  if( splitArgs.size() < 2 ) return 0;
53 
54  //----------------------------------------------------------------------------
55  // params is expected to be "<readv_ior_max>,<readv_iov_max>"
56  //----------------------------------------------------------------------------
57  std::string iorstr = splitArgs[0];
58  std::string iovstr = splitArgs[1];
59  XrdOucUtils::trim( iorstr );
60  XrdOucUtils::trim( iovstr );
61 
62  int val;
63  if( XrdOuca2x::a2i( Eroute, "Error reading specific value of readv_ior_max",
64  iorstr.c_str(), &val, 1, -1 ) )
65  {
66  return -1;
67  }
68 
69  cfg.readv_ior_max = val;
70  if( XrdOuca2x::a2i( Eroute, "Error reading specific value of readv_iov_max",
71  iovstr.c_str(), &val, 1, -1 ) )
72  {
73  return -1;
74  }
75 
76  cfg.readv_iov_max = val;
77  cfg.reqs_max = RREQ_MAXSIZE;
78  cfg.haveSizes = true;
79 
80  return 0;
81 }
82 
83 //------------------------------------------------------------------------------
85 //------------------------------------------------------------------------------
87 {
88  return error_;
89 }
90 
91 //------------------------------------------------------------------------------
93 //------------------------------------------------------------------------------
95 {
96  return rawUserRanges_.empty();
97 }
98 
99 //------------------------------------------------------------------------------
101 //------------------------------------------------------------------------------
103 {
104  return rawUserRanges_.empty() ? 1 : rawUserRanges_.size();
105 }
106 
107 //------------------------------------------------------------------------------
109 //------------------------------------------------------------------------------
111 {
112  if( !rangesResolved_ )
113  resolveRanges();
114 
115  return( resolvedUserRanges_.size() <= 1 );
116 }
117 
118 //------------------------------------------------------------------------------
120 //------------------------------------------------------------------------------
122 {
123  static const UserRangeList emptyList;
124 
125  if( !rangesResolved_ )
126  resolveRanges();
127 
128  if( error_ )
129  return emptyList;
130 
131  return resolvedUserRanges_;
132 }
133 
134 //------------------------------------------------------------------------------
136 //------------------------------------------------------------------------------
138 {
139  static const XrdHttpIOList emptyList;
140 
141  if( !rangesResolved_ )
142  resolveRanges();
143 
144  if( error_ )
145  return emptyList;
146 
147  if( !splitRange_.empty() )
148  {
149  if( currSplitRangeIdx_ == 0 && currSplitRangeOff_ == 0 )
150  {
151  //------------------------------------------------------------------------
152  // Nothing read: Prevent scenario where data is expected but none is
153  // actually read E.g. Accessing files which return the results of a script
154  //------------------------------------------------------------------------
155  error_.set( 500, "Stopping request because more data is expected "
156  "but no data has been read." );
157  return emptyList;
158  }
159 
160  //--------------------------------------------------------------------------
161  // we may have some unacknowledged portion of the last range; maybe due to a
162  // short read. so remove what was received and potentially reissue.
163  //--------------------------------------------------------------------------
164 
165  trimSplit();
166  if( !splitRange_.empty() )
167  return splitRange_;
168  }
169 
170  if( splitRangeIdx_ >= resolvedUserRanges_.size() )
171  return emptyList;
172 
173  splitRanges();
174 
175  return splitRange_;
176 }
177 
178 //------------------------------------------------------------------------------
180 //------------------------------------------------------------------------------
182 {
183  if( error_ )
184  return;
185 
186  error_.set( 500, "An error occurred." );
187 }
188 
189 //------------------------------------------------------------------------------
191 //------------------------------------------------------------------------------
193 (
194  const ssize_t ret,
195  const UserRange** const urp,
196  bool &start,
197  bool &allend
198 )
199 {
200  if( error_ )
201  return -1;
202 
203  if( ret == 0 )
204  return 0;
205 
206  if( ret < 0 )
207  {
208  error_.set( 500, "Range handler read failure." );
209  return -1;
210  }
211 
212  if( !rangesResolved_ )
213  {
214  error_.set( 500, "Range handler ranges not yet resolved." );
215  return -1;
216  }
217 
218  if( splitRange_.empty() )
219  {
220  error_.set( 500, "No ranges being read." );
221  return -1;
222  }
223 
224  start = false;
225  allend = false;
226 
227  if( currSplitRangeIdx_ >= splitRange_.size() ||
228  resolvedRangeIdx_ >= resolvedUserRanges_.size() )
229  {
230  error_.set( 500, "Range handler index invalid." );
231  return -1;
232  }
233 
234  if( urp )
235  *urp = &resolvedUserRanges_[resolvedRangeIdx_];
236 
237  if( resolvedRangeOff_ == 0 )
238  start = true;
239 
240  const int clen = splitRange_[currSplitRangeIdx_].size;
241 
242  const off_t ulen = resolvedUserRanges_[resolvedRangeIdx_].end
243  - resolvedUserRanges_[resolvedRangeIdx_].start + 1;
244 
245  currSplitRangeOff_ += ret;
246  resolvedRangeOff_ += ret;
247 
248  if( currSplitRangeOff_ > clen || resolvedRangeOff_ > ulen )
249  {
250  error_.set( 500, "Range handler read crossing chunk boundary." );
251  return -1;
252  }
253 
254  if( currSplitRangeOff_ == clen )
255  {
256  currSplitRangeOff_ = 0;
257  currSplitRangeIdx_++;
258 
259  if( currSplitRangeIdx_ >= splitRange_.size() )
260  {
261  currSplitRangeIdx_ = 0;
262  splitRange_.clear();
263  }
264  }
265 
266  if( resolvedRangeOff_ == ulen )
267  {
268  resolvedRangeIdx_++;
269  resolvedRangeOff_ = 0;
270  if( resolvedRangeIdx_ >= resolvedUserRanges_.size() )
271  allend = true;
272  }
273 
274  return 0;
275 }
276 
277 //------------------------------------------------------------------------------
279 //------------------------------------------------------------------------------
280 void XrdHttpReadRangeHandler::ParseContentRange(const char* const line)
281 {
282  char *str1, *saveptr1, *token;
283 
284  std::unique_ptr< char, decltype(std::free)* >
285  line_copy { strdup( line ), std::free };
286 
287  //----------------------------------------------------------------------------
288  // line_copy is argument of the Range header.
289  //
290  // e.g. "bytes=15-17,20-25"
291  // We skip the unit prefix (upto first '='). We don't
292  // enforce this prefix nor check what it is (e.g. 'bytes')
293  //----------------------------------------------------------------------------
294 
295  str1 = line_copy.get();
296  token = strchr(str1,'=');
297  if (token) str1 = token + 1;
298 
299  //----------------------------------------------------------------------------
300  // break up the ranges and process each
301  //----------------------------------------------------------------------------
302 
303  for( ; ; str1 = NULL )
304  {
305  token = strtok_r( str1, " ,\n\r", &saveptr1 );
306  if( token == NULL )
307  break;
308 
309  if( !strlen(token) ) continue;
310 
311  const int rc = parseOneRange( token );
312  if( rc )
313  {
314  //------------------------------------------------------------------------
315  // on error we ignore the whole range header
316  //------------------------------------------------------------------------
317  rawUserRanges_.clear();
318  return;
319  }
320  }
321 }
322 
323 //------------------------------------------------------------------------------
325 //------------------------------------------------------------------------------
327 {
328  error_.reset();
329  rawUserRanges_.clear();
330  rawUserRanges_.shrink_to_fit();
331  resolvedUserRanges_.clear();
332  resolvedUserRanges_.shrink_to_fit();
333  splitRange_.clear();
334  splitRange_.shrink_to_fit();
335  rangesResolved_ = false;
336  splitRangeIdx_ = 0;
337  splitRangeOff_ = 0;
338  currSplitRangeIdx_ = 0;
339  currSplitRangeOff_ = 0;
340  resolvedRangeIdx_ = 0;
341  resolvedRangeOff_ = 0;
342  filesize_ = 0;
343 }
344 
345 //------------------------------------------------------------------------------
347 //------------------------------------------------------------------------------
349 {
350  if( error_ )
351  return -1;
352 
353  if( rangesResolved_ )
354  {
355  error_.set( 500, "Filesize notified after ranges resolved." );
356  return -1;
357  }
358 
359  filesize_ = fs;
360  return 0;
361 }
362 
363 //------------------------------------------------------------------------------
365 //------------------------------------------------------------------------------
366 int XrdHttpReadRangeHandler::parseOneRange(char* const str)
367 {
368  UserRange ur;
369  char *sep;
370 
371  //----------------------------------------------------------------------------
372  // expected input is an individual range, e.g.
373  // 5-6
374  // 5-
375  // -2
376  //----------------------------------------------------------------------------
377 
378  sep = strchr( str, '-' );
379  if( !sep )
380  {
381  //--------------------------------------------------------------------------
382  // Unexpected range format
383  //--------------------------------------------------------------------------
384  return -1;
385  }
386 
387  *sep = '\0';
388  if( rangeFig( str, ur.start_set, ur.start )<0 )
389  {
390  //--------------------------------------------------------------------------
391  // Error in range start
392  //--------------------------------------------------------------------------
393  *sep = '-';
394  return -1;
395  }
396  *sep = '-';
397  if( rangeFig( sep+1, ur.end_set, ur.end )<0 )
398  {
399  //--------------------------------------------------------------------------
400  // Error in range end
401  //--------------------------------------------------------------------------
402  return -1;
403  }
404 
405  if( !ur.start_set && !ur.end_set )
406  {
407  //--------------------------------------------------------------------------
408  // Unexpected range format
409  //--------------------------------------------------------------------------
410  return -1;
411  }
412 
413  if( ur.start_set && ur.end_set && ur.start > ur.end )
414  {
415  //--------------------------------------------------------------------------
416  // Range start is after range end
417  //--------------------------------------------------------------------------
418  return -1;
419  }
420 
421  if( !ur.start_set && ur.end_set && ur.end == 0 )
422  {
423  //--------------------------------------------------------------------------
424  // Request to return last 0 bytes of file
425  //--------------------------------------------------------------------------
426  return -1;
427  }
428 
429  rawUserRanges_.push_back(ur);
430  return 0;
431 }
432 
433 //------------------------------------------------------------------------------
435 //------------------------------------------------------------------------------
436 int XrdHttpReadRangeHandler::rangeFig(const char* const s, bool &set, off_t &val)
437 {
438  char *endptr = (char*)s;
439  errno = 0;
440  long long int v = strtoll( s, &endptr, 10 );
441  if( (errno == ERANGE && (v == LONG_MAX || v == LONG_MIN))
442  || (errno != 0 && errno != EINVAL && v == 0) )
443  {
444  return -1;
445  }
446  if( *endptr != '\0' )
447  {
448  return -1;
449  }
450  if( endptr == s )
451  {
452  set = false;
453  }
454  else
455  {
456  set = true;
457  val = v;
458  }
459  return 0;
460 }
461 
462 //------------------------------------------------------------------------------
464 //------------------------------------------------------------------------------
465 void XrdHttpReadRangeHandler::resolveRanges()
466 {
467  if( error_ )
468  return;
469 
470  resolvedUserRanges_.clear();
471 
472  for( const auto &rr: rawUserRanges_ )
473  {
474  off_t start = 0;
475  off_t end = 0;
476 
477  if( rr.end_set )
478  {
479  if( rr.start_set )
480  {
481  //----------------------------------------------------------------------
482  // end and start set
483  // e.g. 5-6
484  //----------------------------------------------------------------------
485  start = rr.start;
486  end = rr.end;
487 
488  //----------------------------------------------------------------------
489  // skip ranges outside the file
490  //----------------------------------------------------------------------
491  if( start >= filesize_ )
492  continue;
493 
494  if( end >= filesize_ )
495  {
496  end = filesize_ - 1;
497  }
498  }
499  else // !start
500  {
501  //----------------------------------------------------------------------
502  // end is set but not start
503  // e.g. -5
504  //----------------------------------------------------------------------
505  if( rr.end == 0 )
506  continue;
507  end = filesize_ -1;
508  if( rr.end > filesize_ )
509  {
510  start = 0;
511  }
512  else
513  {
514  start = filesize_ - rr.end;
515  }
516  }
517  }
518  else // !end
519  {
520  //------------------------------------------------------------------------
521  // end is not set
522  // e.g. 5-
523  //------------------------------------------------------------------------
524  if( !rr.start_set ) continue;
525  if( rr.start >= filesize_ )
526  continue;
527  start = rr.start;
528  end = filesize_ - 1;
529  }
530  resolvedUserRanges_.emplace_back( start, end );
531  }
532 
533  if( rawUserRanges_.empty() && filesize_>0 )
534  {
535  //--------------------------------------------------------------------------
536  // special case: no ranges: speficied, return whole file
537  //--------------------------------------------------------------------------
538  resolvedUserRanges_.emplace_back( 0, filesize_ - 1 );
539  }
540 
541  if( !rawUserRanges_.empty() && resolvedUserRanges_.empty() )
542  {
543  error_.set( 416, "None of the range-specifier values in the Range "
544  "request-header field overlap the current extent of the selected resource." );
545  }
546 
547  rangesResolved_ = true;
548 }
549 
550 //------------------------------------------------------------------------------
554 //------------------------------------------------------------------------------
555 void XrdHttpReadRangeHandler::splitRanges()
556 {
557  splitRange_.clear();
558  currSplitRangeIdx_ = 0;
559  currSplitRangeOff_ = 0;
560  resolvedRangeIdx_ = splitRangeIdx_;
561  resolvedRangeOff_ = splitRangeOff_;
562 
563  //----------------------------------------------------------------------------
564  // If we make a list of just one range XrdHttpReq will issue kXR_read,
565  // otherwise kXR_readv.
566  //
567  // If this is a full file read, or single user range, we'll fetch only one
568  // range at a time, so it is sent as a series of kXR_read requests.
569  //
570  // For multi range requests we pack a number of suitably sized ranges, thereby
571  // using kXR_readv. However, if there's a long user range we can we try to
572  // proceed by issuing single range requests and thereby using kXR_read.
573  //
574  // We don't merge user ranges in a single chunk as we always expect to be
575  // able to notify at boundaries with the output bools of NotifyReadResult.
576  //----------------------------------------------------------------------------
577 
578  size_t maxch = vectorReadMaxChunks_;
579  size_t maxchs = vectorReadMaxChunkSize_;
580  if( isSingleRange() )
581  {
582  maxchs = rRequestMaxBytes_;
583  maxch = 1;
584  }
585 
586  splitRange_.reserve( maxch );
587 
588  //----------------------------------------------------------------------------
589  // Start/continue splitting the resolvedUserRanges_ into a XrdHttpIOList.
590  //----------------------------------------------------------------------------
591 
592  const size_t cs = resolvedUserRanges_.size();
593  size_t nc = 0;
594  size_t rsr = rRequestMaxBytes_;
595  UserRange tmpur;
596 
597  while( ( splitRangeIdx_ < cs ) && ( rsr > 0 ) )
598  {
599  //--------------------------------------------------------------------------
600  // Check if we've readed the maximum number of allowed chunks.
601  //--------------------------------------------------------------------------
602  if( nc >= maxch )
603  break;
604 
605  if( !tmpur.start_set )
606  {
607  tmpur = resolvedUserRanges_[splitRangeIdx_];
608  tmpur.start += splitRangeOff_;
609  }
610 
611  const off_t l = tmpur.end - tmpur.start + 1;
612  size_t maxsize = std::min( rsr, maxchs );
613 
614  //--------------------------------------------------------------------------
615  // If we're starting a new set of chunks and we have enough data available
616  // in the current user range we allow a kXR_read of the max request size.
617  //--------------------------------------------------------------------------
618  if( nc == 0 && l >= (off_t)rRequestMaxBytes_ )
619  maxsize = rRequestMaxBytes_;
620 
621  if( l > (off_t)maxsize )
622  {
623  splitRange_.emplace_back( nullptr, tmpur.start, maxsize );
624  tmpur.start += maxsize;
625  splitRangeOff_ += maxsize;
626  rsr -= maxsize;
627  }
628  else
629  {
630  splitRange_.emplace_back( nullptr, tmpur.start, l );
631  rsr -= l;
632  tmpur = UserRange();
633  splitRangeOff_ = 0;
634  splitRangeIdx_++;
635  }
636  nc++;
637  }
638 }
639 
640 //------------------------------------------------------------------------------
642 //------------------------------------------------------------------------------
643 void XrdHttpReadRangeHandler::trimSplit()
644 {
645  if( currSplitRangeIdx_ < splitRange_.size() )
646  {
647  splitRange_.erase( splitRange_.begin(),
648  splitRange_.begin() + currSplitRangeIdx_ );
649  }
650  else
651  splitRange_.clear();
652 
653  if( splitRange_.size() > 0 )
654  {
655  if( currSplitRangeOff_ < splitRange_[0].size )
656  {
657  splitRange_[0].offset += currSplitRangeOff_;
658  splitRange_[0].size -= currSplitRangeOff_;
659  }
660  else
661  splitRange_.clear();
662  }
663 
664  currSplitRangeIdx_ = 0;
665  currSplitRangeOff_ = 0;
666 }
std::vector< XrdOucIOVec2 > XrdHttpIOList
void reset()
resets this handler
const XrdHttpIOList & NextReadList()
return XrdHttpIOList for sending to read or readv
void ParseContentRange(const char *const line)
parse the line after a "Range: " http request header
int SetFilesize(const off_t sz)
sets the filesize, used during resolving and issuing range requests
static int Configure(XrdSysError &Eroute, const char *const parms, Configuration &cfg)
void NotifyError()
Force handler to enter error state.
bool isFullFile()
indicates when there were no valid Range head ranges supplied
std::vector< UserRange > UserRangeList
int NotifyReadResult(const ssize_t ret, const UserRange **const urp, bool &start, bool &allend)
Advance internal counters concerning received bytes.
size_t getMaxRanges() const
return the maximum number of ranges that may be requested
const Error & getError() const
return the Error object
bool isSingleRange()
indicates a single range (implied whole file, or single range) or empty file
static constexpr size_t RREQ_MAXSIZE
const UserRangeList & ListResolvedRanges()
return resolved (i.e. obsolute start and end) byte ranges desired
static void splitString(Container &result, const std::string &input, const std::string &delimiter)
Split a string.
Definition: XrdOucTUtils.hh:51
static void trim(std::string &str)
static int a2i(XrdSysError &, const char *emsg, const char *item, int *val, int minv=-1, int maxv=-1)
Definition: XrdOuca2x.cc:45
void set(int rc, const std::string &m)