TPIE

2362a60
tpie::ami Namespace Reference

A version of sort that takes an input stream of elements of type T, and an output stream, and and uses the < operator to sort, see also Sorting in TPIE. More...

Classes

class  heap_element
 This is a heap element. More...
 
class  heap_ptr
 This is a record pointer element. More...
 
class  Internal_Sorter_Base
 The base class for internal sorters. More...
 
class  Internal_Sorter_Obj
 Comparision object based Internal_Sorter_base subclass implementation; uses quick_sort_obj(). More...
 
class  merge_heap_obj
 
class  merge_heap_op
 A merge heap object base class - also serves as the full implementation for objects with a < comparison operator. More...
 
class  merge_heap_ptr_obj
 A record pointer heap that uses a comparison object. More...
 
class  merge_heap_ptr_op
 A record pointer heap base class - also serves as the full implementation for objects with a < comparison operator. More...
 
class  qsort_item
 A simple class that facilitates doing key sorting followed by in-memory permuting to sort items in-memory. More...
 
class  stack
 An implementation of an external-memory stack compatible with the old AMI interface. More...
 
class  stream
 
class  stream_old
 A Stream<T> object stores an ordered collection of objects of type T on external memory. More...
 

Typedefs

typedef TPIE_OS_SIZE_T arity_t
 Intended to signal the number of input streams in a merge. More...
 

Enumerations

enum  err {
  NO_ERROR = 0, IO_ERROR, END_OF_STREAM, READ_ONLY,
  OS_ERROR, BASE_METHOD, BTE_ERROR, MM_ERROR,
  OBJECT_INITIALIZATION, OBJECT_INVALID, PERMISSION_DENIED, INSUFFICIENT_MAIN_MEMORY,
  INSUFFICIENT_AVAILABLE_STREAMS, ENV_UNDEFINED, NO_MAIN_MEMORY_OPERATION, BIT_MATRIX_BOUNDS,
  NOT_POWER_OF_2, NULL_POINTER, GENERIC_ERROR = 0xfff, SCAN_DONE = 0x1000,
  SCAN_CONTINUE, MERGE_DONE = 0x2000, MERGE_CONTINUE, MERGE_OUTPUT,
  MERGE_READ_MULTIPLE, MATRIX_BOUNDS = 0x3000, SORT_ALREADY_SORTED = 0x4000
}
 Legacy TPIE error codes. More...
 
enum  stream_type { READ_STREAM = 1, WRITE_STREAM, APPEND_STREAM, READ_WRITE_STREAM }
 AMI stream types passed to constructors. More...
 
enum  stream_status { STREAM_STATUS_VALID = 0, STREAM_STATUS_INVALID }
 AMI stream status. More...
 

Functions

template<class T , class M >
void merge_sorted_runs (typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator start, typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator end, file_stream< T > *outStream, M *MergeHeap, TPIE_OS_OFFSET cutoff=-1, progress_indicator_base *indicator=NULL)
 This is a common merge routine for all of the AMI_merge_sorted, AMI_ptr_merge_sorted and AMI_key_merge_sorted entry points. More...
 
template<class T , class CMPR >
void merge_sorted (typename tpie::array< std::unique_ptr< file_stream< T > > >::iterator start, typename tpie::array< std::unique_ptr< file_stream< T > > >::iterator end, file_stream< T > *outStream, CMPR *cmp)
 Merging with a heap that contains the records to be merged. More...
 
template<class T , class CMPR >
void ptr_merge_sorted (typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator start, typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator end, file_stream< T > *outStream, CMPR *cmp)
 Merging with a heap that keeps a pointer to the records rather than the records themselves: CMPR is the class of the comparison object, and must contain the method compare() which is called from within the merge. More...
 
 TPIE_DEPRECATED_CLASS_A (template< typename T > class TPIE_DEPRECATED_CLASS_B queue:public tpie::queue< T >{public:queue(){}queue(const std::string &basename):tpie::queue< T >(basename){}bool is_empty(){return this->empty();}err enqueue(const T &t){this->push(t);return NO_ERROR;}err dequeue(const T **t){try{*t=&this->pop();}catch(end_of_stream_exception e){return ami::END_OF_STREAM;}return NO_ERROR;}err peek(const T **t){*t=&this->front();return NO_ERROR;}err trim(){return NO_ERROR;}void persist(persistence){}};)
 
err exception_kind (const exception &e)
 
template<class T >
err sort (stream< T > *instream_ami, stream< T > *outstream_ami, tpie::progress_indicator_base *indicator=NULL)
 
template<class T , class CMPR >
err sort (stream< T > *instream_ami, stream< T > *outstream_ami, CMPR *cmp, progress_indicator_base *indicator=NULL)
 
template<class T >
err ptr_sort (stream< T > *instream_ami, stream< T > *outstream_ami, progress_indicator_base *indicator=NULL)
 
template<class T , class CMPR >
err ptr_sort (stream< T > *instream_ami, stream< T > *outstream_ami, CMPR *cmp, progress_indicator_base *indicator=NULL)
 
template<class T , class KEY , class CMPR >
err key_sort (stream< T > *instream_ami, stream< T > *outstream_ami, KEY, CMPR *cmp, progress_indicator_base *indicator=NULL)
 
template<class T >
err sort (stream< T > *instream_ami, progress_indicator_base *indicator=0)
 
template<class T , class CMPR >
err sort (stream< T > *instream_ami, CMPR *cmp, progress_indicator_base *indicator=NULL)
 
template<class T >
err ptr_sort (stream< T > *instream_ami, progress_indicator_base *indicator=NULL)
 
template<class T , class CMPR >
err ptr_sort (stream< T > *instream_ami, CMPR *cmp, progress_indicator_base *indicator=NULL)
 
template<class T , class KEY , class CMPR >
err key_sort (stream< T > *instream_ami, KEY, CMPR *cmp, progress_indicator_base *indicator=NULL)
 

Detailed Description

A version of sort that takes an input stream of elements of type T, and an output stream, and and uses the < operator to sort, see also Sorting in TPIE.

In-place sorting variant of key_sort(stream<T> instream_ami, stream<T> *outstream_ami, KEY dummykey, CMPR *cmp, progress_indicator_base indicator=NULL), see also In-place Variants for Sorting in TPIE.

In-place sorting variant of ptr_sort(stream<T> instream_ami, stream<T> *outstream_ami, CMPR *cmp, progress_indicator_base indicator=NULL), see also In-place Variants for Sorting in TPIE.

In-place sorting variant of ptr_sort(stream<T> instream_ami, stream<T> *outstream_ami, progress_indicator_base indicator=NULL), see also In-place Variants for Sorting in TPIE.

In-place sorting variant of sort(stream<T> instream_ami, stream<T> *outstream_ami, CMPR *cmp, progress_indicator_base indicator=NULL), see also In-place Variants for Sorting in TPIE.

In-place sorting variant of sort(stream<T> instream_ami, stream<T> *outstream_ami, tpie::progress_indicator_base indicator=NULL), see also In-place Variants for Sorting in TPIE.

A version of sort that takes an input stream of elements of type T, an output stream, and a user-specified comparison object.

Comparing within Sorting:
TPIE's sort() has two polymorphs, namely the comparison operator and comparison class polymorphs. The comparison operator version tends to be the fastest and most straightforward to use. The comparison class version is comparable in speed (maybe slightly slower), but somewhat more flexible, as it can support multiple, different sorts on the same keys.
Comparison operator version:
This version works on streams of objects for which the operator "<" is defined.
Comparison class version:
This version of sort() uses a method of a user-defined comparison object to determine the order of two input objects. The object must have a public member function named compare(), having the following prototype:

inline int compare (const KEY & k1, const KEY & k2);

The user-written compare() function computes the order of the two user-defined keys k1 and k2, and returns -1, 0, or +1 to indicate that k1<k2, k1==k2, or k1>k2 respectively.

The comparison object "cmp", of (user-defined) class represented by CMPR, must have a member function called "compare" which is used for sorting the input stream; see also Comparing within Sorting.

Typedef Documentation

typedef TPIE_OS_SIZE_T tpie::ami::arity_t

Intended to signal the number of input streams in a merge.

Definition at line 46 of file merge_sorted_runs.h.

Enumeration Type Documentation

Legacy TPIE error codes.

Functions in the AMI interface of TPIE typically return error codes of the enumerated type err.

Enumerator
NO_ERROR 

No error occurred.

The call the the entry point returned normally.

IO_ERROR 

A low level I/O error occurred.

END_OF_STREAM 

An attempt was made to read past the end of a stream or write past the end of a substream.

READ_ONLY 

An attempt was made to write to a read-only stream.

OS_ERROR 

An unexpected operating system error occurred.

Details should appear in the log file if logging is enabled.

See also
sec_logging
BASE_METHOD 

An attempt was made to call a member function of the virtual base class of tpie::ami::stream.

This indicates a bug in the implementation of TPIE streams.

BTE_ERROR 

An error occurred at the BTE level.

MM_ERROR 

An error occurred within the memory manager.

OBJECT_INITIALIZATION 

An TPIE entry point was not able to properly initialize the operation management object that was passed to it.

This generally indicates a bug in the operation management object's initialization code.

OBJECT_INVALID 

A passed object is invalid.

PERMISSION_DENIED 

A passed object is inaccessible due to insufficient permissions.

INSUFFICIENT_MAIN_MEMORY 

The memory manager could not make adequate main memory available to complete the requested operation.

Many operations adapt themselves to use whatever main memory is available, but in some cases, when memory is extremely tight, they may not be able to function.

INSUFFICIENT_AVAILABLE_STREAMS 

TPIE could not allocate enough intermediate streams to perform the requested operation.

Certain operating system restrictions limit the number of streams that can be created on certain platforms. Only in unusual circumstances, such as when the application itself has a very large number of open streams, will this error occur.

ENV_UNDEFINED 

An environment variable necessary to initialize the TPIE accessing environment was not defined.

BIT_MATRIX_BOUNDS 

A bit matrix larger than the number of bits in an offset into a stream was passed.

NOT_POWER_OF_2 

The length of a stream on which a bit permutation was to be performed is not a power of two.

NULL_POINTER 

An attempt was made to perform a matrix operation on matrices whose bounds did not match appropriately.

SCAN_DONE 

Value returned by a scan_object: Indicates that the function should be called again with any "taken" inputs replaced by the next objects from their respective streams.

SCAN_CONTINUE 

Value returned by a scan_object: Indicates that the scan is complete and no more input needs to be processed.

MERGE_DONE 

Value returned by a merge_management_object, signaling that the merge() completed.

MERGE_CONTINUE 

Value returned by a merge_management_object, telling merge() to continue to call the operate() member function of the management object with more data.

MERGE_OUTPUT 

Value returned by a merge_management_object, signaling that the last merge() call generated output for the output stream.

MERGE_READ_MULTIPLE 

Value returned by a merge_management_object, telling merge() that more than one input ob ject was consumed and thus the input flags should be consulted.

MATRIX_BOUNDS 

Matrix related error.

SORT_ALREADY_SORTED 

Values returned by sort routines if input does not require sorting.

Definition at line 45 of file err.h.

45  {
47  NO_ERROR = 0,
49  IO_ERROR,
54  READ_ONLY,
57  OS_ERROR,
63  BTE_ERROR,
65  MM_ERROR,
90  NO_MAIN_MEMORY_OPERATION,
100 
101  GENERIC_ERROR = 0xfff,
102 
106  SCAN_DONE = 0x1000,
110 
112  MERGE_DONE = 0x2000,
119  MERGE_OUTPUT,
124 
126  MATRIX_BOUNDS = 0x3000,
127 
129  SORT_ALREADY_SORTED = 0x4000
130 
131  };
An TPIE entry point was not able to properly initialize the operation management object that was pass...
Definition: err.h:69
The length of a stream on which a bit permutation was to be performed is not a power of two...
Definition: err.h:96
Value returned by a merge_management_object, telling merge() to continue to call the operate() member...
Definition: err.h:116
Value returned by a scan_object: Indicates that the scan is complete and no more input needs to be pr...
Definition: err.h:109
No error occurred.
Definition: err.h:47
Value returned by a scan_object: Indicates that the function should be called again with any "taken" ...
Definition: err.h:106
A passed object is invalid.
Definition: err.h:71
Matrix related error.
Definition: err.h:126
A low level I/O error occurred.
Definition: err.h:49
An attempt was made to call a member function of the virtual base class of tpie::ami::stream.
Definition: err.h:61
A passed object is inaccessible due to insufficient permissions.
Definition: err.h:73
Value returned by a merge_management_object, signaling that the merge() completed.
Definition: err.h:112
An environment variable necessary to initialize the TPIE accessing environment was not defined...
Definition: err.h:89
Values returned by sort routines if input does not require sorting.
Definition: err.h:129
An error occurred within the memory manager.
Definition: err.h:65
An unexpected operating system error occurred.
Definition: err.h:57
TPIE could not allocate enough intermediate streams to perform the requested operation.
Definition: err.h:86
A bit matrix larger than the number of bits in an offset into a stream was passed.
Definition: err.h:93
An error occurred at the BTE level.
Definition: err.h:63
Value returned by a merge_management_object, telling merge() that more than one input ob ject was con...
Definition: err.h:123
An attempt was made to write to a read-only stream.
Definition: err.h:54
The memory manager could not make adequate main memory available to complete the requested operation...
Definition: err.h:79
An attempt was made to perform a matrix operation on matrices whose bounds did not match appropriatel...
Definition: err.h:99
An attempt was made to read past the end of a stream or write past the end of a substream.
Definition: err.h:52
Value returned by a merge_management_object, signaling that the last merge() call generated output fo...
Definition: err.h:119

AMI stream status.

Enumerator
STREAM_STATUS_VALID 

Stream is valid.

STREAM_STATUS_INVALID 

Stream is invalid.

Definition at line 61 of file stream_old.h.

61  {
66  };

AMI stream types passed to constructors.

Definition at line 53 of file stream_old.h.

53  {
54  READ_STREAM = 1, // Open existing stream for reading
55  WRITE_STREAM, // Open for writing. Create if non-existent
56  APPEND_STREAM, // Open for writing at end. Create if needed.
57  READ_WRITE_STREAM // Open to read and write.
58  };

Function Documentation

template<class T , class CMPR >
void tpie::ami::merge_sorted ( typename tpie::array< std::unique_ptr< file_stream< T > > >::iterator  start,
typename tpie::array< std::unique_ptr< file_stream< T > > >::iterator  end,
file_stream< T > *  outStream,
CMPR *  cmp 
)

Merging with a heap that contains the records to be merged.

CMPR is the class of the comparison object, and must contain the method compare() which is called from within the merge.

This is one of the merge entry points for merging without the merge_management_object used by TPIE's merge. These routines perform the special case of merging when the the required output is the original records interleaved according to a comparison operator or function.

Definition at line 151 of file merge_sorted_runs.h.

References tpie::ami::merge_heap_op< REC, Compare >::allocate(), and merge_sorted_runs().

153  {
154 
155  // make a merge heap which uses the user's comparison object
156  // and initialize it
157  merge_heap_obj<T,CMPR> mrgheap (cmp);
158  mrgheap.allocate(end-start);
159 
160  //Rewind all the input streams
161  for (typename tpie::array<std::unique_ptr<file_stream<T> > >::iterator i=start;
162  i != end; ++i)
163  i->seek(0);
164 
165  merge_sorted_runs(start, end, outStream, mrgheap);
166  }
A generic array with a fixed size.
Definition: array.h:144
void merge_sorted_runs(typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator start, typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator end, file_stream< T > *outStream, M *MergeHeap, TPIE_OS_OFFSET cutoff=-1, progress_indicator_base *indicator=NULL)
This is a common merge routine for all of the AMI_merge_sorted, AMI_ptr_merge_sorted and AMI_key_merg...
std::unique_ptr< T, tpie_deleter > unique_ptr
like std::unique_ptr, but delete the object with tpie_delete.
Definition: memory.h:338
template<class T , class M >
void tpie::ami::merge_sorted_runs ( typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator  start,
typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator  end,
file_stream< T > *  outStream,
M *  MergeHeap,
TPIE_OS_OFFSET  cutoff = -1,
progress_indicator_base *  indicator = NULL 
)

This is a common merge routine for all of the AMI_merge_sorted, AMI_ptr_merge_sorted and AMI_key_merge_sorted entry points.

It is also used by the sort entry points AMI_sort, AMI_ptr_sort and AMI_key_sort and by the routine AMI_partition_and_merge. Differences are encapsulated within the merge heap object MergeHeap. It is assumed that MergeHeap::allocate() was called before entering ami_merge_sorted_runs. ami_merge_sorted_runs takes both the max number of elements to read from any stream and also a boolean flag for showing a progress indicator.

Sorted Stream Merging:
TPIE provides several merge entry points for merging sorted streams to produce a single, interleaved output stream. merge_sorted_runs has three polymorphs, namely the comparison operator, comparison class and the key-based polymorphs. The comparison operator version tends to be the fastest and most straightforward to use. The comparison class version is comparable in speed (maybe slightly slower), but somewhat more flexible, as it can support multiple, different merges on the same keys.

Definition at line 73 of file merge_sorted_runs.h.

References tpie::file_stream< T >::can_read(), and tpie::file_stream< T >::read().

Referenced by merge_sorted(), and ptr_merge_sorted().

77  {
78 
79  size_t i;
80  size_t arity = end-start;
81 
82  //Pointers to current leading elements of streams
83  tpie::array<const T *> in_objects(arity);
84  tpie::array<TPIE_OS_OFFSET> nread(arity);
85 
86  // **************************************************************
87  // * Read first element from stream. Do not rewind! We may read *
88  // * more elements from the same stream later. *
89  // **************************************************************
90 
91  for (i = 0; i < arity; i++) {
92  file_stream<T> * stream = (start+i)->get();
93  if (stream->can_read()) {
94  in_objects[i] = &stream->read();
95  MergeHeap->insert( in_objects[i], i );
96  } else
97  in_objects[i] = NULL;
98  nread[i] = 1;
99  if (indicator) indicator->step();
100  }
101 
102  // *********************************************************
103  // * Build a heap from the smallest items of each stream *
104  // *********************************************************
105 
106  MergeHeap->initialize ( );
107 
108  // *********************************************************
109  // * Perform the merge until the inputs are exhausted. *
110  // *********************************************************
111  while (MergeHeap->sizeofheap() > 0) {
112  i = MergeHeap->get_min_run_id ();
113  outStream->write(*in_objects[i]);
114 
115  bool eof = false;
116 
117  //Check if we read as many elements as we are allowed to
118  if ( (cutoff != -1) && (nread[i]>=cutoff))
119  eof = true;
120  else {
121  file_stream<T> * stream = (start+i)->get();
122  if (stream->can_read()) in_objects[i] = &stream->read();
123  else eof = true;
124 
125  if (indicator) indicator->step();
126  }
127 
128  if (eof)
129  MergeHeap->delete_min_and_insert(NULL);
130  else {
131  nread[i]++;
132  MergeHeap->delete_min_and_insert (in_objects[i]);
133  }
134  } // while
135  }
A generic array with a fixed size.
Definition: array.h:144
template<class T , class CMPR >
void tpie::ami::ptr_merge_sorted ( typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator  start,
typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator  end,
file_stream< T > *  outStream,
CMPR *  cmp 
)

Merging with a heap that keeps a pointer to the records rather than the records themselves: CMPR is the class of the comparison object, and must contain the method compare() which is called from within the merge.

This is one of the merge entry points for merging without the merge_management_object used by TPIE's merge. These routines perform the special case of merging when the the required output is the original records interleaved according to a comparison operator or function.

Definition at line 183 of file merge_sorted_runs.h.

References tpie::ami::merge_heap_ptr_op< REC, CMPR >::allocate(), and merge_sorted_runs().

186  {
187 
188  // make a merge heap of pointers which uses the user's comparison
189  // object and initialize it
190  merge_heap_ptr_obj<T,CMPR> mrgheap (cmp);
191  mrgheap.allocate(end-start);
192 
193  // Rewind all the input streams
194  for (typename tpie::array<std::unique_ptr<file_stream<T> > >::iteratorarity_t i=start;
195  i != end; ++i)
196  i->seek(0);
197 
198  merge_sorted_runs(start, end, outStream, mrgheap);
199  }
A generic array with a fixed size.
Definition: array.h:144
void merge_sorted_runs(typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator start, typename tpie::array< tpie::unique_ptr< file_stream< T > > >::iterator end, file_stream< T > *outStream, M *MergeHeap, TPIE_OS_OFFSET cutoff=-1, progress_indicator_base *indicator=NULL)
This is a common merge routine for all of the AMI_merge_sorted, AMI_ptr_merge_sorted and AMI_key_merg...
std::unique_ptr< T, tpie_deleter > unique_ptr
like std::unique_ptr, but delete the object with tpie_delete.
Definition: memory.h:338