RandomAccessTableReaderScriptImpl< Holder > Class Template Reference

#include <kaldi-table-inl.h>

Inheritance diagram for RandomAccessTableReaderScriptImpl< Holder >:
Collaboration diagram for RandomAccessTableReaderScriptImpl< Holder >:

Public Types

typedef Holder::T T
 
- Public Types inherited from RandomAccessTableReaderImplBase< Holder >
typedef Holder::T T
 

Public Member Functions

 RandomAccessTableReaderScriptImpl ()
 
virtual bool Open (const std::string &rspecifier)
 
virtual bool IsOpen () const
 
virtual bool Close ()
 
virtual bool HasKey (const std::string &key)
 
virtual const TValue (const std::string &key)
 
virtual ~RandomAccessTableReaderScriptImpl ()
 
- Public Member Functions inherited from RandomAccessTableReaderImplBase< Holder >
virtual ~RandomAccessTableReaderImplBase ()
 

Private Types

enum  {
  kUninitialized, kNotReadScript, kNotHaveObject, kHaveObject,
  kHaveRange
}
 

Private Member Functions

virtual bool HasKeyInternal (const std::string &key, bool preload)
 
bool LookupKey (const std::string &key, size_t *script_offset)
 

Private Attributes

Input input_
 
RspecifierOptions opts_
 
std::string rspecifier_
 
std::string script_rxfilename_
 
std::string key_
 
Holder holder_
 
Holder range_holder_
 
std::string range_
 
std::string data_rxfilename_
 
std::vector< std::pair< std::string, std::string > > script_
 
size_t last_found_
 
enum kaldi::RandomAccessTableReaderScriptImpl:: { ... }  state_
 

Detailed Description

template<class Holder>
class kaldi::RandomAccessTableReaderScriptImpl< Holder >

Definition at line 1593 of file kaldi-table-inl.h.

Member Typedef Documentation

◆ T

typedef Holder::T T

Definition at line 1596 of file kaldi-table-inl.h.

Member Enumeration Documentation

◆ anonymous enum

anonymous enum
private
Enumerator
kUninitialized 
kNotReadScript 
kNotHaveObject 
kHaveObject 
kHaveRange 

Definition at line 1863 of file kaldi-table-inl.h.

1863  {
1864  // (*) is script_ set up?
1865  // (*) does holder_ contain an object?
1866  // (*) does range_holder_ contain and object?
1867  //
1868  //
1869  kUninitialized, // no no no
1870  kNotReadScript, // no no no
1871  kNotHaveObject, // yes no no
1872  kHaveObject, // yes yes no
1873  kHaveRange, // yes yes yes
1874 
1875  // If we are in a state where holder_ contains an object, it always contains
1876  // the object from 'key_', and the corresponding rxfilename is always
1877  // 'data_rxfilename_'. If range_holder_ contains an object, it always
1878  // corresponds to the range 'range_' of the object in 'holder_', and always
1879  // corresponds to the current key.
1880  } state_;
enum kaldi::RandomAccessTableReaderScriptImpl::@4 state_

Constructor & Destructor Documentation

◆ RandomAccessTableReaderScriptImpl()

◆ ~RandomAccessTableReaderScriptImpl()

virtual ~RandomAccessTableReaderScriptImpl ( )
inlinevirtual

Definition at line 1693 of file kaldi-table-inl.h.

1693 { }

Member Function Documentation

◆ Close()

virtual bool Close ( )
inlinevirtual

Implements RandomAccessTableReaderImplBase< Holder >.

Definition at line 1652 of file kaldi-table-inl.h.

References TableWriter< Holder >::IsOpen(), and KALDI_ERR.

1652  {
1653  if (!IsOpen())
1654  KALDI_ERR << "Close() called on RandomAccessTableReader that was not"
1655  " open.";
1656  holder_.Clear();
1657  range_holder_.Clear();
1659  last_found_ = 0;
1660  script_.clear();
1661  key_ = "";
1662  range_ = "";
1663  data_rxfilename_ = "";
1664  // This cannot fail because any errors of a "global" nature would have been
1665  // detected when we did Open(). With archives it's different.
1666  return true;
1667  }
enum kaldi::RandomAccessTableReaderScriptImpl::@4 state_
std::vector< std::pair< std::string, std::string > > script_
#define KALDI_ERR
Definition: kaldi-error.h:147

◆ HasKey()

virtual bool HasKey ( const std::string &  key)
inlinevirtual

Implements RandomAccessTableReaderImplBase< Holder >.

Definition at line 1669 of file kaldi-table-inl.h.

1669  {
1670  bool preload = opts_.permissive;
1671  // In permissive mode, we have to check that we can read
1672  // the scp entry before we assert that the key is there.
1673  return HasKeyInternal(key, preload);
1674  }
virtual bool HasKeyInternal(const std::string &key, bool preload)

◆ HasKeyInternal()

virtual bool HasKeyInternal ( const std::string &  key,
bool  preload 
)
inlineprivatevirtual

Definition at line 1703 of file kaldi-table-inl.h.

References kaldi::ExtractRangeSpecifier(), kaldi::IsToken(), KALDI_ASSERT, KALDI_ERR, KALDI_WARN, and kaldi::PrintableRxfilename().

1703  {
1704  switch (state_) {
1705  case kUninitialized: case kNotReadScript:
1706  KALDI_ERR << "HasKey called on RandomAccessTableReader object that is"
1707  " not open.";
1708  case kHaveObject:
1709  if (key == key_ && range_.empty())
1710  return true;
1711  break;
1712  case kHaveRange:
1713  if (key == key_)
1714  return true;
1715  break;
1716  case kNotHaveObject: default: break;
1717  }
1718  KALDI_ASSERT(IsToken(key));
1719  size_t key_pos = 0;
1720  if (!LookupKey(key, &key_pos)) {
1721  return false;
1722  } else {
1723  if (!preload) {
1724  return true; // we have the key, and were not asked to verify that the
1725  // object could be read.
1726  } else { // preload specified, so we have to attempt to pre-load the
1727  // object before returning.
1728  std::string data_rxfilename, range; // We will split
1729  // script_[key_pos].second (e.g. "1.ark:100[0:2]" into data_rxfilename
1730  // (e.g. "1.ark:100") and range (if any), e.g. "0:2".
1731  if (script_[key_pos].second[script_[key_pos].second.size()-1] == ']') {
1732  if(!ExtractRangeSpecifier(script_[key_pos].second,
1733  &data_rxfilename,
1734  &range)) {
1735  KALDI_ERR << "TableReader: failed to parse range in '"
1736  << script_[key_pos].second << "'";
1737  }
1738  } else {
1739  data_rxfilename = script_[key_pos].second;
1740  }
1741  if (state_ == kHaveRange) {
1742  if (data_rxfilename_ == data_rxfilename && range_ == range) {
1743  // the odd situation where two keys had the same rxfilename and range:
1744  // just change the key and keep the object.
1745  key_ = key;
1746  return true;
1747  } else {
1748  range_holder_.Clear();
1749  state_ = kHaveObject;
1750  }
1751  }
1752  // OK, at this point the state will be kHaveObject or kNotHaveObject.
1753  if (state_ == kHaveObject) {
1754  if (data_rxfilename_ != data_rxfilename) {
1755  // clear out the object.
1757  holder_.Clear();
1758  }
1759  }
1760  // At this point we can safely switch to the new key, data_rxfilename
1761  // and range, and we know that if we have an object, it will already be
1762  // the correct one. The state is now kHaveObject or kNotHaveObject.
1763  key_ = key;
1764  data_rxfilename_ = data_rxfilename;
1765  range_ = range;
1766  if (state_ == kNotHaveObject) {
1767  // we need to read the object.
1768  if (!input_.Open(data_rxfilename)) {
1769  KALDI_WARN << "Error opening stream "
1770  << PrintableRxfilename(data_rxfilename);
1771  return false;
1772  } else {
1773  if (holder_.Read(input_.Stream())) {
1774  state_ = kHaveObject;
1775  } else {
1776  KALDI_WARN << "Error reading object from "
1777  "stream " << PrintableRxfilename(data_rxfilename);
1778  return false;
1779  }
1780  }
1781  }
1782  // At this point the state is kHaveObject.
1783  if (range.empty())
1784  return true; // we're done: no range was requested.
1785  if (range_holder_.ExtractRange(holder_, range)) {
1786  state_ = kHaveRange;
1787  return true;
1788  } else {
1789  KALDI_WARN << "Failed to load object from "
1790  << PrintableRxfilename(data_rxfilename)
1791  << "[" << range << "]";
1792  // leave state at kHaveObject.
1793  return false;
1794  }
1795  }
1796  }
1797  }
enum kaldi::RandomAccessTableReaderScriptImpl::@4 state_
bool Open(const std::string &rxfilename, bool *contents_binary=NULL)
Definition: kaldi-io-inl.h:26
bool IsToken(const std::string &token)
Returns true if "token" is nonempty, and all characters are printable and whitespace-free.
Definition: text-utils.cc:105
std::vector< std::pair< std::string, std::string > > script_
std::istream & Stream()
Definition: kaldi-io.cc:826
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:61
bool ExtractRangeSpecifier(const std::string &rxfilename_with_range, std::string *data_rxfilename, std::string *range)
bool LookupKey(const std::string &key, size_t *script_offset)

◆ IsOpen()

◆ LookupKey()

bool LookupKey ( const std::string &  key,
size_t script_offset 
)
inlineprivate

Definition at line 1802 of file kaldi-table-inl.h.

1802  {
1803  // First, an optimization: if we're going consecutively, this will
1804  // make the lookup very fast. Since we may call HasKey and then
1805  // Value(), which both may look up the key, we test if either the
1806  // current or next position are correct.
1807  if (last_found_ < script_.size() && script_[last_found_].first == key) {
1808  *script_offset = last_found_;
1809  return true;
1810  }
1811  last_found_++;
1812  if (last_found_ < script_.size() && script_[last_found_].first == key) {
1813  *script_offset = last_found_;
1814  return true;
1815  }
1816  std::pair<std::string, std::string> pr(key, ""); // Important that ""
1817  // compares less than or equal to any string, so lower_bound points to the
1818  // element that has the same key.
1819  typedef typename std::vector<std::pair<std::string, std::string> >
1820  ::const_iterator IterType;
1821  IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
1822  if (iter != script_.end() && iter->first == key) {
1823  last_found_ = *script_offset = iter - script_.begin();
1824  return true;
1825  } else {
1826  return false;
1827  }
1828  }
std::vector< std::pair< std::string, std::string > > script_

◆ Open()

virtual bool Open ( const std::string &  rspecifier)
inlinevirtual

Implements RandomAccessTableReaderImplBase< Holder >.

Definition at line 1600 of file kaldi-table-inl.h.

References kaldi::ClassifyRspecifier(), rnnlm::i, KALDI_ASSERT, KALDI_ERR, KALDI_WARN, kaldi::kScriptRspecifier, kaldi::PrintableRxfilename(), and kaldi::ReadScriptFile().

1600  {
1601  switch (state_) {
1602  case kNotHaveObject: case kHaveObject: case kHaveRange:
1603  KALDI_ERR << " Opening already open RandomAccessTableReader:"
1604  " call Close first.";
1605  case kUninitialized: case kNotReadScript:
1606  break;
1607  }
1608  rspecifier_ = rspecifier;
1609  RspecifierType rs = ClassifyRspecifier(rspecifier,
1611  &opts_);
1612  KALDI_ASSERT(rs == kScriptRspecifier); // or wrongly called.
1613  KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point
1614 
1616  true, // print any warnings
1617  &script_)) { // error reading script file or invalid
1618  // format
1620  return false; // no need to print further warnings. user gets the error.
1621  }
1622 
1623  rspecifier_ = rspecifier;
1624  // If opts_.sorted, the user has asserted that the keys are already sorted.
1625  // Although we could easily sort them, we want to let the user know of this
1626  // mistake. This same mistake could have serious effects if used with an
1627  // archive rather than a script.
1628  if (!opts_.sorted)
1629  std::sort(script_.begin(), script_.end());
1630  for (size_t i = 0; i + 1 < script_.size(); i++) {
1631  if (script_[i].first.compare(script_[i+1].first) >= 0) {
1632  // script[i] not < script[i+1] in lexical order...
1633  bool same = (script_[i].first == script_[i+1].first);
1634  KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
1635  << (same ? " contains duplicate key: " :
1636  " is not sorted (remove s, option or add ns, option):"
1637  " key is ") << script_[i].first;
1639  return false;
1640  }
1641  }
1643  key_ = ""; // make sure we don't have a key set
1644  return true;
1645  }
enum kaldi::RandomAccessTableReaderScriptImpl::@4 state_
RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, RspecifierOptions *opts)
Definition: kaldi-table.cc:225
std::vector< std::pair< std::string, std::string > > script_
RspecifierType
Definition: kaldi-table.h:219
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
bool ReadScriptFile(const std::string &rxfilename, bool warn, std::vector< std::pair< std::string, std::string > > *script_out)
Definition: kaldi-table.cc:26
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::string PrintableRxfilename(const std::string &rxfilename)
PrintableRxfilename turns the rxfilename into a more human-readable form for error reporting...
Definition: kaldi-io.cc:61

◆ Value()

virtual const T& Value ( const std::string &  key)
inlinevirtual

Implements RandomAccessTableReaderImplBase< Holder >.

Definition at line 1679 of file kaldi-table-inl.h.

References KALDI_ASSERT, and KALDI_ERR.

1679  {
1680  if (!HasKeyInternal(key, true)) // true == preload.
1681  KALDI_ERR << "Could not get item for key " << key
1682  << ", rspecifier is " << rspecifier_ << " [to ignore this, "
1683  << "add the p, (permissive) option to the rspecifier.";
1684  KALDI_ASSERT(key_ == key);
1685  if (state_ == kHaveObject) {
1686  return holder_.Value();
1687  } else {
1689  return range_holder_.Value();
1690  }
1691  }
enum kaldi::RandomAccessTableReaderScriptImpl::@4 state_
virtual bool HasKeyInternal(const std::string &key, bool preload)
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185

Member Data Documentation

◆ data_rxfilename_

std::string data_rxfilename_
private

Definition at line 1849 of file kaldi-table-inl.h.

◆ holder_

Holder holder_
private

Definition at line 1843 of file kaldi-table-inl.h.

◆ input_

Input input_
private

Definition at line 1831 of file kaldi-table-inl.h.

◆ key_

std::string key_
private

Definition at line 1839 of file kaldi-table-inl.h.

◆ last_found_

size_t last_found_
private

Definition at line 1861 of file kaldi-table-inl.h.

◆ opts_

RspecifierOptions opts_
private

Definition at line 1834 of file kaldi-table-inl.h.

◆ range_

std::string range_
private

Definition at line 1847 of file kaldi-table-inl.h.

◆ range_holder_

Holder range_holder_
private

Definition at line 1844 of file kaldi-table-inl.h.

◆ rspecifier_

std::string rspecifier_
private

Definition at line 1835 of file kaldi-table-inl.h.

◆ script_

std::vector<std::pair<std::string, std::string> > script_
private

Definition at line 1860 of file kaldi-table-inl.h.

◆ script_rxfilename_

std::string script_rxfilename_
private

Definition at line 1837 of file kaldi-table-inl.h.

◆ state_

enum { ... } state_

The documentation for this class was generated from the following file: