kaldi-cygwin-io-inl.h
Go to the documentation of this file.
1 // util/kaldi-cygwin-io-inl.h
2 
3 // Copyright 2015 Smart Action Company LLC (author: Kirill Katsnelson)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 
11 // http://www.apache.org/licenses/LICENSE-2.0
12 
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 #ifndef KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_
20 #define KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_
21 
22 #ifndef _MSC_VER
23 #error This is a Windows-compatibility file. Something went wery wrong.
24 #endif
25 
26 #include <string>
27 
28 // This file is included only into kaldi-io.cc, and only if
29 // KALDI_CYGWIN_COMPAT is enabled.
30 //
31 // The routines map unix-ey paths passed to Windows programs from shell
32 // scripts in egs. Since shell scripts run under cygwin, they use cygwin's
33 // own mount table and a mapping to the file system. It is quite possible to
34 // create quite an intricate mapping that only own cygwin API would be able
35 // to untangle. Unfortunately, the API to map between filenames is not
36 // available to non-cygwin programs. Running cygpath for every file operation
37 // would as well be cumbersome. So this is only a simplistic path resolution,
38 // assuming that the default cygwin prefix /cygdrive is used, and that all
39 // resolved unix-style full paths end up prefixed with /cygdrive. This is
40 // quite a sensible approach. We'll also try to map /dev/null and /tmp/**,
41 // die on all other /dev/** and warn about all other rooted paths.
42 
43 namespace kaldi {
44 
45 static bool prefixp(const std::string& pfx, const std::string& str) {
46  return pfx.length() <= str.length() &&
47  std::equal(pfx.begin(), pfx.end(), str.begin());
48 }
49 
50 static std::string cygprefix("/cygdrive/");
51 
52 static std::string MapCygwinPathNoTmp(const std::string &filename) {
53  // UNC(?), relative, native Windows and empty paths are ok already.
54  if (prefixp("//", filename) || !prefixp("/", filename))
55  return filename;
56 
57  // /dev/...
58  if (filename == "/dev/null")
59  return "\\\\.\\nul";
60  if (prefixp("/dev/", filename)) {
61  KALDI_ERR << "Unable to resolve path '" << filename
62  << "' - only have /dev/null here.";
63  return "\\\\.\\invalid";
64  }
65 
66  // /cygdrive/?[/....]
67  int preflen = cygprefix.size();
68  if (prefixp(cygprefix, filename)
69  && filename.size() >= preflen + 1 && isalpha(filename[preflen])
70  && (filename.size() == preflen + 1 || filename[preflen + 1] == '/')) {
71  return std::string() + filename[preflen] + ':' +
72  (filename.size() > preflen + 1 ? filename.substr(preflen + 1) : "/");
73  }
74 
75  KALDI_WARN << "Unable to resolve path '" << filename
76  << "' - cannot map unix prefix. "
77  << "Will go on, but breakage will likely ensue.";
78  return filename;
79 }
80 
81 // extern for unit testing.
82 std::string MapCygwinPath(const std::string &filename) {
83  // /tmp[/....]
84  if (filename != "/tmp" && !prefixp("/tmp/", filename)) {
85  return MapCygwinPathNoTmp(filename);
86  }
87  char *tmpdir = std::getenv("TMP");
88  if (tmpdir == nullptr)
89  tmpdir = std::getenv("TEMP");
90  if (tmpdir == nullptr) {
91  KALDI_ERR << "Unable to resolve path '" << filename
92  << "' - unable to find temporary directory. Set TMP.";
93  return filename;
94  }
95  // Map the value of tmpdir again, as cygwin environment actually may contain
96  // unix-style paths.
97  return MapCygwinPathNoTmp(std::string(tmpdir) + filename.substr(4));
98 }
99 
100 // A popen implementation that passes the command line through cygwin
101 // bash.exe. This is necessary since some piped commands are cygwin links
102 // (e. g. fgrep is a soft link to grep), and some are #!-files, such as
103 // gunzip which is a shell script that invokes gzip, or kaldi's own run.pl
104 // which is a perl script.
105 //
106 // _popen uses cmd.exe or whatever shell is specified via the COMSPEC
107 // variable. Unfortunately, it adds a hardcoded " /c " to it, so we cannot
108 // just substitute the environment variable COMSPEC to point to bash.exe.
109 // Instead, quote the command and pass it to bash via its -c switch.
110 static FILE *CygwinCompatPopen(const char* command, const char* mode) {
111  // To speed up command launch marginally, optionally accept full path
112  // to bash.exe. This will not work if the path contains spaces, but
113  // no sane person would install cygwin into a space-ridden path.
114  const char* bash_exe = std::getenv("BASH_EXE");
115  std::string qcmd(bash_exe != nullptr ? bash_exe : "bash.exe");
116  qcmd += " -c \"";
117  for (; *command; ++command) {
118  if (*command == '\"')
119  qcmd += '\"';
120  qcmd += *command;
121  }
122  qcmd += '\"';
123 
124  return _popen(qcmd.c_str(), mode);
125 }
126 
127 } // namespace kaldi
128 
129 #endif // KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
static FILE * CygwinCompatPopen(const char *command, const char *mode)
static std::string MapCygwinPathNoTmp(const std::string &filename)
std::string MapCygwinPath(const std::string &filename)
static std::string cygprefix("/cygdrive/")
static bool prefixp(const std::string &pfx, const std::string &str)