Paranoid strcpy into a buffer of given length (includes terminating
zero. Strips out all but 'a-Z0-9' and the character in other_safe_chars
and replaces with '_'. Deliberately does *NOT* check for multibyte
- characters. Don't change it !
+ characters. Treats src as an array of bytes, not as a multibyte
+ string. Any byte >0x7f is automatically converted to '_'.
+ other_safe_chars must also contain an ascii string (bytes<0x7f).
**/
char *alpha_strcpy_fn(const char *fn,
for(i = 0; i < len; i++) {
int val = (src[i] & 0xff);
- if (isupper_ascii(val) || islower_ascii(val) ||
- isdigit(val) || strchr_m(other_safe_chars, val))
+ if (val > 0x7f) {
+ dest[i] = '_';
+ continue;
+ }
+ if (isupper(val) || islower(val) ||
+ isdigit(val) || strchr(other_safe_chars, val))
dest[i] = src[i];
else
dest[i] = '_';
}
/**
- Count the number of UCS2 characters in a string. Normally this will
- be the same as the number of bytes in a string for single byte strings,
- but will be different for multibyte.
-**/
-
-size_t strlen_m(const char *s)
+ * Calculate the number of units (8 or 16-bit, depending on the
+ * destination charset), that would be needed to convert the input
+ * string which is expected to be in in CH_UNIX encoding to the
+ * destination charset (which should be a unicode charset).
+ */
+size_t strlen_m_ext(const char *s, const charset_t dst_charset)
{
size_t count = 0;
while (*s) {
size_t c_size;
codepoint_t c = next_codepoint(s, &c_size);
- if (c < 0x10000) {
- /* Unicode char fits into 16 bits. */
+ s += c_size;
+
+ switch(dst_charset) {
+ case CH_UTF16LE:
+ case CH_UTF16BE:
+ case CH_UTF16MUNGED:
+ if (c < 0x10000) {
+ /* Unicode char fits into 16 bits. */
+ count += 1;
+ } else {
+ /* Double-width unicode char - 32 bits. */
+ count += 2;
+ }
+ break;
+ case CH_UTF8:
+ /*
+ * this only checks ranges, and does not
+ * check for invalid codepoints
+ */
+ if (c < 0x80) {
+ count += 1;
+ } else if (c < 0x800) {
+ count += 2;
+ } else if (c < 0x1000) {
+ count += 3;
+ } else {
+ count += 4;
+ }
+ break;
+ default:
+ /*
+ * non-unicode encoding:
+ * assume that each codepoint fits into
+ * one unit in the destination encoding.
+ */
count += 1;
- } else {
- /* Double-width unicode char - 32 bits. */
- count += 2;
}
- s += c_size;
}
return count;
}
+size_t strlen_m_ext_term(const char *s, const charset_t dst_charset)
+{
+ if (!s) {
+ return 0;
+ }
+ return strlen_m_ext(s, dst_charset) + 1;
+}
+
+/**
+ Count the number of UCS2 characters in a string. Normally this will
+ be the same as the number of bytes in a string for single byte strings,
+ but will be different for multibyte.
+**/
+
+size_t strlen_m(const char *s)
+{
+ return strlen_m_ext(s, CH_UTF16LE);
+}
+
/**
Count the number of UCS2 characters in a string including the null
terminator.
}
/******************************************************************************
- substritute a specific pattern in a string list
+ substitute a specific pattern in a string list
*****************************************************************************/
bool str_list_substitute(char **list, const char *pattern, const char *insert)
#define S_LIST_ABS 16 /* List Allocation Block Size */
-char **str_list_make_v3(TALLOC_CTX *mem_ctx, const char *string, const char *sep)
+char **str_list_make_v3(TALLOC_CTX *mem_ctx, const char *string,
+ const char *sep)
{
char **list;
const char *str;
- char *s;
+ char *s, *tok;
int num, lsize;
- char *tok;
if (!string || !*string)
return NULL;