2 Unix SMB/Netbios implementation.
4 Create unicode map files from unicode_def.XXX files.
6 Copyright (C) Jeremy Allison 1997-1999.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 static char *prog_name = NULL;
28 * Print program usage and die.
31 static void unicode_map_usage(char *progname)
33 fprintf(stderr, "Usage is : %s <codepage> <inputfile> <outputfile>\n",
39 * Read a line from a buffer into a line buffer. Ensure null
43 static void read_line( char **buf, char *line_buf, size_t size)
48 for(; *p && (*p != '\n') && (*p != '\032'); p++) {
53 p++; /* Go past the '\n' */
59 * Strip comment lines and blank lines from the data.
60 * Copies into a new buffer and frees the old.
61 * Returns the number of lines copied.
64 static size_t clean_data( char **buf, size_t *size)
69 char *newbuf = (char *)malloc( *size + 1);
70 char *newbuf_p = NULL;
73 fprintf(stderr, "%s: malloc fail for size %u.\n", prog_name, (unsigned int)(*size + 1));
83 read_line( &p, linebuf, sizeof(linebuf));
84 /* Null terminate after comment. */
85 if((cp = strchr( linebuf, '#'))!= NULL)
88 for(cp = linebuf;*cp && isspace(*cp); cp++)
94 safe_strcpy(newbuf_p, cp, *size - (newbuf_p - newbuf));
96 newbuf_p += (strlen(newbuf_p) + 1);
105 * Parse a uint16 from a codepage file.
108 static BOOL parse_uint16(char *buf, uint16 *uip)
113 ui = (unsigned int)strtol(buf, &endptr, 0);
114 if(endptr == buf || ui > 65535)
122 * Print a parse error and exit.
125 static void parse_error(const char *buf, const char *input_file, const char *msg)
127 fprintf(stderr, "%s: In file %s : %s whilst parsing line \n%s\n", prog_name,
128 input_file, msg, buf);
133 * Create a compiled unicode map file from a unicode map definition file.
136 static int do_compile(const char *codepage, const char *input_file, const char *output_file)
142 char *output_buf = NULL;
143 uint16 cp_to_ucs2[65536];
144 uint16 ucs2_to_cp[65536];
145 BOOL multibyte_code_page = False;
150 /* Get the size of the input file. Read the entire thing into memory. */
151 if(sys_stat((char *)input_file, &st)!= 0) {
152 fprintf(stderr, "%s: failed to get the file size for file %s. Error was %s\n",
153 prog_name, input_file, strerror(errno));
157 size = (size_t)st.st_size;
159 if((fp = sys_fopen(input_file, "r")) == NULL) {
160 fprintf(stderr, "%s: cannot open file %s for input.\n", prog_name, input_file);
164 /* As we will be reading text, allocate one more byte for a '\0' */
165 if((buf = (char *)malloc( size + 1 )) == NULL) {
166 fprintf(stderr, "%s: malloc fail for size %d.\n", prog_name, size + 1);
171 if(fread( buf, 1, size, fp) != size) {
172 fprintf(stderr, "%s: read failed for file %s. Error was %s.\n", prog_name,
173 input_file, strerror(errno));
179 /* Null terminate the text read. */
182 /* Go through the data line by line, strip out comments (anything
183 after a '#' to end-of-line) and blank lines. The rest should be
187 num_lines = clean_data( &buf, &size);
190 * Initialize the output data.
193 memset(cp_to_ucs2, '\0', sizeof(cp_to_ucs2));
195 for (i = 1; i < 65536; i++)
196 ucs2_to_cp[i] = (uint16)'_';
198 /* Now convert the lines into the compiled form. */
200 for(i = 0; i < num_lines; i++) {
206 /* Get the codepage value. */
207 if(!next_token(&p, token_buf, NULL, sizeof(token_buf)))
208 parse_error(buf, input_file, "cannot parse first value");
210 if(!parse_uint16( token_buf, &cp))
211 parse_error(buf, input_file, "first value doesn't resolve to an unsigned 16 bit integer");
214 multibyte_code_page = True;
216 /* Get the ucs2 value. */
218 if(!next_token(&p, token_buf, NULL, sizeof(token_buf))) {
221 * Some of the multibyte codepage to unicode map files
222 * list a single byte as a leading multibyte and have no
226 buf += (strlen(buf) + 1);
230 if(!parse_uint16( token_buf, &ucs2))
231 parse_error(buf, input_file, "second value doesn't resolve to an unsigned 16 bit integer");
234 * Set up the cross reference in little-endian format.
237 SSVAL(((char *)&cp_to_ucs2[cp]),0,ucs2);
238 SSVAL(((char *)&ucs2_to_cp[ucs2]),0,cp);
243 buf += (strlen(buf) + 1);
246 size = UNICODE_MAP_HEADER_SIZE + (multibyte_code_page ? (4*65536) : (2*256 + 2*65536));
248 if((output_buf = (char *)malloc( size )) == NULL) {
249 fprintf(stderr, "%s: output buffer malloc fail for size %d.\n", prog_name, size);
254 /* Setup the output file header. */
255 SSVAL(output_buf,UNICODE_MAP_VERSION_OFFSET,UNICODE_MAP_FILE_VERSION_ID);
256 memset(&output_buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET],'\0',UNICODE_MAP_CODEPAGE_ID_SIZE);
257 safe_strcpy(&output_buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage, UNICODE_MAP_CODEPAGE_ID_SIZE - 1);
258 output_buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET+UNICODE_MAP_CODEPAGE_ID_SIZE-1] = '\0';
260 offset = UNICODE_MAP_HEADER_SIZE;
262 if (multibyte_code_page) {
263 SIVAL(output_buf,UNICODE_MAP_CP_TO_UNICODE_LENGTH_OFFSET,2*65536);
264 memcpy(output_buf+offset, (char *)cp_to_ucs2, 2*65536);
267 SIVAL(output_buf,UNICODE_MAP_CP_TO_UNICODE_LENGTH_OFFSET,2*256);
268 memcpy(output_buf+offset, (char *)cp_to_ucs2, 2*256);
271 SIVAL(output_buf,UNICODE_MAP_UNICODE_TO_CP_LENGTH_OFFSET,65536*2);
272 memcpy(output_buf+offset, (char *)ucs2_to_cp, 2*65536);
274 /* Now write out the output_buf. */
275 if((fp = sys_fopen(output_file, "w"))==NULL) {
276 fprintf(stderr, "%s: Cannot open output file %s. Error was %s.\n",
277 prog_name, output_file, strerror(errno));
281 if(fwrite(output_buf, 1, size, fp) != size) {
282 fprintf(stderr, "%s: Cannot write output file %s. Error was %s.\n",
283 prog_name, output_file, strerror(errno));
292 int main(int argc, char **argv)
294 const char *codepage = NULL;
295 char *input_file = NULL;
296 char *output_file = NULL;
301 unicode_map_usage(prog_name);
304 input_file = argv[2];
305 output_file = argv[3];
307 return do_compile( codepage, input_file, output_file);