001: /*
002: * JacORB - a free Java ORB
003: *
004: * Copyright (C) 1997-2004 Gerald Brose.
005: *
006: * This library is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU Library General Public
008: * License as published by the Free Software Foundation; either
009: * version 2 of the License, or (at your option) any later version.
010: *
011: * This library is distributed in the hope that it will be useful,
012: * but WITHOUT ANY WARRANTY; without even the implied warranty of
013: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014: * Library General Public License for more details.
015: *
016: * You should have received a copy of the GNU Library General Public
017: * License along with this library; if not, write to the Free
018: * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
019: */
020:
021: package org.jacorb.orb.giop;
022:
023: import java.io.BufferedReader;
024: import java.io.ByteArrayOutputStream;
025: import java.io.IOException;
026: import java.io.InputStreamReader;
027: import java.io.OutputStreamWriter;
028: import java.util.Properties;
029:
030: import org.omg.CONV_FRAME.CodeSetContext;
031: import org.omg.CONV_FRAME.CodeSetContextHelper;
032: import org.omg.CONV_FRAME.CodeSetComponent;
033: import org.omg.CONV_FRAME.CodeSetComponentInfo;
034: import org.omg.IOP.ServiceContext;
035: import org.omg.IOP.TAG_CODE_SETS;
036:
037: import org.apache.avalon.framework.configuration.ConfigurationException;
038: import org.apache.avalon.framework.logger.NullLogger;
039:
040: import org.jacorb.config.Configuration;
041: import org.jacorb.orb.CDRInputStream;
042: import org.jacorb.orb.CDROutputStream;
043:
044: /**
045: * @author Gerald Brose
046: * @version $Id: CodeSet.java,v 1.22 2006/07/26 13:24:38 nick.cross Exp $
047: */
048: public class CodeSet {
049: /**
050: * <code>ISO8859_1</code> represents standard ASCII.
051: * It is ISO 8859-1:1987; Latin Alphabet No. 1
052: */
053: public static final int ISO8859_1 = 0x00010001;
054:
055: /**
056: * <code>UTF8</code> represents UTF8 1-6 bytes for every character
057: * X/Open UTF-8; UCS Transformation Format 8 (UTF-8)
058: */
059: public static final int UTF8 = 0x05010001;
060:
061: /**
062: * <code>UTF16</code> represents extended UCS2, 2 or 4 bytes for every char
063: * ISO/IEC 10646-1:1993; UTF-16, UCS Transformation Format 16-bit form
064: */
065: public static final int UTF16 = 0x00010109;
066:
067: /**
068: * <code>ISO8859_STR</code> represents the canonical string form of ISO8859_1.
069: */
070: public static final String ISO8859_STR = "ISO8859_1";
071:
072: /**
073: * <code>UTF8_STR</code> represents the canonical string form of UTF8.
074: */
075: public static final String UTF8_STR = "UTF8";
076:
077: /**
078: * <code>UTF16_STR</code> represents the canonical string form of UTF16
079: */
080: public static final String UTF16_STR = "UTF16";
081:
082: /**
083: * <code>logger</code> is the static logger for Codeset.
084: */
085: private static org.apache.avalon.framework.logger.Logger logger = new NullLogger();
086:
087: /**
088: * static flag that keeps track of the configuration status.
089: */
090: private static boolean isConfigured = false;
091:
092: /**
093: * Describe variable <code>nativeCodeSetChar</code> here.
094: *
095: */
096: private static int nativeCodeSetChar = -1; //ISO8859_1;
097: private static int nativeCodeSetWchar = UTF16;
098:
099: public static String csName(int cs) {
100: switch (cs) {
101: case ISO8859_1:
102: return ISO8859_STR;
103: case UTF16:
104: return UTF16_STR;
105: case UTF8:
106: return UTF8_STR;
107: }
108: return "Unknown TCS: " + Integer.toHexString(cs);
109: }
110:
111: /**
112: * <code>configure</code> configures the logger and codesets. It is
113: * synchronized as the configuration parameters are static and therefore
114: * we do not want to 'collide' with another init.
115: *
116: * This class does not implement configurable which ideally it should. However
117: * as this method is static it would conflict with it.
118: *
119: * @param config a <code>Configuration</code> value
120: * @exception ConfigurationException if an error occurs
121: */
122: public synchronized static void configure(Configuration config)
123: throws ConfigurationException {
124: // Only do this once per JVM.
125: if (!isConfigured) {
126: String ncsc = config.getAttribute(
127: "jacorb.native_char_codeset", "");
128: String ncsw = config.getAttribute(
129: "jacorb.native_wchar_codeset", "");
130:
131: if (ncsc != null && !("".equals(ncsc))) {
132: int value = csInt(ncsc);
133: if (value != -1) {
134: nativeCodeSetChar = value;
135: } else if (logger.isErrorEnabled()) {
136: logger.error("Cannot set default NCSC to " + ncsc);
137: }
138: }
139:
140: if (ncsw != null && !("".equals(ncsw))) {
141: int value = csInt(ncsw);
142: if (value != -1) {
143: nativeCodeSetWchar = value;
144: } else if (logger.isErrorEnabled()) {
145: logger.error("Cannot set default NCSW to " + ncsw);
146: }
147: }
148:
149: logger = config.getNamedLogger("org.jacorb.orb.codeset");
150: isConfigured = true;
151: } else {
152: if (logger.isDebugEnabled()) {
153: logger
154: .debug("CodeSet is already configured; further attempts to reconfigure will be ignored!");
155: }
156:
157: }
158: }
159:
160: public static int csInt(String name) {
161: try {
162: return Integer.parseInt(name, 16);
163: } catch (NumberFormatException ex) {
164: // no problem, go on to match literal strings
165: }
166: String ucName = name.toUpperCase();
167: if (ucName.equals(ISO8859_STR)) {
168: return ISO8859_1;
169: } else if (ucName.equals(UTF8_STR)) {
170: return UTF8;
171: } else if (ucName.equals(UTF16_STR)) {
172: return UTF16;
173: } else {
174: return -1;
175: }
176: }
177:
178: public static int getTCSDefault() {
179: if (nativeCodeSetChar == -1) {
180: // See http://java.sun.com/j2se/1.4.1/docs/guide/intl/encoding.doc.html for
181: // a list of encodings and their canonical names.
182: //
183: // http://developer.java.sun.com/developer/bugParade/bugs/4772857.html
184: //
185: // This allows me to get the actual canonical name of the encoding as the
186: // System property may differ depending upon locale and OS.
187: OutputStreamWriter defaultStream = new OutputStreamWriter(
188: new ByteArrayOutputStream());
189: String sysenc = defaultStream.getEncoding();
190: try {
191: defaultStream.close();
192: } catch (IOException e) {
193: }
194: if (sysenc.equals(ISO8859_STR)) {
195: nativeCodeSetChar = ISO8859_1;
196: } else if (sysenc.equals(UTF8_STR)) {
197: nativeCodeSetChar = UTF8;
198: } else {
199: if (logger.isWarnEnabled()) {
200: logger.warn("Warning - unknown codeset (" + sysenc
201: + ") - defaulting to ISO-8859-1");
202: }
203: nativeCodeSetChar = ISO8859_1;
204: }
205: if (logger.isDebugEnabled()) {
206: logger.debug("TCS set to " + csName(nativeCodeSetChar));
207: }
208: }
209: return nativeCodeSetChar;
210: }
211:
212: public static int getTCSWDefault() {
213: return nativeCodeSetWchar;
214: }
215:
216: // at some point additional codeset alternatives are likely to be
217: // added in which case this single conversion default will not be
218: // sufficient.
219: public static int getConversionDefault() {
220: return UTF8;
221: }
222:
223: /**
224: * This method compares the codesets in the component with our
225: * native codeset.
226: */
227: public static int selectTCS(CodeSetComponentInfo cs_info) {
228: int with_native = selectCodeSet(cs_info.ForCharData,
229: getTCSDefault());
230:
231: if (with_native == -1) {
232: //no match with native codeset, so try with conversion
233: //codeset
234:
235: return selectCodeSet(cs_info.ForCharData,
236: getConversionDefault());
237: } else {
238: return with_native;
239: }
240: }
241:
242: /**
243: * This method compares the wide codesets in the component with our
244: * native wide codeset.
245: */
246: public static int selectTCSW(CodeSetComponentInfo cs_info) {
247: int with_native = selectCodeSet(cs_info.ForWcharData,
248: getTCSWDefault());
249:
250: if (with_native == -1) {
251: //no match with native codeset, so try with conversion
252: //codeset
253:
254: return selectCodeSet(cs_info.ForWcharData,
255: getConversionDefault());
256: } else {
257: return with_native;
258: }
259: }
260:
261: private static int selectCodeSet(CodeSetComponent cs_component,
262: int native_cs) {
263: // check if we support server's native sets
264: if (cs_component.native_code_set == native_cs) {
265: return native_cs;
266: }
267:
268: // is our native CS supported at server ?
269: for (int i = 0; i < cs_component.conversion_code_sets.length; i++) {
270: if (cs_component.conversion_code_sets[i] == native_cs) {
271: return native_cs;
272: }
273: }
274:
275: // can't find supported set ..
276: return -1;
277: }
278:
279: public static ServiceContext createCodesetContext(int tcs, int tcsw) {
280: // encapsulate context
281: final CDROutputStream os = new CDROutputStream();
282: try {
283: os.beginEncapsulatedArray();
284: CodeSetContextHelper.write(os,
285: new CodeSetContext(tcs, tcsw));
286:
287: return new ServiceContext(TAG_CODE_SETS.value, os
288: .getBufferCopy());
289: } finally {
290: os.close();
291: }
292: }
293:
294: public static CodeSetContext getCodeSetContext(
295: ServiceContext[] contexts) {
296: for (int i = 0; i < contexts.length; i++) {
297: if (contexts[i].context_id == TAG_CODE_SETS.value) {
298: // TAG_CODE_SETS found, demarshall
299: CDRInputStream is = new CDRInputStream(
300: (org.omg.CORBA.ORB) null,
301: contexts[i].context_data);
302: is.openEncapsulatedArray();
303:
304: return CodeSetContextHelper.read(is);
305: }
306: }
307:
308: return null;
309: }
310:
311: /*
312: * This is useful for debugging to print out Operating System details and
313: * the encoding of that system.
314: *
315: * Currently this prints the following information:
316: * Operating System
317: * OS Version
318: * OS Architecture
319: * User Region
320: * Java Version
321: * JacORB Version
322: * System File Encoding
323: * Cannonical Encoding
324: * If we are running on a Unix system and have used the command line argument
325: * '-a' then it also runs the commands:
326: * locale
327: * locale -a
328: *
329: * Remember the precendence levels of LC_ALL, LANG, LC_CTYPE etc. Preferred
330: * way to override for *all* categories is to set LC_ALL. If you just set LANG
331: * then if any other LC_* categories are set then these will take precedence.
332: * See http://publib16.boulder.ibm.com/pseries/en_US/aixprggd/nlsgdrf/locale_env.htm
333: */
334: public static void main(String args[]) {
335: if (args != null && args.length > 0
336: && (args[0].equals("-h") || !args[0].equals("-a"))) {
337: System.out
338: .println("Usage: org.jacorb.orb.connection.CodeSet [-a]");
339: System.exit(1);
340: }
341:
342: Properties props = System.getProperties();
343:
344: String osName = (String) props.get("os.name");
345:
346: System.out.println("Operating system name: " + osName);
347: System.out.println("Operating system version: "
348: + props.get("os.version"));
349: System.out.println("Operating system architecture: "
350: + props.get("os.arch"));
351: System.out.println("User region: "
352: + System.getProperty("user.region"));
353: System.out.println("JVM: " + props.get("java.vm.version"));
354: System.out.println("JacORB: "
355: + org.jacorb.util.Version.longVersion);
356:
357: System.out.println("System file encoding property: "
358: + System.getProperty("file.encoding"));
359:
360: String defaultIOEncoding = (new OutputStreamWriter(
361: new ByteArrayOutputStream())).getEncoding();
362: System.out.println("Cannonical encoding: " + defaultIOEncoding);
363: System.out.println("Default WChar encoding: "
364: + csName(nativeCodeSetWchar));
365:
366: // If we're not using Windows do some extra debug, printing out the locale information.
367: if ((osName.toLowerCase()).indexOf("windows") == -1
368: && args != null && args.length == 1
369: && args[0].equals("-a")) {
370: System.out.println("Locale is:");
371: try {
372: Process locale = Runtime.getRuntime().exec("locale");
373:
374: BufferedReader buffer = new BufferedReader(
375: new InputStreamReader(locale.getInputStream()));
376:
377: while (true) {
378: String line = buffer.readLine();
379: if (line == null) {
380: break;
381: }
382: System.out.println(" " + line);
383: }
384: buffer.close();
385: } catch (IOException e) {
386: System.err.println("Caught exception " + e);
387: }
388:
389: System.out.println("All available locales are:");
390: try {
391: Process locale = Runtime.getRuntime().exec("locale -a");
392:
393: BufferedReader buffer = new BufferedReader(
394: new InputStreamReader(locale.getInputStream()));
395:
396: while (true) {
397: String line = buffer.readLine();
398: if (line == null) {
399: break;
400: }
401: System.out.println(" " + line);
402: }
403: buffer.close();
404: } catch (IOException e) {
405: System.err.println("Caught exception " + e);
406: }
407: }
408: }
409: }
|