/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
public class Main {
/**
* <p>Splits a String by Character type as returned by
* <code>java.lang.Character.getType(char)</code>. Groups of contiguous
* characters of the same type are returned as complete tokens, with the
* following exception: the character of type
* <code>Character.UPPERCASE_LETTER</code>, if any, immediately
* preceding a token of type <code>Character.LOWERCASE_LETTER</code>
* will belong to the following token rather than to the preceding, if any,
* <code>Character.UPPERCASE_LETTER</code> token.
* <pre>
* StringUtils.splitByCharacterTypeCamelCase(null) = null
* StringUtils.splitByCharacterTypeCamelCase("") = []
* StringUtils.splitByCharacterTypeCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"]
* StringUtils.splitByCharacterTypeCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"]
* StringUtils.splitByCharacterTypeCamelCase("ab:cd:ef") = ["ab", ":", "cd", ":", "ef"]
* StringUtils.splitByCharacterTypeCamelCase("number5") = ["number", "5"]
* StringUtils.splitByCharacterTypeCamelCase("fooBar") = ["foo", "Bar"]
* StringUtils.splitByCharacterTypeCamelCase("foo200Bar") = ["foo", "200", "Bar"]
* StringUtils.splitByCharacterTypeCamelCase("ASFRules") = ["ASF", "Rules"]
* </pre>
* @param str the String to split, may be <code>null</code>
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.4
*/
public static String[] splitByCharacterTypeCamelCase(String str) {
return splitByCharacterType(str, true);
}
/**
* <p>
* Splits a String by Character type as returned by
* <code>java.lang.Character.getType(char)</code>. Groups of contiguous
* characters of the same type are returned as complete tokens, with the
* following exception: if <code>camelCase</code> is <code>true</code>,
* the character of type <code>Character.UPPERCASE_LETTER</code>, if any,
* immediately preceding a token of type
* <code>Character.LOWERCASE_LETTER</code> will belong to the following
* token rather than to the preceding, if any,
* <code>Character.UPPERCASE_LETTER</code> token.
*
* @param str
* the String to split, may be <code>null</code>
* @param camelCase
* whether to use so-called "camel-case" for letter types
* @return an array of parsed Strings, <code>null</code> if null String
* input
* @since 2.4
*/
private static String[] splitByCharacterType(String str, boolean camelCase) {
if (str == null) {
return null;
}
if (str.length() == 0) {
return new String[0];
}
char[] c = str.toCharArray();
List list = new ArrayList();
int tokenStart = 0;
int currentType = Character.getType(c[tokenStart]);
for (int pos = tokenStart + 1; pos < c.length; pos++) {
int type = Character.getType(c[pos]);
if (type == currentType) {
continue;
}
if (camelCase && type == Character.LOWERCASE_LETTER
&& currentType == Character.UPPERCASE_LETTER) {
int newTokenStart = pos - 1;
if (newTokenStart != tokenStart) {
list.add(new String(c, tokenStart, newTokenStart - tokenStart));
tokenStart = newTokenStart;
}
} else {
list.add(new String(c, tokenStart, pos - tokenStart));
tokenStart = pos;
}
currentType = type;
}
list.add(new String(c, tokenStart, c.length - tokenStart));
return (String[]) list.toArray(new String[list.size()]);
}
}
|