Splits a String by Character type as returned by java.lang.Character.getType(char) : String split « Data Type

Splits a String by Character type as returned by java.lang.Character.getType(char) : String split « Data Type « Java

    

/*

 * Licensed to the Apache Software Foundation (ASF) under one or more

 * contributor license agreements.  See the NOTICE file distributed with

 * this work for additional information regarding copyright ownership.

 * The ASF licenses this file to You under the Apache License, Version 2.0

 * (the "License"); you may not use this file except in compliance with

 * the License.  You may obtain a copy of the License at

 * 

 *      http://www.apache.org/licenses/LICENSE-2.0

 * 

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */

import java.util.ArrayList;

import java.util.List;



public class Main {



  /**

   * <p>Splits a String by Character type as returned by

   * <code>java.lang.Character.getType(char)</code>. Groups of contiguous

   * characters of the same type are returned as complete tokens, with the

   * following exception: the character of type

   * <code>Character.UPPERCASE_LETTER</code>, if any, immediately

   * preceding a token of type <code>Character.LOWERCASE_LETTER</code>

   * will belong to the following token rather than to the preceding, if any,

   * <code>Character.UPPERCASE_LETTER</code> token. 

   * <pre>

   * StringUtils.splitByCharacterTypeCamelCase(null)         = null

   * StringUtils.splitByCharacterTypeCamelCase("")           = []

   * StringUtils.splitByCharacterTypeCamelCase("ab de fg")   = ["ab", " ", "de", " ", "fg"]

   * StringUtils.splitByCharacterTypeCamelCase("ab   de fg") = ["ab", "   ", "de", " ", "fg"]

   * StringUtils.splitByCharacterTypeCamelCase("ab:cd:ef")   = ["ab", ":", "cd", ":", "ef"]

   * StringUtils.splitByCharacterTypeCamelCase("number5")    = ["number", "5"]

   * StringUtils.splitByCharacterTypeCamelCase("fooBar")     = ["foo", "Bar"]

   * StringUtils.splitByCharacterTypeCamelCase("foo200Bar")  = ["foo", "200", "Bar"]

   * StringUtils.splitByCharacterTypeCamelCase("ASFRules")   = ["ASF", "Rules"]

   * </pre>

   * @param str the String to split, may be <code>null</code>

   * @return an array of parsed Strings, <code>null</code> if null String input

   * @since 2.4

   */

  public static String[] splitByCharacterTypeCamelCase(String str) {

      return splitByCharacterType(str, true);

  }



  /**

   * <p>

   * Splits a String by Character type as returned by

   * <code>java.lang.Character.getType(char)</code>. Groups of contiguous

   * characters of the same type are returned as complete tokens, with the

   * following exception: if <code>camelCase</code> is <code>true</code>,

   * the character of type <code>Character.UPPERCASE_LETTER</code>, if any,

   * immediately preceding a token of type

   * <code>Character.LOWERCASE_LETTER</code> will belong to the following

   * token rather than to the preceding, if any,

   * <code>Character.UPPERCASE_LETTER</code> token.

   * 

   * @param str

   *          the String to split, may be <code>null</code>

   * @param camelCase

   *          whether to use so-called "camel-case" for letter types

   * @return an array of parsed Strings, <code>null</code> if null String

   *         input

   * @since 2.4

   */

  private static String[] splitByCharacterType(String str, boolean camelCase) {

    if (str == null) {

      return null;

    }

    if (str.length() == 0) {

      return new String[0];

    }

    char[] c = str.toCharArray();

    List list = new ArrayList();

    int tokenStart = 0;

    int currentType = Character.getType(c[tokenStart]);

    for (int pos = tokenStart + 1; pos < c.length; pos++) {

      int type = Character.getType(c[pos]);

      if (type == currentType) {

        continue;

      }

      if (camelCase && type == Character.LOWERCASE_LETTER

          && currentType == Character.UPPERCASE_LETTER) {

        int newTokenStart = pos - 1;

        if (newTokenStart != tokenStart) {

          list.add(new String(c, tokenStart, newTokenStart - tokenStart));

          tokenStart = newTokenStart;

        }

      } else {

        list.add(new String(c, tokenStart, pos - tokenStart));

        tokenStart = pos;

      }

      currentType = type;

    }

    list.add(new String(c, tokenStart, c.length - tokenStart));

    return (String[]) list.toArray(new String[list.size()]);

  }



}
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.