001/* BreakIterator.java -- Breaks text into elements
002   Copyright (C) 1998, 1999, 2001, 2004, 2005, 2007
003   Free Software Foundation, Inc.
004
005This file is part of GNU Classpath.
006
007GNU Classpath is free software; you can redistribute it and/or modify
008it under the terms of the GNU General Public License as published by
009the Free Software Foundation; either version 2, or (at your option)
010any later version.
011
012GNU Classpath is distributed in the hope that it will be useful, but
013WITHOUT ANY WARRANTY; without even the implied warranty of
014MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
015General Public License for more details.
016
017You should have received a copy of the GNU General Public License
018along with GNU Classpath; see the file COPYING.  If not, write to the
019Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02002110-1301 USA.
021
022Linking this library statically or dynamically with other modules is
023making a combined work based on this library.  Thus, the terms and
024conditions of the GNU General Public License cover the whole
025combination.
026
027As a special exception, the copyright holders of this library give you
028permission to link this library with independent modules to produce an
029executable, regardless of the license terms of these independent
030modules, and to copy and distribute the resulting executable under
031terms of your choice, provided that you also meet, for each linked
032independent module, the terms and conditions of the license of that
033module.  An independent module is a module which is not derived from
034or based on this library.  If you modify this library, you may extend
035this exception to your version of the library, but you are not
036obligated to do so.  If you do not wish to do so, delete this
037exception statement from your version. */
038
039
040package java.text;
041
042import gnu.java.locale.LocaleHelper;
043
044import gnu.java.text.CharacterBreakIterator;
045import gnu.java.text.LineBreakIterator;
046import gnu.java.text.SentenceBreakIterator;
047import gnu.java.text.WordBreakIterator;
048
049import java.text.spi.BreakIteratorProvider;
050
051import java.util.Locale;
052import java.util.MissingResourceException;
053import java.util.ResourceBundle;
054import java.util.ServiceLoader;
055
056/**
057 * This class iterates over text elements such as words, lines, sentences,
058 * and characters.  It can only iterate over one of these text elements at
059 * a time.  An instance of this class configured for the desired iteration
060 * type is created by calling one of the static factory methods, not
061 * by directly calling a constructor.
062 *
063 * The standard iterators created by the factory methods in this
064 * class will be valid upon creation.  That is, their methods will
065 * not cause exceptions if called before you call setText().
066 *
067 * @author Tom Tromey (tromey@cygnus.com)
068 * @author Aaron M. Renn (arenn@urbanophile.com)
069 * @date March 19, 1999
070 */
071/* Written using "Java Class Libraries", 2nd edition, plus online
072 * API docs for JDK 1.2 beta from http://www.javasoft.com.
073 * Status:  Believed complete and correct to 1.1.
074 */
075public abstract class BreakIterator implements Cloneable
076{
077  /**
078   * This value is returned by the <code>next()</code> and
079   * <code>previous</code> in order to indicate that the end of the
080   * text has been reached.
081   */
082  // The value was discovered by writing a test program.
083  public static final int DONE = -1;
084
085  /**
086   * This method initializes a new instance of <code>BreakIterator</code>.
087   * This protected constructor is available to subclasses as a default
088   * no-arg superclass constructor.
089   */
090  protected BreakIterator ()
091  {
092  }
093
094  /**
095   * Create a clone of this object.
096   */
097  public Object clone ()
098  {
099    try
100      {
101        return super.clone();
102      }
103    catch (CloneNotSupportedException e)
104      {
105        return null;
106      }
107  }
108
109  /**
110   * This method returns the index of the current text element boundary.
111   *
112   * @return The current text boundary.
113   */
114  public abstract int current ();
115
116  /**
117   * This method returns the first text element boundary in the text being
118   * iterated over.
119   *
120   * @return The first text boundary.
121   */
122  public abstract int first ();
123
124  /**
125   * This methdod returns the offset of the text element boundary following
126   * the specified offset.
127   *
128   * @param pos The text index from which to find the next text boundary.
129   *
130   * @return The next text boundary following the specified index.
131   */
132  public abstract int following (int pos);
133
134  /**
135   * This method returns a list of locales for which instances of
136   * <code>BreakIterator</code> are available.
137   *
138   * @return A list of available locales
139   */
140  public static synchronized Locale[] getAvailableLocales ()
141  {
142    Locale[] l = new Locale[1];
143    l[0] = Locale.US;
144    return l;
145  }
146
147  private static BreakIterator getInstance (String type, Locale loc)
148  {
149    String className;
150    try
151      {
152        ResourceBundle res
153          = ResourceBundle.getBundle("gnu.java.locale.LocaleInformation",
154                                     loc, ClassLoader.getSystemClassLoader());
155        className = res.getString(type);
156      }
157    catch (MissingResourceException x)
158      {
159        return null;
160      }
161    try
162      {
163        Class k = Class.forName(className);
164        return (BreakIterator) k.newInstance();
165      }
166    catch (ClassNotFoundException x1)
167      {
168        return null;
169      }
170    catch (InstantiationException x2)
171      {
172        return null;
173      }
174    catch (IllegalAccessException x3)
175      {
176        return null;
177      }
178  }
179
180  /**
181   * This method returns an instance of <code>BreakIterator</code> that will
182   * iterate over characters as defined in the default locale.
183   *
184   * @return A <code>BreakIterator</code> instance for the default locale.
185   */
186  public static BreakIterator getCharacterInstance ()
187  {
188    return getCharacterInstance (Locale.getDefault());
189  }
190
191  /**
192   * This method returns an instance of <code>BreakIterator</code> that will
193   * iterate over characters as defined in the specified locale.
194   *
195   * @param locale The desired locale.
196   *
197   * @return A <code>BreakIterator</code> instance for the specified locale.
198   */
199  public static BreakIterator getCharacterInstance (Locale locale)
200  {
201    BreakIterator r = getInstance("CharacterIterator", locale);
202    if (r != null)
203      return r;
204    for (BreakIteratorProvider p :
205           ServiceLoader.load(BreakIteratorProvider.class))
206      {
207        for (Locale loc : p.getAvailableLocales())
208          {
209            if (loc.equals(locale))
210              {
211                BreakIterator bi = p.getCharacterInstance(locale);
212                if (bi != null)
213                  return bi;
214                break;
215              }
216          }
217      }
218    if (locale.equals(Locale.ROOT))
219      return new CharacterBreakIterator();
220    return getCharacterInstance(LocaleHelper.getFallbackLocale(locale));
221  }
222
223  /**
224   * This method returns an instance of <code>BreakIterator</code> that will
225   * iterate over line breaks as defined in the default locale.
226   *
227   * @return A <code>BreakIterator</code> instance for the default locale.
228   */
229  public static BreakIterator getLineInstance ()
230  {
231    return getLineInstance (Locale.getDefault());
232  }
233
234  /**
235   * This method returns an instance of <code>BreakIterator</code> that will
236   * iterate over line breaks as defined in the specified locale.
237   *
238   * @param locale The desired locale.
239   *
240   * @return A <code>BreakIterator</code> instance for the default locale.
241   */
242  public static BreakIterator getLineInstance (Locale locale)
243  {
244    BreakIterator r = getInstance ("LineIterator", locale);
245    if (r != null)
246      return r;
247    for (BreakIteratorProvider p :
248           ServiceLoader.load(BreakIteratorProvider.class))
249      {
250        for (Locale loc : p.getAvailableLocales())
251          {
252            if (loc.equals(locale))
253              {
254                BreakIterator bi = p.getLineInstance(locale);
255                if (bi != null)
256                  return bi;
257                break;
258              }
259          }
260      }
261    if (locale.equals(Locale.ROOT))
262      return new LineBreakIterator();
263    return getLineInstance(LocaleHelper.getFallbackLocale(locale));
264  }
265
266  /**
267   * This method returns an instance of <code>BreakIterator</code> that will
268   * iterate over sentences as defined in the default locale.
269   *
270   * @return A <code>BreakIterator</code> instance for the default locale.
271   */
272  public static BreakIterator getSentenceInstance ()
273  {
274    return getSentenceInstance (Locale.getDefault());
275  }
276
277  /**
278   * This method returns an instance of <code>BreakIterator</code> that will
279   * iterate over sentences as defined in the specified locale.
280   *
281   * @param locale The desired locale.
282   *
283   * @return A <code>BreakIterator</code> instance for the default locale.
284   */
285  public static BreakIterator getSentenceInstance (Locale locale)
286  {
287    BreakIterator r = getInstance ("SentenceIterator", locale);
288    if (r != null)
289      return r;
290    for (BreakIteratorProvider p :
291           ServiceLoader.load(BreakIteratorProvider.class))
292      {
293        for (Locale loc : p.getAvailableLocales())
294          {
295            if (loc.equals(locale))
296              {
297                BreakIterator bi = p.getSentenceInstance(locale);
298                if (bi != null)
299                  return bi;
300                break;
301              }
302          }
303      }
304    if (locale.equals(Locale.ROOT))
305      return new SentenceBreakIterator();
306    return getSentenceInstance(LocaleHelper.getFallbackLocale(locale));
307  }
308
309  /**
310   * This method returns the text this object is iterating over as a
311   * <code>CharacterIterator</code>.
312   *
313   * @return The text being iterated over.
314   */
315  public abstract CharacterIterator getText ();
316
317  /**
318   * This method returns an instance of <code>BreakIterator</code> that will
319   * iterate over words as defined in the default locale.
320   *
321   * @return A <code>BreakIterator</code> instance for the default locale.
322   */
323  public static BreakIterator getWordInstance ()
324  {
325    return getWordInstance (Locale.getDefault());
326  }
327
328  /**
329   * This method returns an instance of <code>BreakIterator</code> that will
330   * iterate over words as defined in the specified locale.
331   *
332   * @param locale The desired locale.
333   *
334   * @return A <code>BreakIterator</code> instance for the default locale.
335   */
336  public static BreakIterator getWordInstance (Locale locale)
337  {
338    BreakIterator r = getInstance ("WordIterator", locale);
339    if (r != null)
340      return r;
341    for (BreakIteratorProvider p :
342           ServiceLoader.load(BreakIteratorProvider.class))
343      {
344        for (Locale loc : p.getAvailableLocales())
345          {
346            if (loc.equals(locale))
347              {
348                BreakIterator bi = p.getWordInstance(locale);
349                if (bi != null)
350                  return bi;
351                break;
352              }
353          }
354      }
355    if (locale.equals(Locale.ROOT))
356      return new WordBreakIterator();
357    return getWordInstance(LocaleHelper.getFallbackLocale(locale));
358  }
359
360  /**
361   * This method tests whether or not the specified position is a text
362   * element boundary.
363   *
364   * @param pos The text position to test.
365   *
366   * @return <code>true</code> if the position is a boundary,
367   * <code>false</code> otherwise.
368   */
369  public boolean isBoundary (int pos)
370  {
371    if (pos == 0)
372      return true;
373    return following (pos - 1) == pos;
374  }
375
376  /**
377   * This method returns the last text element boundary in the text being
378   * iterated over.
379   *
380   * @return The last text boundary.
381   */
382  public abstract int last ();
383
384  /**
385   * This method returns the text element boundary following the current
386   * text position.
387   *
388   * @return The next text boundary.
389   */
390  public abstract int next ();
391
392  /**
393   * This method returns the n'th text element boundary following the current
394   * text position.
395   *
396   * @param n The number of text element boundaries to skip.
397   *
398   * @return The next text boundary.
399   */
400  public abstract int next (int n);
401
402  /**
403   * This methdod returns the offset of the text element boundary preceding
404   * the specified offset.
405   *
406   * @param pos The text index from which to find the preceding text boundary.
407   *
408   * @returns The next text boundary preceding the specified index.
409   */
410  public int preceding (int pos)
411  {
412    if (following (pos) == DONE)
413      last ();
414    while (previous () >= pos)
415      ;
416    return current ();
417  }
418
419  /**
420   * This method returns the text element boundary preceding the current
421   * text position.
422   *
423   * @return The previous text boundary.
424   */
425  public abstract int previous ();
426
427  /**
428   * This method sets the text string to iterate over.
429   *
430   * @param newText The <code>String</code> to iterate over.
431   */
432  public void setText (String newText)
433  {
434    setText (new StringCharacterIterator (newText));
435  }
436
437  /**
438   * This method sets the text to iterate over from the specified
439   * <code>CharacterIterator</code>.
440   *
441   * @param newText The desired <code>CharacterIterator</code>.
442   */
443  public abstract void setText (CharacterIterator newText);
444}