/* * CleanWordChars.bsh - a BeanShell macro that cleans weird characters produced * by Microsoft Word, applying some substitutions. * * Copyright (C) 2009 De Franciscis Dimitri, http://www.megadix.it/ * */ import java.util.regex.*; megadix_cleanWordChars() { try { buffer.beginCompoundEdit(); String source = textArea.getText(); Pattern p = Pattern.compile("([\u2013\u2014\u2018\u2019\u201C\u201D\u2022\u2026]){1,1}", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(source); sb = new StringBuffer(); while(m.find()) { g = m.group(1); if (g.equals("\u2013") || g.equals("\u2014")) { m.appendReplacement(sb, "-"); } else if (g.equals("\u2018") || g.equals("\u2019")) { m.appendReplacement(sb, "'"); } else if (g.equals("\u201C") || g.equals("\u201D")) { m.appendReplacement(sb, "\""); } else if (g.equals("\u2022")) { m.appendReplacement(sb, "*"); } else if (g.equals("\u2026")) { m.appendReplacement(sb, "..."); } } m.appendTail(sb); textArea.setText(sb.toString()); } finally { buffer.endCompoundEdit(); } } megadix_cleanWordChars();