package info.monitorenter.cpdetector;

import info.monitorenter.cpdetector.io.CodepageDetectorProxy;
import info.monitorenter.cpdetector.io.FileFilterExtensions;
import info.monitorenter.cpdetector.io.ICodepageDetector;
import info.monitorenter.cpdetector.io.JChardetFacade;
import info.monitorenter.cpdetector.io.ParsingDetector;
import info.monitorenter.cpdetector.io.UnknownCharset;
import info.monitorenter.cpdetector.reflect.SingletonLoader;
import info.monitorenter.util.FileUtil;
import jargs.gnu.CmdLineParser;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.Set;
import java.util.SortedMap;
import java.util.StringTokenizer;
import org.apache.thrift.protocol.TMultiplexedProtocol;

/* loaded from: classes3.dex */
public class CodepageProcessor extends ACmdLineArgsInheritor {
    private FileFilter extensionFilter;
    private File outputDir;
    private Charset[] parseCodepages;
    private static String fileseparator = System.getProperty("file.separator");
    private static char[] transcodeBuffer = new char[1024];
    private static byte[] rawtransportBuffer = new byte[1024];
    protected File collectionRoot = null;
    private boolean moveUnknown = false;
    private boolean printCharsets = false;
    private boolean verbose = false;
    private long wait = 0;
    private Charset targetCodepage = null;
    protected CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();

    public CodepageProcessor() {
        addCmdLineOption("documents", new CmdLineParser.Option.StringOption('r', "documents"));
        addCmdLineOption("extensions", new CmdLineParser.Option.StringOption('e', "extensions"));
        addCmdLineOption("outputDir", new CmdLineParser.Option.StringOption('o', "outputDir"));
        addCmdLineOption("moveUnknown", new CmdLineParser.Option.BooleanOption('m', "moveUnknown"));
        addCmdLineOption("verbose", new CmdLineParser.Option.BooleanOption('v', "verbose"));
        addCmdLineOption("wait", new CmdLineParser.Option.IntegerOption('w', "wait"));
        addCmdLineOption("transform", new CmdLineParser.Option.StringOption('t', "transform"));
        addCmdLineOption("detectors", new CmdLineParser.Option.StringOption('d', "detectors"));
        addCmdLineOption("charsets", new CmdLineParser.Option.BooleanOption('c', "charsets"));
    }

    public static void main(String[] strArr) throws Exception {
        CodepageProcessor codepageProcessor = new CodepageProcessor();
        codepageProcessor.parseArgs(strArr);
        codepageProcessor.process();
    }

    private final String[] parseCSVList(String str) {
        if (str == null) {
            return null;
        }
        LinkedList linkedList = new LinkedList();
        StringTokenizer stringTokenizer = new StringTokenizer(str, ";,");
        while (stringTokenizer.hasMoreElements()) {
            linkedList.add(stringTokenizer.nextToken());
        }
        return (String[]) linkedList.toArray(new String[linkedList.size()]);
    }

    private void printCharsets() {
        Charset[] charsetArr = this.parseCodepages;
        if (charsetArr == null || charsetArr.length == 0) {
            loadCodepages();
        }
        int i = 0;
        while (true) {
            Charset[] charsetArr2 = this.parseCodepages;
            if (i >= charsetArr2.length) {
                return;
            }
            Charset charset = charsetArr2[i];
            System.out.println("  " + charset.name() + TMultiplexedProtocol.SEPARATOR);
            Iterator<String> it = charset.aliases().iterator();
            while (it.hasNext()) {
                System.out.println("    " + it.next());
            }
            i++;
        }
    }

    private void process(File file) throws Exception {
        File file2;
        File file3;
        try {
            Thread.sleep(this.wait);
        } catch (InterruptedException e) {
        }
        Map.Entry<String, String> cutDirectoryInformation = FileUtil.cutDirectoryInformation(file.getAbsolutePath());
        String absolutePath = file.getAbsolutePath();
        int lastIndexOf = absolutePath.lastIndexOf(fileseparator);
        String substring = this.collectionRoot.getAbsolutePath().length() > lastIndexOf ? "" : absolutePath.substring(this.collectionRoot.getAbsolutePath().length(), lastIndexOf + 1);
        if (this.verbose) {
            System.out.println("Processing document: " + substring + "/" + ((Object) cutDirectoryInformation.getValue()));
        }
        Charset detectCodepage = this.detector.detectCodepage(file.toURL());
        if (detectCodepage == null || detectCodepage == UnknownCharset.getInstance()) {
            if (this.verbose) {
                System.out.println("  Charset not detected.");
            }
            if (!this.moveUnknown) {
                if (this.verbose) {
                    System.out.println("  Dropping document.");
                    return;
                }
                return;
            }
            detectCodepage = UnknownCharset.getInstance();
        }
        if (this.targetCodepage == null || detectCodepage == null || UnknownCharset.getInstance() == detectCodepage) {
            if (this.targetCodepage != null) {
                System.out.println("Skipping transformation of document " + file.getAbsolutePath() + " because it's charset could not be detected.");
            }
            if (substring.length() > 0) {
                file2 = new File(this.outputDir.getAbsolutePath() + "/" + detectCodepage.name().toLowerCase() + "/" + substring + "/");
            } else {
                file2 = new File(this.outputDir.getAbsolutePath() + "/" + detectCodepage.name().toLowerCase() + "/");
            }
            if (file2.mkdirs() && this.verbose) {
                System.out.println("Created directory : " + file2.getAbsolutePath());
            }
            File file4 = new File(file2.getAbsolutePath() + "/" + ((Object) cutDirectoryInformation.getValue()));
            if (this.verbose) {
                System.out.println("  Moving to \"" + file4.getAbsolutePath() + "\".");
            }
            rawCopy(file, file4);
            return;
        }
        if (substring.length() > 0) {
            file3 = new File(this.outputDir.getAbsolutePath() + "/" + this.targetCodepage.name() + "/" + substring + "/");
        } else {
            file3 = new File(this.outputDir.getAbsolutePath() + "/" + this.targetCodepage.name() + "/");
        }
        if (file3.mkdirs() && this.verbose) {
            System.out.println("  Created directory : " + file3.getAbsolutePath());
        }
        File file5 = new File(file3.getAbsolutePath() + "/" + ((Object) cutDirectoryInformation.getValue()));
        if (this.verbose) {
            System.out.println("  Moving to \"" + file5.getAbsolutePath() + "\".");
        }
        if (file5.exists() && file5.length() == file.length()) {
            if (this.verbose) {
                System.out.println("  File already exists and has same size. Skipping move.");
                return;
            }
            return;
        }
        file5.createNewFile();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), detectCodepage));
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file5), this.targetCodepage));
        int length = transcodeBuffer.length;
        while (true) {
            int read = bufferedReader.read(transcodeBuffer, 0, length);
            if (read == -1) {
                bufferedReader.close();
                bufferedWriter.close();
                return;
            }
            bufferedWriter.write(transcodeBuffer, 0, read);
        }
    }

    private void processRecursive(File file) throws Exception {
        if (file == null) {
            throw new IllegalArgumentException("File argument is null!");
        }
        if (!file.exists()) {
            throw new IllegalArgumentException(file.getAbsolutePath() + " does not exist.");
        }
        if (!file.isDirectory()) {
            if (this.extensionFilter.accept(file)) {
                process(file);
            }
        } else {
            File[] listFiles = file.listFiles();
            for (int length = listFiles.length - 1; length >= 0; length--) {
                processRecursive(listFiles[length]);
            }
        }
    }

    private void rawCopy(File file, File file2) throws IOException {
        if (!file2.exists()) {
            file2.createNewFile();
        } else if (file.length() == file2.length()) {
            return;
        }
        BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(file));
        BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(file2));
        int length = rawtransportBuffer.length;
        while (true) {
            int read = bufferedInputStream.read(rawtransportBuffer, 0, length);
            if (read == -1) {
                bufferedInputStream.close();
                bufferedOutputStream.close();
                return;
            }
            bufferedOutputStream.write(rawtransportBuffer, 0, read);
        }
    }

    protected void describe() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("Setup:\n");
        stringBuffer.append("  Collection-Root        : ");
        stringBuffer.append(this.collectionRoot.getAbsolutePath());
        stringBuffer.append("\n");
        stringBuffer.append("  Output-Dir             : ");
        stringBuffer.append(this.outputDir.getAbsolutePath());
        stringBuffer.append("\n");
        stringBuffer.append("  Move unknown           : ");
        stringBuffer.append(this.moveUnknown);
        stringBuffer.append("\n");
        stringBuffer.append("  verbose                : ");
        stringBuffer.append(this.verbose);
        stringBuffer.append("\n");
        stringBuffer.append("  wait                   : ");
        stringBuffer.append(this.wait);
        stringBuffer.append("\n");
        if (this.targetCodepage != null) {
            stringBuffer.append("  transform to codepage  : ");
            stringBuffer.append(this.targetCodepage.name());
            stringBuffer.append("\n");
        }
        stringBuffer.append("  detection algorithm    : ");
        stringBuffer.append("\n");
        stringBuffer.append(this.detector.toString());
        System.out.println(stringBuffer.toString());
    }

    void loadCodepages() {
        SortedMap<String, Charset> availableCharsets = Charset.availableCharsets();
        Iterator<Map.Entry<String, Charset>> it = availableCharsets.entrySet().iterator();
        if (this.verbose) {
            System.out.println("Loading system codepages...");
        }
        this.parseCodepages = new Charset[availableCharsets.size()];
        int i = 0;
        while (it.hasNext()) {
            Charset value = it.next().getValue();
            if (this.verbose) {
                System.out.println("Charset: " + value.name());
                Set<String> aliases = value.aliases();
                System.out.println("  Aliases: ");
                Iterator<String> it2 = aliases.iterator();
                while (it2.hasNext()) {
                    System.out.println("    " + it2.next().toString());
                }
            }
            this.parseCodepages[i] = value;
            i++;
        }
    }

    @Override // info.monitorenter.cpdetector.ACmdLineArgsInheritor
    public void parseArgs(String[] strArr) throws Exception {
        Object obj;
        super.parseArgs(strArr);
        Object parsedCmdLineOption = getParsedCmdLineOption("documents");
        Object parsedCmdLineOption2 = getParsedCmdLineOption("extensions");
        Object parsedCmdLineOption3 = getParsedCmdLineOption("outputDir");
        Object parsedCmdLineOption4 = getParsedCmdLineOption("moveUnknown");
        Object parsedCmdLineOption5 = getParsedCmdLineOption("verbose");
        Object parsedCmdLineOption6 = getParsedCmdLineOption("wait");
        Object parsedCmdLineOption7 = getParsedCmdLineOption("transform");
        Object parsedCmdLineOption8 = getParsedCmdLineOption("detectors");
        Object parsedCmdLineOption9 = getParsedCmdLineOption("charsets");
        if (parsedCmdLineOption9 != null) {
            this.printCharsets = ((Boolean) parsedCmdLineOption9).booleanValue();
            return;
        }
        if (parsedCmdLineOption == null) {
            usage();
            throw new MissingResourceException("Parameter for collection root directory is missing.", "String", "-r");
        }
        this.collectionRoot = new File(parsedCmdLineOption.toString());
        if (parsedCmdLineOption3 == null) {
            usage();
            throw new MissingResourceException("Parameter for output directory is missing.", "String", "-o");
        }
        this.outputDir = new File(parsedCmdLineOption3.toString());
        if (parsedCmdLineOption2 != null) {
            this.extensionFilter = new FileFilterExtensions(parseCSVList(parsedCmdLineOption2.toString()));
        } else {
            this.extensionFilter = new FileFilter() { // from class: info.monitorenter.cpdetector.CodepageProcessor.1
                @Override // java.io.FileFilter
                public boolean accept(File file) {
                    return true;
                }
            };
        }
        if (parsedCmdLineOption4 != null) {
            this.moveUnknown = true;
        }
        if (parsedCmdLineOption5 != null && ((Boolean) parsedCmdLineOption5).booleanValue()) {
            this.verbose = true;
        }
        if (parsedCmdLineOption6 != null) {
            this.wait = ((Integer) parsedCmdLineOption6).intValue() * 1000;
        }
        if (parsedCmdLineOption7 != null) {
            String str = (String) parsedCmdLineOption7;
            try {
                this.targetCodepage = Charset.forName(str);
            } catch (Exception e) {
                StringBuffer stringBuffer = new StringBuffer();
                stringBuffer.append("Given charset name: \"");
                stringBuffer.append(str);
                stringBuffer.append("\" for option -t is illegal: \n");
                stringBuffer.append("  ");
                stringBuffer.append(e.getMessage());
                stringBuffer.append("\n");
                stringBuffer.append("   Legal values are: \n");
                for (int i = 0; i < this.parseCodepages.length; i++) {
                    stringBuffer.append("    ");
                    stringBuffer.append(this.parseCodepages[i].name());
                    stringBuffer.append("\n");
                }
                throw new IllegalArgumentException(stringBuffer.toString());
            }
        }
        if (parsedCmdLineOption8 != null) {
            String[] parseCSVList = parseCSVList((String) parsedCmdLineOption8);
            if (parseCSVList.length == 0) {
                StringBuffer stringBuffer2 = new StringBuffer();
                stringBuffer2.append("You specified the codepage detector argument \"-d\" but ommited any comma-separated fully qualified class-name.");
                throw new IllegalArgumentException(stringBuffer2.toString());
            }
            int i2 = 0;
            while (i2 < parseCSVList.length) {
                try {
                    ICodepageDetector iCodepageDetector = (ICodepageDetector) SingletonLoader.getInstance().newInstance(parseCSVList[i2]);
                    if (iCodepageDetector != null) {
                        this.detector.add(iCodepageDetector);
                    }
                    obj = parsedCmdLineOption;
                } catch (InstantiationException e2) {
                    PrintStream printStream = System.err;
                    StringBuilder sb = new StringBuilder();
                    obj = parsedCmdLineOption;
                    sb.append("Could not instantiate custom ICodepageDetector: ");
                    sb.append(parseCSVList[i2]);
                    sb.append(" (argument \"-c\"): ");
                    sb.append(e2.getMessage());
                    printStream.println(sb.toString());
                }
                i2++;
                parsedCmdLineOption = obj;
            }
        } else {
            this.detector.add(new ParsingDetector(this.verbose));
            this.detector.add(JChardetFacade.getInstance());
        }
        loadCodepages();
    }

    public final void process() throws Exception {
        if (this.printCharsets) {
            printCharsets();
        } else {
            verifyFiles();
            describe();
            processRecursive(this.collectionRoot);
        }
        System.out.println("No exceptional program flow occured!");
    }

    @Override // info.monitorenter.cpdetector.ACmdLineArgsInheritor
    protected void usage() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("usage: java -cp jargs-1.0.jar");
        stringBuffer.append(File.separatorChar);
        stringBuffer.append("cpdetector_1.0.9.jar");
        stringBuffer.append(File.pathSeparatorChar);
        stringBuffer.append("antlr-2.7.4.jar");
        stringBuffer.append(File.pathSeparatorChar);
        stringBuffer.append("chardet.jar info.monitorenter.cpdetector.CodepageProcessor -r <testdocumentdir> -o <testoutputdir> [options]");
        stringBuffer.append("\n");
        stringBuffer.append("options: \n");
        stringBuffer.append("\n  Optional:\n");
        stringBuffer.append("  -c              : Only print available charsets on this system.\n");
        stringBuffer.append("  -e <extensions> : A comma- or semicolon- separated string for document extensions like \"-e txt,dat\" (without dot or space!).\n");
        stringBuffer.append("  -m              : Move files with unknown charset to directory \"unknown\".\n");
        stringBuffer.append("  -v              : Verbose output.\n");
        stringBuffer.append("  -w <int>        : Wait <int> seconds before trying next document (good, if you want to work on the very same machine).\n");
        stringBuffer.append("  -t <charset>    : Try to transform the document to given charset (codepage) name. \n");
        stringBuffer.append("                    This is only possible for documents that are detected to have a  \n");
        stringBuffer.append("                    codepage that is supported by the current java VM. If not possible \n");
        stringBuffer.append("                    sorting will be done as normal. \n");
        stringBuffer.append("  -d              : Semicolon-separated list of fully qualified classnames. \n");
        stringBuffer.append("                    These classes will be casted to ICodepageDetector instances \n");
        stringBuffer.append("                    and used in the order specified.\n");
        stringBuffer.append("                    If this argument is ommited, a HTMLCodepageDetector followed by .\n");
        stringBuffer.append("                    a JChardetFacade is used by default.\n");
        stringBuffer.append("  Mandatory (if no -c option given) :\n");
        stringBuffer.append("  -r            : Root directory containing the collection (recursive).\n");
        stringBuffer.append("  -o            : Output directory containing the sorted collection.\n");
        System.out.print(stringBuffer.toString());
    }

    protected void verifyFiles() throws IllegalArgumentException {
        StringBuffer stringBuffer = new StringBuffer();
        File file = this.collectionRoot;
        if (file == null) {
            stringBuffer.append("-> Collection root directory is null!\n");
        } else if (!file.exists()) {
            stringBuffer.append("-> Collection root directory:\"");
            stringBuffer.append(this.collectionRoot.getAbsolutePath());
            stringBuffer.append("\" does not exist!\n");
        }
        File file2 = this.outputDir;
        if (file2 == null) {
            stringBuffer.append("-> Output directory is null!\n");
        } else {
            file2.mkdirs();
            if (!this.outputDir.isDirectory()) {
                stringBuffer.append("-> Output directory has to be a directory, no File!\n");
            }
        }
        if (stringBuffer.length() > 0) {
            throw new IllegalArgumentException(stringBuffer.toString());
        }
        System.out.println("All parameters are valid.");
    }
}
