/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.benchmark.byTask.feeds;

import java.io.IOException;
import java.io.StringReader;
import java.util.Date;
import org.apache.lucene.benchmark.byTask.feeds.DocData;
import org.apache.lucene.benchmark.byTask.feeds.TrecContentSource;
import org.apache.lucene.benchmark.byTask.feeds.TrecDocParser;

public class TrecGov2Parser
extends TrecDocParser {
    private static final String DATE = "Date: ";
    private static final String DATE_END = TrecContentSource.NEW_LINE;
    private static final String DOCHDR = "<DOCHDR>";
    private static final String TERMINATING_DOCHDR = "</DOCHDR>";

    @Override
    public DocData parse(DocData docData, String name, TrecContentSource trecSrc, StringBuilder docBuf, TrecDocParser.ParsePathType pathType) throws IOException {
        Date date = null;
        int start = 0;
        int h1 = docBuf.indexOf(DOCHDR);
        if (h1 >= 0) {
            int h2 = docBuf.indexOf(TERMINATING_DOCHDR, h1);
            String dateStr = TrecGov2Parser.extract(docBuf, DATE, DATE_END, h2, null);
            if (dateStr != null) {
                date = trecSrc.parseDate(dateStr);
            }
            start = h2 + TERMINATING_DOCHDR.length();
        }
        String html = docBuf.substring(start);
        return trecSrc.getHtmlParser().parse(docData, name, date, new StringReader(html), trecSrc);
    }
}

