package rabbit.spider;

import java.io.FilterInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.zip.GZIPInputStream;
import org.xbill.DNS.KEYRecord;
import rabbit.html.HTMLBlock;
import rabbit.html.HTMLParseException;
import rabbit.html.HTMLParser;
import rabbit.html.Tag;
import rabbit.html.Token;
import rabbit.http.HTTPHeader;
import rabbit.io.HTTPInputStream;
import rabbit.io.WebConnection;
import rabbit.proxy.Proxy;

/* loaded from: input_file:rabbit/spider/WebSpider.class */
public class WebSpider {
    private List workers;
    private int sleeptime;
    private String proxyHost;
    private int proxyPort;
    private static final String spiderId = "RabbIT web spider, http://www.khelekore.org/rabbit/web-spider.shtml";
    private LinkedList urlsToGet = new LinkedList();
    private Map failures = new WeakHashMap();
    private Map history = new HashMap();

    /* loaded from: input_file:rabbit/spider/WebSpider$Worker.class */
    private class Worker extends Thread {
        private URL currentURL;
        private final WebSpider this$0;

        public Worker(WebSpider webSpider, int i) {
            super(new StringBuffer().append("Worker: ").append(i).toString());
            this.this$0 = webSpider;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            while (true) {
                URL url = this.this$0.getURL();
                System.err.println(new StringBuffer().append(this).append(" got an url: ").append(url).toString());
                handleURL(url);
                try {
                    Thread.sleep(this.this$0.sleeptime);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }

        /* JADX WARN: Code restructure failed: missing block: B:26:0x0119, code lost:
        
            r7.this$0.returnWebConnection(r10);
         */
        /* JADX WARN: Code restructure failed: missing block: B:28:?, code lost:
        
            return;
         */
        /* JADX WARN: Code restructure failed: missing block: B:34:0x0119, code lost:
        
            r7.this$0.returnWebConnection(r10);
         */
        /* JADX WARN: Code restructure failed: missing block: B:36:0x0112, code lost:
        
            throw r15;
         */
        /* JADX WARN: Removed duplicated region for block: B:29:0x0121 A[REMOVE] */
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        private void handleURL(java.net.URL r8) {
            /*
                Method dump skipped, instructions count: 292
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: rabbit.spider.WebSpider.Worker.handleURL(java.net.URL):void");
        }

        private void handleData(HTTPHeader hTTPHeader, WebConnection webConnection, HTTPInputStream hTTPInputStream) throws IOException {
            int read;
            int read2;
            String header = hTTPHeader.getHeader("Content-Type");
            long j = 0;
            byte[] bArr = new byte[KEYRecord.Flags.FLAG5];
            long dataSize = webConnection.dataSize();
            if (header != null && header.startsWith("text/html")) {
                String header2 = hTTPHeader.getHeader("Content-Encoding");
                FilterInputStream filterInputStream = hTTPInputStream;
                if (header2 != null && header2.equals("gzip")) {
                    filterInputStream = new GZIPInputStream(hTTPInputStream);
                }
                HTMLParser hTMLParser = new HTMLParser();
                if (dataSize < 0) {
                    while (true) {
                        int read3 = filterInputStream.read(bArr);
                        if (read3 <= 0) {
                            break;
                        }
                        j += read3;
                        handleData(bArr, hTMLParser, null, read3);
                    }
                } else {
                    while (j < dataSize && (read2 = filterInputStream.read(bArr)) > 0) {
                        j += read2;
                        handleData(bArr, hTMLParser, null, read2);
                    }
                }
                do {
                } while (filterInputStream.read(bArr) > 0);
                return;
            }
            if (dataSize >= 0) {
                while (j < dataSize && (read = hTTPInputStream.read(bArr)) > 0) {
                    j += read;
                }
            } else {
                while (true) {
                    int read4 = hTTPInputStream.read(bArr);
                    if (read4 <= 0) {
                        return;
                    } else {
                        j += read4;
                    }
                }
            }
        }

        private void handleData(byte[] bArr, HTMLParser hTMLParser, HTMLBlock hTMLBlock, int i) {
            byte[] bArr2 = bArr;
            int i2 = i;
            if (hTMLBlock != null && hTMLBlock.restSize() > 0) {
                i2 += hTMLBlock.restSize();
                byte[] bArr3 = new byte[i2];
                hTMLBlock.insertRest(bArr3);
                System.arraycopy(bArr, 0, bArr3, hTMLBlock.restSize(), i);
                bArr2 = bArr3;
            }
            hTMLParser.setText(bArr2, i2);
            try {
                checkTokens(hTMLParser.parse().getTokens());
            } catch (HTMLParseException e) {
                System.err.println(new StringBuffer().append("bad HTML: ").append(e.toString()).toString());
            }
        }

        private void checkTokens(List list) {
            for (int i = 0; i < list.size(); i++) {
                Token token = (Token) list.get(i);
                if (token.getType() == 2) {
                    Tag tag = token.getTag();
                    String str = null;
                    if (tag.getLowerCaseType().equals("a") || tag.getLowerCaseType().equals("area")) {
                        str = tag.getAttribute("href");
                    } else if (tag.getLowerCaseType().equals("frame") || tag.getLowerCaseType().equals("img")) {
                        str = tag.getAttribute("src");
                    }
                    if (str != null) {
                        doAddURL(str);
                    }
                }
            }
        }

        private void doAddURL(String str) {
            if (str.startsWith("\"") || str.startsWith("'")) {
                str = str.substring(1);
            }
            if (str.endsWith("\"") || str.endsWith("'")) {
                str = str.substring(0, str.length() - 1);
            }
            int indexOf = str.indexOf("#");
            if (indexOf > -1) {
                str = str.substring(0, indexOf);
            }
            try {
                URL url = new URL(this.currentURL, str);
                String protocol = url.getProtocol();
                if (protocol != null && protocol.toLowerCase().equals("http")) {
                    this.this$0.addURL(url);
                }
            } catch (MalformedURLException e) {
            }
        }
    }

    public static void main(String[] strArr) {
        if (strArr.length < 5) {
            System.err.println("usage: WebSpider <threads> <sleeptime seconds> proxyhost proxyport url [url*]");
            System.exit(-1);
        }
        int parseInt = Integer.parseInt(strArr[0]);
        int parseInt2 = Integer.parseInt(strArr[1]) * HTMLParser.UNKNOWN;
        String str = strArr[2];
        int parseInt3 = Integer.parseInt(strArr[3]);
        try {
            Proxy.setProxy(str);
            Proxy.setProxyPort(parseInt3);
        } catch (UnknownHostException e) {
            System.err.println(new StringBuffer().append("unable to find proxy host: '").append(str).append("'").toString());
            e.printStackTrace();
            System.exit(-1);
        }
        ArrayList arrayList = new ArrayList(strArr.length - 4);
        for (int i = 4; i < strArr.length; i++) {
            try {
                arrayList.add(new URL(strArr[i]));
            } catch (MalformedURLException e2) {
                System.err.println(new StringBuffer().append("bad url: ").append(strArr[i]).append(": ").append(e2).toString());
            }
        }
        if (arrayList.size() > 0) {
            new WebSpider(parseInt, parseInt2, str, parseInt3, arrayList);
        } else {
            System.err.println("unable to parse urls => nothing to do");
            System.exit(-1);
        }
    }

    protected WebSpider(int i, int i2, String str, int i3, List list) {
        this.sleeptime = i2;
        this.proxyHost = str;
        this.proxyPort = i3;
        this.workers = new ArrayList(i);
        for (int i4 = 0; i4 < i; i4++) {
            Worker worker = new Worker(this, i4);
            this.workers.add(worker);
            worker.start();
        }
        Iterator it = list.iterator();
        while (it.hasNext()) {
            addURL((URL) it.next());
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void addURL(URL url) {
        synchronized (this.urlsToGet) {
            String url2 = url.toString();
            if (this.history.get(url2) != null) {
                return;
            }
            this.history.put(url2, url);
            this.urlsToGet.add(url);
            if (this.urlsToGet.size() > 0) {
                this.urlsToGet.notify();
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public URL getURL() {
        URL url;
        synchronized (this.urlsToGet) {
            while (this.urlsToGet.size() == 0) {
                try {
                    this.urlsToGet.wait();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
            url = (URL) this.urlsToGet.removeFirst();
        }
        return url;
    }

    protected WebConnection getWebConnection(HTTPHeader hTTPHeader) throws IOException {
        return Proxy.getWebConnection(hTTPHeader);
    }

    protected void returnWebConnection(WebConnection webConnection) {
        Proxy.releaseWebConnection(webConnection);
    }
}
