package org.wamblee.crawler; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.params.HttpClientParams; import org.apache.commons.httpclient.params.HttpMethodParams; import org.w3c.tidy.Tidy; /* * Copyright 2005 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * */ public final class Main { /** * */ private static final int PROXY_PORT = 10000; /** * */ private static final int MAX_REDIRECTS = 10; /** * Disabled constructor. * */ private Main() { // Empty } private static final String BASE = "http://epg.kml.kiss-technology.com/"; private static int COUNT = 0; public static void main(String[] aArgs) { HttpClientParams clientParams = new HttpClientParams(); clientParams.setIntParameter(HttpClientParams.MAX_REDIRECTS, MAX_REDIRECTS); clientParams.setBooleanParameter(HttpClientParams.REJECT_RELATIVE_REDIRECT, false); HttpClient client = new HttpClient(clientParams); client.getHostConfiguration().setProxy("localhost", PROXY_PORT); clientParams = client.getParams(); Object obj = clientParams.getParameter(HttpClientParams.MAX_REDIRECTS); System.out.println("Max redirects = " + obj); HttpMethod method = new GetMethod(BASE + "l.php"); executeMethod(client, method); PostMethod postMethod = new PostMethod(BASE + "login_core.php"); HttpMethodParams params = new HttpMethodParams(); params.setParameter("user", "erik@brakkee.org"); params.setParameter("passwd", "ebra1969"); params.setParameter("SavePlayerID", ""); params.setParameter("GMode", "TextMode"); params.setParameter("submit", "Login"); NameValuePair[] data = new NameValuePair[] { new NameValuePair("user", "erik@brakkee.org"), new NameValuePair("passwd", "ebra1969"), new NameValuePair("GMode", "TextMode"), new NameValuePair("submit", "Login") }; postMethod.addParameters(data); executeMethod(client, postMethod); Header header = postMethod.getResponseHeader("Location"); System.out.println("Redirecting to: " + header.getValue()); method = new GetMethod(header.getValue()); executeMethod(client, method); } /** * @param aClient * @param aMethod */ private static int executeMethod(HttpClient aClient, HttpMethod aMethod) { //method.setFollowRedirects(true); try { // Execute the method. int statusCode = aClient.executeMethod(aMethod); if (statusCode != HttpStatus.SC_OK) { System.err.println("Method failed: " + aMethod.getStatusLine()); } // Read the response body. String filename = "output" + COUNT++; FileOutputStream os = new FileOutputStream(new File(filename)); //os.write(method.getResponseBody()); Tidy tidy = new Tidy(); tidy.setXHTML(true); tidy.parse(aMethod.getResponseBodyAsStream(), os); os.close(); System.out.println("Written response to file: " + filename); return statusCode; } catch (HttpException e) { throw new RuntimeException("Fatal protocol violation: " + e.getMessage()); } catch (IOException e) { throw new RuntimeException("Fatal transport error: " + e.getMessage()); } finally { // Release the connection. aMethod.releaseConnection(); } } }