8000 Add feature prototype · Dyzio18/java-web-bot-library@90bb450 · GitHub
[go: up one dir, main page]

Skip to content

Commit 90bb450

Browse files
authored
Add feature prototype
1 parent 7b95750 commit 90bb450

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed

Main.java

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
Library to working with HTML
3+
https://jsoup.org/
4+
*/
5+
6+
7+
package com.company;
8+
9+
import java.io.*;
10+
import java.net.URL;
11+
import java.net.URLConnection;
12+
import java.util.*;
13+
import java.util.regex.Matcher;
14+
import java.util.regex.Pattern;
15+
import java.util.stream.Stream;
16+
17+
public class Main {
18+
19+
public static void main(String[] args) {
20+
21+
String testURL = "http://home.agh.edu.pl/~geoinf/pl/main/";
22+
Map<String, String> map = findHyperlinks(testURL);
23+
24+
Stream.of(map.keySet().toString()).forEach(System.out::println);
25+
26+
}
27+
28+
29+
/**
30+
* Function return map with hyperlinks
31+
*/
32+
private static Map<String, String> findHyperlinks(String url){
33+
34+
String html = "";
35+
try {
36+
html = getURLSource(url);
37+
} catch (IOException e) {
38+
e.printStackTrace();
39+
}
40+
41+
Map<String, String> map = new TreeMap<String, String>();
42+
43+
String regex="\\b(?<=(href=\"))[^\"]*?(?=\")";
44+
Pattern pattern = Pattern.compile(regex);
45+
Matcher m = pattern.matcher(html);
46+
int i = 0;
47+
while(m.find()) {
48+
map.put( m.group(), url);
49+
}
50+
51+
return map;
52+
}
53+
54+
55+
/**
56+
* Function get URL and return stringify website
57+
*/
58+
public static String getURLSource(String url) throws IOException
59+
{
60+
URL urlObject = new URL(url);
61+
URLConnection urlConnection = urlObject.openConnection();
62+
urlConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
63+
64+
return toString(urlConnection.getInputStream());
65+
}
66+
67+
/**
68+
* This function return string from HTTP response
69+
*/
70+
private static String toString(InputStream inputStream) throws IOException
71+
{
72+
try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8")))
73+
{
74+
String inputLine;
75+
StringBuilder stringBuilder = new StringBuilder();
76+
while ((inputLine = bufferedReader.readLine()) != null)
77+
{
78+
stringBuilder.append(inputLine);
79+
}
80+
81+
return stringBuilder.toString();
82+
}
83+
}
84+
85+
}

0 commit comments

Comments
 (0)
0