Skip to content

Commit c07ee56

Browse files
committed
feat: extract ratings
1 parent b54da4d commit c07ee56

File tree

11 files changed

+63
-2
lines changed

11 files changed

+63
-2
lines changed

lib/serp_parser/elements/google/organic_result.rb

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ def root_domain
3131
PublicSuffix.parse(domain).domain
3232
end
3333

34+
def rating
35+
nil
36+
end
37+
3438
def to_h
3539
{
3640
"position" => position,
@@ -39,7 +43,8 @@ def to_h
3943
"url" => url,
4044
"domain" => domain,
4145
"root_domain" => root_domain,
42-
"_serp_parser_meta" => meta
46+
"_serp_parser_meta" => meta,
47+
"rating" => rating
4348
}
4449
end
4550
end

lib/serp_parser/elements/google/variants/organic_result1.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,20 @@ def description
1111
remove_span_elements(match)
1212
end
1313

14+
def rating
15+
element = element_from_selector(".r0bn4c.rQMQod.tP9Zud")
16+
text = element.text.strip # "4,0 (90)"
17+
return nil if text.empty?
18+
19+
matches = text.match(/(\d+,\d+)\s+\((\d+)\)/)
20+
21+
{
22+
"value" => matches[1].gsub(",", ".").to_f,
23+
"max_value" => 5,
24+
"votes" => matches[2].to_i
25+
}
26+
end
27+
1428
private
1529

1630
# Find the text node that contains the description text

lib/serp_parser/elements/google/variants/organic_result2.rb

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,29 @@ def description
99
element = element_from_selector(".VwiC3b")
1010
element.text
1111
end
12+
13+
# Returns the rating of the result
14+
# @return [String]
15+
def rating
16+
element = element_from_selector(".fG8Fp.uo4vr .ChPIuf")
17+
18+
text = element&.text&.strip
19+
matches = text.match(/(\d+\.\d+)\/(\d+) · ‎([\d,]+)/)
20+
21+
return nil if matches.nil?
22+
23+
value = matches[1].to_f
24+
max_value = matches[2].to_i
25+
26+
# Use split with a regular expression to divide the string at each comma or dot, then join to concatenate the parts
27+
votes = matches[3].split(/[,\.]/).join.to_i
28+
29+
{
30+
"value" => value,
31+
"max_value" => max_value,
32+
"votes" => votes
33+
}
34+
end
1235
end
1336
end
1437
end

lib/serp_parser/helpers.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ module Helpers
44
# @param selector [String]
55
# @return [Nokogiri::XML::Element]
66
def element_from_selector(selector)
7-
@element.css(selector)
7+
elements = @element.css(selector)
88
end
99

1010
# Extract text from selector

spec/files/google/1/organic_results/organic_result.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"url": "https://dalaavfall.se/sorteringsguide/plastforpackning/plastkasse/",
66
"domain": "dalaavfall.se",
77
"root_domain": "dalaavfall.se",
8+
"rating": null,
89
"_serp_parser_meta": {
910
"template": "SerpParser::Elements::Google::Variants::OrganicResult1"
1011
}

spec/files/google/1/organic_results/organic_result_with_date.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
"url": "https://www.testin.se/hushall/matkasse",
66
"domain": "www.testin.se",
77
"root_domain": "testin.se",
8+
"rating": {
9+
"value": 4.7,
10+
"max_value": 5,
11+
"votes": 52
12+
},
813
"_serp_parser_meta": {
914
"template": "SerpParser::Elements::Google::Variants::OrganicResult1"
1015
}

spec/files/google/1/organic_results/organic_result_with_rating.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
"url": "https://www.jordklok.se/sv/articles/2.113.6157/omom-ekologisk-matkasse",
66
"domain": "www.jordklok.se",
77
"root_domain": "jordklok.se",
8+
"rating": {
9+
"value": 4.0,
10+
"max_value": 5,
11+
"votes": 90
12+
},
813
"_serp_parser_meta": {
914
"template": "SerpParser::Elements::Google::Variants::OrganicResult1"
1015
}

spec/files/google/1/organic_results/organic_result_with_sitelinks.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"url": "https://www.linasmatkasse.se/",
66
"domain": "www.linasmatkasse.se",
77
"root_domain": "linasmatkasse.se",
8+
"rating": null,
89
"_serp_parser_meta": {
910
"template": "SerpParser::Elements::Google::Variants::OrganicResult1"
1011
}

spec/files/google/2/organic_results/organic_result.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"url": "https://www.whitehouse.gov/administration/president-biden/",
66
"domain": "www.whitehouse.gov",
77
"root_domain": "whitehouse.gov",
8+
"rating": null,
89
"_serp_parser_meta": {
910
"template": "SerpParser::Elements::Google::Variants::OrganicResult2"
1011
}

spec/files/google/2/organic_results/organic_result_with_rating.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
"url": "https://www.imdb.com/title/tt0112346/",
66
"domain": "www.imdb.com",
77
"root_domain": "imdb.com",
8+
"rating": {
9+
"value": 6.8,
10+
"max_value": 10,
11+
"votes": 60953
12+
},
813
"_serp_parser_meta": {
914
"template": "SerpParser::Elements::Google::Variants::OrganicResult2"
1015
}

spec/files/google/2/organic_results/top_result.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"url": "https://en.wikipedia.org/wiki/President_of_the_United_States#:~:text=In%20all%2C%2045%20individuals%20have,office%20on%20January%2020%2C%202021.",
66
"domain": "en.wikipedia.org",
77
"root_domain": "wikipedia.org",
8+
"rating": null,
89
"_serp_parser_meta": {
910
"template": "SerpParser::Elements::Google::Variants::OrganicTopResult1"
1011
}

0 commit comments

Comments
 (0)