10
10
from sqlalchemy import create_engine
11
11
from sqlalchemy .orm import Session , sessionmaker
12
12
13
- from models .collection import Base , Collection
13
+ from models .collection import Base , Collection , Section
14
14
15
15
API_SEARCH = "https://www.ancestry.com/search/collections/catalog/api/search"
16
16
@@ -71,7 +71,7 @@ def random_sleep(min_sec: float = 1, max_sec: float = 30, log=True, factor=1):
71
71
"""
72
72
min_sec = min_sec * factor
73
73
max_sec = max_sec * factor
74
- sleep_time = random .randint (min_sec * 1000 , max_sec * 1000 ) / 1000
74
+ sleep_time = random .randint (int ( min_sec * 1000 ), int ( max_sec * 1000 ) ) / 1000
75
75
if log :
76
76
print (f"Waiting for { sleep_time } s" )
77
77
time .sleep (sleep_time )
@@ -93,6 +93,10 @@ def __init__(self, user: str, pas: str):
93
93
:param pas: Ancestry.com password
94
94
"""
95
95
self ._session = requests .Session ()
96
+ self ._session .headers .update ({
97
+ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) '
98
+ 'Chrome/39.0.2171.95 Safari/537.36 '
99
+ })
96
100
self ._username = user
97
101
self ._password = pas
98
102
self ._login ()
@@ -101,19 +105,21 @@ def _login(self):
101
105
"""Authenticate with the ancestry API"""
102
106
if not self ._authenticated :
103
107
print ("Logging in" )
104
- self ._session .post ("https://www.ancestry.com/account/signin/frame/authenticate" ,
105
- {"username" : self ._username , "password" : self ._password })
106
- self ._authenticated = True
108
+ login_body = {"username" : self ._username , "password" : self ._password }
109
+ response = self ._session .post ("https://www.ancestry.com/account/signin/frame/authenticate" ,
110
+ data = login_body ,
111
+ headers = {"referer" : "https://www.ancestry.com/account/signin/frame?" })
112
+ self ._authenticated = response .status_code == 200
107
113
108
114
def _init_db_engine (self ):
109
115
self ._db_engine = create_engine (f'sqlite:///{ self ._sqlite_file } ' , echo = True )
110
116
Base .metadata .create_all (self ._db_engine )
111
117
self ._engine_initialized = True
112
118
113
- def _get_db_session (self ) -> Session :
119
+ def _get_db_session (self , autocommit = False ) -> Session :
114
120
if not self ._engine_initialized :
115
121
self ._init_db_engine ()
116
- return sessionmaker (bind = self ._db_engine )()
122
+ return sessionmaker (bind = self ._db_engine , autocommit = autocommit )()
117
123
118
124
def _get_metadata_target (self ) -> int :
119
125
""" Selects next target for updating collection metadata
@@ -244,6 +250,70 @@ def get_metadata_loop(self, limit_seconds=600):
244
250
if time .time () - started > limit_seconds :
245
251
break
246
252
253
+ def get_browse_values (self , dbid : int ):
254
+ db_session = self ._get_db_session ()
255
+ exists_query = db_session .query (Collection ).filter_by (collection_id = dbid )
256
+
257
+ def format_url (path : List [str ] = None ) -> str :
258
+ url = f"https://www.ancestry.com/imageviewer/api/media/browse-elements?dbId={ dbid } "
259
+ if path is not None :
260
+ url += f"&path={ '|' .join (path )} "
261
+ return url
262
+
263
+ def get_children (path : List [str ] = None ) -> List [Section ]:
264
+ children = []
265
+ if path is None :
266
+ path = []
267
+ print (path )
268
+ url = format_url (path = path )
269
+ response = self ._session .get (url ).json ()["browseElement" ]
270
+ for x2 in response ["BrowseSubElements" ]:
271
+ value2 , locale_value2 , description2 = x2 ["PathValue" ], x2 ["LocalizedPathValue" ], None
272
+ if "PathDescription" in x2 .keys ():
273
+ description2 = x2 ["PathDescription" ]
274
+ child_section = Section (value = value2 , locale_value = locale_value2 ,
275
+ has_child_levels = response ["ContainsChildLevels" ])
276
+ if description2 :
277
+ child_section .description = description2
278
+ if response ["ContainsChildLevels" ]:
279
+ new_path = path .copy ()
280
+ new_path .append (value2 )
281
+ child_section .children = get_children (new_path )
282
+ children .append (
283
+ child_section
284
+ )
285
+ return children
286
+
287
+ if exists_query .scalar ():
288
+ collection = exists_query .first ()
289
+ incompatible_collections = ["Dictionaries, Encyclopedias & Reference" ]
290
+ collection .sections = []
291
+
292
+ res = self ._session .get (format_url ())
293
+ print (res .text )
294
+ browse_element = res .json ()["browseElement" ]
295
+ has_children = browse_element ["ContainsChildLevels" ]
296
+ index = 1
297
+
298
+ for x in browse_element ["BrowseSubElements" ]:
299
+ value , locale_value , description = x ["PathValue" ], x ["LocalizedPathValue" ], None
300
+ if "PathDescription" in x .keys ():
301
+ description = x ["PathDescription" ]
302
+
303
+ print (f"section { index } of { len (browse_element ['BrowseSubElements' ])} " )
304
+ index += 1
305
+ section = Section (value = value , locale_value = locale_value , has_child_levels = has_children )
306
+ if description :
307
+ section .description = description
308
+ if has_children :
309
+ print ("getting children for top section" )
310
+ section .children = get_children ([section .value ])
311
+
312
+ collection .sections .append (section )
313
+ db_session .commit ()
314
+ db_session .commit ()
315
+ db_session .close ()
316
+
247
317
248
318
if __name__ == '__main__' :
249
319
dotenv .load_dotenv ()
@@ -257,3 +327,5 @@ def get_metadata_loop(self, limit_seconds=600):
257
327
# controller.load_collections_into_db_from_disk()
258
328
259
329
# controller.get_metadata_loop()
330
+
331
+ print (controller .get_browse_values (int (input (">>" ))))
0 commit comments