123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- #! /usr/bin/env python
- # For general OS related functions
- import os
- # For html parsing
- import re
- # For cli arguments
- import argparse
- # Gets content from a given filename
- def file_get_contents(filename):
- if os.path.exists(filename):
- fp = open(filename, "r")
- content = fp.read()
- fp.close()
- return content
- # Gets link data list from a given file
- def get_link_data_from_file(filename):
- contents = file_get_contents(filename)
- # 0 -> url
- # 1 -> page title
- return re.findall(r'HREF="(https?://[^\s]+)".*>(.*)</A', contents)
- # Generate output from a given linkdata list
- def output_from_link_data(linkdata, filename):
- htmlbegin="""
- <!DOCTYPE NETSCAPE-Bookmark-file-1>
- <!-- This is an automatically generated file.
- It will be read and overwritten.
- DO NOT EDIT! -->
- <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
- <TITLE>Bookmarks</TITLE>
- <H1>Bookmarks Menu</H1>
- <DL><p>
- <DT><H3 UNFILED_BOOKMARKS_FOLDER="true">Merged Bookmarks</H3>
- <DL><p>
- """
- htmlend="""
- </DL><p>"""
- html=htmlbegin
- for data in linkdata:
- html = html+'<DT><A HREF="'+data[0]+'">'+data[1]+'</A>\n\n'
- html = html+htmlend
- if filename != '':
- html_file = open(filename, "w")
- html_file.write(html)
- print('Output written to: '+filename)
- html_file.close()
- else:
- print(html)
- # Removed duplicates from a list
- def remove_duplicates(l):
- new_l = []
- for elem in l:
- if elem not in new_l:
- new_l.append(elem)
- return new_l
- ## ---- MAIN PROGRAM ---- ##
- parser = argparse.ArgumentParser(prog='bookmerger', description='Merge multiple browser HTML bookmark files into one.')
- parser.add_argument('files', metavar='files', type=str, nargs='+',
- help='HTML bookmark files')
- parser.add_argument('--output', metavar='output', type=str, default='',
- help='Merged output HTML bookmark file')
- args = parser.parse_args()
- linkdata = list()
- # Gets all the cli arguments without the first one (script name)
- cli_filenames = args.files
- for filename in cli_filenames:
- if os.path.exists(filename):
- links = get_link_data_from_file(filename)
- for elem in links:
- if elem not in linkdata:
- linkdata.append(elem)
- else:
- print('ERROR: '+filename+' is not found, so skipped')
- linkdata = remove_duplicates(linkdata)
- if args.output is None:
- output_from_link_data(linkdata)
- else:
- output_from_link_data(linkdata, args.output)
|