bookmerger.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. #! /usr/bin/env python
  2. # For general OS related functions
  3. import os
  4. # For html parsing
  5. import re
  6. # For cli arguments
  7. import argparse
  8. # Gets content from a given filename
  9. def file_get_contents(filename):
  10. if os.path.exists(filename):
  11. fp = open(filename, "r")
  12. content = fp.read()
  13. fp.close()
  14. return content
  15. # Gets link data list from a given file
  16. def get_link_data_from_file(filename):
  17. contents = file_get_contents(filename)
  18. # 0 -> url
  19. # 1 -> page title
  20. return re.findall(r'HREF="(https?://[^\s]+)".*>(.*)</A', contents)
  21. # Generate output from a given linkdata list
  22. def output_from_link_data(linkdata, filename):
  23. htmlbegin="""
  24. <!DOCTYPE NETSCAPE-Bookmark-file-1>
  25. <!-- This is an automatically generated file.
  26. It will be read and overwritten.
  27. DO NOT EDIT! -->
  28. <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
  29. <TITLE>Bookmarks</TITLE>
  30. <H1>Bookmarks Menu</H1>
  31. <DL><p>
  32. <DT><H3 UNFILED_BOOKMARKS_FOLDER="true">Merged Bookmarks</H3>
  33. <DL><p>
  34. """
  35. htmlend="""
  36. </DL><p>"""
  37. html=htmlbegin
  38. for data in linkdata:
  39. html = html+'<DT><A HREF="'+data[0]+'">'+data[1]+'</A>\n\n'
  40. html = html+htmlend
  41. if filename != '':
  42. html_file = open(filename, "w")
  43. html_file.write(html)
  44. print('Output written to: '+filename)
  45. html_file.close()
  46. else:
  47. print(html)
  48. # Removed duplicates from a list
  49. def remove_duplicates(l):
  50. new_l = []
  51. for elem in l:
  52. if elem not in new_l:
  53. new_l.append(elem)
  54. return new_l
  55. ## ---- MAIN PROGRAM ---- ##
  56. parser = argparse.ArgumentParser(prog='bookmerger', description='Merge multiple browser HTML bookmark files into one.')
  57. parser.add_argument('files', metavar='files', type=str, nargs='+',
  58. help='HTML bookmark files')
  59. parser.add_argument('--output', metavar='output', type=str, default='',
  60. help='Merged output HTML bookmark file')
  61. args = parser.parse_args()
  62. linkdata = list()
  63. # Gets all the cli arguments without the first one (script name)
  64. cli_filenames = args.files
  65. for filename in cli_filenames:
  66. if os.path.exists(filename):
  67. links = get_link_data_from_file(filename)
  68. for elem in links:
  69. if elem not in linkdata:
  70. linkdata.append(elem)
  71. else:
  72. print('ERROR: '+filename+' is not found, so skipped')
  73. linkdata = remove_duplicates(linkdata)
  74. if args.output is None:
  75. output_from_link_data(linkdata)
  76. else:
  77. output_from_link_data(linkdata, args.output)