html.ml 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. (*
  2. * _ _ ____ _
  3. * _| || |_/ ___| ___ _ __ _ __ ___ | |
  4. * |_ .. _\___ \ / _ \ '_ \| '_ \ / _ \| |
  5. * |_ _|___) | __/ |_) | |_) | (_) |_|
  6. * |_||_| |____/ \___| .__/| .__/ \___/(_)
  7. * |_| |_|
  8. *
  9. * Personal Social Web.
  10. *
  11. * Copyright (C) The #Seppo contributors. All rights reserved.
  12. *
  13. * This program is free software: you can redistribute it and/or modify
  14. * it under the terms of the GNU General Public License as published by
  15. * the Free Software Foundation, either version 3 of the License, or
  16. * (at your option) any later version.
  17. *
  18. * This program is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  21. * GNU General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU General Public License
  24. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  25. *)
  26. open Astring
  27. let ( let* ) = Result.bind
  28. module Form = struct
  29. type field = string * string list
  30. (** A single html form field content. Modeled after Uri.query *)
  31. type t = field list
  32. (** Values of a html form *)
  33. let of_string s : t = s
  34. |> String.trim
  35. |> Uri.query_of_encoded
  36. (** application/x-www-form-urlencoded
  37. *
  38. * https://discuss.ocaml.org/t/decoding-x-www-form-urlencoded/4505/3?u=mro *)
  39. let of_channel n ic = really_input_string ic n
  40. |> of_string
  41. (** Read a fixed number of bytes and parse *)
  42. (*
  43. let sort (l : t) : t =
  44. l |> List.sort (fun (a, _) (b, _) -> String.compare a b)
  45. let filter_sort f l = l |> List.filter f |> sort
  46. let filter_sort_keys (ks : string list) l =
  47. l |> filter_sort (fun (k, _) -> List.exists (String.equal k) ks)
  48. *)
  49. type constraint_ = string * string
  50. (** actually an input field attribute like pattern=".+"
  51. *
  52. * https://www.w3.org/TR/xhtml-modularization/abstract_modules.html#s_extformsmodule
  53. *)
  54. type input = string * string * constraint_ list
  55. (** name * type * constraint list.
  56. *
  57. * https://www.w3.org/TR/xhtml-modularization/abstract_modules.html#s_extformsmodule
  58. * and some more, e.g. pattern
  59. *)
  60. let validate name ty va (c_nam,c_cri)
  61. (** Validate one input field against one constraint from a form definition
  62. *) =
  63. let vali v =
  64. Logr.debug (fun m -> m " validate %s='%s'" c_nam c_cri);
  65. match ty,c_nam with
  66. | _,"maxlength" ->
  67. (* http://www.w3.org/TR/html5/forms.html#the-maxlength-and-minlength-attributes
  68. https://wiki.selfhtml.org/wiki/HTML/Elemente/input *)
  69. (match c_cri |> int_of_string_opt with
  70. | None -> Error (name,"invalid maxlength")
  71. | Some max -> if String.length v <= max
  72. then Ok v
  73. else Error (name,"longer than maxlength"))
  74. | _,"minlength" ->
  75. (* http://www.w3.org/TR/html5/forms.html#the-maxlength-and-minlength-attributes
  76. https://wiki.selfhtml.org/wiki/HTML/Elemente/input *)
  77. (match c_cri |> int_of_string_opt with
  78. | None -> Error (name,"invalid minlength")
  79. | Some min -> if String.length v >= min
  80. then Ok v
  81. else Error (name,"shorter than minlength"))
  82. | _,"pattern" ->
  83. (* https://html.spec.whatwg.org/multipage/input.html#attr-input-pattern *)
  84. (try
  85. let rx = Re.Pcre.regexp c_cri in
  86. if Re.execp rx v
  87. then Ok v
  88. else Error (name,"pattern mismatch")
  89. with | _ -> Error (name,"invalid pattern"))
  90. | _ -> Ok v
  91. in
  92. Result.bind va vali
  93. let string_opt (name,ty,constraints : input) (vals : t) : (string option, string * string) result =
  94. Logr.debug (fun m -> m " <input name='%s' type='%s' ..." name ty);
  95. match List.assoc_opt name vals with
  96. | None ->
  97. (match List.assoc_opt "required" constraints with
  98. | None -> Ok None
  99. | Some _ -> Error (name, "required but missing"))
  100. | Some v ->
  101. let* s = List.fold_left
  102. (validate name ty)
  103. (v |> String.concat |> Result.ok)
  104. constraints in
  105. Ok (Some s)
  106. let string (name,ty,contraints) va : (string, string * string) result =
  107. match string_opt (name,ty,contraints) va with
  108. | Error _ as e -> e
  109. | Ok None -> Logr.err (fun m -> m "%s Field '%s' must be 'required' to use 'string'" E.e1012 name);
  110. Error (name, "implicitly required but missing")
  111. | Ok (Some v) -> Ok v
  112. end
  113. let add_class atts c =
  114. let rec f found (src,dst) =
  115. match src with
  116. | [] -> [], if found
  117. then dst
  118. else (("","class"),c) :: dst
  119. | ((_,"class") as n,v) :: tl ->
  120. let is_sep = Char.equal ' ' in
  121. let vs = v |> String.fields ~is_sep in
  122. let vs = match vs |> List.find_opt (String.equal c) with
  123. | None -> c :: vs
  124. | Some _ -> vs in
  125. f true (tl, (n,vs |> String.concat ~sep:" ") :: dst)
  126. | hd :: tl ->
  127. f found (tl, hd :: dst)
  128. in
  129. let _,r = f false (atts,[]) in
  130. r |> List.rev
  131. let of_plain s =
  132. s
  133. |> Lexing.from_string
  134. |> Plain2html.url (Buffer.create 100)
  135. |> Buffer.contents
  136. let to_plain s =
  137. let open Soup in (* https://aantron.github.io/lambdasoup/ *)
  138. (* TODO care about :
  139. * - tags
  140. *)
  141. let at_mention a =
  142. let handle = a |> texts |> String.concat in
  143. if handle |> St.is_prefix ~affix:"@"
  144. then (
  145. (* @TODO check txt not having a host *)
  146. let href = R.attribute "href" a |> Uri.of_string in
  147. let host = href |> Uri.host_with_default ~default:"-" in
  148. let handle = handle ^ "@" ^ host in
  149. a |> delete_attribute "href";
  150. a |> clear;
  151. handle |> create_text |> append_child a;
  152. Some (handle,href)
  153. ) else
  154. None
  155. in
  156. let at_mention' l a =
  157. match a |> at_mention with
  158. | None -> l
  159. | Some e -> e :: l
  160. in
  161. let p_br soup =
  162. soup |> select "br" |> iter (fun br -> create_text "\n" |> replace br);
  163. soup |> select "p" |> iter (fun p -> create_text "\n\n" |> append_child p);
  164. soup
  165. in
  166. (* ocaml 5.1+ *)
  167. let find_index predicate xs =
  168. let rec fi i = function
  169. | [] -> None
  170. | hd :: tl ->
  171. if hd |> predicate
  172. then Some i
  173. else fi (succ i) tl
  174. in
  175. fi 0 xs
  176. in
  177. let a_href_to_footnotes soup =
  178. let fns = ref [] in
  179. soup |> select "a[href]" |> iter (fun a ->
  180. let href = a |> R.attribute "href" in
  181. let href' = href |> Uri.of_string in
  182. let pred x = x |> Uri.of_string |> Uri.equal href' in
  183. let txt = a |> texts |> String.concat |> String.trim in
  184. if txt |> pred
  185. || "http://" ^ txt |> pred
  186. || "https://" ^ txt |> pred
  187. then ( a |> clear;
  188. href |> create_text |> append_child a
  189. ) else (
  190. let no : int = match !fns |> find_index pred with
  191. | Some l -> succ l
  192. | None ->
  193. fns := href :: !fns;
  194. !fns |> List.length
  195. in
  196. let lbl = "[" ^ (no |> string_of_int) ^ "]" in
  197. lbl |> create_text |> insert_after a;
  198. Printf.sprintf "\n%s: %s" lbl href
  199. |> create_text
  200. |> append_root soup )
  201. );
  202. soup
  203. in
  204. let proc f css n =
  205. let m = n |> select css |> fold f [] in
  206. n,m
  207. in
  208. let _de_trunk so =
  209. let open Soup in
  210. so |> select ".ellipsis" |> iter ("…" |> create_text |> insert_after);
  211. so |> select ".invisible" |> iter delete;
  212. so
  213. in
  214. let s = "<div>" ^ s ^ "<br/></div>"
  215. |> parse
  216. in
  217. let s,me = s |> proc at_mention' "a[href]"
  218. and ha = []
  219. and rx_one = Str.regexp " +\n"
  220. and rx_many = Str.regexp "\n\n+"
  221. in
  222. s |> a_href_to_footnotes
  223. |> p_br
  224. |> texts
  225. |> String.concat
  226. (* replace superfluous whitespace? *)
  227. |> Str.global_replace rx_one "\n"
  228. |> Str.global_replace rx_many "\n\n"
  229. |> String.trim , me , ha