/* xscreensaver, Copyright (c) 2012-2016 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. * * Loading URLs and returning the underlying text. * * This is necessary because iOS and Android don't have Perl installed, * so we can't just run "xscreensaver-text" at the end of a pipe to do this. */ #include "utils.h" #if defined(USE_IPHONE) || defined(HAVE_ANDROID) /* whole file */ #include "textclient.h" #include "resources.h" #include "utf8wc.h" #include #undef countof #define countof(x) (sizeof((x))/sizeof((*x))) extern const char *progname; struct text_data { enum { DATE, LITERAL, URL } mode; char *literal, *url; Display *dpy; int columns; int max_lines; char *buf; int buf_size; char *fp; }; text_data * textclient_open (Display *dpy) { text_data *d = (text_data *) calloc (1, sizeof (*d)); # ifdef DEBUG fprintf (stderr, "%s: textclient: init\n", progname); # endif char *s = get_string_resource (dpy, "textMode", "TextMode"); if (!s || !*s || !strcasecmp (s, "date") || !strcmp (s, "0")) d->mode = DATE; else if (!strcasecmp (s, "literal") || !strcmp (s, "1")) d->mode = LITERAL; else if (!strcasecmp (s, "url") || !strcmp (s, "3")) d->mode = URL; else d->mode = DATE; d->dpy = dpy; d->literal = get_string_resource (dpy, "textLiteral", "TextLiteral"); d->url = get_string_resource (dpy, "textURL", "TextURL"); return d; } void textclient_close (text_data *d) { # ifdef DEBUG fprintf (stderr, "%s: textclient: free\n", progname); # endif if (d->buf) free (d->buf); if (d->literal) free (d->literal); if (d->url) free (d->url); free (d); } /* Returns a copy of the string with some basic HTML entities decoded. */ static char * decode_entities (const char *html) { char *ret = (char *) malloc ((strlen(html) * 4) + 1); // room for UTF8 const char *in = html; char *out = ret; *out = 0; const struct { const char *c; const char *e; } entities[] = { { "amp", "&" }, { "lt", "<" }, { "gt", ">" }, // Convert Latin1 to UTF8 { "nbsp", " " }, //   160 { "iexcl", "\302\241" }, // ¡ 161 { "cent", "\302\242" }, // ¢ 162 { "pound", "\302\243" }, // £ 163 { "curren", "\302\244" }, // ¤ 164 { "yen", "\302\245" }, // ¥ 165 { "brvbar", "\302\246" }, // ¦ 166 { "sect", "\302\247" }, // § 167 { "uml", "\302\250" }, // ¨ 168 { "copy", "\302\251" }, // © 169 { "ordf", "\302\252" }, // ª 170 { "laquo", "\302\253" }, // « 171 { "not", "\302\254" }, // ¬ 172 { "shy", "\302\255" }, // ­ 173 { "reg", "\302\256" }, // ® 174 { "macr", "\302\257" }, // ¯ 175 { "deg", "\302\260" }, // ° 176 { "plusmn", "\302\261" }, // ± 177 { "sup2", "\302\262" }, // ² 178 { "sup3", "\302\263" }, // ³ 179 { "acute", "\302\264" }, // ´ 180 { "micro", "\302\265" }, // µ 181 { "para", "\302\266" }, // ¶ 182 { "middot", "\302\267" }, // · 183 { "cedil", "\302\270" }, // ¸ 184 { "sup1", "\302\271" }, // ¹ 185 { "ordm", "\302\272" }, // º 186 { "raquo", "\302\273" }, // » 187 { "frac14", "\302\274" }, // ¼ 188 { "frac12", "\302\275" }, // ½ 189 { "frac34", "\302\276" }, // ¾ 190 { "iquest", "\302\277" }, // ¿ 191 { "Agrave", "\303\200" }, // À 192 { "Aacute", "\303\201" }, // Á 193 { "Acirc", "\303\202" }, // Â 194 { "Atilde", "\303\203" }, // Ã 195 { "Auml", "\303\204" }, // Ä 196 { "Aring", "\303\205" }, // Å 197 { "AElig", "\303\206" }, // Æ 198 { "Ccedil", "\303\207" }, // Ç 199 { "Egrave", "\303\210" }, // È 200 { "Eacute", "\303\211" }, // É 201 { "Ecirc", "\303\212" }, // Ê 202 { "Euml", "\303\213" }, // Ë 203 { "Igrave", "\303\214" }, // Ì 204 { "Iacute", "\303\215" }, // Í 205 { "Icirc", "\303\216" }, // Î 206 { "Iuml", "\303\217" }, // Ï 207 { "ETH", "\303\220" }, // Ð 208 { "Ntilde", "\303\221" }, // Ñ 209 { "Ograve", "\303\222" }, // Ò 210 { "Oacute", "\303\223" }, // Ó 211 { "Ocirc", "\303\224" }, // Ô 212 { "Otilde", "\303\225" }, // Õ 213 { "Ouml", "\303\226" }, // Ö 214 { "times", "\303\227" }, // × 215 { "Oslash", "\303\230" }, // Ø 216 { "Ugrave", "\303\231" }, // Ù 217 { "Uacute", "\303\232" }, // Ú 218 { "Ucirc", "\303\233" }, // Û 219 { "Uuml", "\303\234" }, // Ü 220 { "Yacute", "\303\235" }, // Ý 221 { "THORN", "\303\236" }, // Þ 222 { "szlig", "\303\237" }, // ß 223 { "agrave", "\303\240" }, // à 224 { "aacute", "\303\241" }, // á 225 { "acirc", "\303\242" }, // â 226 { "atilde", "\303\243" }, // ã 227 { "auml", "\303\244" }, // ä 228 { "aring", "\303\245" }, // å 229 { "aelig", "\303\246" }, // æ 230 { "ccedil", "\303\247" }, // ç 231 { "egrave", "\303\250" }, // è 232 { "eacute", "\303\251" }, // é 233 { "ecirc", "\303\252" }, // ê 234 { "euml", "\303\253" }, // ë 235 { "igrave", "\303\254" }, // ì 236 { "iacute", "\303\255" }, // í 237 { "icirc", "\303\256" }, // î 238 { "iuml", "\303\257" }, // ï 239 { "eth", "\303\260" }, // ð 240 { "ntilde", "\303\261" }, // ñ 241 { "ograve", "\303\262" }, // ò 242 { "oacute", "\303\263" }, // ó 243 { "ocirc", "\303\264" }, // ô 244 { "otilde", "\303\265" }, // õ 245 { "ouml", "\303\266" }, // ö 246 { "divide", "\303\267" }, // ÷ 247 { "oslash", "\303\270" }, // ø 248 { "ugrave", "\303\271" }, // ù 249 { "uacute", "\303\272" }, // ú 250 { "ucirc", "\303\273" }, // û 251 { "uuml", "\303\274" }, // ü 252 { "yacute", "\303\275" }, // ý 253 { "thorn", "\303\276" }, // þ 254 { "yuml", "\303\277" }, // ÿ 255 // And some random others { "bdquo", "\342\200\236" }, // ~ { "bull", "\342\200\242" }, // ~ { "circ", "\313\206" }, // ~ { "cong", "\342\211\205" }, // ~ { "empty", "\342\210\205" }, // ~ { "emsp", "\342\200\203" }, // ~ { "ensp", "\342\200\202" }, // ~ { "equiv", "\342\211\241" }, // ~ { "frasl", "\342\201\204" }, // ~ { "ge", "\342\211\245" }, // ~ { "hArr", "\342\207\224" }, // ~ { "harr", "\342\206\224" }, // ~ { "hellip", "\342\200\246" }, // ~ { "lArr", "\342\207\220" }, // ~ { "lang", "\342\237\250" }, // ~ { "larr", "\342\206\220" }, // ~ { "ldquo", "\342\200\234" }, // ~ { "le", "\342\211\244" }, // ~ { "lowast", "\342\210\227" }, // ~ { "loz", "\342\227\212" }, // ~ { "lsaquo", "\342\200\271" }, // ~ { "lsquo", "\342\200\230" }, // ~ { "mdash", "\342\200\224" }, // ~ { "minus", "\342\210\222" }, // ~ { "ndash", "\342\200\223" }, // ~ { "ne", "\342\211\240" }, // ~ { "OElig", "\305\222" }, // ~ { "oelig", "\305\223" }, // ~ { "prime", "\342\200\262" }, // ~ { "quot", "\342\200\235" }, // ~ { "rArr", "\342\207\222" }, // ~ { "rang", "\342\237\251" }, // ~ { "rarr", "\342\206\222" }, // ~ { "rdquo", "\342\200\235" }, // ~ { "rsaquo", "\342\200\272" }, // ~ { "rsquo", "\342\200\231" }, // ~ { "sbquo", "\342\200\232" }, // ~ { "sim", "\342\210\274" }, // ~ { "thinsp", "\342\200\211" }, // ~ { "tilde", "\313\234" }, // ~ { "trade", "\342\204\242" }, // ~ }; while (*in) { if (*in == '&') { int done = 0; if (in[1] == '#' && in[2] == 'x') { // A unsigned long i = 0; in += 2; while ((*in >= '0' && *in <= '9') || (*in >= 'A' && *in <= 'F') || (*in >= 'a' && *in <= 'f')) { i = (i * 16) + (*in >= 'a' ? *in - 'a' + 16 : *in >= 'A' ? *in - 'A' + 16 : *in - '0'); in++; } *out += utf8_encode (i, out, strlen(out)); done = 1; } else if (in[1] == '#') { // A unsigned long i = 0; in++; while (*in >= '0' && *in <= '9') { i = (i * 10) + (*in - '0'); in++; } *out += utf8_encode (i, out, strlen(out)); done = 1; } else { int i; for (i = 0; !done && i < countof(entities); i++) { if (!strncmp (in+1, entities[i].c, strlen(entities[i].c))) { strcpy (out, entities[i].e); in += strlen(entities[i].c) + 1; out += strlen(entities[i].e); done = 1; } } } if (done) { if (*in == ';') in++; } else { *out++ = *in++; } } else { *out++ = *in++; } } *out = 0; /* Shrink */ ret = realloc (ret, out - ret + 1); return ret; } /* Returns a copy of the HTML string that has been converted to plain text, in UTF8 encoding. HTML tags are stripped,
and

are converted to newlines, and some basic HTML entities are decoded. */ static char * textclient_strip_html (const char *html) { int tag = 0; int comment = 0; int white = 0; int nl = 0; char *ret = (char *) malloc ((strlen(html) * 4) + 1); // room for UTF8 char *out = ret; *out = 0; for (const char *in = html; *in; in++) { if (comment) { if (!strncmp (in, "-->", 3)) { comment = 0; } } else if (tag) { if (*in == '>') { tag = 0; } } else if (*in == '<') { tag = 1; if (!strncmp (in, " */ { char *e = strstr (in+4, "-->"); if (e) in = e + 3; } else if (!strncmp (in, "/*", 2)) /* ... */ { char *e = strstr (in+2, "*/"); if (e) in = e + 2; else *out++ = *in++; } else if (!strncmp (in, "{{Infobox", 9)) /* {{Infobox ... \n}}\n */ { char *e = strstr (in+2, "\n}}"); if (e) in = e + 3; else *out++ = *in++; } else if (!strncmp (in, "{{", 2)) /* {{ ...table... }} */ { char *e = strstr (in+2, "}}"); if (e) in = e + 2; else *out++ = *in++; } else if (!strncmp (in, "{|", 2)) /* {| ...table... |} */ { char *e = strstr (in+2, "|}"); if (e) in = e + 2; else *out++ = *in++; } else if (!strncmp (in, "|-", 2)) /* |- ...table cell... | */ { char *e = strstr (in+2, "|"); if (e) in = e + 1; else *out++ = *in++; } else if (!strncmp (in, "... -> "*" */ { char *e1 = strstr (in+4, "/>"); char *e2 = strstr (in+4, ""); if (e1 && e1 < e2) in = e1 + 2; else if (e2) in = e2 + 6; else *out++ = *in++; *out++ = '*'; } else if (!strncmp (in, "<", 1)) /* <...> */ { char *e = strstr (in+1, ">"); if (e) in = e + 1; else *out++ = *in++; } else if (!strncmp (in, "[[", 2)) /* [[ ... ]] */ { char *e1 = strstr (in+2, "|"); char *e2 = strstr (in+2, "]]"); if (e1 && e2 && e1 < e2) /* [[link|anchor]] */ { long L = e2 - e1 - 1; memmove (out, e1+1, L); out += L; in = e2+2; } else if (e2) /* [[link]] */ { long L = e2 - in - 2; memmove (out, in+2, L); out += L; in = e2+2; } else *out++ = *in++; } else if (!strncmp (in, "[", 1)) /* [ ... ] */ { char *e1 = strstr (in+2, " "); char *e2 = strstr (in+2, "]"); if (e1 && e2 && e1 < e2) /* [url anchor] */ { long L = e2 - e1 - 1; memmove (out, e1+1, L); out += L; in = e2+2; } else *out++ = *in++; } else if (!strncmp (in, "''''", 4)) /* omit '''' */ in += 4; else if (!strncmp (in, "'''", 3)) /* omit ''' */ in += 3; else if (!strncmp (in, "''", 2) || /* '' or `` or "" -> " */ !strncmp (in, "``", 2) || !strncmp (in, "\"\"", 2)) { *out++ = '"'; in += 2; } else { *out++ = *in++; } } *out = 0; /* Collapse newlines */ in = text; out = text; while (*in) { while (!strncmp(in, "\n\n\n", 3)) in++; *out++ = *in++; } *out = 0; } /* Returns a copy of the RSS document that has been converted to plain text, in UTF8 encoding. Rougly, it uses the contents of the field of each , and decodes HTML within it. */ static char * textclient_strip_rss (const char *rss) { char *ret = malloc (strlen(rss) * 4 + 1); // room for UTF8 char *out = ret; const char *a = 0, *b = 0, *c = 0, *d = 0, *t = 0; int head = 1; int done = 0; int wiki_p = !!strcasestr (rss, "MediaWiki"); *out = 0; for (const char *in = rss; *in; in++) { if (*in == '<') { if (!strncasecmp (in, ""); out += strlen (out); } } else if (head) { // still before first ; } else if (!strncasecmp (in, " columns && last_space) { *last_space = '\n'; col = col - last_col; } last_space = p; last_col = col; } if (*p == '\r' || *p == '\n') { col = 0; last_col = 0; last_space = 0; lines++; if (max_lines && lines >= max_lines) { *p = 0; break; } } else { col++; } } } static void rewrap_text (char *body, int columns) { if (! body) return; for (char *p = body; *p; p++) { if (*p == '\n') { if (p[1] == '\n') p++; else *p = ' '; } } wrap_text (body, columns, 0); } static void strip_backslashes (char *s) { char *out = s; for (char *in = s; *in; in++) { if (*in == '\\') { in++; if (*in == 'n') *out++ = '\n'; else if (*in == 'r') *out++ = '\r'; else if (*in == 't') *out++ = '\t'; else *out++ = *in; } else { *out++ = *in; } } *out = 0; } /* Load the raw body of a URL, and convert it to plain text. */ static char * mobile_url_text (Display *dpy, const char *url) { char *body = textclient_mobile_url_string (dpy, url); enum { RSS, HTML, TEXT } type; if (!body) return NULL; if (!strncasecmp (body, "fp || !*d->fp) { if (d->buf) { free (d->buf); d->buf = 0; d->fp = 0; } switch (d->mode) { case DATE: DATE: d->buf = textclient_mobile_date_string(); break; case LITERAL: if (!d->literal || !*d->literal) goto DATE; d->buf = (char *) malloc (strlen (d->literal) + 3); strcpy (d->buf, d->literal); strcat (d->buf, "\n"); strip_backslashes (d->buf); d->fp = d->buf; break; case URL: if (!d->url || !*d->url) goto DATE; d->buf = mobile_url_text (d->dpy, d->url); break; default: abort(); } if (d->columns > 10) wrap_text (d->buf, d->columns, d->max_lines); d->fp = d->buf; } if (!d->fp || !*d->fp) return -1; unsigned char c = (unsigned char) *d->fp++; return (int) c; } Bool textclient_putc (text_data *d, XKeyEvent *k) { return False; } void textclient_reshape (text_data *d, int pix_w, int pix_h, int char_w, int char_h, int max_lines) { d->columns = char_w; d->max_lines = max_lines; rewrap_text (d->buf, d->columns); } #endif /* whole file */