Context Navigation

source: lib/misc.c @ 37c9653

Last change on this file since 37c9653 was 098b430, checked in by dequis <dx@…>, at 2015-10-08T05:09:14Z
Replace <br/> and <br /> with \n in strip_html
Property mode set to `100644`
File size: 16.3 KB

Line
1	/********************************************************************\
2	* BitlBee -- An IRC to other IM-networks gateway *
3	* *
4	* Copyright 2002-2012 Wilmer van der Gaast and others *
5	\********************************************************************/
6
7	/*
8	* Various utility functions. Some are copied from Gaim to support the
9	* IM-modules, most are from BitlBee.
10	*
11	* Copyright (C) 1998-1999, Mark Spencer <markster@marko.net>
12	* (and possibly other members of the Gaim team)
13	* Copyright 2002-2012 Wilmer van der Gaast <wilmer@gaast.net>
14	*/
15
16	/*
17	This program is free software; you can redistribute it and/or modify
18	it under the terms of the GNU General Public License as published by
19	the Free Software Foundation; either version 2 of the License, or
20	(at your option) any later version.
21
22	This program is distributed in the hope that it will be useful,
23	but WITHOUT ANY WARRANTY; without even the implied warranty of
24	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25	GNU General Public License for more details.
26
27	You should have received a copy of the GNU General Public License with
28	the Debian GNU/Linux distribution in /usr/share/common-licenses/GPL;
29	if not, write to the Free Software Foundation, Inc., 51 Franklin St.,
30	Fifth Floor, Boston, MA 02110-1301 USA
31	*/
32
33	#define BITLBEE_CORE
34	#include "nogaim.h"
35	#include "base64.h"
36	#include "md5.h"
37	#include <stdio.h>
38	#include <stdlib.h>
39	#include <string.h>
40	#include <ctype.h>
41	#include <glib.h>
42	#include <time.h>
43
44	#ifdef HAVE_RESOLV_A
45	#include <arpa/nameser.h>
46	#include <resolv.h>
47	#endif
48
49	#include "md5.h"
50	#include "ssl_client.h"
51
52	void strip_linefeed(gchar *text)
53	{
54	int i, j;
55	gchar *text2 = g_malloc(strlen(text) + 1);
56
57	for (i = 0, j = 0; text[i]; i++) {
58	if (text[i] != '\r') {
59	text2[j++] = text[i];
60	}
61	}
62	text2[j] = '\0';
63
64	strcpy(text, text2);
65	g_free(text2);
66	}
67
68	time_t get_time(int year, int month, int day, int hour, int min, int sec)
69	{
70	struct tm tm;
71
72	memset(&tm, 0, sizeof(struct tm));
73	tm.tm_year = year - 1900;
74	tm.tm_mon = month - 1;
75	tm.tm_mday = day;
76	tm.tm_hour = hour;
77	tm.tm_min = min;
78	tm.tm_sec = sec >= 0 ? sec : time(NULL) % 60;
79
80	return mktime(&tm);
81	}
82
83	time_t mktime_utc(struct tm *tp)
84	{
85	struct tm utc;
86	time_t res, tres;
87
88	tp->tm_isdst = -1;
89	res = mktime(tp);
90	/* Problem is, mktime() just gave us the GMT timestamp for the
91	given local time... While the given time WAS NOT local. So
92	we should fix this now.
93
94	Now I could choose between messing with environment variables
95	(kludgy) or using timegm() (not portable)... Or doing the
96	following, which I actually prefer...
97
98	tzset() may also work but in other places I actually want to
99	use local time.
100
101	FFFFFFFFFFFFFFFFFFFFFUUUUUUUUUUUUUUUUUUUU!! */
102	gmtime_r(&res, &utc);
103	utc.tm_isdst = -1;
104	if (utc.tm_hour == tp->tm_hour && utc.tm_min == tp->tm_min) {
105	/* Sweet! We're in UTC right now... */
106	return res;
107	}
108
109	tres = mktime(&utc);
110	res += res - tres;
111
112	/* Yes, this is a hack. And it will go wrong around DST changes.
113	BUT this is more likely to be threadsafe than messing with
114	environment variables, and possibly more portable... */
115
116	return res;
117	}
118
119	typedef struct htmlentity {
120	char code[7];
121	char is[3];
122	} htmlentity_t;
123
124	static const htmlentity_t ent[] =
125	{
126	{ "lt", "<" },
127	{ "gt", ">" },
128	{ "amp", "&" },
129	{ "apos", "'" },
130	{ "quot", "\"" },
131	{ "aacute", "á" },
132	{ "eacute", "é" },
133	{ "iacute", "é" },
134	{ "oacute", "ó" },
135	{ "uacute", "ú" },
136	{ "agrave", "à" },
137	{ "egrave", "è" },
138	{ "igrave", "ì" },
139	{ "ograve", "ò" },
140	{ "ugrave", "ù" },
141	{ "acirc", "â" },
142	{ "ecirc", "ê" },
143	{ "icirc", "î" },
144	{ "ocirc", "ô" },
145	{ "ucirc", "û" },
146	{ "auml", "ä" },
147	{ "euml", "ë" },
148	{ "iuml", "ï" },
149	{ "ouml", "ö" },
150	{ "uuml", "ü" },
151	{ "nbsp", " " },
152	{ "", "" }
153	};
154
155	void strip_html(char *in)
156	{
157	char *start = in;
158	char out[strlen(in) + 1];
159	char s = out, cs;
160	int i, matched;
161	int taglen;
162
163	memset(out, 0, sizeof(out));
164
165	while (*in) {
166	if (in == '<' && (g_ascii_isalpha((in + 1)) \|\| *(in + 1) == '/')) {
167	/* If in points at a < and in+1 points at a letter or a slash, this is probably
168	a HTML-tag. Try to find a closing > and continue there. If the > can't be
169	found, assume that it wasn't a HTML-tag after all. */
170
171	cs = in;
172
173	while (in && in != '>') {
174	in++;
175	}
176
177	taglen = in - cs - 1; /* not <0 because the above loop runs at least once */
178	if (*in) {
179	if (g_strncasecmp(cs + 1, "b", taglen) == 0) {
180	*(s++) = '\x02';
181	} else if (g_strncasecmp(cs + 1, "/b", taglen) == 0) {
182	*(s++) = '\x02';
183	} else if (g_strncasecmp(cs + 1, "i", taglen) == 0) {
184	*(s++) = '\x1f';
185	} else if (g_strncasecmp(cs + 1, "/i", taglen) == 0) {
186	*(s++) = '\x1f';
187	} else if (g_strncasecmp(cs + 1, "br", taglen) == 0) {
188	*(s++) = '\n';
189	} else if (g_strncasecmp(cs + 1, "br/", taglen) == 0) {
190	*(s++) = '\n';
191	} else if (g_strncasecmp(cs + 1, "br /", taglen) == 0) {
192	*(s++) = '\n';
193	}
194	in++;
195	} else {
196	in = cs;
197	(s++) = (in++);
198	}
199	} else if (*in == '&') {
200	cs = ++in;
201	while (in && g_ascii_isalpha(in)) {
202	in++;
203	}
204
205	if (*in == ';') {
206	in++;
207	}
208	matched = 0;
209
210	for (i = 0; *ent[i].code; i++) {
211	if (g_strncasecmp(ent[i].code, cs, strlen(ent[i].code)) == 0) {
212	int j;
213
214	for (j = 0; ent[i].is[j]; j++) {
215	*(s++) = ent[i].is[j];
216	}
217
218	matched = 1;
219	break;
220	}
221	}
222
223	/* None of the entities were matched, so return the string */
224	if (!matched) {
225	in = cs - 1;
226	(s++) = (in++);
227	}
228	} else {
229	(s++) = (in++);
230	}
231	}
232
233	strcpy(start, out);
234	}
235
236	char escape_html(const char html)
237	{
238	const char *c = html;
239	GString *ret;
240	char *str;
241
242	if (html == NULL) {
243	return(NULL);
244	}
245
246	ret = g_string_new("");
247
248	while (*c) {
249	switch (*c) {
250	case '&':
251	ret = g_string_append(ret, "&");
252	break;
253	case '<':
254	ret = g_string_append(ret, "<");
255	break;
256	case '>':
257	ret = g_string_append(ret, ">");
258	break;
259	case '"':
260	ret = g_string_append(ret, """);
261	break;
262	default:
263	ret = g_string_append_c(ret, *c);
264	}
265	c++;
266	}
267
268	str = ret->str;
269	g_string_free(ret, FALSE);
270	return(str);
271	}
272
273	/* Decode%20a%20file%20name */
274	void http_decode(char *s)
275	{
276	char *t;
277	int i, j, k;
278
279	t = g_new(char, strlen(s) + 1);
280
281	for (i = j = 0; s[i]; i++, j++) {
282	if (s[i] == '%') {
283	if (sscanf(s + i + 1, "%2x", &k)) {
284	t[j] = k;
285	i += 2;
286	} else {
287	*t = 0;
288	break;
289	}
290	} else {
291	t[j] = s[i];
292	}
293	}
294	t[j] = 0;
295
296	strcpy(s, t);
297	g_free(t);
298	}
299
300	/* Warning: This one explodes the string. Worst-cases can make the string 3x its original size! */
301	/* This function is safe, but make sure you call it safely as well! */
302	void http_encode(char *s)
303	{
304	char t[strlen(s) + 1];
305	int i, j;
306
307	strcpy(t, s);
308	for (i = j = 0; t[i]; i++, j++) {
309	/* Warning: g_ascii_isalnum() is locale-aware, so don't use it here! */
310	if ((t[i] >= 'A' && t[i] <= 'Z') \|\|
311	(t[i] >= 'a' && t[i] <= 'z') \|\|
312	(t[i] >= '0' && t[i] <= '9') \|\|
313	strchr("._-~", t[i])) {
314	s[j] = t[i];
315	} else {
316	sprintf(s + j, "%%%02X", ((unsigned char *) t)[i]);
317	j += 2;
318	}
319	}
320	s[j] = 0;
321	}
322
323	/* Strip newlines from a string. Modifies the string passed to it. */
324	char strip_newlines(char source)
325	{
326	int i;
327
328	for (i = 0; source[i] != '\0'; i++) {
329	if (source[i] == '\n' \|\| source[i] == '\r') {
330	source[i] = ' ';
331	}
332	}
333
334	return source;
335	}
336
337	/* Wrap an IPv4 address into IPv6 space. Not thread-safe... */
338	char ipv6_wrap(char src)
339	{
340	static char dst[64];
341	int i;
342
343	for (i = 0; src[i]; i++) {
344	if ((src[i] < '0' \|\| src[i] > '9') && src[i] != '.') {
345	break;
346	}
347	}
348
349	/* Hmm, it's not even an IP... */
350	if (src[i]) {
351	return src;
352	}
353
354	g_snprintf(dst, sizeof(dst), "::ffff:%s", src);
355
356	return dst;
357	}
358
359	/* Unwrap an IPv4 address into IPv6 space. Thread-safe, because it's very simple. :-) */
360	char ipv6_unwrap(char src)
361	{
362	int i;
363
364	if (g_strncasecmp(src, "::ffff:", 7) != 0) {
365	return src;
366	}
367
368	for (i = 7; src[i]; i++) {
369	if ((src[i] < '0' \|\| src[i] > '9') && src[i] != '.') {
370	break;
371	}
372	}
373
374	/* Hmm, it's not even an IP... */
375	if (src[i]) {
376	return src;
377	}
378
379	return (src + 7);
380	}
381
382	/* Convert from one charset to another.
383
384	from_cs, to_cs: Source and destination charsets
385	src, dst: Source and destination strings
386	size: Size if src. 0 == use strlen(). strlen() is not reliable for UNICODE/UTF16 strings though.
387	maxbuf: Maximum number of bytes to write to dst
388
389	Returns the number of bytes written to maxbuf or -1 on an error.
390	*/
391	signed int do_iconv(char from_cs, char to_cs, char src, char dst, size_t size, size_t maxbuf)
392	{
393	GIConv cd;
394	size_t res;
395	size_t inbytesleft, outbytesleft;
396	char *inbuf = src;
397	char *outbuf = dst;
398
399	cd = g_iconv_open(to_cs, from_cs);
400	if (cd == (GIConv) - 1) {
401	return -1;
402	}
403
404	inbytesleft = size ? size : strlen(src);
405	outbytesleft = maxbuf - 1;
406	res = g_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
407	*outbuf = '\0';
408	g_iconv_close(cd);
409
410	if (res != 0) {
411	return -1;
412	} else {
413	return outbuf - dst;
414	}
415	}
416
417	/* A wrapper for /dev/urandom.
418	* If /dev/urandom is not present or not usable, it calls abort()
419	* to prevent bitlbee from working without a decent entropy source */
420	void random_bytes(unsigned char *buf, int count)
421	{
422	int fd;
423
424	if (((fd = open("/dev/urandom", O_RDONLY)) == -1) \|\|
425	(read(fd, buf, count) == -1)) {
426	log_message(LOGLVL_ERROR, "/dev/urandom not present - aborting");
427	abort();
428	}
429
430	close(fd);
431	}
432
433	int is_bool(char *value)
434	{
435	if (*value == 0) {
436	return 0;
437	}
438
439	if ((g_strcasecmp(value,
440	"true") == 0) \|\| (g_strcasecmp(value, "yes") == 0) \|\| (g_strcasecmp(value, "on") == 0)) {
441	return 1;
442	}
443	if ((g_strcasecmp(value,
444	"false") == 0) \|\| (g_strcasecmp(value, "no") == 0) \|\| (g_strcasecmp(value, "off") == 0)) {
445	return 1;
446	}
447
448	while (*value) {
449	if (!g_ascii_isdigit(*value)) {
450	return 0;
451	} else {
452	value++;
453	}
454	}
455
456	return 1;
457	}
458
459	int bool2int(char *value)
460	{
461	int i;
462
463	if ((g_strcasecmp(value,
464	"true") == 0) \|\| (g_strcasecmp(value, "yes") == 0) \|\| (g_strcasecmp(value, "on") == 0)) {
465	return 1;
466	}
467	if ((g_strcasecmp(value,
468	"false") == 0) \|\| (g_strcasecmp(value, "no") == 0) \|\| (g_strcasecmp(value, "off") == 0)) {
469	return 0;
470	}
471
472	if (sscanf(value, "%d", &i) == 1) {
473	return i;
474	}
475
476	return 0;
477	}
478
479	struct ns_srv_reply *srv_lookup(char service, char protocol, char domain)
480	{
481	struct ns_srv_reply **replies = NULL;
482
483	#ifdef HAVE_RESOLV_A
484	struct ns_srv_reply *reply = NULL;
485	char name[1024];
486	unsigned char querybuf[1024];
487	const unsigned char *buf;
488	ns_msg nsh;
489	ns_rr rr;
490	int n, len, size;
491
492	g_snprintf(name, sizeof(name), "_%s._%s.%s", service, protocol, domain);
493
494	if ((size = res_query(name, ns_c_in, ns_t_srv, querybuf, sizeof(querybuf))) <= 0) {
495	return NULL;
496	}
497
498	if (ns_initparse(querybuf, size, &nsh) != 0) {
499	return NULL;
500	}
501
502	n = 0;
503	while (ns_parserr(&nsh, ns_s_an, n, &rr) == 0) {
504	char name[NS_MAXDNAME];
505
506	if (ns_rr_rdlen(rr) < 7) {
507	break;
508	}
509
510	buf = ns_rr_rdata(rr);
511
512	if (dn_expand(querybuf, querybuf + size, &buf[6], name, NS_MAXDNAME) == -1) {
513	break;
514	}
515
516	len = strlen(name) + 1;
517
518	reply = g_malloc(sizeof(struct ns_srv_reply) + len);
519	memcpy(reply->name, name, len);
520
521	reply->prio = (buf[0] << 8) \| buf[1];
522	reply->weight = (buf[2] << 8) \| buf[3];
523	reply->port = (buf[4] << 8) \| buf[5];
524
525	n++;
526	replies = g_renew(struct ns_srv_reply *, replies, n + 1);
527	replies[n - 1] = reply;
528	}
529	if (replies) {
530	replies[n] = NULL;
531	}
532	#endif
533
534	return replies;
535	}
536
537	void srv_free(struct ns_srv_reply **srv)
538	{
539	int i;
540
541	if (srv == NULL) {
542	return;
543	}
544
545	for (i = 0; srv[i]; i++) {
546	g_free(srv[i]);
547	}
548	g_free(srv);
549	}
550
551	/* Word wrapping. Yes, I know this isn't UTF-8 clean. I'm willing to take the risk. */
552	char word_wrap(const char msg, int line_len)
553	{
554	GString *ret = g_string_sized_new(strlen(msg) + 16);
555
556	while (strlen(msg) > line_len) {
557	int i;
558
559	/* First try to find out if there's a newline already. Don't
560	want to add more splits than necessary. */
561	for (i = line_len; i > 0 && msg[i] != '\n'; i--) {
562	;
563	}
564	if (msg[i] == '\n') {
565	g_string_append_len(ret, msg, i + 1);
566	msg += i + 1;
567	continue;
568	}
569
570	for (i = line_len; i > 0; i--) {
571	if (msg[i] == '-') {
572	g_string_append_len(ret, msg, i + 1);
573	g_string_append_c(ret, '\n');
574	msg += i + 1;
575	break;
576	} else if (msg[i] == ' ') {
577	g_string_append_len(ret, msg, i);
578	g_string_append_c(ret, '\n');
579	msg += i + 1;
580	break;
581	}
582	}
583	if (i == 0) {
584	g_string_append_len(ret, msg, line_len);
585	g_string_append_c(ret, '\n');
586	msg += line_len;
587	}
588	}
589	g_string_append(ret, msg);
590
591	return g_string_free(ret, FALSE);
592	}
593
594	gboolean ssl_sockerr_again(void *ssl)
595	{
596	if (ssl) {
597	return ssl_errno == SSL_AGAIN;
598	} else {
599	return sockerr_again();
600	}
601	}
602
603	/* Returns values: -1 == Failure (base64-decoded to something unexpected)
604	0 == Okay
605	1 == Password doesn't match the hash. */
606	int md5_verify_password(char password, char hash)
607	{
608	md5_byte_t *pass_dec = NULL;
609	md5_byte_t pass_md5[16];
610	md5_state_t md5_state;
611	int ret = -1, i;
612
613	if (base64_decode(hash, &pass_dec) == 21) {
614	md5_init(&md5_state);
615	md5_append(&md5_state, (md5_byte_t *) password, strlen(password));
616	md5_append(&md5_state, (md5_byte_t ) pass_dec + 16, 5); / Hmmm, salt! */
617	md5_finish(&md5_state, pass_md5);
618
619	for (i = 0; i < 16; i++) {
620	if (pass_dec[i] != pass_md5[i]) {
621	ret = 1;
622	break;
623	}
624	}
625
626	/* If we reached the end of the loop, it was a match! */
627	if (i == 16) {
628	ret = 0;
629	}
630	}
631
632	g_free(pass_dec);
633
634	return ret;
635	}
636
637	/* Split commands (root-style, not IRC-style). Handles "quoting of"
638	white\ space in 'various ways'. Returns a NULL-terminated static
639	char** so watch out with nested use! Definitely not thread-safe. */
640	char *split_command_parts(char command, int limit)
641	{
642	static char *cmd[IRC_MAX_ARGS + 1];
643	char *s, q = 0;
644	int k;
645
646	memset(cmd, 0, sizeof(cmd));
647	cmd[0] = command;
648	k = 1;
649	for (s = command; *s && k < IRC_MAX_ARGS; s++) {
650	if (*s == ' ' && !q) {
651	*s = 0;
652	while (*++s == ' ') {
653	;
654	}
655	if (k != limit && (s == '"' \|\| s == '\'')) {
656	q = *s;
657	s++;
658	}
659	if (*s) {
660	cmd[k++] = s;
661	if (limit && k > limit) {
662	break;
663	}
664	s--;
665	} else {
666	break;
667	}
668	} else if (*s == '\\' && ((!q && s[1]) \|\| (q && q == s[1]))) {
669	char *cpy;
670
671	for (cpy = s; *cpy; cpy++) {
672	cpy[0] = cpy[1];
673	}
674	} else if (*s == q) {
675	q = *s = 0;
676	}
677	}
678
679	/* Full zero-padding for easier argc checking. */
680	while (k <= IRC_MAX_ARGS) {
681	cmd[k++] = NULL;
682	}
683
684	return cmd;
685	}
686
687	char get_rfc822_header(const char text, const char *header, int len)
688	{
689	int hlen = strlen(header), i;
690	const char *ret;
691
692	if (text == NULL) {
693	return NULL;
694	}
695
696	if (len == 0) {
697	len = strlen(text);
698	}
699
700	i = 0;
701	while ((i + hlen) < len) {
702	/* Maybe this is a bit over-commented, but I just hate this part... */
703	if (g_strncasecmp(text + i, header, hlen) == 0) {
704	/* Skip to the (probable) end of the header */
705	i += hlen;
706
707	/* Find the first non-[: \t] character */
708	while (i < len && (text[i] == ':' \|\| text[i] == ' ' \|\| text[i] == '\t')) {
709	i++;
710	}
711
712	/* Make sure we're still inside the string */
713	if (i >= len) {
714	return(NULL);
715	}
716
717	/* Save the position */
718	ret = text + i;
719
720	/* Search for the end of this line */
721	while (i < len && text[i] != '\r' && text[i] != '\n') {
722	i++;
723	}
724
725	/* Copy the found data */
726	return(g_strndup(ret, text + i - ret));
727	}
728
729	/* This wasn't the header we were looking for, skip to the next line. */
730	while (i < len && (text[i] != '\r' && text[i] != '\n')) {
731	i++;
732	}
733	while (i < len && (text[i] == '\r' \|\| text[i] == '\n')) {
734	i++;
735	}
736
737	/* End of headers? */
738	if ((i >= 4 && strncmp(text + i - 4, "\r\n\r\n", 4) == 0) \|\|
739	(i >= 2 && (strncmp(text + i - 2, "\n\n", 2) == 0 \|\|
740	strncmp(text + i - 2, "\r\r", 2) == 0))) {
741	break;
742	}
743	}
744
745	return NULL;
746	}
747
748	/* Takes a string, truncates it where it's safe, returns the new length */
749	int truncate_utf8(char *string, int maxlen)
750	{
751	char *end;
752
753	g_utf8_validate((const gchar ) string, maxlen, (const gchar *) &end);
754	*end = '\0';
755	return end - string;
756	}
757
758	/* Parses a guint64 from string, returns TRUE on success */
759	gboolean parse_int64(char string, int base, guint64 number)
760	{
761	guint64 parsed;
762	char *endptr;
763
764	errno = 0;
765	parsed = g_ascii_strtoull(string, &endptr, base);
766	if (errno \|\| endptr == string \|\| *endptr != '\0') {
767	return FALSE;
768	}
769	*number = parsed;
770	return TRUE;
771	}
772

Note: See TracBrowser for help on using the repository browser.

Download in other formats: